mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-15 04:17:30 +01:00
Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix
This commit is contained in:
@@ -12,7 +12,7 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/mattn/go-sqlite3"
|
||||
"github.com/qustavo/sqlhooks/v2"
|
||||
@@ -51,7 +51,7 @@ func setupSqlite(db *sql.DB) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func Connect(driver string, db string) {
|
||||
func Connect(db string) {
|
||||
var err error
|
||||
var dbHandle *sqlx.DB
|
||||
|
||||
@@ -64,39 +64,31 @@ func Connect(driver string, db string) {
|
||||
ConnectionMaxIdleTime: repoConfig.ConnectionMaxIdleTime,
|
||||
}
|
||||
|
||||
switch driver {
|
||||
case "sqlite3":
|
||||
// TODO: Have separate DB handles for Writes and Reads
|
||||
// Optimize SQLite connection: https://kerkour.com/sqlite-for-servers
|
||||
connectionURLParams := make(url.Values)
|
||||
connectionURLParams.Add("_txlock", "immediate")
|
||||
connectionURLParams.Add("_journal_mode", "WAL")
|
||||
connectionURLParams.Add("_busy_timeout", "5000")
|
||||
connectionURLParams.Add("_synchronous", "NORMAL")
|
||||
connectionURLParams.Add("_cache_size", "1000000000")
|
||||
connectionURLParams.Add("_foreign_keys", "true")
|
||||
opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())
|
||||
// TODO: Have separate DB handles for Writes and Reads
|
||||
// Optimize SQLite connection: https://kerkour.com/sqlite-for-servers
|
||||
connectionURLParams := make(url.Values)
|
||||
connectionURLParams.Add("_txlock", "immediate")
|
||||
connectionURLParams.Add("_journal_mode", "WAL")
|
||||
connectionURLParams.Add("_busy_timeout", "5000")
|
||||
connectionURLParams.Add("_synchronous", "NORMAL")
|
||||
connectionURLParams.Add("_cache_size", "1000000000")
|
||||
connectionURLParams.Add("_foreign_keys", "true")
|
||||
opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())
|
||||
|
||||
if cclog.Loglevel() == "debug" {
|
||||
sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
|
||||
dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL)
|
||||
} else {
|
||||
dbHandle, err = sqlx.Open("sqlite3", opts.URL)
|
||||
}
|
||||
|
||||
err = setupSqlite(dbHandle.DB)
|
||||
if err != nil {
|
||||
cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
|
||||
}
|
||||
case "mysql":
|
||||
opts.URL += "?multiStatements=true"
|
||||
dbHandle, err = sqlx.Open("mysql", opts.URL)
|
||||
default:
|
||||
cclog.Abortf("DB Connection: Unsupported database driver '%s'.\n", driver)
|
||||
if cclog.Loglevel() == "debug" {
|
||||
sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
|
||||
dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL)
|
||||
} else {
|
||||
dbHandle, err = sqlx.Open("sqlite3", opts.URL)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cclog.Abortf("DB Connection: Could not connect to '%s' database with sqlx.Open().\nError: %s\n", driver, err.Error())
|
||||
cclog.Abortf("DB Connection: Could not connect to SQLite database with sqlx.Open().\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
err = setupSqlite(dbHandle.DB)
|
||||
if err != nil {
|
||||
cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
dbHandle.SetMaxOpenConns(opts.MaxOpenConnections)
|
||||
@@ -104,8 +96,8 @@ func Connect(driver string, db string) {
|
||||
dbHandle.SetConnMaxLifetime(opts.ConnectionMaxLifetime)
|
||||
dbHandle.SetConnMaxIdleTime(opts.ConnectionMaxIdleTime)
|
||||
|
||||
dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
|
||||
err = checkDBVersion(driver, dbHandle.DB)
|
||||
dbConnInstance = &DBConnection{DB: dbHandle}
|
||||
err = checkDBVersion(dbHandle.DB)
|
||||
if err != nil {
|
||||
cclog.Abortf("DB Connection: Failed DB version check.\nError: %s\n", err.Error())
|
||||
}
|
||||
@@ -119,3 +111,26 @@ func GetConnection() *DBConnection {
|
||||
|
||||
return dbConnInstance
|
||||
}
|
||||
|
||||
// ResetConnection closes the current database connection and resets the connection state.
|
||||
// This function is intended for testing purposes only to allow test isolation.
|
||||
func ResetConnection() error {
|
||||
if dbConnInstance != nil && dbConnInstance.DB != nil {
|
||||
if err := dbConnInstance.DB.Close(); err != nil {
|
||||
return fmt.Errorf("failed to close database connection: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
dbConnInstance = nil
|
||||
dbConnOnce = sync.Once{}
|
||||
jobRepoInstance = nil
|
||||
jobRepoOnce = sync.Once{}
|
||||
nodeRepoInstance = nil
|
||||
nodeRepoOnce = sync.Once{}
|
||||
userRepoInstance = nil
|
||||
userRepoOnce = sync.Once{}
|
||||
userCfgRepoInstance = nil
|
||||
userCfgRepoOnce = sync.Once{}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -2,13 +2,14 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
)
|
||||
|
||||
// Hooks satisfies the sqlhook.Hooks interface
|
||||
|
||||
274
internal/repository/hooks_test.go
Normal file
274
internal/repository/hooks_test.go
Normal file
@@ -0,0 +1,274 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type MockJobHook struct {
|
||||
startCalled bool
|
||||
stopCalled bool
|
||||
startJobs []*schema.Job
|
||||
stopJobs []*schema.Job
|
||||
}
|
||||
|
||||
func (m *MockJobHook) JobStartCallback(job *schema.Job) {
|
||||
m.startCalled = true
|
||||
m.startJobs = append(m.startJobs, job)
|
||||
}
|
||||
|
||||
func (m *MockJobHook) JobStopCallback(job *schema.Job) {
|
||||
m.stopCalled = true
|
||||
m.stopJobs = append(m.stopJobs, job)
|
||||
}
|
||||
|
||||
func TestRegisterJobHook(t *testing.T) {
|
||||
t.Run("register single hook", func(t *testing.T) {
|
||||
hooks = nil
|
||||
mock := &MockJobHook{}
|
||||
|
||||
RegisterJobHook(mock)
|
||||
|
||||
assert.NotNil(t, hooks)
|
||||
assert.Len(t, hooks, 1)
|
||||
assert.Equal(t, mock, hooks[0])
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
|
||||
t.Run("register multiple hooks", func(t *testing.T) {
|
||||
hooks = nil
|
||||
mock1 := &MockJobHook{}
|
||||
mock2 := &MockJobHook{}
|
||||
|
||||
RegisterJobHook(mock1)
|
||||
RegisterJobHook(mock2)
|
||||
|
||||
assert.Len(t, hooks, 2)
|
||||
assert.Equal(t, mock1, hooks[0])
|
||||
assert.Equal(t, mock2, hooks[1])
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
|
||||
t.Run("register nil hook does not add to hooks", func(t *testing.T) {
|
||||
hooks = nil
|
||||
RegisterJobHook(nil)
|
||||
|
||||
if hooks != nil {
|
||||
assert.Len(t, hooks, 0, "Nil hook should not be added")
|
||||
}
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallJobStartHooks(t *testing.T) {
|
||||
t.Run("call start hooks with single job", func(t *testing.T) {
|
||||
hooks = nil
|
||||
mock := &MockJobHook{}
|
||||
RegisterJobHook(mock)
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 123,
|
||||
User: "testuser",
|
||||
Cluster: "testcluster",
|
||||
}
|
||||
|
||||
CallJobStartHooks([]*schema.Job{job})
|
||||
|
||||
assert.True(t, mock.startCalled)
|
||||
assert.False(t, mock.stopCalled)
|
||||
assert.Len(t, mock.startJobs, 1)
|
||||
assert.Equal(t, int64(123), mock.startJobs[0].JobID)
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
|
||||
t.Run("call start hooks with multiple jobs", func(t *testing.T) {
|
||||
hooks = nil
|
||||
mock := &MockJobHook{}
|
||||
RegisterJobHook(mock)
|
||||
|
||||
jobs := []*schema.Job{
|
||||
{JobID: 1, User: "user1", Cluster: "cluster1"},
|
||||
{JobID: 2, User: "user2", Cluster: "cluster2"},
|
||||
{JobID: 3, User: "user3", Cluster: "cluster3"},
|
||||
}
|
||||
|
||||
CallJobStartHooks(jobs)
|
||||
|
||||
assert.True(t, mock.startCalled)
|
||||
assert.Len(t, mock.startJobs, 3)
|
||||
assert.Equal(t, int64(1), mock.startJobs[0].JobID)
|
||||
assert.Equal(t, int64(2), mock.startJobs[1].JobID)
|
||||
assert.Equal(t, int64(3), mock.startJobs[2].JobID)
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
|
||||
t.Run("call start hooks with multiple registered hooks", func(t *testing.T) {
|
||||
hooks = nil
|
||||
mock1 := &MockJobHook{}
|
||||
mock2 := &MockJobHook{}
|
||||
RegisterJobHook(mock1)
|
||||
RegisterJobHook(mock2)
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 456, User: "testuser", Cluster: "testcluster",
|
||||
}
|
||||
|
||||
CallJobStartHooks([]*schema.Job{job})
|
||||
|
||||
assert.True(t, mock1.startCalled)
|
||||
assert.True(t, mock2.startCalled)
|
||||
assert.Len(t, mock1.startJobs, 1)
|
||||
assert.Len(t, mock2.startJobs, 1)
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
|
||||
t.Run("call start hooks with nil hooks", func(t *testing.T) {
|
||||
hooks = nil
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 789, User: "testuser", Cluster: "testcluster",
|
||||
}
|
||||
|
||||
CallJobStartHooks([]*schema.Job{job})
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
|
||||
t.Run("call start hooks with empty job list", func(t *testing.T) {
|
||||
hooks = nil
|
||||
mock := &MockJobHook{}
|
||||
RegisterJobHook(mock)
|
||||
|
||||
CallJobStartHooks([]*schema.Job{})
|
||||
|
||||
assert.False(t, mock.startCalled)
|
||||
assert.Len(t, mock.startJobs, 0)
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
}
|
||||
|
||||
func TestCallJobStopHooks(t *testing.T) {
|
||||
t.Run("call stop hooks with single job", func(t *testing.T) {
|
||||
hooks = nil
|
||||
mock := &MockJobHook{}
|
||||
RegisterJobHook(mock)
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 123,
|
||||
User: "testuser",
|
||||
Cluster: "testcluster",
|
||||
}
|
||||
|
||||
CallJobStopHooks(job)
|
||||
|
||||
assert.True(t, mock.stopCalled)
|
||||
assert.False(t, mock.startCalled)
|
||||
assert.Len(t, mock.stopJobs, 1)
|
||||
assert.Equal(t, int64(123), mock.stopJobs[0].JobID)
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
|
||||
t.Run("call stop hooks with multiple registered hooks", func(t *testing.T) {
|
||||
hooks = nil
|
||||
mock1 := &MockJobHook{}
|
||||
mock2 := &MockJobHook{}
|
||||
RegisterJobHook(mock1)
|
||||
RegisterJobHook(mock2)
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 456, User: "testuser", Cluster: "testcluster",
|
||||
}
|
||||
|
||||
CallJobStopHooks(job)
|
||||
|
||||
assert.True(t, mock1.stopCalled)
|
||||
assert.True(t, mock2.stopCalled)
|
||||
assert.Len(t, mock1.stopJobs, 1)
|
||||
assert.Len(t, mock2.stopJobs, 1)
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
|
||||
t.Run("call stop hooks with nil hooks", func(t *testing.T) {
|
||||
hooks = nil
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 789, User: "testuser", Cluster: "testcluster",
|
||||
}
|
||||
|
||||
CallJobStopHooks(job)
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
}
|
||||
|
||||
func TestSQLHooks(t *testing.T) {
|
||||
_ = setup(t)
|
||||
|
||||
t.Run("hooks log queries in debug mode", func(t *testing.T) {
|
||||
h := &Hooks{}
|
||||
|
||||
ctx := context.Background()
|
||||
query := "SELECT * FROM job WHERE job_id = ?"
|
||||
args := []any{123}
|
||||
|
||||
ctxWithTime, err := h.Before(ctx, query, args...)
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, ctxWithTime)
|
||||
|
||||
beginTime := ctxWithTime.Value("begin")
|
||||
require.NotNil(t, beginTime)
|
||||
_, ok := beginTime.(time.Time)
|
||||
assert.True(t, ok, "Begin time should be time.Time")
|
||||
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
|
||||
ctxAfter, err := h.After(ctxWithTime, query, args...)
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, ctxAfter)
|
||||
})
|
||||
}
|
||||
|
||||
func TestHookIntegration(t *testing.T) {
|
||||
t.Run("hooks are called during job lifecycle", func(t *testing.T) {
|
||||
hooks = nil
|
||||
mock := &MockJobHook{}
|
||||
RegisterJobHook(mock)
|
||||
|
||||
job := &schema.Job{
|
||||
JobID: 999,
|
||||
User: "integrationuser",
|
||||
Cluster: "integrationcluster",
|
||||
}
|
||||
|
||||
CallJobStartHooks([]*schema.Job{job})
|
||||
assert.True(t, mock.startCalled)
|
||||
assert.Equal(t, 1, len(mock.startJobs))
|
||||
|
||||
CallJobStopHooks(job)
|
||||
assert.True(t, mock.stopCalled)
|
||||
assert.Equal(t, 1, len(mock.stopJobs))
|
||||
|
||||
assert.Equal(t, mock.startJobs[0].JobID, mock.stopJobs[0].JobID)
|
||||
|
||||
hooks = nil
|
||||
})
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,14 +2,15 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
@@ -29,6 +30,27 @@ const NamedJobInsert string = `INSERT INTO job (
|
||||
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
);`
|
||||
|
||||
// InsertJobDirect inserts a job directly into the job table (not job_cache).
|
||||
// Use this when the returned ID will be used for operations on the job table
|
||||
// (e.g., adding tags), or for imported jobs that are already completed.
|
||||
func (r *JobRepository) InsertJobDirect(job *schema.Job) (int64, error) {
|
||||
r.Mutex.Lock()
|
||||
defer r.Mutex.Unlock()
|
||||
|
||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while NamedJobInsert (direct)")
|
||||
return 0, err
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
cclog.Warn("Error while getting last insert ID (direct)")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||
r.Mutex.Lock()
|
||||
defer r.Mutex.Unlock()
|
||||
@@ -70,8 +92,9 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
// Use INSERT OR IGNORE to skip jobs already transferred by the stop path
|
||||
_, err = r.DB.Exec(
|
||||
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
|
||||
"INSERT OR IGNORE INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while Job sync: %v", err)
|
||||
return nil, err
|
||||
@@ -83,9 +106,48 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Resolve correct job.id from the job table. The IDs read from job_cache
|
||||
// are from a different auto-increment sequence and must not be used to
|
||||
// query the job table.
|
||||
for _, job := range jobs {
|
||||
var newID int64
|
||||
if err := sq.Select("job.id").From("job").
|
||||
Where("job.job_id = ? AND job.cluster = ? AND job.start_time = ?",
|
||||
job.JobID, job.Cluster, job.StartTime).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&newID); err != nil {
|
||||
cclog.Warnf("SyncJobs: could not resolve job table id for job %d on %s: %v",
|
||||
job.JobID, job.Cluster, err)
|
||||
continue
|
||||
}
|
||||
job.ID = &newID
|
||||
}
|
||||
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// TransferCachedJobToMain moves a job from job_cache to the job table.
|
||||
// Caller must hold r.Mutex. Returns the new job table ID.
|
||||
func (r *JobRepository) TransferCachedJobToMain(cacheID int64) (int64, error) {
|
||||
res, err := r.DB.Exec(
|
||||
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache WHERE id = ?",
|
||||
cacheID)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("transferring cached job %d to main table failed: %w", cacheID, err)
|
||||
}
|
||||
|
||||
newID, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("getting new job ID after transfer failed: %w", err)
|
||||
}
|
||||
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", cacheID)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("deleting cached job %d after transfer failed: %w", cacheID, err)
|
||||
}
|
||||
|
||||
return newID, nil
|
||||
}
|
||||
|
||||
// Start inserts a new job in the table, returning the unique job ID.
|
||||
// Statistics are not transfered!
|
||||
func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
|
||||
@@ -107,41 +169,46 @@ func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
|
||||
return r.InsertJob(job)
|
||||
}
|
||||
|
||||
// StartDirect inserts a new job directly into the job table (not job_cache).
|
||||
// Use this when the returned ID will immediately be used for job table
|
||||
// operations such as adding tags.
|
||||
func (r *JobRepository) StartDirect(job *schema.Job) (id int64, err error) {
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
||||
}
|
||||
|
||||
return r.InsertJobDirect(job)
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) Stop(
|
||||
jobId int64,
|
||||
jobID int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
// Invalidate cache entries as job state is changing
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", jobId))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", jobId))
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", jobID))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", jobID))
|
||||
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
Where("job.id = ?", jobID)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *JobRepository) StopCached(
|
||||
jobId int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
// Note: StopCached updates job_cache table, not the main job table
|
||||
// Cache invalidation happens when job is synced to main table
|
||||
stmt := sq.Update("job_cache").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job_cache.id = ?", jobId)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return err
|
||||
}
|
||||
|
||||
607
internal/repository/jobCreate_test.go
Normal file
607
internal/repository/jobCreate_test.go
Normal file
@@ -0,0 +1,607 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// createTestJob creates a minimal valid job for testing
|
||||
func createTestJob(jobID int64, cluster string) *schema.Job {
|
||||
return &schema.Job{
|
||||
JobID: jobID,
|
||||
User: "testuser",
|
||||
Project: "testproject",
|
||||
Cluster: cluster,
|
||||
SubCluster: "main",
|
||||
Partition: "batch",
|
||||
NumNodes: 1,
|
||||
NumHWThreads: 4,
|
||||
NumAcc: 0,
|
||||
Shared: "none",
|
||||
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||
SMT: 1,
|
||||
State: schema.JobStateRunning,
|
||||
StartTime: 1234567890,
|
||||
Duration: 0,
|
||||
Walltime: 3600,
|
||||
Resources: []*schema.Resource{
|
||||
{
|
||||
Hostname: "node01",
|
||||
HWThreads: []int{0, 1, 2, 3},
|
||||
},
|
||||
},
|
||||
Footprint: map[string]float64{
|
||||
"cpu_load": 50.0,
|
||||
"mem_used": 8000.0,
|
||||
"flops_any": 0.5,
|
||||
"mem_bw": 10.0,
|
||||
"net_bw": 2.0,
|
||||
"file_bw": 1.0,
|
||||
"cpu_used": 2.0,
|
||||
"cpu_load_core": 12.5,
|
||||
},
|
||||
MetaData: map[string]string{
|
||||
"jobName": "test_job",
|
||||
"queue": "normal",
|
||||
"qosName": "default",
|
||||
"accountName": "testaccount",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestInsertJob(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("successful insertion", func(t *testing.T) {
|
||||
job := createTestJob(999001, "testcluster")
|
||||
job.RawResources, _ = json.Marshal(job.Resources)
|
||||
job.RawFootprint, _ = json.Marshal(job.Footprint)
|
||||
job.RawMetaData, _ = json.Marshal(job.MetaData)
|
||||
|
||||
id, err := r.InsertJob(job)
|
||||
require.NoError(t, err, "InsertJob should succeed")
|
||||
assert.Greater(t, id, int64(0), "Should return valid insert ID")
|
||||
|
||||
// Verify job was inserted into job_cache
|
||||
var count int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id = ? AND cluster = ?",
|
||||
job.JobID, job.Cluster).Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 1, count, "Job should be in job_cache table")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("insertion with all fields", func(t *testing.T) {
|
||||
job := createTestJob(999002, "testcluster")
|
||||
job.ArrayJobID = 5000
|
||||
job.Energy = 1500.5
|
||||
job.RawResources, _ = json.Marshal(job.Resources)
|
||||
job.RawFootprint, _ = json.Marshal(job.Footprint)
|
||||
job.RawMetaData, _ = json.Marshal(job.MetaData)
|
||||
|
||||
id, err := r.InsertJob(job)
|
||||
require.NoError(t, err)
|
||||
assert.Greater(t, id, int64(0))
|
||||
|
||||
// Verify all fields were stored correctly
|
||||
var retrievedJob schema.Job
|
||||
err = r.DB.QueryRow(`SELECT job_id, hpc_user, project, cluster, array_job_id, energy
|
||||
FROM job_cache WHERE id = ?`, id).Scan(
|
||||
&retrievedJob.JobID, &retrievedJob.User, &retrievedJob.Project,
|
||||
&retrievedJob.Cluster, &retrievedJob.ArrayJobID, &retrievedJob.Energy)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, job.JobID, retrievedJob.JobID)
|
||||
assert.Equal(t, job.User, retrievedJob.User)
|
||||
assert.Equal(t, job.Project, retrievedJob.Project)
|
||||
assert.Equal(t, job.Cluster, retrievedJob.Cluster)
|
||||
assert.Equal(t, job.ArrayJobID, retrievedJob.ArrayJobID)
|
||||
assert.Equal(t, job.Energy, retrievedJob.Energy)
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestStart(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("successful job start with JSON encoding", func(t *testing.T) {
|
||||
job := createTestJob(999003, "testcluster")
|
||||
|
||||
id, err := r.Start(job)
|
||||
require.NoError(t, err, "Start should succeed")
|
||||
assert.Greater(t, id, int64(0), "Should return valid insert ID")
|
||||
|
||||
// Verify job was inserted and JSON fields were encoded
|
||||
var rawResources, rawFootprint, rawMetaData []byte
|
||||
err = r.DB.QueryRow(`SELECT resources, footprint, meta_data FROM job_cache WHERE id = ?`, id).Scan(
|
||||
&rawResources, &rawFootprint, &rawMetaData)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify resources JSON
|
||||
var resources []*schema.Resource
|
||||
err = json.Unmarshal(rawResources, &resources)
|
||||
require.NoError(t, err, "Resources should be valid JSON")
|
||||
assert.Equal(t, 1, len(resources))
|
||||
assert.Equal(t, "node01", resources[0].Hostname)
|
||||
|
||||
// Verify footprint JSON
|
||||
var footprint map[string]float64
|
||||
err = json.Unmarshal(rawFootprint, &footprint)
|
||||
require.NoError(t, err, "Footprint should be valid JSON")
|
||||
assert.Equal(t, 50.0, footprint["cpu_load"])
|
||||
assert.Equal(t, 8000.0, footprint["mem_used"])
|
||||
|
||||
// Verify metadata JSON
|
||||
var metaData map[string]string
|
||||
err = json.Unmarshal(rawMetaData, &metaData)
|
||||
require.NoError(t, err, "MetaData should be valid JSON")
|
||||
assert.Equal(t, "test_job", metaData["jobName"])
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("job start with empty footprint", func(t *testing.T) {
|
||||
job := createTestJob(999004, "testcluster")
|
||||
job.Footprint = map[string]float64{}
|
||||
|
||||
id, err := r.Start(job)
|
||||
require.NoError(t, err)
|
||||
assert.Greater(t, id, int64(0))
|
||||
|
||||
// Verify empty footprint was encoded as empty JSON object
|
||||
var rawFootprint []byte
|
||||
err = r.DB.QueryRow(`SELECT footprint FROM job_cache WHERE id = ?`, id).Scan(&rawFootprint)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, []byte("{}"), rawFootprint)
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("job start with nil metadata", func(t *testing.T) {
|
||||
job := createTestJob(999005, "testcluster")
|
||||
job.MetaData = nil
|
||||
|
||||
id, err := r.Start(job)
|
||||
require.NoError(t, err)
|
||||
assert.Greater(t, id, int64(0))
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestStop(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("successful job stop", func(t *testing.T) {
|
||||
// First insert a job using Start
|
||||
job := createTestJob(999106, "testcluster")
|
||||
id, err := r.Start(job)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Move from job_cache to job table (simulate SyncJobs) - exclude id to let it auto-increment
|
||||
_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
|
||||
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
|
||||
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint)
|
||||
SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
|
||||
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
|
||||
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint
|
||||
FROM job_cache WHERE id = ?`, id)
|
||||
require.NoError(t, err)
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Get the new job id in the job table
|
||||
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
|
||||
job.JobID, job.Cluster, job.StartTime).Scan(&id)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Stop the job
|
||||
duration := int32(3600)
|
||||
state := schema.JobStateCompleted
|
||||
monitoringStatus := int32(schema.MonitoringStatusArchivingSuccessful)
|
||||
|
||||
err = r.Stop(id, duration, state, monitoringStatus)
|
||||
require.NoError(t, err, "Stop should succeed")
|
||||
|
||||
// Verify job was updated
|
||||
var retrievedDuration int32
|
||||
var retrievedState string
|
||||
var retrievedMonStatus int32
|
||||
err = r.DB.QueryRow(`SELECT duration, job_state, monitoring_status FROM job WHERE id = ?`, id).Scan(
|
||||
&retrievedDuration, &retrievedState, &retrievedMonStatus)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, duration, retrievedDuration)
|
||||
assert.Equal(t, string(state), retrievedState)
|
||||
assert.Equal(t, monitoringStatus, retrievedMonStatus)
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("stop updates job state transitions", func(t *testing.T) {
|
||||
// Insert a job
|
||||
job := createTestJob(999107, "testcluster")
|
||||
id, err := r.Start(job)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Move to job table
|
||||
_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
|
||||
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
|
||||
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint)
|
||||
SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
|
||||
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
|
||||
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint
|
||||
FROM job_cache WHERE id = ?`, id)
|
||||
require.NoError(t, err)
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Get the new job id in the job table
|
||||
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
|
||||
job.JobID, job.Cluster, job.StartTime).Scan(&id)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Stop the job with different duration
|
||||
err = r.Stop(id, 7200, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful))
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the duration was updated correctly
|
||||
var duration int32
|
||||
err = r.DB.QueryRow(`SELECT duration FROM job WHERE id = ?`, id).Scan(&duration)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, int32(7200), duration, "Duration should be updated to 7200")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("stop with different states", func(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
jobID int64
|
||||
state schema.JobState
|
||||
monitoringStatus int32
|
||||
}{
|
||||
{"completed", 999108, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful)},
|
||||
{"failed", 999118, schema.JobStateFailed, int32(schema.MonitoringStatusArchivingSuccessful)},
|
||||
{"cancelled", 999119, schema.JobStateCancelled, int32(schema.MonitoringStatusArchivingSuccessful)},
|
||||
{"timeout", 999120, schema.JobStateTimeout, int32(schema.MonitoringStatusArchivingSuccessful)},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
job := createTestJob(tc.jobID, "testcluster")
|
||||
id, err := r.Start(job)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Move to job table
|
||||
_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
|
||||
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
|
||||
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint)
|
||||
SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
|
||||
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
|
||||
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint
|
||||
FROM job_cache WHERE id = ?`, id)
|
||||
require.NoError(t, err)
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Get the new job id in the job table
|
||||
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
|
||||
job.JobID, job.Cluster, job.StartTime).Scan(&id)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Stop with specific state
|
||||
err = r.Stop(id, 1800, tc.state, tc.monitoringStatus)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify state was set correctly
|
||||
var retrievedState string
|
||||
err = r.DB.QueryRow(`SELECT job_state FROM job WHERE id = ?`, id).Scan(&retrievedState)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, string(tc.state), retrievedState)
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestTransferCachedJobToMain(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("successful transfer from cache to main", func(t *testing.T) {
|
||||
// Insert a job in job_cache
|
||||
job := createTestJob(999009, "testcluster")
|
||||
cacheID, err := r.Start(job)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Transfer the cached job to the main table
|
||||
r.Mutex.Lock()
|
||||
newID, err := r.TransferCachedJobToMain(cacheID)
|
||||
r.Mutex.Unlock()
|
||||
require.NoError(t, err, "TransferCachedJobToMain should succeed")
|
||||
assert.NotEqual(t, cacheID, newID, "New ID should differ from cache ID")
|
||||
|
||||
// Verify job exists in job table
|
||||
var count int
|
||||
err = r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE id = ?`, newID).Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 1, count, "Job should exist in main table")
|
||||
|
||||
// Verify job was removed from job_cache
|
||||
err = r.DB.QueryRow(`SELECT COUNT(*) FROM job_cache WHERE id = ?`, cacheID).Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, count, "Job should be removed from cache")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("transfer preserves job data", func(t *testing.T) {
|
||||
// Insert a job in job_cache
|
||||
job := createTestJob(999010, "testcluster")
|
||||
cacheID, err := r.Start(job)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Transfer the cached job
|
||||
r.Mutex.Lock()
|
||||
newID, err := r.TransferCachedJobToMain(cacheID)
|
||||
r.Mutex.Unlock()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the transferred job has the correct data
|
||||
var jobID int64
|
||||
var cluster string
|
||||
err = r.DB.QueryRow(`SELECT job_id, cluster FROM job WHERE id = ?`, newID).Scan(&jobID, &cluster)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, job.JobID, jobID)
|
||||
assert.Equal(t, job.Cluster, cluster)
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", newID)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestSyncJobs(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("sync jobs from cache to main table", func(t *testing.T) {
|
||||
// Ensure cache is empty first
|
||||
_, err := r.DB.Exec("DELETE FROM job_cache")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Insert multiple jobs in job_cache
|
||||
job1 := createTestJob(999011, "testcluster")
|
||||
job2 := createTestJob(999012, "testcluster")
|
||||
job3 := createTestJob(999013, "testcluster")
|
||||
|
||||
_, err = r.Start(job1)
|
||||
require.NoError(t, err)
|
||||
_, err = r.Start(job2)
|
||||
require.NoError(t, err)
|
||||
_, err = r.Start(job3)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify jobs are in job_cache
|
||||
var cacheCount int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id IN (?, ?, ?)",
|
||||
job1.JobID, job2.JobID, job3.JobID).Scan(&cacheCount)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 3, cacheCount, "All jobs should be in job_cache")
|
||||
|
||||
// Sync jobs
|
||||
jobs, err := r.SyncJobs()
|
||||
require.NoError(t, err, "SyncJobs should succeed")
|
||||
assert.Equal(t, 3, len(jobs), "Should return 3 synced jobs")
|
||||
|
||||
// Verify jobs were moved to job table
|
||||
var jobCount int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM job WHERE job_id IN (?, ?, ?)",
|
||||
job1.JobID, job2.JobID, job3.JobID).Scan(&jobCount)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 3, jobCount, "All jobs should be in job table")
|
||||
|
||||
// Verify job_cache was cleared
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id IN (?, ?, ?)",
|
||||
job1.JobID, job2.JobID, job3.JobID).Scan(&cacheCount)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, cacheCount, "job_cache should be empty after sync")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE job_id IN (?, ?, ?)", job1.JobID, job2.JobID, job3.JobID)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("sync preserves job data", func(t *testing.T) {
|
||||
// Ensure cache is empty first
|
||||
_, err := r.DB.Exec("DELETE FROM job_cache")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Insert a job with specific data
|
||||
job := createTestJob(999014, "testcluster")
|
||||
job.ArrayJobID = 7777
|
||||
job.Energy = 2500.75
|
||||
job.Duration = 1800
|
||||
|
||||
id, err := r.Start(job)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Update some fields to simulate job progress
|
||||
result, err := r.DB.Exec(`UPDATE job_cache SET duration = ?, energy = ? WHERE id = ?`,
|
||||
3600, 3000.5, id)
|
||||
require.NoError(t, err)
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
require.Equal(t, int64(1), rowsAffected, "UPDATE should affect exactly 1 row")
|
||||
|
||||
// Verify the update worked
|
||||
var checkDuration int32
|
||||
var checkEnergy float64
|
||||
err = r.DB.QueryRow(`SELECT duration, energy FROM job_cache WHERE id = ?`, id).Scan(&checkDuration, &checkEnergy)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, int32(3600), checkDuration, "Duration should be updated to 3600 before sync")
|
||||
require.Equal(t, 3000.5, checkEnergy, "Energy should be updated to 3000.5 before sync")
|
||||
|
||||
// Sync jobs
|
||||
jobs, err := r.SyncJobs()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, len(jobs), "Should return exactly 1 synced job")
|
||||
|
||||
// Verify in database
|
||||
var dbJob schema.Job
|
||||
err = r.DB.QueryRow(`SELECT job_id, hpc_user, project, cluster, array_job_id, duration, energy
|
||||
FROM job WHERE job_id = ? AND cluster = ?`, job.JobID, job.Cluster).Scan(
|
||||
&dbJob.JobID, &dbJob.User, &dbJob.Project, &dbJob.Cluster,
|
||||
&dbJob.ArrayJobID, &dbJob.Duration, &dbJob.Energy)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, job.JobID, dbJob.JobID)
|
||||
assert.Equal(t, int32(3600), dbJob.Duration)
|
||||
assert.Equal(t, 3000.5, dbJob.Energy)
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("sync returns job table IDs not cache IDs", func(t *testing.T) {
|
||||
// Ensure cache is empty first
|
||||
_, err := r.DB.Exec("DELETE FROM job_cache")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Insert a job into job_cache
|
||||
job := createTestJob(999015, "testcluster")
|
||||
cacheID, err := r.Start(job)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Sync jobs
|
||||
jobs, err := r.SyncJobs()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 1, len(jobs))
|
||||
|
||||
// The returned ID must refer to the job table, not job_cache
|
||||
var jobTableID int64
|
||||
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
|
||||
jobs[0].JobID, jobs[0].Cluster, jobs[0].StartTime).Scan(&jobTableID)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, jobTableID, *jobs[0].ID,
|
||||
"returned ID should match the job table row, not the cache ID (%d)", cacheID)
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("sync with empty cache returns empty list", func(t *testing.T) {
|
||||
// Ensure cache is empty
|
||||
_, err := r.DB.Exec("DELETE FROM job_cache")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Sync should return empty list
|
||||
jobs, err := r.SyncJobs()
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, len(jobs), "Should return empty list when cache is empty")
|
||||
})
|
||||
}
|
||||
|
||||
func TestInsertJobDirect(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("inserts into job table not cache", func(t *testing.T) {
|
||||
job := createTestJob(999020, "testcluster")
|
||||
job.RawResources, _ = json.Marshal(job.Resources)
|
||||
job.RawFootprint, _ = json.Marshal(job.Footprint)
|
||||
job.RawMetaData, _ = json.Marshal(job.MetaData)
|
||||
|
||||
id, err := r.InsertJobDirect(job)
|
||||
require.NoError(t, err, "InsertJobDirect should succeed")
|
||||
assert.Greater(t, id, int64(0), "Should return valid insert ID")
|
||||
|
||||
// Verify job is in job table
|
||||
var count int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM job WHERE id = ?", id).Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 1, count, "Job should be in job table")
|
||||
|
||||
// Verify job is NOT in job_cache
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id = ? AND cluster = ?",
|
||||
job.JobID, job.Cluster).Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, count, "Job should NOT be in job_cache")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("returned ID works for tag operations", func(t *testing.T) {
|
||||
job := createTestJob(999021, "testcluster")
|
||||
job.RawResources, _ = json.Marshal(job.Resources)
|
||||
job.RawFootprint, _ = json.Marshal(job.Footprint)
|
||||
job.RawMetaData, _ = json.Marshal(job.MetaData)
|
||||
|
||||
id, err := r.InsertJobDirect(job)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Adding a tag using the returned ID should succeed (FK constraint on jobtag)
|
||||
err = r.ImportTag(id, "test_type", "test_name", "global")
|
||||
require.NoError(t, err, "ImportTag should succeed with direct insert ID")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM jobtag WHERE job_id = ?", id)
|
||||
require.NoError(t, err)
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestStartDirect(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("inserts into job table with JSON encoding", func(t *testing.T) {
|
||||
job := createTestJob(999022, "testcluster")
|
||||
|
||||
id, err := r.StartDirect(job)
|
||||
require.NoError(t, err, "StartDirect should succeed")
|
||||
assert.Greater(t, id, int64(0))
|
||||
|
||||
// Verify job is in job table with encoded JSON
|
||||
var rawResources []byte
|
||||
err = r.DB.QueryRow("SELECT resources FROM job WHERE id = ?", id).Scan(&rawResources)
|
||||
require.NoError(t, err)
|
||||
|
||||
var resources []*schema.Resource
|
||||
err = json.Unmarshal(rawResources, &resources)
|
||||
require.NoError(t, err, "Resources should be valid JSON")
|
||||
assert.Equal(t, "node01", resources[0].Hostname)
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
@@ -11,8 +12,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
@@ -22,13 +23,17 @@ import (
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) Find(
|
||||
jobId *int64,
|
||||
jobID *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) (*schema.Job, error) {
|
||||
if jobID == nil {
|
||||
return nil, fmt.Errorf("jobID cannot be nil")
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
Where("job.job_id = ?", *jobID)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
@@ -37,19 +42,29 @@ func (r *JobRepository) Find(
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
|
||||
q = q.OrderBy("job.id DESC").Limit(1) // always use newest matching job by db id if more than one match
|
||||
|
||||
cclog.Debugf("Timer Find %s", time.Since(start))
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindCached executes a SQL query to find a specific batch job from the job_cache table.
|
||||
// The job is queried using the batch job id, and optionally filtered by cluster name
|
||||
// and start time (UNIX epoch time seconds). This method uses cached job data which
|
||||
// may be stale but provides faster access than Find().
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindCached(
|
||||
jobId *int64,
|
||||
jobID *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) (*schema.Job, error) {
|
||||
if jobID == nil {
|
||||
return nil, fmt.Errorf("jobID cannot be nil")
|
||||
}
|
||||
|
||||
q := sq.Select(jobCacheColumns...).From("job_cache").
|
||||
Where("job_cache.job_id = ?", *jobId)
|
||||
Where("job_cache.job_id = ?", *jobID)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job_cache.cluster = ?", *cluster)
|
||||
@@ -58,24 +73,28 @@ func (r *JobRepository) FindCached(
|
||||
q = q.Where("job_cache.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
q = q.OrderBy("job_cache.id DESC") // always use newest matching job by db id if more than one match
|
||||
q = q.OrderBy("job_cache.id DESC").Limit(1) // always use newest matching job by db id if more than one match
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
// FindAll executes a SQL query to find all batch jobs matching the given criteria.
|
||||
// Jobs are queried using the batch job id, and optionally filtered by cluster name
|
||||
// and start time (UNIX epoch time seconds).
|
||||
// It returns a slice of pointers to schema.Job data structures and an error variable.
|
||||
// An empty slice is returned if no matching jobs are found.
|
||||
func (r *JobRepository) FindAll(
|
||||
jobId *int64,
|
||||
jobID *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) ([]*schema.Job, error) {
|
||||
if jobID == nil {
|
||||
return nil, fmt.Errorf("jobID cannot be nil")
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
Where("job.job_id = ?", *jobID)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
@@ -86,8 +105,8 @@ func (r *JobRepository) FindAll(
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
cclog.Errorf("Error while running FindAll query for jobID=%d: %v", *jobID, err)
|
||||
return nil, fmt.Errorf("failed to execute FindAll query: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
@@ -95,8 +114,8 @@ func (r *JobRepository) FindAll(
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
cclog.Warnf("Error while scanning rows in FindAll: %v", err)
|
||||
return nil, fmt.Errorf("failed to scan job row: %w", err)
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
@@ -119,8 +138,8 @@ func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) {
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
cclog.Errorf("Error while running GetJobList query (limit=%d, offset=%d): %v", limit, offset, err)
|
||||
return nil, fmt.Errorf("failed to execute GetJobList query: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
@@ -129,23 +148,23 @@ func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) {
|
||||
var id int64
|
||||
err := rows.Scan(&id)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
cclog.Warnf("Error while scanning rows in GetJobList: %v", err)
|
||||
return nil, fmt.Errorf("failed to scan job ID: %w", err)
|
||||
}
|
||||
jl = append(jl, id)
|
||||
}
|
||||
|
||||
cclog.Infof("Return job count %d", len(jl))
|
||||
cclog.Debugf("JobRepository.GetJobList(): Return job count %d", len(jl))
|
||||
return jl, nil
|
||||
}
|
||||
|
||||
// FindById executes a SQL query to find a specific batch job.
|
||||
// FindByID executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
|
||||
func (r *JobRepository) FindByID(ctx context.Context, jobID int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
From("job").Where("job.id = ?", jobID)
|
||||
|
||||
q, qerr := SecurityCheck(ctx, q)
|
||||
if qerr != nil {
|
||||
@@ -155,14 +174,14 @@ func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job,
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByIdWithUser executes a SQL query to find a specific batch job.
|
||||
// FindByIDWithUser executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id. The user is passed directly,
|
||||
// instead as part of the context.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
|
||||
func (r *JobRepository) FindByIDWithUser(user *schema.User, jobID int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
From("job").Where("job.id = ?", jobID)
|
||||
|
||||
q, qerr := SecurityCheckWithUser(user, q)
|
||||
if qerr != nil {
|
||||
@@ -172,24 +191,24 @@ func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schem
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByIdDirect executes a SQL query to find a specific batch job.
|
||||
// FindByIDDirect executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
|
||||
func (r *JobRepository) FindByIDDirect(jobID int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
From("job").Where("job.id = ?", jobID)
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByJobId executes a SQL query to find a specific batch job.
|
||||
// FindByJobID executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the slurm id and the clustername.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
|
||||
func (r *JobRepository) FindByJobID(ctx context.Context, jobID int64, startTime int64, cluster string) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.job_id = ?", jobID).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
@@ -201,19 +220,22 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// IsJobOwner executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the slurm id,a username and the cluster.
|
||||
// It returns a bool.
|
||||
// If job was found, user is owner: test err != sql.ErrNoRows
|
||||
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
|
||||
// IsJobOwner checks if the specified user owns the batch job identified by jobID,
|
||||
// startTime, and cluster. Returns true if the user is the owner, false otherwise.
|
||||
// This method does not return errors; it returns false for both non-existent jobs
|
||||
// and jobs owned by other users.
|
||||
func (r *JobRepository) IsJobOwner(jobID int64, startTime int64, user string, cluster string) bool {
|
||||
q := sq.Select("id").
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.job_id = ?", jobID).
|
||||
Where("job.hpc_user = ?", user).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Warnf("IsJobOwner: unexpected error for jobID=%d, user=%s, cluster=%s: %v", jobID, user, cluster, err)
|
||||
}
|
||||
return err != sql.ErrNoRows
|
||||
}
|
||||
|
||||
@@ -231,6 +253,11 @@ func (r *JobRepository) FindConcurrentJobs(
|
||||
}
|
||||
|
||||
query = query.Where("cluster = ?", job.Cluster)
|
||||
|
||||
if len(job.Resources) == 0 {
|
||||
return nil, fmt.Errorf("job has no resources defined")
|
||||
}
|
||||
|
||||
var startTime int64
|
||||
var stopTime int64
|
||||
|
||||
@@ -243,25 +270,28 @@ func (r *JobRepository) FindConcurrentJobs(
|
||||
stopTime = startTime + int64(job.Duration)
|
||||
}
|
||||
|
||||
// Add 200s overlap for jobs start time at the end
|
||||
startTimeTail := startTime + 10
|
||||
stopTimeTail := stopTime - 200
|
||||
startTimeFront := startTime + 200
|
||||
// Time buffer constant for finding overlapping jobs
|
||||
// overlapBufferEnd: 200s buffer at job end to account for scheduling/cleanup overlap
|
||||
const overlapBufferEnd = 200
|
||||
|
||||
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTime)
|
||||
stopTimeTail := stopTime - overlapBufferEnd
|
||||
startTimeFront := startTime + overlapBufferEnd
|
||||
|
||||
queryRunning := query.Where("job.job_state = ?", "running").
|
||||
Where("job.start_time <= ?", stopTimeTail)
|
||||
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
|
||||
queryRunning = queryRunning.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
|
||||
|
||||
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
||||
query = query.Where("job.job_state != ?", "running").
|
||||
Where("job.start_time < ?", stopTimeTail).
|
||||
Where("(job.start_time + job.duration) > ?", startTimeFront)
|
||||
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
|
||||
query = query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
cclog.Errorf("Error while running concurrent jobs query: %v", err)
|
||||
return nil, fmt.Errorf("failed to execute concurrent jobs query: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
@@ -269,44 +299,44 @@ func (r *JobRepository) FindConcurrentJobs(
|
||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
var id, jobID, startTime sql.NullInt64
|
||||
|
||||
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
if err = rows.Scan(&id, &jobID, &startTime); err != nil {
|
||||
cclog.Warnf("Error while scanning concurrent job rows: %v", err)
|
||||
return nil, fmt.Errorf("failed to scan concurrent job row: %w", err)
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobID.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
JobID: int(jobID.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
cclog.Errorf("Error while running concurrent running jobs query: %v", err)
|
||||
return nil, fmt.Errorf("failed to execute concurrent running jobs query: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
var id, jobID, startTime sql.NullInt64
|
||||
|
||||
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
if err := rows.Scan(&id, &jobID, &startTime); err != nil {
|
||||
cclog.Warnf("Error while scanning running concurrent job rows: %v", err)
|
||||
return nil, fmt.Errorf("failed to scan running concurrent job row: %w", err)
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobID.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
JobID: int(jobID.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,16 +2,45 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
)
|
||||
|
||||
// JobHook interface allows external components to hook into job lifecycle events.
|
||||
// Implementations can perform actions when jobs start or stop, such as tagging,
|
||||
// logging, notifications, or triggering external workflows.
|
||||
//
|
||||
// Example implementation:
|
||||
//
|
||||
// type MyJobTagger struct{}
|
||||
//
|
||||
// func (t *MyJobTagger) JobStartCallback(job *schema.Job) {
|
||||
// if job.NumNodes > 100 {
|
||||
// // Tag large jobs automatically
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// func (t *MyJobTagger) JobStopCallback(job *schema.Job) {
|
||||
// if job.State == schema.JobStateFailed {
|
||||
// // Log or alert on failed jobs
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// Register hooks during application initialization:
|
||||
//
|
||||
// repository.RegisterJobHook(&MyJobTagger{})
|
||||
type JobHook interface {
|
||||
// JobStartCallback is invoked when one or more jobs start.
|
||||
// This is called synchronously, so implementations should be fast.
|
||||
JobStartCallback(job *schema.Job)
|
||||
|
||||
// JobStopCallback is invoked when a job completes.
|
||||
// This is called synchronously, so implementations should be fast.
|
||||
JobStopCallback(job *schema.Job)
|
||||
}
|
||||
|
||||
@@ -20,7 +49,13 @@ var (
|
||||
hooks []JobHook
|
||||
)
|
||||
|
||||
func RegisterJobJook(hook JobHook) {
|
||||
// RegisterJobHook registers a JobHook to receive job lifecycle callbacks.
|
||||
// Multiple hooks can be registered and will be called in registration order.
|
||||
// This function is safe to call multiple times and is typically called during
|
||||
// application initialization.
|
||||
//
|
||||
// Nil hooks are silently ignored to simplify conditional registration.
|
||||
func RegisterJobHook(hook JobHook) {
|
||||
initOnce.Do(func() {
|
||||
hooks = make([]JobHook, 0)
|
||||
})
|
||||
@@ -30,6 +65,12 @@ func RegisterJobJook(hook JobHook) {
|
||||
}
|
||||
}
|
||||
|
||||
// CallJobStartHooks invokes all registered JobHook.JobStartCallback methods
|
||||
// for each job in the provided slice. This is called internally by the repository
|
||||
// when jobs are started (e.g., via StartJob or batch job imports).
|
||||
//
|
||||
// Hooks are called synchronously in registration order. If a hook panics,
|
||||
// the panic will propagate to the caller.
|
||||
func CallJobStartHooks(jobs []*schema.Job) {
|
||||
if hooks == nil {
|
||||
return
|
||||
@@ -44,6 +85,12 @@ func CallJobStartHooks(jobs []*schema.Job) {
|
||||
}
|
||||
}
|
||||
|
||||
// CallJobStopHooks invokes all registered JobHook.JobStopCallback methods
|
||||
// for the provided job. This is called internally by the repository when a
|
||||
// job completes (e.g., via StopJob or job state updates).
|
||||
//
|
||||
// Hooks are called synchronously in registration order. If a hook panics,
|
||||
// the panic will propagate to the caller.
|
||||
func CallJobStopHooks(job *schema.Job) {
|
||||
if hooks == nil {
|
||||
return
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package repository provides job query functionality with filtering, pagination,
|
||||
// and security controls. This file contains the main query builders and security
|
||||
// checks for job retrieval operations.
|
||||
package repository
|
||||
|
||||
import (
|
||||
@@ -14,11 +18,27 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
const (
|
||||
// Default initial capacity for job result slices
|
||||
defaultJobsCapacity = 50
|
||||
)
|
||||
|
||||
// QueryJobs retrieves jobs from the database with optional filtering, pagination,
|
||||
// and sorting. Security controls are automatically applied based on the user context.
|
||||
//
|
||||
// Parameters:
|
||||
// - ctx: Context containing user authentication information
|
||||
// - filters: Optional job filters (cluster, state, user, time ranges, etc.)
|
||||
// - page: Optional pagination parameters (page number and items per page)
|
||||
// - order: Optional sorting specification (column or footprint field)
|
||||
//
|
||||
// Returns a slice of jobs matching the criteria, or an error if the query fails.
|
||||
// The function enforces role-based access control through SecurityCheck.
|
||||
func (r *JobRepository) QueryJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter,
|
||||
@@ -33,26 +53,24 @@ func (r *JobRepository) QueryJobs(
|
||||
if order != nil {
|
||||
field := toSnakeCase(order.Field)
|
||||
if order.Type == "col" {
|
||||
// "col": Fixed column name query
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
|
||||
return nil, errors.New("invalid sorting order for column")
|
||||
}
|
||||
} else {
|
||||
// "foot": Order by footprint JSON field values
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(meta_data)")
|
||||
// Order by footprint JSON field values
|
||||
query = query.Where("JSON_VALID(footprint)")
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
|
||||
return nil, errors.New("invalid sorting order for footprint")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -69,29 +87,35 @@ func (r *JobRepository) QueryJobs(
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
queryString, queryVars, _ := query.ToSql()
|
||||
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("query failed [%s] %v: %w", queryString, queryVars, err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
jobs := make([]*schema.Job, 0, defaultJobsCapacity)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
cclog.Warn("Error while scanning rows (Jobs)")
|
||||
return nil, err
|
||||
cclog.Warnf("Error scanning job row: %v", err)
|
||||
return nil, fmt.Errorf("failed to scan job row: %w", err)
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error iterating job rows: %w", err)
|
||||
}
|
||||
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// CountJobs returns the total number of jobs matching the given filters.
|
||||
// Security controls are automatically applied based on the user context.
|
||||
// Uses DISTINCT count to handle tag filters correctly (jobs may appear multiple
|
||||
// times when joined with the tag table).
|
||||
func (r *JobRepository) CountJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter,
|
||||
) (int, error) {
|
||||
// DISTICT count for tags filters, does not affect other queries
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
|
||||
if qerr != nil {
|
||||
return 0, qerr
|
||||
@@ -103,12 +127,22 @@ func (r *JobRepository) CountJobs(
|
||||
|
||||
var count int
|
||||
if err := query.RunWith(r.DB).Scan(&count); err != nil {
|
||||
return 0, err
|
||||
return 0, fmt.Errorf("failed to count jobs: %w", err)
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// SecurityCheckWithUser applies role-based access control filters to a job query
|
||||
// based on the provided user's roles and permissions.
|
||||
//
|
||||
// Access rules by role:
|
||||
// - API role (exclusive): Full access to all jobs
|
||||
// - Admin/Support roles: Full access to all jobs
|
||||
// - Manager role: Access to jobs in managed projects plus own jobs
|
||||
// - User role: Access only to own jobs
|
||||
//
|
||||
// Returns an error if the user is nil or has no recognized roles.
|
||||
func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||
if user == nil {
|
||||
var qnil sq.SelectBuilder
|
||||
@@ -116,84 +150,68 @@ func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.Select
|
||||
}
|
||||
|
||||
switch {
|
||||
case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
|
||||
case len(user.Roles) == 1 && user.HasRole(schema.RoleAPI):
|
||||
return query, nil
|
||||
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
|
||||
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}):
|
||||
return query, nil
|
||||
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
|
||||
case user.HasRole(schema.RoleManager):
|
||||
if len(user.Projects) != 0 {
|
||||
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
|
||||
} else {
|
||||
cclog.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
|
||||
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||
}
|
||||
case user.HasRole(schema.RoleUser): // User : Only personal jobs
|
||||
cclog.Debugf("Manager '%s' has no assigned projects, restricting to personal jobs", user.Username)
|
||||
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||
default: // No known Role, return error
|
||||
case user.HasRole(schema.RoleUser):
|
||||
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||
default:
|
||||
var qnil sq.SelectBuilder
|
||||
return qnil, fmt.Errorf("user has no or unknown roles")
|
||||
}
|
||||
}
|
||||
|
||||
// SecurityCheck extracts the user from the context and applies role-based access
|
||||
// control filters to the query. This is a convenience wrapper around SecurityCheckWithUser.
|
||||
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||
user := GetUserFromContext(ctx)
|
||||
|
||||
return SecurityCheckWithUser(user, query)
|
||||
}
|
||||
|
||||
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
||||
// BuildWhereClause constructs SQL WHERE conditions from a JobFilter and applies
|
||||
// them to the query. Supports filtering by job properties (cluster, state, user),
|
||||
// time ranges, resource usage, tags, and JSON field searches in meta_data,
|
||||
// footprint, and resources columns.
|
||||
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if filter.Tags != nil {
|
||||
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
|
||||
}
|
||||
// Primary Key
|
||||
if filter.DbID != nil {
|
||||
dbIDs := make([]string, len(filter.DbID))
|
||||
copy(dbIDs, filter.DbID)
|
||||
query = query.Where(sq.Eq{"job.id": dbIDs})
|
||||
}
|
||||
if filter.JobID != nil {
|
||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||
}
|
||||
if filter.ArrayJobID != nil {
|
||||
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
|
||||
}
|
||||
if filter.User != nil {
|
||||
query = buildStringCondition("job.hpc_user", filter.User, query)
|
||||
}
|
||||
if filter.Project != nil {
|
||||
query = buildStringCondition("job.project", filter.Project, query)
|
||||
}
|
||||
if filter.JobName != nil {
|
||||
query = buildMetaJsonCondition("jobName", filter.JobName, query)
|
||||
}
|
||||
// Explicit indices
|
||||
if filter.Cluster != nil {
|
||||
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
||||
}
|
||||
if filter.SubCluster != nil {
|
||||
query = buildStringCondition("job.subcluster", filter.SubCluster, query)
|
||||
}
|
||||
if filter.Partition != nil {
|
||||
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
|
||||
}
|
||||
if filter.StartTime != nil {
|
||||
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
||||
}
|
||||
if filter.Duration != nil {
|
||||
query = buildIntCondition("job.duration", filter.Duration, query)
|
||||
}
|
||||
if filter.MinRunningFor != nil {
|
||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
|
||||
}
|
||||
if filter.Shared != nil {
|
||||
query = query.Where("job.shared = ?", *filter.Shared)
|
||||
}
|
||||
if filter.State != nil {
|
||||
states := make([]string, len(filter.State))
|
||||
for i, val := range filter.State {
|
||||
states[i] = string(val)
|
||||
}
|
||||
|
||||
query = query.Where(sq.Eq{"job.job_state": states})
|
||||
}
|
||||
if filter.Shared != nil {
|
||||
query = query.Where("job.shared = ?", *filter.Shared)
|
||||
}
|
||||
if filter.Project != nil {
|
||||
query = buildStringCondition("job.project", filter.Project, query)
|
||||
}
|
||||
if filter.User != nil {
|
||||
query = buildStringCondition("job.hpc_user", filter.User, query)
|
||||
}
|
||||
if filter.NumNodes != nil {
|
||||
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
|
||||
}
|
||||
@@ -203,33 +221,95 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
if filter.NumHWThreads != nil {
|
||||
query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
|
||||
}
|
||||
if filter.Node != nil {
|
||||
query = buildResourceJsonCondition("hostname", filter.Node, query)
|
||||
if filter.ArrayJobID != nil {
|
||||
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
|
||||
}
|
||||
if filter.StartTime != nil {
|
||||
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
||||
}
|
||||
if filter.Duration != nil {
|
||||
query = buildIntCondition("job.duration", filter.Duration, query)
|
||||
}
|
||||
if filter.Energy != nil {
|
||||
query = buildFloatCondition("job.energy", filter.Energy, query)
|
||||
}
|
||||
// Indices on Tag Table
|
||||
if filter.Tags != nil {
|
||||
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
|
||||
}
|
||||
// No explicit Indices
|
||||
if filter.JobID != nil {
|
||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||
}
|
||||
// Queries Within JSONs
|
||||
if filter.MetricStats != nil {
|
||||
for _, ms := range filter.MetricStats {
|
||||
query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
|
||||
query = buildFloatJSONCondition(ms.MetricName, ms.Range, query)
|
||||
}
|
||||
}
|
||||
if filter.Node != nil {
|
||||
query = buildResourceJSONCondition("hostname", filter.Node, query)
|
||||
}
|
||||
if filter.JobName != nil {
|
||||
query = buildMetaJSONCondition("jobName", filter.JobName, query)
|
||||
}
|
||||
if filter.Schedule != nil {
|
||||
interactiveJobname := "interactive"
|
||||
switch *filter.Schedule {
|
||||
case "interactive":
|
||||
iFilter := model.StringInput{Eq: &interactiveJobname}
|
||||
query = buildMetaJSONCondition("jobName", &iFilter, query)
|
||||
case "batch":
|
||||
sFilter := model.StringInput{Neq: &interactiveJobname}
|
||||
query = buildMetaJSONCondition("jobName", &sFilter, query)
|
||||
}
|
||||
}
|
||||
|
||||
// Configurable Filter to exclude recently started jobs, see config.go: ShortRunningJobsDuration
|
||||
if filter.MinRunningFor != nil {
|
||||
now := time.Now().Unix()
|
||||
// Only jobs whose start timestamp is more than MinRunningFor seconds in the past
|
||||
// If a job completed within the configured timeframe, it will still show up after the start_time matches the condition!
|
||||
query = query.Where(sq.Lt{"job.start_time": (now - int64(*filter.MinRunningFor))})
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
// buildIntCondition creates clauses for integer range filters, using BETWEEN only if required.
|
||||
func buildIntCondition(field string, cond *config.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
if cond.From != 1 && cond.To != 0 {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
} else if cond.From != 1 && cond.To == 0 {
|
||||
return query.Where(field+" >= ?", cond.From)
|
||||
} else if cond.From == 1 && cond.To != 0 {
|
||||
return query.Where(field+" <= ?", cond.To)
|
||||
} else {
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
// buildFloatCondition creates a clauses for float range filters, using BETWEEN only if required.
|
||||
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
if cond.From != 1.0 && cond.To != 0.0 {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
} else if cond.From != 1.0 && cond.To == 0.0 {
|
||||
return query.Where(field+" >= ?", cond.From)
|
||||
} else if cond.From == 1.0 && cond.To != 0.0 {
|
||||
return query.Where(field+" <= ?", cond.To)
|
||||
} else {
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
// buildTimeCondition creates time range filters supporting absolute timestamps,
|
||||
// relative time ranges (last6h, last24h, last7d, last30d), or open-ended ranges.
|
||||
// Reminder: BETWEEN Queries are slower and dont use indices as frequently: Only use if both conditions required
|
||||
func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.From != nil && cond.To != nil {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
|
||||
} else if cond.From != nil {
|
||||
return query.Where("? <= "+field, cond.From.Unix())
|
||||
return query.Where(field+" >= ?", cond.From.Unix())
|
||||
} else if cond.To != nil {
|
||||
return query.Where(field+" <= ?", cond.To.Unix())
|
||||
} else if cond.Range != "" {
|
||||
@@ -248,18 +328,28 @@ func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBui
|
||||
cclog.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
|
||||
return query
|
||||
}
|
||||
return query.Where(field+" BETWEEN ? AND ?", then, now)
|
||||
return query.Where(field+" >= ?", then)
|
||||
} else {
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
// buildFloatJSONCondition creates a filter on a numeric field within the footprint JSON column, using BETWEEN only if required.
|
||||
func buildFloatJSONCondition(jsonField string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
query = query.Where("JSON_VALID(footprint)")
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
|
||||
if cond.From != 1.0 && cond.To != 0.0 {
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") BETWEEN ? AND ?", cond.From, cond.To)
|
||||
} else if cond.From != 1.0 && cond.To == 0.0 {
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") >= ?", cond.From)
|
||||
} else if cond.From == 1.0 && cond.To != 0.0 {
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") <= ?", cond.To)
|
||||
} else {
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
// buildStringCondition creates filters for string fields supporting equality,
|
||||
// inequality, prefix, suffix, substring, and IN list matching.
|
||||
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.Eq != nil {
|
||||
return query.Where(field+" = ?", *cond.Eq)
|
||||
@@ -284,10 +374,9 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
|
||||
return query
|
||||
}
|
||||
|
||||
func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
// buildMetaJSONCondition creates filters on fields within the meta_data JSON column.
|
||||
func buildMetaJSONCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
query = query.Where("JSON_VALID(meta_data)")
|
||||
// add "AND" Sql query Block for field match
|
||||
if cond.Eq != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") = ?", *cond.Eq)
|
||||
}
|
||||
@@ -306,10 +395,10 @@ func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.
|
||||
return query
|
||||
}
|
||||
|
||||
func buildResourceJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
// buildResourceJSONCondition creates filters on fields within the resources JSON array column.
|
||||
// Uses json_each to search within array elements.
|
||||
func buildResourceJSONCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
query = query.Where("JSON_VALID(resources)")
|
||||
// add "AND" Sql query Block for field match
|
||||
if cond.Eq != nil {
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") = ?)", *cond.Eq)
|
||||
}
|
||||
@@ -333,15 +422,16 @@ var (
|
||||
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
)
|
||||
|
||||
// toSnakeCase converts camelCase strings to snake_case for SQL column names.
|
||||
// Includes security checks to prevent SQL injection attempts.
|
||||
// Panics if potentially dangerous characters are detected.
|
||||
func toSnakeCase(str string) string {
|
||||
for _, c := range str {
|
||||
if c == '\'' || c == '\\' {
|
||||
cclog.Panic("toSnakeCase() attack vector!")
|
||||
if c == '\'' || c == '\\' || c == '"' || c == ';' || c == '-' || c == ' ' {
|
||||
cclog.Panicf("toSnakeCase: potentially dangerous character detected in input: %q", str)
|
||||
}
|
||||
}
|
||||
|
||||
str = strings.ReplaceAll(str, "'", "")
|
||||
str = strings.ReplaceAll(str, "\\", "")
|
||||
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
|
||||
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
|
||||
return strings.ToLower(snake)
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
@@ -33,7 +33,7 @@ func TestFind(t *testing.T) {
|
||||
func TestFindById(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
job, err := r.FindById(getContext(t), 338)
|
||||
job, err := r.FindByID(getContext(t), 338)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -78,7 +78,7 @@ func TestFindJobsBetween(t *testing.T) {
|
||||
|
||||
// 1. Find a job to use (Find all jobs)
|
||||
// We use a large time range to ensure we get something if it exists
|
||||
jobs, err := r.FindJobsBetween(0, 9999999999, false)
|
||||
jobs, err := r.FindJobsBetween(0, 9999999999, "none")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -88,21 +88,21 @@ func TestFindJobsBetween(t *testing.T) {
|
||||
|
||||
targetJob := jobs[0]
|
||||
|
||||
// 2. Create a tag
|
||||
tagName := fmt.Sprintf("testtag_%d", time.Now().UnixNano())
|
||||
tagId, err := r.CreateTag("testtype", tagName, "global")
|
||||
// 2. Create an auto-tagger tag (type "app")
|
||||
appTagName := fmt.Sprintf("apptag_%d", time.Now().UnixNano())
|
||||
appTagID, err := r.CreateTag("app", appTagName, "global")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// 3. Link Tag (Manually to avoid archive dependency side-effects in unit test)
|
||||
_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, tagId)
|
||||
// 3. Link auto-tagger tag to job
|
||||
_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, appTagID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// 4. Search with omitTagged = false (Should find the job)
|
||||
jobsFound, err := r.FindJobsBetween(0, 9999999999, false)
|
||||
// 4. Search with omitTagged = "none" (Should find the job)
|
||||
jobsFound, err := r.FindJobsBetween(0, 9999999999, "none")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -115,18 +115,58 @@ func TestFindJobsBetween(t *testing.T) {
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("Target job %d should be found when omitTagged=false", *targetJob.ID)
|
||||
t.Errorf("Target job %d should be found when omitTagged=none", *targetJob.ID)
|
||||
}
|
||||
|
||||
// 5. Search with omitTagged = true (Should NOT find the job)
|
||||
jobsFiltered, err := r.FindJobsBetween(0, 9999999999, true)
|
||||
// 5. Search with omitTagged = "all" (Should NOT find the job — it has a tag)
|
||||
jobsFiltered, err := r.FindJobsBetween(0, 9999999999, "all")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, j := range jobsFiltered {
|
||||
if *j.ID == *targetJob.ID {
|
||||
t.Errorf("Target job %d should NOT be found when omitTagged=true", *targetJob.ID)
|
||||
t.Errorf("Target job %d should NOT be found when omitTagged=all", *targetJob.ID)
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Search with omitTagged = "user": auto-tagger tag ("app") should NOT exclude the job
|
||||
jobsUserFilter, err := r.FindJobsBetween(0, 9999999999, "user")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
found = false
|
||||
for _, j := range jobsUserFilter {
|
||||
if *j.ID == *targetJob.ID {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("Target job %d should be found when omitTagged=user (only has auto-tagger tag)", *targetJob.ID)
|
||||
}
|
||||
|
||||
// 7. Add a user-created tag (type "testtype") to the same job
|
||||
userTagName := fmt.Sprintf("usertag_%d", time.Now().UnixNano())
|
||||
userTagID, err := r.CreateTag("testtype", userTagName, "global")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, userTagID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// 8. Now omitTagged = "user" should exclude the job (has a user-created tag)
|
||||
jobsUserFilter2, err := r.FindJobsBetween(0, 9999999999, "user")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, j := range jobsUserFilter2 {
|
||||
if *j.ID == *targetJob.ID {
|
||||
t.Errorf("Target job %d should NOT be found when omitTagged=user (has user-created tag)", *targetJob.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,52 +10,48 @@ import (
|
||||
"embed"
|
||||
"fmt"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/golang-migrate/migrate/v4"
|
||||
"github.com/golang-migrate/migrate/v4/database/mysql"
|
||||
"github.com/golang-migrate/migrate/v4/database/sqlite3"
|
||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||
)
|
||||
|
||||
// Version is the current database schema version required by this version of cc-backend.
|
||||
// When the database schema changes, this version is incremented and a new migration file
|
||||
// is added to internal/repository/migrations/sqlite3/.
|
||||
//
|
||||
// Version history:
|
||||
// - Version 10: Current version
|
||||
//
|
||||
// Migration files are embedded at build time from the migrations directory.
|
||||
const Version uint = 10
|
||||
|
||||
//go:embed migrations/*
|
||||
var migrationFiles embed.FS
|
||||
|
||||
func checkDBVersion(backend string, db *sql.DB) error {
|
||||
var m *migrate.Migrate
|
||||
// checkDBVersion verifies that the database schema version matches the expected version.
|
||||
// This is called automatically during Connect() to ensure schema compatibility.
|
||||
//
|
||||
// Returns an error if:
|
||||
// - Database version is older than expected (needs migration)
|
||||
// - Database version is newer than expected (needs app upgrade)
|
||||
// - Database is in a dirty state (failed migration)
|
||||
//
|
||||
// A "dirty" database indicates a migration was started but not completed successfully.
|
||||
// This requires manual intervention to fix the database and force the version.
|
||||
func checkDBVersion(db *sql.DB) error {
|
||||
driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch backend {
|
||||
case "sqlite3":
|
||||
driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithInstance("iofs", d, "sqlite3", driver)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case "mysql":
|
||||
driver, err := mysql.WithInstance(db, &mysql.Config{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d, err := iofs.New(migrationFiles, "migrations/mysql")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithInstance("iofs", d, "mysql", driver)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
|
||||
m, err := migrate.NewWithInstance("iofs", d, "sqlite3", driver)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
v, dirty, err := m.Version()
|
||||
@@ -80,37 +76,41 @@ func checkDBVersion(backend string, db *sql.DB) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err error) {
|
||||
switch backend {
|
||||
case "sqlite3":
|
||||
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
|
||||
if err != nil {
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
// getMigrateInstance creates a new migration instance for the given database file.
|
||||
// This is used internally by MigrateDB, RevertDB, and ForceDB.
|
||||
func getMigrateInstance(db string) (m *migrate.Migrate, err error) {
|
||||
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
|
||||
if err != nil {
|
||||
return m, err
|
||||
}
|
||||
case "mysql":
|
||||
d, err := iofs.New(migrationFiles, "migrations/mysql")
|
||||
if err != nil {
|
||||
return m, err
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
|
||||
if err != nil {
|
||||
return m, err
|
||||
}
|
||||
default:
|
||||
cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
|
||||
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func MigrateDB(backend string, db string) error {
|
||||
m, err := getMigrateInstance(backend, db)
|
||||
// MigrateDB applies all pending database migrations to bring the schema up to date.
|
||||
// This should be run with the -migrate-db flag before starting the application
|
||||
// after upgrading to a new version that requires schema changes.
|
||||
//
|
||||
// Process:
|
||||
// 1. Checks current database version
|
||||
// 2. Applies all migrations from current version to target Version
|
||||
// 3. Updates schema_migrations table to track applied migrations
|
||||
//
|
||||
// Important:
|
||||
// - Always backup your database before running migrations
|
||||
// - Migrations are irreversible without manual intervention
|
||||
// - If a migration fails, the database is marked "dirty" and requires manual fix
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// cc-backend -migrate-db
|
||||
func MigrateDB(db string) error {
|
||||
m, err := getMigrateInstance(db)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -118,7 +118,7 @@ func MigrateDB(backend string, db string) error {
|
||||
v, dirty, err := m.Version()
|
||||
if err != nil {
|
||||
if err == migrate.ErrNilVersion {
|
||||
cclog.Warn("Legacy database without version or missing database file!")
|
||||
cclog.Info("Legacy database without version or missing database file!")
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
@@ -144,8 +144,19 @@ func MigrateDB(backend string, db string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func RevertDB(backend string, db string) error {
|
||||
m, err := getMigrateInstance(backend, db)
|
||||
// RevertDB rolls back the database schema to the previous version (Version - 1).
|
||||
// This is primarily used for testing or emergency rollback scenarios.
|
||||
//
|
||||
// Warning:
|
||||
// - This may cause data loss if newer schema added columns/tables
|
||||
// - Always backup before reverting
|
||||
// - Not all migrations are safely reversible
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// cc-backend -revert-db
|
||||
func RevertDB(db string) error {
|
||||
m, err := getMigrateInstance(db)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -162,8 +173,23 @@ func RevertDB(backend string, db string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func ForceDB(backend string, db string) error {
|
||||
m, err := getMigrateInstance(backend, db)
|
||||
// ForceDB forces the database schema version to the current Version without running migrations.
|
||||
// This is only used to recover from failed migrations that left the database in a "dirty" state.
|
||||
//
|
||||
// When to use:
|
||||
// - After manually fixing a failed migration
|
||||
// - When you've manually applied schema changes and need to update the version marker
|
||||
//
|
||||
// Warning:
|
||||
// - This does NOT apply any schema changes
|
||||
// - Only use after manually verifying the schema is correct
|
||||
// - Improper use can cause schema/version mismatch
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// cc-backend -force-db
|
||||
func ForceDB(db string) error {
|
||||
m, err := getMigrateInstance(db)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
DROP TABLE IF EXISTS job;
|
||||
DROP TABLE IF EXISTS tags;
|
||||
DROP TABLE IF EXISTS jobtag;
|
||||
DROP TABLE IF EXISTS configuration;
|
||||
DROP TABLE IF EXISTS user;
|
||||
@@ -1,66 +0,0 @@
|
||||
CREATE TABLE IF NOT EXISTS job (
|
||||
id INTEGER AUTO_INCREMENT PRIMARY KEY ,
|
||||
job_id BIGINT NOT NULL,
|
||||
cluster VARCHAR(255) NOT NULL,
|
||||
subcluster VARCHAR(255) NOT NULL,
|
||||
start_time BIGINT NOT NULL, -- Unix timestamp
|
||||
|
||||
user VARCHAR(255) NOT NULL,
|
||||
project VARCHAR(255) NOT NULL,
|
||||
`partition` VARCHAR(255) NOT NULL,
|
||||
array_job_id BIGINT NOT NULL,
|
||||
duration INT NOT NULL DEFAULT 0,
|
||||
walltime INT NOT NULL DEFAULT 0,
|
||||
job_state VARCHAR(255) NOT NULL
|
||||
CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled',
|
||||
'stopped', 'timeout', 'preempted', 'out_of_memory')),
|
||||
meta_data TEXT, -- JSON
|
||||
resources TEXT NOT NULL, -- JSON
|
||||
|
||||
num_nodes INT NOT NULL,
|
||||
num_hwthreads INT NOT NULL,
|
||||
num_acc INT NOT NULL,
|
||||
smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),
|
||||
exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),
|
||||
monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),
|
||||
|
||||
mem_used_max REAL NOT NULL DEFAULT 0.0,
|
||||
flops_any_avg REAL NOT NULL DEFAULT 0.0,
|
||||
mem_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
load_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
net_data_vol_total REAL NOT NULL DEFAULT 0.0,
|
||||
file_bw_avg REAL NOT NULL DEFAULT 0.0,
|
||||
file_data_vol_total REAL NOT NULL DEFAULT 0.0,
|
||||
UNIQUE (job_id, cluster, start_time)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS tag (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tag_type VARCHAR(255) NOT NULL,
|
||||
tag_name VARCHAR(255) NOT NULL,
|
||||
UNIQUE (tag_type, tag_name));
|
||||
|
||||
CREATE TABLE IF NOT EXISTS jobtag (
|
||||
job_id INTEGER,
|
||||
tag_id INTEGER,
|
||||
PRIMARY KEY (job_id, tag_id),
|
||||
FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS user (
|
||||
username varchar(255) PRIMARY KEY NOT NULL,
|
||||
password varchar(255) DEFAULT NULL,
|
||||
ldap tinyint NOT NULL DEFAULT 0, /* col called "ldap" for historic reasons, fills the "AuthSource" */
|
||||
name varchar(255) DEFAULT NULL,
|
||||
roles varchar(255) NOT NULL DEFAULT "[]",
|
||||
email varchar(255) DEFAULT NULL);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS configuration (
|
||||
username varchar(255),
|
||||
confkey varchar(255),
|
||||
value varchar(255),
|
||||
PRIMARY KEY (username, confkey),
|
||||
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
DROP INDEX IF EXISTS job_stats;
|
||||
DROP INDEX IF EXISTS job_by_user;
|
||||
DROP INDEX IF EXISTS job_by_starttime;
|
||||
DROP INDEX IF EXISTS job_by_job_id;
|
||||
DROP INDEX IF EXISTS job_list;
|
||||
DROP INDEX IF EXISTS job_list_user;
|
||||
DROP INDEX IF EXISTS job_list_users;
|
||||
DROP INDEX IF EXISTS job_list_users_start;
|
||||
@@ -1,8 +0,0 @@
|
||||
CREATE INDEX IF NOT EXISTS job_stats ON job (cluster,subcluster,user);
|
||||
CREATE INDEX IF NOT EXISTS job_by_user ON job (user);
|
||||
CREATE INDEX IF NOT EXISTS job_by_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS job_by_job_id ON job (job_id);
|
||||
CREATE INDEX IF NOT EXISTS job_list ON job (cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS job_list_user ON job (user, cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS job_list_users ON job (user, job_state);
|
||||
CREATE INDEX IF NOT EXISTS job_list_users_start ON job (start_time, user, job_state);
|
||||
@@ -1 +0,0 @@
|
||||
ALTER TABLE user DROP COLUMN projects;
|
||||
@@ -1 +0,0 @@
|
||||
ALTER TABLE user ADD COLUMN projects varchar(255) NOT NULL DEFAULT "[]";
|
||||
@@ -1,5 +0,0 @@
|
||||
ALTER TABLE job
|
||||
MODIFY `partition` VARCHAR(255) NOT NULL,
|
||||
MODIFY array_job_id BIGINT NOT NULL,
|
||||
MODIFY num_hwthreads INT NOT NULL,
|
||||
MODIFY num_acc INT NOT NULL;
|
||||
@@ -1,5 +0,0 @@
|
||||
ALTER TABLE job
|
||||
MODIFY `partition` VARCHAR(255),
|
||||
MODIFY array_job_id BIGINT,
|
||||
MODIFY num_hwthreads INT,
|
||||
MODIFY num_acc INT;
|
||||
@@ -1,2 +0,0 @@
|
||||
ALTER TABLE tag DROP COLUMN insert_time;
|
||||
ALTER TABLE jobtag DROP COLUMN insert_time;
|
||||
@@ -1,2 +0,0 @@
|
||||
ALTER TABLE tag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP;
|
||||
ALTER TABLE jobtag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP;
|
||||
@@ -1 +0,0 @@
|
||||
ALTER TABLE configuration MODIFY value VARCHAR(255);
|
||||
@@ -1 +0,0 @@
|
||||
ALTER TABLE configuration MODIFY value TEXT;
|
||||
@@ -1,3 +0,0 @@
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
ALTER TABLE tag MODIFY id INTEGER;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
@@ -1,3 +0,0 @@
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
ALTER TABLE tag MODIFY id INTEGER AUTO_INCREMENT;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
@@ -1,83 +0,0 @@
|
||||
ALTER TABLE job DROP energy;
|
||||
ALTER TABLE job DROP energy_footprint;
|
||||
ALTER TABLE job ADD COLUMN flops_any_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_used_max;
|
||||
ALTER TABLE job ADD COLUMN load_avg;
|
||||
ALTER TABLE job ADD COLUMN net_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN net_data_vol_total;
|
||||
ALTER TABLE job ADD COLUMN file_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN file_data_vol_total;
|
||||
|
||||
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
|
||||
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
|
||||
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
|
||||
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
|
||||
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
|
||||
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
|
||||
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
|
||||
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
|
||||
|
||||
ALTER TABLE job DROP footprint;
|
||||
-- Do not use reserved keywords anymore
|
||||
RENAME TABLE hpc_user TO `user`;
|
||||
ALTER TABLE job RENAME COLUMN hpc_user TO `user`;
|
||||
ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_subcluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_user;
|
||||
DROP INDEX IF EXISTS jobs_user_starttime;
|
||||
DROP INDEX IF EXISTS jobs_user_duration;
|
||||
DROP INDEX IF EXISTS jobs_user_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_project;
|
||||
DROP INDEX IF EXISTS jobs_project_user;
|
||||
DROP INDEX IF EXISTS jobs_project_starttime;
|
||||
DROP INDEX IF EXISTS jobs_project_duration;
|
||||
DROP INDEX IF EXISTS jobs_project_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_starttime;
|
||||
DROP INDEX IF EXISTS jobs_duration;
|
||||
DROP INDEX IF EXISTS jobs_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numnodes_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numacc_starttime;
|
||||
DROP INDEX IF EXISTS jobs_energy_starttime;
|
||||
@@ -1,123 +0,0 @@
|
||||
DROP INDEX IF EXISTS job_stats ON job;
|
||||
DROP INDEX IF EXISTS job_by_user ON job;
|
||||
DROP INDEX IF EXISTS job_by_starttime ON job;
|
||||
DROP INDEX IF EXISTS job_by_job_id ON job;
|
||||
DROP INDEX IF EXISTS job_list ON job;
|
||||
DROP INDEX IF EXISTS job_list_user ON job;
|
||||
DROP INDEX IF EXISTS job_list_users ON job;
|
||||
DROP INDEX IF EXISTS job_list_users_start ON job;
|
||||
|
||||
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
|
||||
ALTER TABLE job ADD COLUMN energy_footprint JSON;
|
||||
|
||||
ALTER TABLE job ADD COLUMN footprint JSON;
|
||||
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
|
||||
|
||||
-- Do not use reserved keywords anymore
|
||||
RENAME TABLE `user` TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN `user` TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition;
|
||||
|
||||
ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN project VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25);
|
||||
|
||||
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
|
||||
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
|
||||
|
||||
ALTER TABLE job DROP flops_any_avg;
|
||||
ALTER TABLE job DROP mem_bw_avg;
|
||||
ALTER TABLE job DROP mem_used_max;
|
||||
ALTER TABLE job DROP load_avg;
|
||||
ALTER TABLE job DROP net_bw_avg;
|
||||
ALTER TABLE job DROP net_data_vol_total;
|
||||
ALTER TABLE job DROP file_bw_avg;
|
||||
ALTER TABLE job DROP file_data_vol_total;
|
||||
|
||||
-- Indices for: Single filters, combined filters, sorting, sorting with filters
|
||||
-- Cluster Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
|
||||
-- Cluster+Partition Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||
|
||||
-- Cluster+Partition+Jobstate Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
|
||||
-- Cluster+Partition+Jobstate Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||
-- Cluster+JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||
|
||||
-- User Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
|
||||
-- User Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||
|
||||
-- Project Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||
-- Project Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||
|
||||
-- JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||
-- JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||
|
||||
-- ArrayJob Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||
|
||||
-- Sorting without active filters
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||
|
||||
-- Optimize DB index usage
|
||||
@@ -118,104 +118,116 @@ DROP TABLE lookup_exclusive;
|
||||
DROP TABLE job; -- Deletes All Existing 'job' Indices; Recreate after Renaming
|
||||
ALTER TABLE job_new RENAME TO job;
|
||||
|
||||
-- Recreate Indices from 08_add-footprint, include new submit_time indices
|
||||
-- Recreate Indices from 08_add-footprint; include new 'shared' column
|
||||
-- Cluster Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_submittime ON job (cluster, submit_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
|
||||
|
||||
-- Cluster Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration_starttime ON job (cluster, duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime_duration ON job (cluster, start_time, duration);
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_user ON job (cluster, cluster_partition, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_project ON job (cluster, cluster_partition, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_shared ON job (cluster, cluster_partition, shared);
|
||||
|
||||
-- Cluster+Partition Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_submittime ON job (cluster, cluster_partition, submit_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
|
||||
|
||||
-- Cluster+Partition+Jobstate Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
|
||||
-- Cluster+Partition+Jobstate Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_submittime ON job (cluster, cluster_partition, job_state, submit_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
|
||||
-- Cluster+Partition Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration_starttime ON job (cluster, cluster_partition, duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime_duration ON job (cluster, cluster_partition, start_time, duration);
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||
-- Cluster+JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_submittime ON job (cluster, job_state, submit_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
|
||||
|
||||
-- Cluster+JobState Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime_duration ON job (cluster, job_state, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration_starttime ON job (cluster, job_state, duration, start_time);
|
||||
|
||||
-- Cluster+Shared Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_user ON job (cluster, shared, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_project ON job (cluster, shared, project);
|
||||
-- Cluster+Shared Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numnodes ON job (cluster, shared, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numhwthreads ON job (cluster, shared, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numacc ON job (cluster, shared, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_energy ON job (cluster, shared, energy);
|
||||
|
||||
-- Cluster+Shared Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_starttime_duration ON job (cluster, shared, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_duration_starttime ON job (cluster, shared, duration, start_time);
|
||||
|
||||
-- User Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
|
||||
-- User Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
|
||||
|
||||
-- Cluster+Shared Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime_duration ON job (hpc_user, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration_starttime ON job (hpc_user, duration, start_time);
|
||||
|
||||
-- Project Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||
-- Project Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
|
||||
|
||||
-- Cluster+Shared Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime_duration ON job (project, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration_starttime ON job (project, duration, start_time);
|
||||
|
||||
-- JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||
-- JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
|
||||
|
||||
-- Cluster+Shared Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime_duration ON job (job_state, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration_starttime ON job (job_state, duration, start_time);
|
||||
|
||||
-- Shared Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_user ON job (shared, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_project ON job (shared, project);
|
||||
-- Shared Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_numnodes ON job (shared, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_numhwthreads ON job (shared, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_numacc ON job (shared, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_energy ON job (shared, energy);
|
||||
|
||||
-- Cluster+Shared Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_starttime_duration ON job (shared, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_duration_starttime ON job (shared, duration, start_time);
|
||||
|
||||
-- ArrayJob Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||
|
||||
-- Sorting without active filters
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||
@@ -223,6 +235,22 @@ CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, st
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||
|
||||
-- Single filters with duration sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime_duration ON job (start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_duration ON job (num_nodes, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_duration ON job (num_hwthreads, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_duration ON job (num_acc, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_duration ON job (energy, duration);
|
||||
|
||||
-- Backup Indices For High Variety Columns
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
|
||||
-- Notes:
|
||||
-- Cluster+Partition+Jobstate Filter: Tested -> Full Array Of Combinations non-required
|
||||
-- Cluster+JobState+Shared Filter: Tested -> No further timing improvement
|
||||
-- JobState+Shared Filter: Tested -> No further timing improvement
|
||||
|
||||
-- Optimize DB index usage
|
||||
PRAGMA optimize;
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@ CREATE TABLE "node_state" (
|
||||
CHECK (health_state IN (
|
||||
'full', 'partial', 'failed'
|
||||
)),
|
||||
health_metrics TEXT, -- JSON array of strings
|
||||
node_id INTEGER,
|
||||
FOREIGN KEY (node_id) REFERENCES node (id)
|
||||
);
|
||||
@@ -33,12 +34,11 @@ CREATE INDEX IF NOT EXISTS nodes_cluster_subcluster ON node (cluster, subcluster
|
||||
|
||||
-- Add NEW Indices For New Node_State Table Fields
|
||||
CREATE INDEX IF NOT EXISTS nodestates_timestamp ON node_state (time_stamp);
|
||||
CREATE INDEX IF NOT EXISTS nodestates_state ON node_state (node_state);
|
||||
CREATE INDEX IF NOT EXISTS nodestates_health ON node_state (health_state);
|
||||
CREATE INDEX IF NOT EXISTS nodestates_state_timestamp ON node_state (node_state, time_stamp);
|
||||
CREATE INDEX IF NOT EXISTS nodestates_health_timestamp ON node_state (health_state, time_stamp);
|
||||
CREATE INDEX IF NOT EXISTS nodestates_nodeid_state ON node_state (node_id, node_state);
|
||||
CREATE INDEX IF NOT EXISTS nodestates_nodeid_health ON node_state (node_id, health_state);
|
||||
CREATE INDEX IF NOT EXISTS nodestates_nodeid_timestamp ON node_state (node_id, time_stamp DESC);
|
||||
|
||||
-- Add NEW Indices For Increased Amounts of Tags
|
||||
CREATE INDEX IF NOT EXISTS tags_jobid ON jobtag (job_id);
|
||||
|
||||
@@ -10,14 +10,17 @@ import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/lrucache"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/lrucache"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
@@ -49,6 +52,38 @@ func GetNodeRepository() *NodeRepository {
|
||||
return nodeRepoInstance
|
||||
}
|
||||
|
||||
// latestStateCondition returns a squirrel expression that restricts node_state
|
||||
// rows to the latest per node_id using a correlated subquery.
|
||||
// Requires the query to join node and node_state tables.
|
||||
func latestStateCondition() sq.Sqlizer {
|
||||
return sq.Expr(
|
||||
"node_state.id = (SELECT ns2.id FROM node_state ns2 WHERE ns2.node_id = node.id ORDER BY ns2.time_stamp DESC LIMIT 1)",
|
||||
)
|
||||
}
|
||||
|
||||
// applyNodeFilters applies common NodeFilter conditions to a query that joins
|
||||
// the node and node_state tables with latestStateCondition.
|
||||
func applyNodeFilters(query sq.SelectBuilder, filters []*model.NodeFilter) sq.SelectBuilder {
|
||||
for _, f := range filters {
|
||||
if f.Cluster != nil {
|
||||
query = buildStringCondition("node.cluster", f.Cluster, query)
|
||||
}
|
||||
if f.SubCluster != nil {
|
||||
query = buildStringCondition("node.subcluster", f.SubCluster, query)
|
||||
}
|
||||
if f.Hostname != nil {
|
||||
query = buildStringCondition("node.hostname", f.Hostname, query)
|
||||
}
|
||||
if f.SchedulerState != nil {
|
||||
query = query.Where("node_state.node_state = ?", f.SchedulerState)
|
||||
}
|
||||
if f.HealthState != nil {
|
||||
query = query.Where("node_state.health_state = ?", f.HealthState)
|
||||
}
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[string]string, error) {
|
||||
start := time.Now()
|
||||
|
||||
@@ -79,17 +114,16 @@ func (r *NodeRepository) FetchMetadata(hostname string, cluster string) (map[str
|
||||
|
||||
func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool) (*schema.Node, error) {
|
||||
node := &schema.Node{}
|
||||
var timestamp int
|
||||
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
|
||||
"node_state.health_state", "MAX(node_state.time_stamp) as time").
|
||||
From("node_state").
|
||||
Join("node ON node_state.node_id = node.id").
|
||||
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster",
|
||||
"node_state.node_state", "node_state.health_state").
|
||||
From("node").
|
||||
Join("node_state ON node_state.node_id = node.id").
|
||||
Where(latestStateCondition()).
|
||||
Where("node.hostname = ?", hostname).
|
||||
Where("node.cluster = ?", cluster).
|
||||
GroupBy("node_state.node_id").
|
||||
RunWith(r.DB).
|
||||
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, ×tamp); err != nil {
|
||||
cclog.Warnf("Error while querying node '%s' at time '%d' from database: %v", hostname, timestamp, err)
|
||||
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
|
||||
cclog.Warnf("Error while querying node '%s' from database: %v", hostname, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -106,31 +140,28 @@ func (r *NodeRepository) GetNode(hostname string, cluster string, withMeta bool)
|
||||
return node, nil
|
||||
}
|
||||
|
||||
func (r *NodeRepository) GetNodeById(id int64, withMeta bool) (*schema.Node, error) {
|
||||
func (r *NodeRepository) GetNodeByID(id int64, withMeta bool) (*schema.Node, error) {
|
||||
node := &schema.Node{}
|
||||
var timestamp int
|
||||
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
|
||||
"node_state.health_state", "MAX(node_state.time_stamp) as time").
|
||||
From("node_state").
|
||||
Join("node ON node_state.node_id = node.id").
|
||||
if err := sq.Select("node.hostname", "node.cluster", "node.subcluster",
|
||||
"node_state.node_state", "node_state.health_state").
|
||||
From("node").
|
||||
Join("node_state ON node_state.node_id = node.id").
|
||||
Where(latestStateCondition()).
|
||||
Where("node.id = ?", id).
|
||||
GroupBy("node_state.node_id").
|
||||
RunWith(r.DB).
|
||||
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState, ×tamp); err != nil {
|
||||
cclog.Warnf("Error while querying node ID '%d' at time '%d' from database: %v", id, timestamp, err)
|
||||
QueryRow().Scan(&node.Hostname, &node.Cluster, &node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
|
||||
cclog.Warnf("Error while querying node ID '%d' from database: %v", id, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// NEEDS METADATA BY ID
|
||||
// if withMeta {
|
||||
// var err error
|
||||
// var meta map[string]string
|
||||
// if meta, err = r.FetchMetadata(hostname, cluster); err != nil {
|
||||
// cclog.Warnf("Error while fetching metadata for node '%s'", hostname)
|
||||
// return nil, err
|
||||
// }
|
||||
// node.MetaData = meta
|
||||
// }
|
||||
if withMeta {
|
||||
meta, metaErr := r.FetchMetadata(node.Hostname, node.Cluster)
|
||||
if metaErr != nil {
|
||||
cclog.Warnf("Error while fetching metadata for node ID '%d': %v", id, metaErr)
|
||||
return nil, metaErr
|
||||
}
|
||||
node.MetaData = meta
|
||||
}
|
||||
|
||||
return node, nil
|
||||
}
|
||||
@@ -166,9 +197,10 @@ func (r *NodeRepository) AddNode(node *schema.NodeDB) (int64, error) {
|
||||
}
|
||||
|
||||
const NamedNodeStateInsert string = `
|
||||
INSERT INTO node_state (time_stamp, node_state, health_state, cpus_allocated,
|
||||
memory_allocated, gpus_allocated, jobs_running, node_id)
|
||||
VALUES (:time_stamp, :node_state, :health_state, :cpus_allocated, :memory_allocated, :gpus_allocated, :jobs_running, :node_id);`
|
||||
INSERT INTO node_state (time_stamp, node_state, health_state, health_metrics,
|
||||
cpus_allocated, memory_allocated, gpus_allocated, jobs_running, node_id)
|
||||
VALUES (:time_stamp, :node_state, :health_state, :health_metrics,
|
||||
:cpus_allocated, :memory_allocated, :gpus_allocated, :jobs_running, :node_id);`
|
||||
|
||||
// TODO: Add real Monitoring Health State
|
||||
|
||||
@@ -194,8 +226,7 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
|
||||
return err
|
||||
}
|
||||
|
||||
cclog.Infof("Added node '%s' to database", hostname)
|
||||
return nil
|
||||
cclog.Debugf("Added node '%s' to database", hostname)
|
||||
} else {
|
||||
cclog.Warnf("Error while querying node '%v' from database", id)
|
||||
return err
|
||||
@@ -209,7 +240,7 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
|
||||
cclog.Errorf("Error while adding node state for '%v' to database", hostname)
|
||||
return err
|
||||
}
|
||||
cclog.Infof("Updated node state for '%s' in database", hostname)
|
||||
cclog.Debugf("Updated node state for '%s' in database", hostname)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -222,6 +253,77 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
|
||||
// return nil
|
||||
// }
|
||||
|
||||
// NodeStateWithNode combines a node state row with denormalized node info.
|
||||
type NodeStateWithNode struct {
|
||||
ID int64 `db:"id"`
|
||||
TimeStamp int64 `db:"time_stamp"`
|
||||
NodeState string `db:"node_state"`
|
||||
HealthState string `db:"health_state"`
|
||||
HealthMetrics string `db:"health_metrics"`
|
||||
CpusAllocated int `db:"cpus_allocated"`
|
||||
MemoryAllocated int64 `db:"memory_allocated"`
|
||||
GpusAllocated int `db:"gpus_allocated"`
|
||||
JobsRunning int `db:"jobs_running"`
|
||||
Hostname string `db:"hostname"`
|
||||
Cluster string `db:"cluster"`
|
||||
SubCluster string `db:"subcluster"`
|
||||
}
|
||||
|
||||
// FindNodeStatesBefore returns all node_state rows with time_stamp < cutoff,
|
||||
// joined with node info for denormalized archiving.
|
||||
func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode, error) {
|
||||
rows, err := sq.Select(
|
||||
"node_state.id", "node_state.time_stamp", "node_state.node_state",
|
||||
"node_state.health_state", "node_state.health_metrics",
|
||||
"node_state.cpus_allocated", "node_state.memory_allocated",
|
||||
"node_state.gpus_allocated", "node_state.jobs_running",
|
||||
"node.hostname", "node.cluster", "node.subcluster",
|
||||
).
|
||||
From("node_state").
|
||||
Join("node ON node_state.node_id = node.id").
|
||||
Where(sq.Lt{"node_state.time_stamp": cutoff}).
|
||||
Where("node_state.id NOT IN (SELECT ns2.id FROM node_state ns2 WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id))").
|
||||
OrderBy("node.cluster ASC", "node.subcluster ASC", "node.hostname ASC", "node_state.time_stamp ASC").
|
||||
RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var result []NodeStateWithNode
|
||||
for rows.Next() {
|
||||
var ns NodeStateWithNode
|
||||
var healthMetrics sql.NullString
|
||||
if err := rows.Scan(&ns.ID, &ns.TimeStamp, &ns.NodeState,
|
||||
&ns.HealthState, &healthMetrics,
|
||||
&ns.CpusAllocated, &ns.MemoryAllocated,
|
||||
&ns.GpusAllocated, &ns.JobsRunning,
|
||||
&ns.Hostname, &ns.Cluster, &ns.SubCluster); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ns.HealthMetrics = healthMetrics.String
|
||||
result = append(result, ns)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// DeleteNodeStatesBefore removes node_state rows with time_stamp < cutoff,
|
||||
// but always preserves the row with the latest timestamp per node_id.
|
||||
func (r *NodeRepository) DeleteNodeStatesBefore(cutoff int64) (int64, error) {
|
||||
res, err := r.DB.Exec(
|
||||
`DELETE FROM node_state WHERE time_stamp < ?
|
||||
AND id NOT IN (
|
||||
SELECT id FROM node_state ns2
|
||||
WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id)
|
||||
)`,
|
||||
cutoff,
|
||||
)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return res.RowsAffected()
|
||||
}
|
||||
|
||||
func (r *NodeRepository) DeleteNode(id int64) error {
|
||||
_, err := r.DB.Exec(`DELETE FROM node WHERE node.id = ?`, id)
|
||||
if err != nil {
|
||||
@@ -241,38 +343,17 @@ func (r *NodeRepository) QueryNodes(
|
||||
order *model.OrderByInput, // Currently unused!
|
||||
) ([]*schema.Node, error) {
|
||||
query, qerr := AccessCheck(ctx,
|
||||
sq.Select("hostname", "cluster", "subcluster", "node_state", "health_state", "MAX(time_stamp) as time").
|
||||
sq.Select("node.hostname", "node.cluster", "node.subcluster",
|
||||
"node_state.node_state", "node_state.health_state").
|
||||
From("node").
|
||||
Join("node_state ON node_state.node_id = node.id"))
|
||||
Join("node_state ON node_state.node_id = node.id").
|
||||
Where(latestStateCondition()))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
if f.Cluster != nil {
|
||||
query = buildStringCondition("cluster", f.Cluster, query)
|
||||
}
|
||||
if f.Subcluster != nil {
|
||||
query = buildStringCondition("subcluster", f.Subcluster, query)
|
||||
}
|
||||
if f.Hostname != nil {
|
||||
query = buildStringCondition("hostname", f.Hostname, query)
|
||||
}
|
||||
if f.SchedulerState != nil {
|
||||
query = query.Where("node_state = ?", f.SchedulerState)
|
||||
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
|
||||
now := time.Now().Unix()
|
||||
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
|
||||
}
|
||||
if f.HealthState != nil {
|
||||
query = query.Where("health_state = ?", f.HealthState)
|
||||
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
|
||||
now := time.Now().Unix()
|
||||
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
|
||||
}
|
||||
}
|
||||
|
||||
query = query.GroupBy("node_id").OrderBy("hostname ASC")
|
||||
query = applyNodeFilters(query, filters)
|
||||
query = query.OrderBy("node.hostname ASC")
|
||||
|
||||
if page != nil && page.ItemsPerPage != -1 {
|
||||
limit := uint64(page.ItemsPerPage)
|
||||
@@ -290,11 +371,10 @@ func (r *NodeRepository) QueryNodes(
|
||||
nodes := make([]*schema.Node, 0)
|
||||
for rows.Next() {
|
||||
node := schema.Node{}
|
||||
var timestamp int
|
||||
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
|
||||
&node.NodeState, &node.HealthState, ×tamp); err != nil {
|
||||
&node.NodeState, &node.HealthState); err != nil {
|
||||
rows.Close()
|
||||
cclog.Warnf("Error while scanning rows (QueryNodes) at time '%d'", timestamp)
|
||||
cclog.Warn("Error while scanning rows (QueryNodes)")
|
||||
return nil, err
|
||||
}
|
||||
nodes = append(nodes, &node)
|
||||
@@ -386,73 +466,115 @@ func (r *NodeRepository) QueryNodesWithMeta(
|
||||
return nodes, nil
|
||||
}
|
||||
|
||||
// CountNodes returns the total matched nodes based on a node filter. It always operates
|
||||
// on the last state (largest timestamp).
|
||||
func (r *NodeRepository) CountNodes(
|
||||
// QueryNodesWithMeta returns a list of nodes based on a node filter. It always operates
|
||||
// on the last state (largest timestamp). It includes both (!) optional JSON column data
|
||||
func (r *NodeRepository) QueryNodesWithMeta(
|
||||
ctx context.Context,
|
||||
filters []*model.NodeFilter,
|
||||
) (int, error) {
|
||||
page *model.PageRequest,
|
||||
order *model.OrderByInput, // Currently unused!
|
||||
) ([]*schema.Node, error) {
|
||||
query, qerr := AccessCheck(ctx,
|
||||
sq.Select("time_stamp", "count(*) as countRes").
|
||||
sq.Select("node.hostname", "node.cluster", "node.subcluster",
|
||||
"node_state.node_state", "node_state.health_state",
|
||||
"node.meta_data", "node_state.health_metrics").
|
||||
From("node").
|
||||
Join("node_state ON node_state.node_id = node.id"))
|
||||
Join("node_state ON node_state.node_id = node.id").
|
||||
Where(latestStateCondition()))
|
||||
if qerr != nil {
|
||||
return 0, qerr
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
if f.Cluster != nil {
|
||||
query = buildStringCondition("cluster", f.Cluster, query)
|
||||
}
|
||||
if f.Subcluster != nil {
|
||||
query = buildStringCondition("subcluster", f.Subcluster, query)
|
||||
}
|
||||
if f.Hostname != nil {
|
||||
query = buildStringCondition("hostname", f.Hostname, query)
|
||||
}
|
||||
if f.SchedulerState != nil {
|
||||
query = query.Where("node_state = ?", f.SchedulerState)
|
||||
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
|
||||
now := time.Now().Unix()
|
||||
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
|
||||
}
|
||||
if f.HealthState != nil {
|
||||
query = query.Where("health_state = ?", f.HealthState)
|
||||
// Requires Additional time_stamp Filter: Else the last (past!) time_stamp with queried state will be returned
|
||||
now := time.Now().Unix()
|
||||
query = query.Where(sq.Gt{"time_stamp": (now - 60)})
|
||||
}
|
||||
}
|
||||
query = applyNodeFilters(query, filters)
|
||||
query = query.OrderBy("node.hostname ASC")
|
||||
|
||||
query = query.GroupBy("time_stamp").OrderBy("time_stamp DESC").Limit(1)
|
||||
if page != nil && page.ItemsPerPage != -1 {
|
||||
limit := uint64(page.ItemsPerPage)
|
||||
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
queryString, queryVars, _ := query.ToSql()
|
||||
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodes := make([]*schema.Node, 0)
|
||||
for rows.Next() {
|
||||
node := schema.Node{}
|
||||
RawMetaData := make([]byte, 0)
|
||||
RawMetricHealth := make([]byte, 0)
|
||||
|
||||
if err := rows.Scan(&node.Hostname, &node.Cluster, &node.SubCluster,
|
||||
&node.NodeState, &node.HealthState, &RawMetaData, &RawMetricHealth); err != nil {
|
||||
rows.Close()
|
||||
cclog.Warn("Error while scanning rows (QueryNodes)")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(RawMetaData) == 0 {
|
||||
node.MetaData = nil
|
||||
} else {
|
||||
metaData := make(map[string]string)
|
||||
if err := json.Unmarshal(RawMetaData, &metaData); err != nil {
|
||||
cclog.Warn("Error while unmarshaling raw metadata json")
|
||||
return nil, err
|
||||
}
|
||||
node.MetaData = metaData
|
||||
}
|
||||
|
||||
if len(RawMetricHealth) == 0 {
|
||||
node.HealthData = nil
|
||||
} else {
|
||||
healthData := make(map[string][]string)
|
||||
if err := json.Unmarshal(RawMetricHealth, &healthData); err != nil {
|
||||
cclog.Warn("Error while unmarshaling raw healthdata json")
|
||||
return nil, err
|
||||
}
|
||||
node.HealthData = healthData
|
||||
}
|
||||
|
||||
nodes = append(nodes, &node)
|
||||
}
|
||||
|
||||
return nodes, nil
|
||||
}
|
||||
|
||||
// CountNodes returns the total matched nodes based on a node filter. It always operates
|
||||
// on the last state (largest timestamp) per node.
|
||||
func (r *NodeRepository) CountNodes(
|
||||
ctx context.Context,
|
||||
filters []*model.NodeFilter,
|
||||
) (int, error) {
|
||||
query, qerr := AccessCheck(ctx,
|
||||
sq.Select("COUNT(*)").
|
||||
From("node").
|
||||
Join("node_state ON node_state.node_id = node.id").
|
||||
Where(latestStateCondition()))
|
||||
if qerr != nil {
|
||||
return 0, qerr
|
||||
}
|
||||
|
||||
query = applyNodeFilters(query, filters)
|
||||
|
||||
var count int
|
||||
if err := query.RunWith(r.stmtCache).QueryRow().Scan(&count); err != nil {
|
||||
queryString, queryVars, _ := query.ToSql()
|
||||
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var totalNodes int
|
||||
for rows.Next() {
|
||||
var timestamp int
|
||||
if err := rows.Scan(×tamp, &totalNodes); err != nil {
|
||||
rows.Close()
|
||||
cclog.Warnf("Error while scanning rows (CountNodes) at time '%d'", timestamp)
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
return totalNodes, nil
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
|
||||
q := sq.Select("node.hostname", "node.cluster", "node.subcluster", "node_state.node_state",
|
||||
"node_state.health_state", "MAX(node_state.time_stamp) as time").
|
||||
q := sq.Select("node.hostname", "node.cluster", "node.subcluster",
|
||||
"node_state.node_state", "node_state.health_state").
|
||||
From("node").
|
||||
Join("node_state ON node_state.node_id = node.id").
|
||||
Where(latestStateCondition()).
|
||||
Where("node.cluster = ?", cluster).
|
||||
GroupBy("node_state.node_id").
|
||||
OrderBy("node.hostname ASC")
|
||||
|
||||
rows, err := q.RunWith(r.DB).Query()
|
||||
@@ -464,10 +586,9 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
node := &schema.Node{}
|
||||
var timestamp int
|
||||
if err := rows.Scan(&node.Hostname, &node.Cluster,
|
||||
&node.SubCluster, &node.NodeState, &node.HealthState, ×tamp); err != nil {
|
||||
cclog.Warnf("Error while scanning node list (ListNodes) at time '%d'", timestamp)
|
||||
&node.SubCluster, &node.NodeState, &node.HealthState); err != nil {
|
||||
cclog.Warn("Error while scanning node list (ListNodes)")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -478,11 +599,11 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {
|
||||
}
|
||||
|
||||
func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
|
||||
q := sq.Select("node.hostname", "node_state.node_state", "MAX(node_state.time_stamp) as time").
|
||||
q := sq.Select("node.hostname", "node_state.node_state").
|
||||
From("node").
|
||||
Join("node_state ON node_state.node_id = node.id").
|
||||
Where(latestStateCondition()).
|
||||
Where("node.cluster = ?", cluster).
|
||||
GroupBy("node_state.node_id").
|
||||
OrderBy("node.hostname ASC")
|
||||
|
||||
rows, err := q.RunWith(r.DB).Query()
|
||||
@@ -495,9 +616,8 @@ func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var hostname, nodestate string
|
||||
var timestamp int
|
||||
if err := rows.Scan(&hostname, &nodestate, ×tamp); err != nil {
|
||||
cclog.Warnf("Error while scanning node list (MapNodes) at time '%d'", timestamp)
|
||||
if err := rows.Scan(&hostname, &nodestate); err != nil {
|
||||
cclog.Warn("Error while scanning node list (MapNodes)")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -509,37 +629,15 @@ func (r *NodeRepository) MapNodes(cluster string) (map[string]string, error) {
|
||||
|
||||
func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeFilter, column string) ([]*model.NodeStates, error) {
|
||||
query, qerr := AccessCheck(ctx,
|
||||
sq.Select(column, "COUNT(*) as count").
|
||||
sq.Select(column).
|
||||
From("node").
|
||||
Join("node_state ON node_state.node_id = node.id").
|
||||
Where(latestStateCondition()).
|
||||
GroupBy(column))
|
||||
Where(latestStateCondition()))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
query = query.Join("node_state ON node_state.node_id = node.id")
|
||||
|
||||
for _, f := range filters {
|
||||
if f.Hostname != nil {
|
||||
query = buildStringCondition("hostname", f.Hostname, query)
|
||||
}
|
||||
if f.Cluster != nil {
|
||||
query = buildStringCondition("cluster", f.Cluster, query)
|
||||
}
|
||||
if f.Subcluster != nil {
|
||||
query = buildStringCondition("subcluster", f.Subcluster, query)
|
||||
}
|
||||
if f.SchedulerState != nil {
|
||||
query = query.Where("node_state = ?", f.SchedulerState)
|
||||
}
|
||||
if f.HealthState != nil {
|
||||
query = query.Where("health_state = ?", f.HealthState)
|
||||
}
|
||||
}
|
||||
|
||||
// Add Group and Order
|
||||
query = query.GroupBy("hostname").OrderBy("hostname DESC")
|
||||
query = applyNodeFilters(query, filters)
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
@@ -549,6 +647,18 @@ func (r *NodeRepository) CountStates(ctx context.Context, filters []*model.NodeF
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
stateMap := map[string]int{}
|
||||
for rows.Next() {
|
||||
var state string
|
||||
if err := rows.Scan(&state); err != nil {
|
||||
rows.Close()
|
||||
cclog.Warn("Error while scanning rows (CountStates)")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stateMap[state] += 1
|
||||
}
|
||||
|
||||
nodes := make([]*model.NodeStates, 0)
|
||||
for rows.Next() {
|
||||
var state string
|
||||
@@ -587,8 +697,8 @@ func (r *NodeRepository) CountStatesTimed(ctx context.Context, filters []*model.
|
||||
if f.Cluster != nil {
|
||||
query = buildStringCondition("cluster", f.Cluster, query)
|
||||
}
|
||||
if f.Subcluster != nil {
|
||||
query = buildStringCondition("subcluster", f.Subcluster, query)
|
||||
if f.SubCluster != nil {
|
||||
query = buildStringCondition("subcluster", f.SubCluster, query)
|
||||
}
|
||||
if f.SchedulerState != nil {
|
||||
query = query.Where("node_state = ?", f.SchedulerState)
|
||||
@@ -640,6 +750,132 @@ func (r *NodeRepository) CountStatesTimed(ctx context.Context, filters []*model.
|
||||
return timedStates, nil
|
||||
}
|
||||
|
||||
func (r *NodeRepository) GetNodesForList(
|
||||
ctx context.Context,
|
||||
cluster string,
|
||||
subCluster string,
|
||||
stateFilter string,
|
||||
nodeFilter string,
|
||||
page *model.PageRequest,
|
||||
) ([]string, map[string]string, int, bool, error) {
|
||||
// Init Return Vars
|
||||
nodes := make([]string, 0)
|
||||
stateMap := make(map[string]string)
|
||||
countNodes := 0
|
||||
hasNextPage := false
|
||||
|
||||
// Build Filters
|
||||
queryFilters := make([]*model.NodeFilter, 0)
|
||||
if cluster != "" {
|
||||
queryFilters = append(queryFilters, &model.NodeFilter{Cluster: &model.StringInput{Eq: &cluster}})
|
||||
}
|
||||
if subCluster != "" {
|
||||
queryFilters = append(queryFilters, &model.NodeFilter{SubCluster: &model.StringInput{Eq: &subCluster}})
|
||||
}
|
||||
if nodeFilter != "" && stateFilter != "notindb" {
|
||||
queryFilters = append(queryFilters, &model.NodeFilter{Hostname: &model.StringInput{Contains: &nodeFilter}})
|
||||
}
|
||||
if stateFilter != "all" && stateFilter != "notindb" {
|
||||
queryState := schema.SchedulerState(stateFilter)
|
||||
queryFilters = append(queryFilters, &model.NodeFilter{SchedulerState: &queryState})
|
||||
}
|
||||
// if healthFilter != "all" {
|
||||
// filters = append(filters, &model.NodeFilter{HealthState: &healthFilter})
|
||||
// }
|
||||
|
||||
// Special Case: Disable Paging for missing nodes filter, save IPP for later
|
||||
var backupItems int
|
||||
if stateFilter == "notindb" {
|
||||
backupItems = page.ItemsPerPage
|
||||
page.ItemsPerPage = -1
|
||||
}
|
||||
|
||||
// Query Nodes From DB
|
||||
rawNodes, serr := r.QueryNodes(ctx, queryFilters, page, nil) // Order not Used
|
||||
if serr != nil {
|
||||
cclog.Warn("error while loading node database data (Resolver.NodeMetricsList)")
|
||||
return nil, nil, 0, false, serr
|
||||
}
|
||||
|
||||
// Intermediate Node Result Info
|
||||
for _, node := range rawNodes {
|
||||
if node == nil {
|
||||
continue
|
||||
}
|
||||
nodes = append(nodes, node.Hostname)
|
||||
stateMap[node.Hostname] = string(node.NodeState)
|
||||
}
|
||||
|
||||
// Special Case: Find Nodes not in DB node table but in metricStore only
|
||||
if stateFilter == "notindb" {
|
||||
// Reapply Original Paging
|
||||
page.ItemsPerPage = backupItems
|
||||
// Get Nodes From Topology
|
||||
var topoNodes []string
|
||||
if subCluster != "" {
|
||||
scNodes := archive.NodeLists[cluster][subCluster]
|
||||
topoNodes = scNodes.PrintList()
|
||||
} else {
|
||||
subClusterNodeLists := archive.NodeLists[cluster]
|
||||
for _, nodeList := range subClusterNodeLists {
|
||||
topoNodes = append(topoNodes, nodeList.PrintList()...)
|
||||
}
|
||||
}
|
||||
// Compare to all nodes from cluster/subcluster in DB
|
||||
var missingNodes []string
|
||||
for _, scanNode := range topoNodes {
|
||||
if !slices.Contains(nodes, scanNode) {
|
||||
missingNodes = append(missingNodes, scanNode)
|
||||
}
|
||||
}
|
||||
// Filter nodes by name
|
||||
if nodeFilter != "" {
|
||||
filteredNodesByName := []string{}
|
||||
for _, missingNode := range missingNodes {
|
||||
if strings.Contains(missingNode, nodeFilter) {
|
||||
filteredNodesByName = append(filteredNodesByName, missingNode)
|
||||
}
|
||||
}
|
||||
missingNodes = filteredNodesByName
|
||||
}
|
||||
// Sort Missing Nodes Alphanumerically
|
||||
slices.Sort(missingNodes)
|
||||
// Total Missing
|
||||
countNodes = len(missingNodes)
|
||||
// Apply paging
|
||||
if countNodes > page.ItemsPerPage {
|
||||
start := (page.Page - 1) * page.ItemsPerPage
|
||||
end := start + page.ItemsPerPage
|
||||
if end > countNodes {
|
||||
end = countNodes
|
||||
hasNextPage = false
|
||||
} else {
|
||||
hasNextPage = true
|
||||
}
|
||||
nodes = missingNodes[start:end]
|
||||
} else {
|
||||
nodes = missingNodes
|
||||
}
|
||||
|
||||
} else {
|
||||
// DB Nodes: Count and derive hasNextPage from count
|
||||
var cerr error
|
||||
countNodes, cerr = r.CountNodes(ctx, queryFilters)
|
||||
if cerr != nil {
|
||||
cclog.Warn("error while counting node database data (Resolver.NodeMetricsList)")
|
||||
return nil, nil, 0, false, cerr
|
||||
}
|
||||
hasNextPage = page.Page*page.ItemsPerPage < countNodes
|
||||
}
|
||||
|
||||
// Fallback for non-init'd node table in DB; Ignores stateFilter
|
||||
if stateFilter == "all" && countNodes == 0 {
|
||||
nodes, countNodes, hasNextPage = getNodesFromTopol(cluster, subCluster, nodeFilter, page)
|
||||
}
|
||||
|
||||
return nodes, stateMap, countNodes, hasNextPage, nil
|
||||
}
|
||||
|
||||
func AccessCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||
user := GetUserFromContext(ctx)
|
||||
return AccessCheckWithUser(user, query)
|
||||
@@ -661,3 +897,51 @@ func AccessCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBu
|
||||
return qnil, fmt.Errorf("user has no or unknown roles")
|
||||
}
|
||||
}
|
||||
|
||||
func getNodesFromTopol(cluster string, subCluster string, nodeFilter string, page *model.PageRequest) ([]string, int, bool) {
|
||||
// 0) Init additional vars
|
||||
hasNextPage := false
|
||||
totalNodes := 0
|
||||
|
||||
// 1) Get list of all nodes
|
||||
var topolNodes []string
|
||||
if subCluster != "" {
|
||||
scNodes := archive.NodeLists[cluster][subCluster]
|
||||
topolNodes = scNodes.PrintList()
|
||||
} else {
|
||||
subClusterNodeLists := archive.NodeLists[cluster]
|
||||
for _, nodeList := range subClusterNodeLists {
|
||||
topolNodes = append(topolNodes, nodeList.PrintList()...)
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Filter nodes
|
||||
if nodeFilter != "" {
|
||||
filteredNodes := []string{}
|
||||
for _, node := range topolNodes {
|
||||
if strings.Contains(node, nodeFilter) {
|
||||
filteredNodes = append(filteredNodes, node)
|
||||
}
|
||||
}
|
||||
topolNodes = filteredNodes
|
||||
}
|
||||
|
||||
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ...
|
||||
totalNodes = len(topolNodes)
|
||||
sort.Strings(topolNodes)
|
||||
|
||||
// 3) Apply paging
|
||||
if len(topolNodes) > page.ItemsPerPage {
|
||||
start := (page.Page - 1) * page.ItemsPerPage
|
||||
end := start + page.ItemsPerPage
|
||||
if end >= len(topolNodes) {
|
||||
end = len(topolNodes)
|
||||
hasNextPage = false
|
||||
} else {
|
||||
hasNextPage = true
|
||||
}
|
||||
topolNodes = topolNodes[start:end]
|
||||
}
|
||||
|
||||
return topolNodes, totalNodes, hasNextPage
|
||||
}
|
||||
|
||||
@@ -15,9 +15,9 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
@@ -26,7 +26,7 @@ func nodeTestSetup(t *testing.T) {
|
||||
"main": {
|
||||
"addr": "0.0.0.0:8080",
|
||||
"validate": false,
|
||||
"apiAllowedIPs": [
|
||||
"api-allowed-ips": [
|
||||
"*"
|
||||
]
|
||||
},
|
||||
@@ -38,18 +38,7 @@ func nodeTestSetup(t *testing.T) {
|
||||
"jwts": {
|
||||
"max-age": "2m"
|
||||
}
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "testcluster",
|
||||
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
|
||||
"filterRanges": {
|
||||
"numNodes": { "from": 1, "to": 64 },
|
||||
"duration": { "from": 0, "to": 86400 },
|
||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
||||
}
|
||||
}
|
||||
]
|
||||
}`
|
||||
const testclusterJSON = `{
|
||||
"name": "testcluster",
|
||||
@@ -130,7 +119,7 @@ func nodeTestSetup(t *testing.T) {
|
||||
}
|
||||
|
||||
dbfilepath := filepath.Join(tmpdir, "test.db")
|
||||
err := MigrateDB("sqlite3", dbfilepath)
|
||||
err := MigrateDB(dbfilepath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -144,19 +133,22 @@ func nodeTestSetup(t *testing.T) {
|
||||
|
||||
// Load and check main configuration
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||
config.Init(cfg, clustercfg)
|
||||
} else {
|
||||
cclog.Abort("Cluster configuration must be present")
|
||||
}
|
||||
config.Init(cfg)
|
||||
} else {
|
||||
cclog.Abort("Main configuration must be present")
|
||||
}
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||
|
||||
Connect("sqlite3", dbfilepath)
|
||||
if err := ResetConnection(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
ResetConnection()
|
||||
})
|
||||
|
||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||
Connect(dbfilepath)
|
||||
|
||||
if err := archive.Init(json.RawMessage(archiveCfg)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
@@ -164,8 +156,12 @@ func nodeTestSetup(t *testing.T) {
|
||||
func TestUpdateNodeState(t *testing.T) {
|
||||
nodeTestSetup(t)
|
||||
|
||||
repo := GetNodeRepository()
|
||||
now := time.Now().Unix()
|
||||
|
||||
nodeState := schema.NodeStateDB{
|
||||
TimeStamp: time.Now().Unix(), NodeState: "allocated",
|
||||
TimeStamp: now,
|
||||
NodeState: "allocated",
|
||||
CpusAllocated: 72,
|
||||
MemoryAllocated: 480,
|
||||
GpusAllocated: 0,
|
||||
@@ -173,18 +169,152 @@ func TestUpdateNodeState(t *testing.T) {
|
||||
JobsRunning: 1,
|
||||
}
|
||||
|
||||
repo := GetNodeRepository()
|
||||
err := repo.UpdateNodeState("host124", "testcluster", &nodeState)
|
||||
if err != nil {
|
||||
return
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
node, err := repo.GetNode("host124", "testcluster", false)
|
||||
if err != nil {
|
||||
return
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if node.NodeState != "allocated" {
|
||||
t.Errorf("wrong node state\ngot: %s \nwant: allocated ", node.NodeState)
|
||||
}
|
||||
|
||||
t.Run("FindBeforeEmpty", func(t *testing.T) {
|
||||
// Only the current-timestamp row exists, so nothing should be found before now
|
||||
rows, err := repo.FindNodeStatesBefore(now)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(rows) != 0 {
|
||||
t.Errorf("expected 0 rows, got %d", len(rows))
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("DeleteOldRows", func(t *testing.T) {
|
||||
// Insert 2 more old rows for host124
|
||||
for i, ts := range []int64{now - 7200, now - 3600} {
|
||||
ns := schema.NodeStateDB{
|
||||
TimeStamp: ts,
|
||||
NodeState: "allocated",
|
||||
HealthState: schema.MonitoringStateFull,
|
||||
CpusAllocated: 72,
|
||||
MemoryAllocated: 480,
|
||||
JobsRunning: i,
|
||||
}
|
||||
if err := repo.UpdateNodeState("host124", "testcluster", &ns); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Delete rows older than 30 minutes
|
||||
cutoff := now - 1800
|
||||
cnt, err := repo.DeleteNodeStatesBefore(cutoff)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Should delete the 2 old rows
|
||||
if cnt != 2 {
|
||||
t.Errorf("expected 2 deleted rows, got %d", cnt)
|
||||
}
|
||||
|
||||
// Latest row should still exist
|
||||
node, err := repo.GetNode("host124", "testcluster", false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if node.NodeState != "allocated" {
|
||||
t.Errorf("expected node state 'allocated', got %s", node.NodeState)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("PreservesLatestPerNode", func(t *testing.T) {
|
||||
// Insert a single old row for host125 — it's the latest per node so it must survive
|
||||
ns := schema.NodeStateDB{
|
||||
TimeStamp: now - 7200,
|
||||
NodeState: "idle",
|
||||
HealthState: schema.MonitoringStateFull,
|
||||
CpusAllocated: 0,
|
||||
MemoryAllocated: 0,
|
||||
JobsRunning: 0,
|
||||
}
|
||||
if err := repo.UpdateNodeState("host125", "testcluster", &ns); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Delete everything older than now — the latest per node should be preserved
|
||||
_, err := repo.DeleteNodeStatesBefore(now)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// The latest row for host125 must still exist
|
||||
node, err := repo.GetNode("host125", "testcluster", false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if node.NodeState != "idle" {
|
||||
t.Errorf("expected node state 'idle', got %s", node.NodeState)
|
||||
}
|
||||
|
||||
// Verify exactly 1 row remains for host125
|
||||
var countAfter int
|
||||
if err := repo.DB.QueryRow(
|
||||
"SELECT COUNT(*) FROM node_state WHERE node_id = (SELECT id FROM node WHERE hostname = 'host125')").
|
||||
Scan(&countAfter); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if countAfter != 1 {
|
||||
t.Errorf("expected 1 row remaining for host125, got %d", countAfter)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("FindBeforeWithJoin", func(t *testing.T) {
|
||||
// Insert old and current rows for host123
|
||||
for _, ts := range []int64{now - 7200, now} {
|
||||
ns := schema.NodeStateDB{
|
||||
TimeStamp: ts,
|
||||
NodeState: "allocated",
|
||||
HealthState: schema.MonitoringStateFull,
|
||||
CpusAllocated: 8,
|
||||
MemoryAllocated: 1024,
|
||||
GpusAllocated: 1,
|
||||
JobsRunning: 1,
|
||||
}
|
||||
if err := repo.UpdateNodeState("host123", "testcluster", &ns); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Find rows older than 30 minutes, excluding latest per node
|
||||
cutoff := now - 1800
|
||||
rows, err := repo.FindNodeStatesBefore(cutoff)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Should find the old host123 row
|
||||
found := false
|
||||
for _, row := range rows {
|
||||
if row.Hostname == "host123" && row.TimeStamp == now-7200 {
|
||||
found = true
|
||||
if row.Cluster != "testcluster" {
|
||||
t.Errorf("expected cluster 'testcluster', got %s", row.Cluster)
|
||||
}
|
||||
if row.SubCluster != "sc1" {
|
||||
t.Errorf("expected subcluster 'sc1', got %s", row.SubCluster)
|
||||
}
|
||||
if row.CpusAllocated != 8 {
|
||||
t.Errorf("expected cpus_allocated 8, got %d", row.CpusAllocated)
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("expected to find old host123 row among %d results", len(rows))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -6,11 +6,13 @@ package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
@@ -46,7 +48,7 @@ func BenchmarkSelect1(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkDB_FindJobById(b *testing.B) {
|
||||
var jobId int64 = 1677322
|
||||
var jobID int64 = 1677322
|
||||
|
||||
b.Run("FindJobById", func(b *testing.B) {
|
||||
db := setup(b)
|
||||
@@ -55,7 +57,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_, err := db.FindById(getContext(b), jobId)
|
||||
_, err := db.FindByID(getContext(b), jobID)
|
||||
noErr(b, err)
|
||||
}
|
||||
})
|
||||
@@ -63,7 +65,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkDB_FindJob(b *testing.B) {
|
||||
var jobId int64 = 107266
|
||||
var jobID int64 = 107266
|
||||
var startTime int64 = 1657557241
|
||||
cluster := "fritz"
|
||||
|
||||
@@ -74,7 +76,7 @@ func BenchmarkDB_FindJob(b *testing.B) {
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_, err := db.Find(&jobId, &cluster, &startTime)
|
||||
_, err := db.Find(&jobID, &cluster, &startTime)
|
||||
noErr(b, err)
|
||||
}
|
||||
})
|
||||
@@ -148,10 +150,24 @@ func getContext(tb testing.TB) context.Context {
|
||||
func setup(tb testing.TB) *JobRepository {
|
||||
tb.Helper()
|
||||
cclog.Init("warn", true)
|
||||
dbfile := "testdata/job.db"
|
||||
err := MigrateDB("sqlite3", dbfile)
|
||||
|
||||
// Copy test DB to a temp file for test isolation
|
||||
srcData, err := os.ReadFile("testdata/job.db")
|
||||
noErr(tb, err)
|
||||
Connect("sqlite3", dbfile)
|
||||
dbfile := filepath.Join(tb.TempDir(), "job.db")
|
||||
err = os.WriteFile(dbfile, srcData, 0o644)
|
||||
noErr(tb, err)
|
||||
|
||||
// Reset singletons so Connect uses the new temp DB
|
||||
err = ResetConnection()
|
||||
noErr(tb, err)
|
||||
tb.Cleanup(func() {
|
||||
ResetConnection()
|
||||
})
|
||||
|
||||
err = MigrateDB(dbfile)
|
||||
noErr(tb, err)
|
||||
Connect(dbfile)
|
||||
return GetJobRepository()
|
||||
}
|
||||
|
||||
|
||||
@@ -2,6 +2,44 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file contains job statistics and histogram generation functionality for the JobRepository.
|
||||
//
|
||||
// # Job Statistics
|
||||
//
|
||||
// The statistics methods provide aggregated metrics about jobs including total jobs, users,
|
||||
// walltime, and resource usage (nodes, cores, accelerators). Statistics can be computed:
|
||||
// - Overall (JobsStats): Single aggregate across all matching jobs
|
||||
// - Grouped (JobsStatsGrouped): Aggregated by user, project, cluster, or subcluster
|
||||
// - Counts (JobCountGrouped, AddJobCount): Simple job counts with optional filtering
|
||||
//
|
||||
// All statistics methods support filtering via JobFilter and respect security contexts.
|
||||
//
|
||||
// # Histograms
|
||||
//
|
||||
// Histogram methods generate distribution data for visualization:
|
||||
// - Duration, nodes, cores, accelerators (AddHistograms)
|
||||
// - Job metrics like CPU load, memory usage (AddMetricHistograms)
|
||||
//
|
||||
// Histograms use intelligent binning:
|
||||
// - Duration: Variable bin sizes (1m, 10m, 1h, 6h, 12h, 24h) with zero-padding
|
||||
// - Resources: Natural value-based bins
|
||||
// - Metrics: Normalized to peak values with configurable bin counts
|
||||
//
|
||||
// # Running vs. Completed Jobs
|
||||
//
|
||||
// Statistics handle running jobs specially:
|
||||
// - Duration calculated as (now - start_time) for running jobs
|
||||
// - Metric histograms for running jobs load data from metric backend instead of footprint
|
||||
// - Job state filtering distinguishes running/completed jobs
|
||||
//
|
||||
// # Performance Considerations
|
||||
//
|
||||
// - All queries use prepared statements via stmtCache
|
||||
// - Complex aggregations use SQL for efficiency
|
||||
// - Histogram pre-initialization ensures consistent bin ranges
|
||||
// - Metric histogram queries limited to 5000 jobs for running job analysis
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
@@ -12,14 +50,16 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// GraphQL validation should make sure that no unkown values can be specified.
|
||||
// groupBy2column maps GraphQL Aggregate enum values to their corresponding database column names.
|
||||
// Used by JobsStatsGrouped and JobCountGrouped to translate user-facing grouping dimensions
|
||||
// into SQL GROUP BY clauses. GraphQL validation ensures only valid enum values are accepted.
|
||||
var groupBy2column = map[model.Aggregate]string{
|
||||
model.AggregateUser: "job.hpc_user",
|
||||
model.AggregateProject: "job.project",
|
||||
@@ -27,6 +67,9 @@ var groupBy2column = map[model.Aggregate]string{
|
||||
model.AggregateSubcluster: "job.subcluster",
|
||||
}
|
||||
|
||||
// sortBy2column maps GraphQL SortByAggregate enum values to their corresponding computed column names.
|
||||
// Used by JobsStatsGrouped to translate sort preferences into SQL ORDER BY clauses.
|
||||
// Column names match the AS aliases used in buildStatsQuery.
|
||||
var sortBy2column = map[model.SortByAggregate]string{
|
||||
model.SortByAggregateTotaljobs: "totalJobs",
|
||||
model.SortByAggregateTotalusers: "totalUsers",
|
||||
@@ -39,6 +82,21 @@ var sortBy2column = map[model.SortByAggregate]string{
|
||||
model.SortByAggregateTotalacchours: "totalAccHours",
|
||||
}
|
||||
|
||||
// buildCountQuery constructs a SQL query to count jobs with optional grouping and filtering.
|
||||
//
|
||||
// Parameters:
|
||||
// - filter: Job filters to apply (cluster, user, time range, etc.)
|
||||
// - kind: Special filter - "running" for running jobs only, "short" for jobs under threshold
|
||||
// - col: Column name to GROUP BY; empty string for total count without grouping
|
||||
//
|
||||
// Returns a SelectBuilder that produces either:
|
||||
// - Single count: COUNT(job.id) when col is empty
|
||||
// - Grouped counts: col, COUNT(job.id) when col is specified
|
||||
//
|
||||
// The kind parameter enables counting specific job categories:
|
||||
// - "running": Only jobs with job_state = 'running'
|
||||
// - "short": Only jobs with duration < ShortRunningJobsDuration config value
|
||||
// - empty: All jobs matching filters
|
||||
func (r *JobRepository) buildCountQuery(
|
||||
filter []*model.JobFilter,
|
||||
kind string,
|
||||
@@ -47,10 +105,8 @@ func (r *JobRepository) buildCountQuery(
|
||||
var query sq.SelectBuilder
|
||||
|
||||
if col != "" {
|
||||
// Scan columns: id, cnt
|
||||
query = sq.Select(col, "COUNT(job.id)").From("job").GroupBy(col)
|
||||
} else {
|
||||
// Scan columns: cnt
|
||||
query = sq.Select("COUNT(job.id)").From("job")
|
||||
}
|
||||
|
||||
@@ -68,42 +124,58 @@ func (r *JobRepository) buildCountQuery(
|
||||
return query
|
||||
}
|
||||
|
||||
// buildStatsQuery constructs a SQL query to compute comprehensive job statistics with optional grouping.
|
||||
//
|
||||
// Parameters:
|
||||
// - filter: Job filters to apply (cluster, user, time range, etc.)
|
||||
// - col: Column name to GROUP BY; empty string for overall statistics without grouping
|
||||
//
|
||||
// Returns a SelectBuilder that produces comprehensive statistics:
|
||||
// - totalJobs: Count of jobs
|
||||
// - totalUsers: Count of distinct users (always 0 when grouping by user)
|
||||
// - totalWalltime: Sum of job durations in hours
|
||||
// - totalNodes: Sum of nodes used across all jobs
|
||||
// - totalNodeHours: Sum of (duration × num_nodes) in hours
|
||||
// - totalCores: Sum of hardware threads used across all jobs
|
||||
// - totalCoreHours: Sum of (duration × num_hwthreads) in hours
|
||||
// - totalAccs: Sum of accelerators used across all jobs
|
||||
// - totalAccHours: Sum of (duration × num_acc) in hours
|
||||
//
|
||||
// Special handling:
|
||||
// - Running jobs: Duration calculated as (now - start_time) instead of stored duration
|
||||
// - Grouped queries: Also select grouping column and user's display name from hpc_user table
|
||||
// - All time values converted from seconds to hours (÷ 3600) and rounded
|
||||
func (r *JobRepository) buildStatsQuery(
|
||||
filter []*model.JobFilter,
|
||||
col string,
|
||||
) sq.SelectBuilder {
|
||||
var query sq.SelectBuilder
|
||||
castType := r.getCastType()
|
||||
|
||||
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
||||
|
||||
if col != "" {
|
||||
// Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select(
|
||||
col,
|
||||
"name",
|
||||
"COUNT(job.id) as totalJobs",
|
||||
"COUNT(DISTINCT job.hpc_user) AS totalUsers",
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s) as totalCores`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int) as totalWalltime`, time.Now().Unix()),
|
||||
`CAST(SUM(job.num_nodes) as int) as totalNodes`,
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int) as totalNodeHours`, time.Now().Unix()),
|
||||
`CAST(SUM(job.num_hwthreads) as int) as totalCores`,
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int) as totalCoreHours`, time.Now().Unix()),
|
||||
`CAST(SUM(job.num_acc) as int) as totalAccs`,
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int) as totalAccHours`, time.Now().Unix()),
|
||||
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
|
||||
} else {
|
||||
// Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select(
|
||||
"COUNT(job.id) as totalJobs",
|
||||
"COUNT(DISTINCT job.hpc_user) AS totalUsers",
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s)`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s)`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int)`, time.Now().Unix()),
|
||||
`CAST(SUM(job.num_nodes) as int)`,
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int)`, time.Now().Unix()),
|
||||
`CAST(SUM(job.num_hwthreads) as int)`,
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int)`, time.Now().Unix()),
|
||||
`CAST(SUM(job.num_acc) as int)`,
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int)`, time.Now().Unix()),
|
||||
).From("job")
|
||||
}
|
||||
|
||||
@@ -114,21 +186,25 @@ func (r *JobRepository) buildStatsQuery(
|
||||
return query
|
||||
}
|
||||
|
||||
func (r *JobRepository) getCastType() string {
|
||||
var castType string
|
||||
|
||||
switch r.driver {
|
||||
case "sqlite3":
|
||||
castType = "int"
|
||||
case "mysql":
|
||||
castType = "unsigned"
|
||||
default:
|
||||
castType = ""
|
||||
}
|
||||
|
||||
return castType
|
||||
}
|
||||
|
||||
// JobsStatsGrouped computes comprehensive job statistics grouped by a dimension (user, project, cluster, or subcluster).
|
||||
//
|
||||
// This is the primary method for generating aggregated statistics views in the UI, providing
|
||||
// metrics like total jobs, walltime, and resource usage broken down by the specified grouping.
|
||||
//
|
||||
// Parameters:
|
||||
// - ctx: Context for security checks and cancellation
|
||||
// - filter: Filters to apply (time range, cluster, job state, etc.)
|
||||
// - page: Optional pagination (ItemsPerPage: -1 disables pagination)
|
||||
// - sortBy: Optional sort column (totalJobs, totalWalltime, totalCoreHours, etc.)
|
||||
// - groupBy: Required grouping dimension (User, Project, Cluster, or SubCluster)
|
||||
//
|
||||
// Returns a slice of JobsStatistics, one per group, with:
|
||||
// - ID: The group identifier (username, project name, cluster name, etc.)
|
||||
// - Name: Display name (for users, from hpc_user.name; empty for other groups)
|
||||
// - Statistics: totalJobs, totalUsers, totalWalltime, resource usage metrics
|
||||
//
|
||||
// Security: Respects user roles via SecurityCheck - users see only their own data unless admin/support.
|
||||
// Performance: Results are sorted in SQL and pagination applied before scanning rows.
|
||||
func (r *JobRepository) JobsStatsGrouped(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
@@ -253,6 +329,21 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// JobsStats computes overall job statistics across all matching jobs without grouping.
|
||||
//
|
||||
// This method provides a single aggregate view of job metrics, useful for dashboard
|
||||
// summaries and overall system utilization reports.
|
||||
//
|
||||
// Parameters:
|
||||
// - ctx: Context for security checks and cancellation
|
||||
// - filter: Filters to apply (time range, cluster, job state, etc.)
|
||||
//
|
||||
// Returns a single-element slice containing aggregate statistics:
|
||||
// - totalJobs, totalUsers, totalWalltime
|
||||
// - totalNodeHours, totalCoreHours, totalAccHours
|
||||
//
|
||||
// Unlike JobsStatsGrouped, this returns overall totals without breaking down by dimension.
|
||||
// Security checks are applied via SecurityCheck to respect user access levels.
|
||||
func (r *JobRepository) JobsStats(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
@@ -300,6 +391,15 @@ func (r *JobRepository) JobsStats(
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// LoadJobStat retrieves a specific statistic for a metric from a job's statistics.
|
||||
// Returns 0.0 if the metric is not found or statType is invalid.
|
||||
//
|
||||
// Parameters:
|
||||
// - job: Job struct with populated Statistics field
|
||||
// - metric: Name of the metric to query (e.g., "cpu_load", "mem_used")
|
||||
// - statType: Type of statistic: "avg", "min", or "max"
|
||||
//
|
||||
// Returns the requested statistic value or 0.0 if not found.
|
||||
func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
switch statType {
|
||||
@@ -317,6 +417,17 @@ func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
|
||||
return 0.0
|
||||
}
|
||||
|
||||
// JobCountGrouped counts jobs grouped by a dimension without computing detailed statistics.
|
||||
//
|
||||
// This is a lightweight alternative to JobsStatsGrouped when only job counts are needed,
|
||||
// avoiding the overhead of calculating walltime and resource usage metrics.
|
||||
//
|
||||
// Parameters:
|
||||
// - ctx: Context for security checks
|
||||
// - filter: Filters to apply
|
||||
// - groupBy: Grouping dimension (User, Project, Cluster, or SubCluster)
|
||||
//
|
||||
// Returns JobsStatistics with only ID and TotalJobs populated for each group.
|
||||
func (r *JobRepository) JobCountGrouped(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
@@ -362,6 +473,20 @@ func (r *JobRepository) JobCountGrouped(
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// AddJobCountGrouped augments existing statistics with additional job counts by category.
|
||||
//
|
||||
// This method enriches JobsStatistics returned by JobsStatsGrouped or JobCountGrouped
|
||||
// with counts of running or short-running (based on ShortRunningJobsDuration) jobs, matched by group ID.
|
||||
//
|
||||
// Parameters:
|
||||
// - ctx: Context for security checks
|
||||
// - filter: Filters to apply
|
||||
// - groupBy: Grouping dimension (must match the dimension used for stats parameter)
|
||||
// - stats: Existing statistics to augment (modified in-place by ID matching)
|
||||
// - kind: "running" to add RunningJobs count, "short" to add ShortJobs count
|
||||
//
|
||||
// Returns the same stats slice with RunningJobs or ShortJobs fields populated per group.
|
||||
// Groups without matching jobs will have 0 for the added field.
|
||||
func (r *JobRepository) AddJobCountGrouped(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
@@ -416,6 +541,18 @@ func (r *JobRepository) AddJobCountGrouped(
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// AddJobCount augments existing overall statistics with additional job counts by category.
|
||||
//
|
||||
// Similar to AddJobCountGrouped but for ungrouped statistics. Applies the same count
|
||||
// to all statistics entries (typically just one).
|
||||
//
|
||||
// Parameters:
|
||||
// - ctx: Context for security checks
|
||||
// - filter: Filters to apply
|
||||
// - stats: Existing statistics to augment (modified in-place)
|
||||
// - kind: "running" to add RunningJobs count, "short" to add ShortJobs count
|
||||
//
|
||||
// Returns the same stats slice with RunningJobs or ShortJobs fields set to the total count.
|
||||
func (r *JobRepository) AddJobCount(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
@@ -451,6 +588,26 @@ func (r *JobRepository) AddJobCount(
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// AddHistograms augments statistics with distribution histograms for job properties.
|
||||
//
|
||||
// Generates histogram data for visualization of job duration, node count, core count,
|
||||
// and accelerator count distributions. Duration histogram uses intelligent binning based
|
||||
// on the requested resolution.
|
||||
//
|
||||
// Parameters:
|
||||
// - ctx: Context for security checks
|
||||
// - filter: Filters to apply to jobs included in histograms
|
||||
// - stat: Statistics struct to augment (modified in-place)
|
||||
// - durationBins: Bin size - "1m", "10m", "1h", "6h", "12h", or "24h" (default)
|
||||
//
|
||||
// Populates these fields in stat:
|
||||
// - HistDuration: Job duration distribution (zero-padded bins)
|
||||
// - HistNumNodes: Node count distribution
|
||||
// - HistNumCores: Core (hwthread) count distribution
|
||||
// - HistNumAccs: Accelerator count distribution
|
||||
//
|
||||
// Duration bins are pre-initialized with zeros to ensure consistent ranges for visualization.
|
||||
// Bin size determines both the width and maximum duration displayed (e.g., "1h" = 48 bins × 1h = 48h max).
|
||||
func (r *JobRepository) AddHistograms(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
@@ -461,20 +618,20 @@ func (r *JobRepository) AddHistograms(
|
||||
|
||||
var targetBinCount int
|
||||
var targetBinSize int
|
||||
switch {
|
||||
case *durationBins == "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
|
||||
switch *durationBins {
|
||||
case "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
|
||||
targetBinCount = 60
|
||||
targetBinSize = 60
|
||||
case *durationBins == "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
|
||||
case "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
|
||||
targetBinCount = 72
|
||||
targetBinSize = 600
|
||||
case *durationBins == "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
|
||||
case "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
|
||||
targetBinCount = 48
|
||||
targetBinSize = 3600
|
||||
case *durationBins == "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
|
||||
case "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
|
||||
targetBinCount = 12
|
||||
targetBinSize = 21600
|
||||
case *durationBins == "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
|
||||
case "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
|
||||
targetBinCount = 14
|
||||
targetBinSize = 43200
|
||||
default: // 24h
|
||||
@@ -482,10 +639,9 @@ func (r *JobRepository) AddHistograms(
|
||||
targetBinSize = 3600
|
||||
}
|
||||
|
||||
castType := r.getCastType()
|
||||
var err error
|
||||
// Return X-Values always as seconds, will be formatted into minutes and hours in frontend
|
||||
value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), targetBinSize, castType)
|
||||
value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as int) as value`, time.Now().Unix(), targetBinSize)
|
||||
stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while loading job statistics histogram: job duration")
|
||||
@@ -514,7 +670,30 @@ func (r *JobRepository) AddHistograms(
|
||||
return stat, nil
|
||||
}
|
||||
|
||||
// Requires thresholds for metric from config for cluster? Of all clusters and use largest? split to 10 + 1 for artifacts?
|
||||
// AddMetricHistograms augments statistics with distribution histograms for job metrics.
|
||||
//
|
||||
// Generates histogram data for metrics like CPU load, memory usage, etc. Handles running
|
||||
// and completed jobs differently: running jobs load data from metric backend, completed jobs
|
||||
// use footprint data from database.
|
||||
//
|
||||
// Parameters:
|
||||
// - ctx: Context for security checks
|
||||
// - filter: Filters to apply (MUST contain State filter for running jobs)
|
||||
// - metrics: List of metric names to histogram (e.g., ["cpu_load", "mem_used"])
|
||||
// - stat: Statistics struct to augment (modified in-place)
|
||||
// - targetBinCount: Number of histogram bins (default: 10)
|
||||
//
|
||||
// Populates HistMetrics field in stat with MetricHistoPoints for each metric.
|
||||
//
|
||||
// Binning algorithm:
|
||||
// - Values normalized to metric's peak value from cluster configuration
|
||||
// - Bins evenly distributed from 0 to peak
|
||||
// - Pre-initialized with zeros for consistent visualization
|
||||
//
|
||||
// Limitations:
|
||||
// - Running jobs: Limited to 5000 jobs for performance
|
||||
// - Requires valid cluster configuration with metric peak values
|
||||
// - Uses footprint statistic (avg/max/min) configured per metric
|
||||
func (r *JobRepository) AddMetricHistograms(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
@@ -549,7 +728,16 @@ func (r *JobRepository) AddMetricHistograms(
|
||||
return stat, nil
|
||||
}
|
||||
|
||||
// `value` must be the column grouped by, but renamed to "value"
|
||||
// jobsStatisticsHistogram generates a simple histogram by grouping on a column value.
|
||||
//
|
||||
// Used for histograms where the column value directly represents the bin (e.g., node count, core count).
|
||||
// Unlike duration/metric histograms, this doesn't pre-initialize bins with zeros.
|
||||
//
|
||||
// Parameters:
|
||||
// - value: SQL expression that produces the histogram value, aliased as "value"
|
||||
// - filters: Job filters to apply
|
||||
//
|
||||
// Returns histogram points with Value (from column) and Count (number of jobs).
|
||||
func (r *JobRepository) jobsStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
value string,
|
||||
@@ -594,6 +782,26 @@ func (r *JobRepository) jobsStatisticsHistogram(
|
||||
return points, nil
|
||||
}
|
||||
|
||||
// jobsDurationStatisticsHistogram generates a duration histogram with pre-initialized bins.
|
||||
//
|
||||
// Bins are zero-padded to provide consistent ranges for visualization, unlike simple
|
||||
// histograms which only return bins with data. The value parameter should compute
|
||||
// the bin number from job duration.
|
||||
//
|
||||
// Parameters:
|
||||
// - value: SQL expression computing bin number from duration, aliased as "value"
|
||||
// - filters: Job filters to apply
|
||||
// - binSizeSeconds: Width of each bin in seconds
|
||||
// - targetBinCount: Number of bins to pre-initialize
|
||||
//
|
||||
// Returns histogram points with Value (bin_number × binSizeSeconds) and Count.
|
||||
// All bins from 1 to targetBinCount are returned, with Count=0 for empty bins.
|
||||
//
|
||||
// Algorithm:
|
||||
// 1. Pre-initialize targetBinCount bins with zero counts
|
||||
// 2. Query database for actual counts per bin
|
||||
// 3. Match query results to pre-initialized bins by value
|
||||
// 4. Bins without matches remain at zero
|
||||
func (r *JobRepository) jobsDurationStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
value string,
|
||||
@@ -609,7 +817,8 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
// Setup Array
|
||||
// Each bin represents a duration range: bin N = [N*binSizeSeconds, (N+1)*binSizeSeconds)
|
||||
// Example: binSizeSeconds=3600 (1 hour), bin 1 = 0-1h, bin 2 = 1-2h, etc.
|
||||
points := make([]*model.HistoPoint, 0)
|
||||
for i := 1; i <= *targetBinCount; i++ {
|
||||
point := model.HistoPoint{Value: i * binSizeSeconds, Count: 0}
|
||||
@@ -627,7 +836,8 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
// Fill Array at matching $Value
|
||||
// Match query results to pre-initialized bins.
|
||||
// point.Value from query is the bin number; multiply by binSizeSeconds to match bin.Value.
|
||||
for rows.Next() {
|
||||
point := model.HistoPoint{}
|
||||
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
||||
@@ -637,9 +847,6 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
|
||||
|
||||
for _, e := range points {
|
||||
if e.Value == (point.Value * binSizeSeconds) {
|
||||
// Note:
|
||||
// Matching on unmodified integer value (and multiplying point.Value by binSizeSeconds after match)
|
||||
// causes frontend to loop into highest targetBinCount, due to zoom condition instantly being fullfilled (cause unknown)
|
||||
e.Count = point.Count
|
||||
break
|
||||
}
|
||||
@@ -654,18 +861,43 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
|
||||
return points, nil
|
||||
}
|
||||
|
||||
// jobsMetricStatisticsHistogram generates a metric histogram using footprint data from completed jobs.
|
||||
//
|
||||
// Values are normalized to the metric's peak value and distributed into bins. The algorithm
|
||||
// is based on SQL histogram generation techniques, extracting metric values from JSON footprint
|
||||
// and computing bin assignments in SQL.
|
||||
//
|
||||
// Parameters:
|
||||
// - metric: Metric name (e.g., "cpu_load", "mem_used")
|
||||
// - filters: Job filters to apply
|
||||
// - bins: Number of bins to generate
|
||||
//
|
||||
// Returns MetricHistoPoints with metric name, unit, footprint stat type, and binned data.
|
||||
//
|
||||
// Algorithm:
|
||||
// 1. Determine peak value from cluster configuration (filtered cluster or max across all)
|
||||
// 2. Generate SQL that extracts footprint value, normalizes to [0,1], multiplies by bin count
|
||||
// 3. Pre-initialize bins with min/max ranges based on peak value
|
||||
// 4. Query database for counts per bin
|
||||
// 5. Match results to pre-initialized bins
|
||||
//
|
||||
// Special handling: Values exactly equal to peak are forced into the last bin by multiplying
|
||||
// peak by 0.999999999 to avoid creating an extra bin.
|
||||
func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
metric string,
|
||||
filters []*model.JobFilter,
|
||||
bins *int,
|
||||
) (*model.MetricHistoPoints, error) {
|
||||
// Get specific Peak or largest Peak
|
||||
// Peak value defines the upper bound for binning: values are distributed across
|
||||
// bins from 0 to peak. First try to get peak from filtered cluster, otherwise
|
||||
// scan all clusters to find the maximum peak value.
|
||||
var metricConfig *schema.MetricConfig
|
||||
var peak float64
|
||||
var unit string
|
||||
var footprintStat string
|
||||
|
||||
// Try to get metric config from filtered cluster
|
||||
for _, f := range filters {
|
||||
if f.Cluster != nil {
|
||||
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
||||
@@ -676,6 +908,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
}
|
||||
}
|
||||
|
||||
// If no cluster filter or peak not found, find largest peak across all clusters
|
||||
// This ensures histogram can accommodate all possible values
|
||||
if peak == 0.0 {
|
||||
for _, c := range archive.Clusters {
|
||||
for _, m := range c.MetricConfig {
|
||||
@@ -694,11 +928,14 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
}
|
||||
}
|
||||
|
||||
// cclog.Debugf("Metric %s, Peak %f, Unit %s", metric, peak, unit)
|
||||
// Make bins, see https://jereze.com/code/sql-histogram/ (Modified here)
|
||||
// Construct SQL histogram bins using normalized values.
|
||||
// Algorithm based on: https://jereze.com/code/sql-histogram/ (modified)
|
||||
start := time.Now()
|
||||
|
||||
// Find Jobs' Value Bin Number: Divide Value by Peak, Multiply by RequestedBins, then CAST to INT: Gets Bin-Number of Job
|
||||
// Bin calculation formula:
|
||||
// bin_number = CAST( (value / peak) * num_bins AS INTEGER ) + 1
|
||||
// Special case: value == peak would create bin N+1, so we test for equality
|
||||
// and multiply peak by 0.999999999 to force it into bin N.
|
||||
binQuery := fmt.Sprintf(`CAST(
|
||||
((case when json_extract(footprint, "$.%s") = %f then %f*0.999999999 else json_extract(footprint, "$.%s") end) / %f)
|
||||
* %v as INTEGER )`,
|
||||
@@ -707,24 +944,19 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
mainQuery := sq.Select(
|
||||
fmt.Sprintf(`%s + 1 as bin`, binQuery),
|
||||
`count(*) as count`,
|
||||
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * %s as min`, peak, *bins, binQuery),
|
||||
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * (%s + 1) as max`, peak, *bins, binQuery),
|
||||
).From("job").Where(
|
||||
"JSON_VALID(footprint)",
|
||||
).Where(fmt.Sprintf(`json_extract(footprint, "$.%s") is not null and json_extract(footprint, "$.%s") <= %f`, (metric + "_" + footprintStat), (metric + "_" + footprintStat), peak))
|
||||
|
||||
// Only accessible Jobs...
|
||||
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
// Filters...
|
||||
for _, f := range filters {
|
||||
mainQuery = BuildWhereClause(f, mainQuery)
|
||||
}
|
||||
|
||||
// Finalize query with Grouping and Ordering
|
||||
mainQuery = mainQuery.GroupBy("bin").OrderBy("bin")
|
||||
|
||||
rows, err := mainQuery.RunWith(r.DB).Query()
|
||||
@@ -734,7 +966,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
// Setup Return Array With Bin-Numbers for Match and Min/Max based on Peak
|
||||
// Pre-initialize bins with calculated min/max ranges.
|
||||
// Example: peak=1000, bins=10 -> bin 1=[0,100), bin 2=[100,200), ..., bin 10=[900,1000]
|
||||
points := make([]*model.MetricHistoPoint, 0)
|
||||
binStep := int(peak) / *bins
|
||||
for i := 1; i <= *bins; i++ {
|
||||
@@ -744,26 +977,18 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
points = append(points, &epoint)
|
||||
}
|
||||
|
||||
for rows.Next() { // Fill Count if Bin-No. Matches (Not every Bin exists in DB!)
|
||||
// Match query results to pre-initialized bins.
|
||||
for rows.Next() {
|
||||
rpoint := model.MetricHistoPoint{}
|
||||
if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil { // Required for Debug: &rpoint.Min, &rpoint.Max
|
||||
if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil {
|
||||
cclog.Warnf("Error while scanning rows for %s", metric)
|
||||
return nil, err // FIXME: Totally bricks cc-backend if returned and if all metrics requested?
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, e := range points {
|
||||
if e.Bin != nil && rpoint.Bin != nil {
|
||||
if *e.Bin == *rpoint.Bin {
|
||||
e.Count = rpoint.Count
|
||||
// Only Required For Debug: Check DB returned Min/Max against Backend Init above
|
||||
// if rpoint.Min != nil {
|
||||
// cclog.Warnf(">>>> Bin %d Min Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Min, *e.Min)
|
||||
// }
|
||||
// if rpoint.Max != nil {
|
||||
// cclog.Warnf(">>>> Bin %d Max Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Max, *e.Max)
|
||||
// }
|
||||
break
|
||||
}
|
||||
if e.Bin != nil && rpoint.Bin != nil && *e.Bin == *rpoint.Bin {
|
||||
e.Count = rpoint.Count
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -778,6 +1003,28 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
// runningJobsMetricStatisticsHistogram generates metric histograms for running jobs using live data.
|
||||
//
|
||||
// Unlike completed jobs which use footprint data from the database, running jobs require
|
||||
// fetching current metric averages from the metric backend (via metricdispatch).
|
||||
//
|
||||
// Parameters:
|
||||
// - metrics: List of metric names
|
||||
// - filters: Job filters (should filter to running jobs only)
|
||||
// - bins: Number of histogram bins
|
||||
//
|
||||
// Returns slice of MetricHistoPoints, one per metric.
|
||||
//
|
||||
// Limitations:
|
||||
// - Maximum 5000 jobs (returns nil if more jobs match)
|
||||
// - Requires metric backend availability
|
||||
// - Bins based on metric peak values from cluster configuration
|
||||
//
|
||||
// Algorithm:
|
||||
// 1. Query first 5001 jobs to check count limit
|
||||
// 2. Load metric averages for all jobs via metricdispatch
|
||||
// 3. For each metric, create bins based on peak value
|
||||
// 4. Iterate averages and count jobs per bin
|
||||
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
metrics []string,
|
||||
@@ -785,13 +1032,13 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
bins *int,
|
||||
) []*model.MetricHistoPoints {
|
||||
// Get Jobs
|
||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 5000 + 1}, nil)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while querying jobs for footprint: %s", err)
|
||||
return nil
|
||||
}
|
||||
if len(jobs) > 500 {
|
||||
cclog.Errorf("too many jobs matched (max: %d)", 500)
|
||||
if len(jobs) > 5000 {
|
||||
cclog.Errorf("too many jobs matched (max: %d)", 5000)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -806,7 +1053,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
continue
|
||||
}
|
||||
|
||||
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
if err := metricdispatch.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
cclog.Errorf("Error while loading averages for histogram: %s", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -25,11 +25,14 @@ func TestBuildJobStatsQuery(t *testing.T) {
|
||||
func TestJobStats(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
filter := &model.JobFilter{}
|
||||
stats, err := r.JobsStats(getContext(t), []*model.JobFilter{filter})
|
||||
var expectedCount int
|
||||
err := r.DB.QueryRow(`SELECT COUNT(*) FROM job`).Scan(&expectedCount)
|
||||
noErr(t, err)
|
||||
|
||||
if stats[0].TotalJobs != 544 {
|
||||
t.Fatalf("Want 544, Got %d", stats[0].TotalJobs)
|
||||
stats, err := r.JobsStats(getContext(t), []*model.JobFilter{})
|
||||
noErr(t, err)
|
||||
|
||||
if stats[0].TotalJobs != expectedCount {
|
||||
t.Fatalf("Want %d, Got %d", expectedCount, stats[0].TotalJobs)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,35 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package repository provides data access and persistence layer for ClusterCockpit.
|
||||
//
|
||||
// This file implements tag management functionality for job categorization and classification.
|
||||
// Tags support both manual assignment (via REST/GraphQL APIs) and automatic detection
|
||||
// (via tagger plugins). The implementation includes role-based access control through
|
||||
// tag scopes and maintains bidirectional consistency between the SQL database and
|
||||
// the file-based job archive.
|
||||
//
|
||||
// Database Schema:
|
||||
//
|
||||
// CREATE TABLE tag (
|
||||
// id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
// tag_type VARCHAR(255) NOT NULL,
|
||||
// tag_name VARCHAR(255) NOT NULL,
|
||||
// tag_scope VARCHAR(255) NOT NULL DEFAULT "global",
|
||||
// CONSTRAINT tag_unique UNIQUE (tag_type, tag_name, tag_scope)
|
||||
// );
|
||||
//
|
||||
// CREATE TABLE jobtag (
|
||||
// job_id INTEGER,
|
||||
// tag_id INTEGER,
|
||||
// PRIMARY KEY (job_id, tag_id),
|
||||
// FOREIGN KEY (job_id) REFERENCES job(id) ON DELETE CASCADE,
|
||||
// FOREIGN KEY (tag_id) REFERENCES tag(id) ON DELETE CASCADE
|
||||
// );
|
||||
//
|
||||
// The jobtag junction table enables many-to-many relationships between jobs and tags.
|
||||
// CASCADE deletion ensures referential integrity when jobs or tags are removed.
|
||||
package repository
|
||||
|
||||
import (
|
||||
@@ -10,15 +39,39 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// Tag Scope Rules:
|
||||
//
|
||||
// Tags in ClusterCockpit have three visibility scopes that control who can see and use them:
|
||||
//
|
||||
// 1. "global" - Visible to all users, can be used by anyone
|
||||
// Example: System-generated tags like "energy-efficient", "failed", "short"
|
||||
//
|
||||
// 2. "private" - Only visible to the creating user
|
||||
// Example: Personal notes like "needs-review", "interesting-case"
|
||||
//
|
||||
// 3. "admin" - Only visible to users with admin or support roles
|
||||
// Example: Internal notes like "hardware-issue", "billing-problem"
|
||||
//
|
||||
// Authorization Rules:
|
||||
// - Regular users can only create/see "global" and their own "private" tags
|
||||
// - Admin/Support can create/see all scopes including "admin" tags
|
||||
// - Users can only add tags to jobs they have permission to view
|
||||
// - Tag scope is enforced at query time in GetTags() and CountTags()
|
||||
|
||||
// AddTag adds the tag with id `tagId` to the job with the database id `jobId`.
|
||||
// Requires user authentication for security checks.
|
||||
//
|
||||
// The user must have permission to view the job. Tag visibility is determined by scope:
|
||||
// - "global" tags: visible to all users
|
||||
// - "private" tags: only visible to the tag creator
|
||||
// - "admin" tags: only visible to admin/support users
|
||||
func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
|
||||
j, err := r.FindByIdWithUser(user, job)
|
||||
j, err := r.FindByIDWithUser(user, job)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error finding job %d for user %s: %v", job, user.Username, err)
|
||||
return nil, err
|
||||
@@ -32,7 +85,7 @@ func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*sche
|
||||
// AddTagDirect adds a tag without user security checks.
|
||||
// Use only for internal/admin operations.
|
||||
func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
j, err := r.FindByIdDirect(job)
|
||||
j, err := r.FindByIDDirect(job)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error finding job %d: %v", job, err)
|
||||
return nil, err
|
||||
@@ -43,12 +96,12 @@ func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error
|
||||
})
|
||||
}
|
||||
|
||||
// Removes a tag from a job by tag id.
|
||||
// Used by GraphQL API
|
||||
// RemoveTag removes the tag with the database id `tag` from the job with the database id `job`.
|
||||
// Requires user authentication for security checks. Used by GraphQL API.
|
||||
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
|
||||
j, err := r.FindByIdWithUser(user, job)
|
||||
j, err := r.FindByIDWithUser(user, job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while finding job by id")
|
||||
cclog.Warnf("Error while finding job %d for user %s during tag removal: %v", job, user.Username, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -68,27 +121,27 @@ func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.
|
||||
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while getting tags for job")
|
||||
cclog.Warnf("Error while getting archive tags for job %d in RemoveTag: %v", job, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
}
|
||||
|
||||
// Removes a tag from a job by tag info
|
||||
// Used by REST API
|
||||
// RemoveJobTagByRequest removes a tag from the job with the database id `job` by tag type, name, and scope.
|
||||
// Requires user authentication for security checks. Used by REST API.
|
||||
func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagType string, tagName string, tagScope string) ([]*schema.Tag, error) {
|
||||
// Get Tag ID to delete
|
||||
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
||||
tagID, exists := r.TagID(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
cclog.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||
return nil, fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||
}
|
||||
|
||||
// Get Job
|
||||
j, err := r.FindByIdWithUser(user, job)
|
||||
j, err := r.FindByIDWithUser(user, job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while finding job by id")
|
||||
cclog.Warnf("Error while finding job %d for user %s during tag removal by request: %v", job, user.Username, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -103,19 +156,30 @@ func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagT
|
||||
|
||||
tags, err := r.GetTags(user, &job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while getting tags for job")
|
||||
cclog.Warnf("Error while getting tags for job %d in RemoveJobTagByRequest: %v", job, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while getting tags for job")
|
||||
cclog.Warnf("Error while getting archive tags for job %d in RemoveJobTagByRequest: %v", job, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
}
|
||||
|
||||
// removeTagFromArchiveJobs updates the job archive for all affected jobs after a tag deletion.
|
||||
//
|
||||
// This function is called asynchronously (via goroutine) after removing a tag from the database
|
||||
// to synchronize the file-based job archive with the database state. Errors are logged but not
|
||||
// returned since this runs in the background.
|
||||
//
|
||||
// Parameters:
|
||||
// - jobIds: Database IDs of all jobs that had the deleted tag
|
||||
//
|
||||
// Implementation note: Each job is processed individually to handle partial failures gracefully.
|
||||
// If one job fails to update, others will still be processed.
|
||||
func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
|
||||
for _, j := range jobIds {
|
||||
tags, err := r.getArchiveTags(&j)
|
||||
@@ -124,7 +188,7 @@ func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
|
||||
continue
|
||||
}
|
||||
|
||||
job, err := r.FindByIdDirect(j)
|
||||
job, err := r.FindByIDDirect(j)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while getting job %d", j)
|
||||
continue
|
||||
@@ -138,18 +202,18 @@ func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
|
||||
// Used by REST API. Does not update tagged jobs in Job archive.
|
||||
func (r *JobRepository) RemoveTagByRequest(tagType string, tagName string, tagScope string) error {
|
||||
// Get Tag ID to delete
|
||||
tagID, exists := r.TagId(tagType, tagName, tagScope)
|
||||
tagID, exists := r.TagID(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
cclog.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||
return fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
|
||||
}
|
||||
|
||||
return r.RemoveTagById(tagID)
|
||||
return r.RemoveTagByID(tagID)
|
||||
}
|
||||
|
||||
// Removes a tag from db by tag id
|
||||
// Used by GraphQL API.
|
||||
func (r *JobRepository) RemoveTagById(tagID int64) error {
|
||||
func (r *JobRepository) RemoveTagByID(tagID int64) error {
|
||||
jobIds, err := r.FindJobIdsByTag(tagID)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -179,8 +243,16 @@ func (r *JobRepository) RemoveTagById(tagID int64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateTag creates a new tag with the specified type and name and returns its database id.
|
||||
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||
// CreateTag creates a new tag with the specified type, name, and scope.
|
||||
// Returns the database ID of the newly created tag.
|
||||
//
|
||||
// Scope defaults to "global" if empty string is provided.
|
||||
// Valid scopes: "global", "private", "admin"
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// tagID, err := repo.CreateTag("performance", "high-memory", "global")
|
||||
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagID int64, err error) {
|
||||
// Default to "Global" scope if none defined
|
||||
if tagScope == "" {
|
||||
tagScope = "global"
|
||||
@@ -198,8 +270,14 @@ func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope strin
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
// CountTags returns all tags visible to the user and the count of jobs for each tag.
|
||||
// Applies scope-based filtering to respect tag visibility rules.
|
||||
//
|
||||
// Returns:
|
||||
// - tags: slice of tags the user can see
|
||||
// - counts: map of tag name to job count
|
||||
// - err: any error encountered
|
||||
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
|
||||
// Fetch all Tags in DB for Display in Frontend Tag-View
|
||||
tags = make([]schema.Tag, 0, 100)
|
||||
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
|
||||
if err != nil {
|
||||
@@ -228,10 +306,10 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
}
|
||||
|
||||
// Query and Count Jobs with attached Tags
|
||||
q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
|
||||
q := sq.Select("t.tag_type, t.tag_name, t.id, count(jt.tag_id)").
|
||||
From("tag t").
|
||||
LeftJoin("jobtag jt ON t.id = jt.tag_id").
|
||||
GroupBy("t.tag_name")
|
||||
GroupBy("t.tag_type, t.tag_name")
|
||||
|
||||
// Build scope list for filtering
|
||||
var scopeBuilder strings.Builder
|
||||
@@ -265,14 +343,15 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
|
||||
counts = make(map[string]int)
|
||||
for rows.Next() {
|
||||
var tagType string
|
||||
var tagName string
|
||||
var tagId int
|
||||
var tagID int
|
||||
var count int
|
||||
if err = rows.Scan(&tagName, &tagId, &count); err != nil {
|
||||
if err = rows.Scan(&tagType, &tagName, &tagID, &count); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
// Use tagId as second Map-Key component to differentiate tags with identical names
|
||||
counts[fmt.Sprint(tagName, tagId)] = count
|
||||
counts[fmt.Sprint(tagType, tagName, tagID)] = count
|
||||
}
|
||||
err = rows.Err()
|
||||
|
||||
@@ -280,18 +359,44 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
}
|
||||
|
||||
var (
|
||||
ErrTagNotFound = errors.New("the tag does not exist")
|
||||
ErrJobNotOwned = errors.New("user is not owner of job")
|
||||
ErrTagNoAccess = errors.New("user not permitted to use that tag")
|
||||
ErrTagPrivateScope = errors.New("tag is private to another user")
|
||||
ErrTagAdminScope = errors.New("tag requires admin privileges")
|
||||
// ErrTagNotFound is returned when a tag ID or tag identifier (type, name, scope) does not exist in the database.
|
||||
ErrTagNotFound = errors.New("the tag does not exist")
|
||||
|
||||
// ErrJobNotOwned is returned when a user attempts to tag a job they do not have permission to access.
|
||||
ErrJobNotOwned = errors.New("user is not owner of job")
|
||||
|
||||
// ErrTagNoAccess is returned when a user attempts to use a tag they cannot access due to scope restrictions.
|
||||
ErrTagNoAccess = errors.New("user not permitted to use that tag")
|
||||
|
||||
// ErrTagPrivateScope is returned when a user attempts to access another user's private tag.
|
||||
ErrTagPrivateScope = errors.New("tag is private to another user")
|
||||
|
||||
// ErrTagAdminScope is returned when a non-admin user attempts to use an admin-scoped tag.
|
||||
ErrTagAdminScope = errors.New("tag requires admin privileges")
|
||||
|
||||
// ErrTagsIncompatScopes is returned when attempting to combine admin and non-admin scoped tags in a single operation.
|
||||
ErrTagsIncompatScopes = errors.New("combining admin and non-admin scoped tags not allowed")
|
||||
)
|
||||
|
||||
// addJobTag is a helper function that inserts a job-tag association and updates the archive.
|
||||
// Returns the updated tag list for the job.
|
||||
func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
|
||||
//
|
||||
// This function performs three operations atomically:
|
||||
// 1. Inserts the job-tag association into the jobtag junction table
|
||||
// 2. Retrieves the updated tag list for the job (using the provided getTags callback)
|
||||
// 3. Updates the job archive with the new tags to maintain database-archive consistency
|
||||
//
|
||||
// Parameters:
|
||||
// - jobId: Database ID of the job
|
||||
// - tagId: Database ID of the tag to associate
|
||||
// - job: Full job object needed for archive update
|
||||
// - getTags: Callback function to retrieve updated tags (allows different security contexts)
|
||||
//
|
||||
// Returns the complete updated tag list for the job or an error.
|
||||
//
|
||||
// Note: This function does NOT validate tag scope permissions - callers must perform
|
||||
// authorization checks before invoking this helper.
|
||||
func (r *JobRepository) addJobTag(jobID int64, tagID int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobID, tagID)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
@@ -301,13 +406,13 @@ func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, get
|
||||
|
||||
tags, err := getTags()
|
||||
if err != nil {
|
||||
cclog.Warnf("Error getting tags for job %d: %v", jobId, err)
|
||||
cclog.Warnf("Error getting tags for job %d: %v", jobID, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
archiveTags, err := r.getArchiveTags(&jobId)
|
||||
archiveTags, err := r.getArchiveTags(&jobID)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error getting archive tags for job %d: %v", jobId, err)
|
||||
cclog.Warnf("Error getting archive tags for job %d: %v", jobID, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -316,7 +421,7 @@ func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, get
|
||||
|
||||
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
|
||||
// If such a tag does not yet exist, it is created.
|
||||
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobID int64, tagType string, tagName string, tagScope string) (tagID int64, err error) {
|
||||
// Default to "Global" scope if none defined
|
||||
if tagScope == "" {
|
||||
tagScope = "global"
|
||||
@@ -330,44 +435,45 @@ func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType s
|
||||
return 0, fmt.Errorf("cannot write tag scope with current authorization")
|
||||
}
|
||||
|
||||
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||
tagID, exists := r.TagID(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
tagID, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := r.AddTag(user, jobId, tagId); err != nil {
|
||||
if _, err := r.AddTag(user, jobID, tagID); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return tagId, nil
|
||||
return tagID, nil
|
||||
}
|
||||
|
||||
// used in auto tagger plugins
|
||||
func (r *JobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) {
|
||||
func (r *JobRepository) AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error) {
|
||||
tagScope := "global"
|
||||
|
||||
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||
tagID, exists := r.TagID(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
tagID, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := r.AddTagDirect(jobId, tagId); err != nil {
|
||||
cclog.Infof("Adding tag %s:%s:%s (direct)", tagType, tagName, tagScope)
|
||||
|
||||
if _, err := r.AddTagDirect(jobID, tagID); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return tagId, nil
|
||||
return tagID, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
|
||||
func (r *JobRepository) HasTag(jobID int64, tagType string, tagName string) bool {
|
||||
var id int64
|
||||
q := sq.Select("id").From("tag").Join("jobtag ON jobtag.tag_id = tag.id").
|
||||
Where("jobtag.job_id = ?", jobId).Where("tag.tag_type = ?", tagType).
|
||||
Where("jobtag.job_id = ?", jobID).Where("tag.tag_type = ?", tagType).
|
||||
Where("tag.tag_name = ?", tagName)
|
||||
err := q.RunWith(r.stmtCache).QueryRow().Scan(&id)
|
||||
if err != nil {
|
||||
@@ -377,21 +483,21 @@ func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool
|
||||
}
|
||||
}
|
||||
|
||||
// TagId returns the database id of the tag with the specified type and name.
|
||||
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
|
||||
// TagID returns the database id of the tag with the specified type and name.
|
||||
func (r *JobRepository) TagID(tagType string, tagName string, tagScope string) (tagID int64, exists bool) {
|
||||
exists = true
|
||||
if err := sq.Select("id").From("tag").
|
||||
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&tagID); err != nil {
|
||||
exists = false
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// TagInfo returns the database infos of the tag with the specified id.
|
||||
func (r *JobRepository) TagInfo(tagId int64) (tagType string, tagName string, tagScope string, exists bool) {
|
||||
func (r *JobRepository) TagInfo(tagID int64) (tagType string, tagName string, tagScope string, exists bool) {
|
||||
exists = true
|
||||
if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagId).
|
||||
if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&tagType, &tagName, &tagScope); err != nil {
|
||||
exists = false
|
||||
}
|
||||
@@ -417,7 +523,7 @@ func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, e
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
cclog.Warnf("Error while scanning tag rows in GetTags: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
|
||||
@@ -455,7 +561,7 @@ func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
cclog.Warnf("Error while scanning tag rows in GetTagsDirect: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
tags = append(tags, tag)
|
||||
@@ -468,7 +574,18 @@ func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
|
||||
// getArchiveTags returns all tags for a job WITHOUT applying scope-based filtering.
|
||||
//
|
||||
// This internal function is used exclusively for job archive synchronization where we need
|
||||
// to store all tags regardless of the current user's permissions. Unlike GetTags() which
|
||||
// filters by scope, this returns the complete unfiltered tag list.
|
||||
//
|
||||
// Parameters:
|
||||
// - job: Pointer to job database ID, or nil to return all tags in the system
|
||||
//
|
||||
// Returns all tags without scope filtering, used only for archive operations.
|
||||
//
|
||||
// WARNING: Do NOT expose this function to user-facing APIs as it bypasses authorization.
|
||||
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||
if job != nil {
|
||||
@@ -487,7 +604,7 @@ func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
cclog.Warnf("Error while scanning tag rows in getArchiveTags: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
tags = append(tags, tag)
|
||||
@@ -500,18 +617,18 @@ func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
|
||||
func (r *JobRepository) ImportTag(jobID int64, tagType string, tagName string, tagScope string) (err error) {
|
||||
// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err
|
||||
|
||||
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||
tagID, exists := r.TagID(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
tagID, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobID, tagID)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
@@ -522,16 +639,38 @@ func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, t
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkScopeAuth validates whether a user is authorized to perform an operation on a tag with the given scope.
|
||||
//
|
||||
// This function implements the tag scope authorization matrix:
|
||||
//
|
||||
// Scope | Read Access | Write Access
|
||||
// -------------|----------------------------------|----------------------------------
|
||||
// "global" | All users | Admin, Support, API-only
|
||||
// "admin" | Admin, Support | Admin, API-only
|
||||
// <username> | Owner only | Owner only (private tags)
|
||||
//
|
||||
// Parameters:
|
||||
// - user: User attempting the operation (must not be nil)
|
||||
// - operation: Either "read" or "write"
|
||||
// - scope: Tag scope value ("global", "admin", or username for private tags)
|
||||
//
|
||||
// Returns:
|
||||
// - pass: true if authorized, false if denied
|
||||
// - err: error only if operation is invalid or user is nil
|
||||
//
|
||||
// Special cases:
|
||||
// - API-only users (single role: RoleApi) can write to admin and global scopes for automation
|
||||
// - Private tags use the username as scope, granting exclusive access to that user
|
||||
func (r *JobRepository) checkScopeAuth(user *schema.User, operation string, scope string) (pass bool, err error) {
|
||||
if user != nil {
|
||||
switch {
|
||||
case operation == "write" && scope == "admin":
|
||||
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleAPI)) {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
case operation == "write" && scope == "global":
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleAPI)) {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
|
||||
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
@@ -62,7 +62,7 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error {
|
||||
func (r *JobRepository) TransactionAddNamed(
|
||||
t *Transaction,
|
||||
query string,
|
||||
args ...interface{},
|
||||
args ...any,
|
||||
) (int64, error) {
|
||||
if t.tx == nil {
|
||||
return 0, fmt.Errorf("transaction is nil or already completed")
|
||||
@@ -82,7 +82,7 @@ func (r *JobRepository) TransactionAddNamed(
|
||||
}
|
||||
|
||||
// TransactionAdd executes a query within the transaction.
|
||||
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
|
||||
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...any) (int64, error) {
|
||||
if t.tx == nil {
|
||||
return 0, fmt.Errorf("transaction is nil or already completed")
|
||||
}
|
||||
|
||||
311
internal/repository/transaction_test.go
Normal file
311
internal/repository/transaction_test.go
Normal file
@@ -0,0 +1,311 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestTransactionInit(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("successful transaction init", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err, "TransactionInit should succeed")
|
||||
require.NotNil(t, tx, "Transaction should not be nil")
|
||||
require.NotNil(t, tx.tx, "Transaction.tx should not be nil")
|
||||
|
||||
// Clean up
|
||||
err = tx.Rollback()
|
||||
require.NoError(t, err, "Rollback should succeed")
|
||||
})
|
||||
}
|
||||
|
||||
func TestTransactionCommit(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("commit after successful operations", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Insert a test tag
|
||||
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
|
||||
"test_type", "test_tag_commit", "global")
|
||||
require.NoError(t, err, "TransactionAdd should succeed")
|
||||
|
||||
// Commit the transaction
|
||||
err = tx.Commit()
|
||||
require.NoError(t, err, "Commit should succeed")
|
||||
|
||||
// Verify the tag was inserted
|
||||
var count int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_tag_commit").Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 1, count, "Tag should be committed to database")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_tag_commit")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("commit on already committed transaction", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
|
||||
err = tx.Commit()
|
||||
require.NoError(t, err, "First commit should succeed")
|
||||
|
||||
err = tx.Commit()
|
||||
assert.Error(t, err, "Second commit should fail")
|
||||
assert.Contains(t, err.Error(), "transaction already committed or rolled back")
|
||||
})
|
||||
}
|
||||
|
||||
func TestTransactionRollback(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("rollback after operations", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Insert a test tag
|
||||
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
|
||||
"test_type", "test_tag_rollback", "global")
|
||||
require.NoError(t, err, "TransactionAdd should succeed")
|
||||
|
||||
// Rollback the transaction
|
||||
err = tx.Rollback()
|
||||
require.NoError(t, err, "Rollback should succeed")
|
||||
|
||||
// Verify the tag was NOT inserted
|
||||
var count int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_tag_rollback").Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, count, "Tag should not be in database after rollback")
|
||||
})
|
||||
|
||||
t.Run("rollback on already rolled back transaction", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
|
||||
err = tx.Rollback()
|
||||
require.NoError(t, err, "First rollback should succeed")
|
||||
|
||||
err = tx.Rollback()
|
||||
assert.NoError(t, err, "Second rollback should be safe (no-op)")
|
||||
})
|
||||
|
||||
t.Run("rollback on committed transaction", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
|
||||
err = tx.Commit()
|
||||
require.NoError(t, err)
|
||||
|
||||
err = tx.Rollback()
|
||||
assert.NoError(t, err, "Rollback after commit should be safe (no-op)")
|
||||
})
|
||||
}
|
||||
|
||||
func TestTransactionAdd(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("insert with TransactionAdd", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
defer tx.Rollback()
|
||||
|
||||
id, err := r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
|
||||
"test_type", "test_add", "global")
|
||||
require.NoError(t, err, "TransactionAdd should succeed")
|
||||
assert.Greater(t, id, int64(0), "Should return valid insert ID")
|
||||
})
|
||||
|
||||
t.Run("error on nil transaction", func(t *testing.T) {
|
||||
tx := &Transaction{tx: nil}
|
||||
|
||||
_, err := r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
|
||||
"test_type", "test_nil", "global")
|
||||
assert.Error(t, err, "Should error on nil transaction")
|
||||
assert.Contains(t, err.Error(), "transaction is nil or already completed")
|
||||
})
|
||||
|
||||
t.Run("error on invalid SQL", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
defer tx.Rollback()
|
||||
|
||||
_, err = r.TransactionAdd(tx, "INVALID SQL STATEMENT")
|
||||
assert.Error(t, err, "Should error on invalid SQL")
|
||||
})
|
||||
|
||||
t.Run("error after transaction committed", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
|
||||
err = tx.Commit()
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
|
||||
"test_type", "test_after_commit", "global")
|
||||
assert.Error(t, err, "Should error when transaction is already committed")
|
||||
})
|
||||
}
|
||||
|
||||
func TestTransactionAddNamed(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("insert with TransactionAddNamed", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
defer tx.Rollback()
|
||||
|
||||
type TagArgs struct {
|
||||
Type string `db:"type"`
|
||||
Name string `db:"name"`
|
||||
Scope string `db:"scope"`
|
||||
}
|
||||
|
||||
args := TagArgs{
|
||||
Type: "test_type",
|
||||
Name: "test_named",
|
||||
Scope: "global",
|
||||
}
|
||||
|
||||
id, err := r.TransactionAddNamed(tx,
|
||||
"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (:type, :name, :scope)",
|
||||
args)
|
||||
require.NoError(t, err, "TransactionAddNamed should succeed")
|
||||
assert.Greater(t, id, int64(0), "Should return valid insert ID")
|
||||
})
|
||||
|
||||
t.Run("error on nil transaction", func(t *testing.T) {
|
||||
tx := &Transaction{tx: nil}
|
||||
|
||||
_, err := r.TransactionAddNamed(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (:type, :name, :scope)",
|
||||
map[string]any{"type": "test", "name": "test", "scope": "global"})
|
||||
assert.Error(t, err, "Should error on nil transaction")
|
||||
assert.Contains(t, err.Error(), "transaction is nil or already completed")
|
||||
})
|
||||
}
|
||||
|
||||
func TestTransactionMultipleOperations(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("multiple inserts in single transaction", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
defer tx.Rollback()
|
||||
|
||||
// Insert multiple tags
|
||||
for i := range 5 {
|
||||
_, err = r.TransactionAdd(tx,
|
||||
"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
|
||||
"test_type", "test_multi_"+string(rune('a'+i)), "global")
|
||||
require.NoError(t, err, "Insert %d should succeed", i)
|
||||
}
|
||||
|
||||
err = tx.Commit()
|
||||
require.NoError(t, err, "Commit should succeed")
|
||||
|
||||
// Verify all tags were inserted
|
||||
var count int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name LIKE 'test_multi_%'").Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 5, count, "All 5 tags should be committed")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name LIKE 'test_multi_%'")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("rollback undoes all operations", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Insert multiple tags
|
||||
for i := range 3 {
|
||||
_, err = r.TransactionAdd(tx,
|
||||
"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
|
||||
"test_type", "test_rollback_"+string(rune('a'+i)), "global")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
err = tx.Rollback()
|
||||
require.NoError(t, err, "Rollback should succeed")
|
||||
|
||||
// Verify no tags were inserted
|
||||
var count int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name LIKE 'test_rollback_%'").Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 0, count, "No tags should be in database after rollback")
|
||||
})
|
||||
}
|
||||
|
||||
func TestTransactionEnd(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("deprecated TransactionEnd calls Commit", func(t *testing.T) {
|
||||
tx, err := r.TransactionInit()
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
|
||||
"test_type", "test_end", "global")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Use deprecated method
|
||||
err = r.TransactionEnd(tx)
|
||||
require.NoError(t, err, "TransactionEnd should succeed")
|
||||
|
||||
// Verify the tag was committed
|
||||
var count int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_end").Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 1, count, "Tag should be committed")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_end")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestTransactionDeferPattern(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
t.Run("defer rollback pattern", func(t *testing.T) {
|
||||
insertTag := func() error {
|
||||
tx, err := r.TransactionInit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback() // Safe to call even after commit
|
||||
|
||||
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
|
||||
"test_type", "test_defer", "global")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
err := insertTag()
|
||||
require.NoError(t, err, "Function should succeed")
|
||||
|
||||
// Verify the tag was committed
|
||||
var count int
|
||||
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_defer").Scan(&count)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 1, count, "Tag should be committed despite defer rollback")
|
||||
|
||||
// Clean up
|
||||
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_defer")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
@@ -10,18 +10,38 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
)
|
||||
|
||||
// Authentication and Role System:
|
||||
//
|
||||
// ClusterCockpit supports multiple authentication sources:
|
||||
// - Local: Username/password stored in database (password hashed with bcrypt)
|
||||
// - LDAP: External LDAP/Active Directory authentication
|
||||
// - JWT: Token-based authentication for API access
|
||||
//
|
||||
// Role Hierarchy (from highest to lowest privilege):
|
||||
// 1. "admin" - Full system access, can manage all users and jobs
|
||||
// 2. "support" - Can view all jobs but limited management capabilities
|
||||
// 3. "manager" - Can manage specific projects and their users
|
||||
// 4. "api" - Programmatic access for job submission/management
|
||||
// 5. "user" - Default role, can only view own jobs
|
||||
//
|
||||
// Project Association:
|
||||
// - Managers have a list of projects they oversee
|
||||
// - Regular users' project membership is determined by job data
|
||||
// - Managers can view/manage all jobs within their projects
|
||||
|
||||
var (
|
||||
userRepoOnce sync.Once
|
||||
userRepoInstance *UserRepository
|
||||
@@ -44,6 +64,9 @@ func GetUserRepository() *UserRepository {
|
||||
return userRepoInstance
|
||||
}
|
||||
|
||||
// GetUser retrieves a user by username from the database.
|
||||
// Returns the complete user record including hashed password, roles, and projects.
|
||||
// Password field contains bcrypt hash for local auth users, empty for LDAP users.
|
||||
func (r *UserRepository) GetUser(username string) (*schema.User, error) {
|
||||
user := &schema.User{Username: username}
|
||||
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
|
||||
@@ -93,12 +116,18 @@ func (r *UserRepository) GetLdapUsernames() ([]string, error) {
|
||||
return users, nil
|
||||
}
|
||||
|
||||
// AddUser creates a new user in the database.
|
||||
// Passwords are automatically hashed with bcrypt before storage.
|
||||
// Auth source determines authentication method (local, LDAP, etc.).
|
||||
//
|
||||
// Required fields: Username, Roles
|
||||
// Optional fields: Name, Email, Password, Projects, AuthSource
|
||||
func (r *UserRepository) AddUser(user *schema.User) error {
|
||||
rolesJson, _ := json.Marshal(user.Roles)
|
||||
projectsJson, _ := json.Marshal(user.Projects)
|
||||
|
||||
cols := []string{"username", "roles", "projects"}
|
||||
vals := []interface{}{user.Username, string(rolesJson), string(projectsJson)}
|
||||
vals := []any{user.Username, string(rolesJson), string(projectsJson)}
|
||||
|
||||
if user.Name != "" {
|
||||
cols = append(cols, "name")
|
||||
@@ -159,8 +188,8 @@ func (r *UserRepository) AddUser(user *schema.User) error {
|
||||
}
|
||||
|
||||
func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) error {
|
||||
// user contains updated info, apply to dbuser
|
||||
// TODO: Discuss updatable fields
|
||||
// user contains updated info -> Apply to dbUser
|
||||
// --- Simple Name Update ---
|
||||
if dbUser.Name != user.Name {
|
||||
if _, err := sq.Update("hpc_user").Set("name", user.Name).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||
cclog.Errorf("error while updating name of user '%s'", user.Username)
|
||||
@@ -168,13 +197,64 @@ func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) erro
|
||||
}
|
||||
}
|
||||
|
||||
// Toggled until greenlit
|
||||
// if dbUser.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
|
||||
// projects, _ := json.Marshal(user.Projects)
|
||||
// if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
// --- Def Helpers ---
|
||||
// Helper to update roles
|
||||
updateRoles := func(roles []string) error {
|
||||
rolesJSON, _ := json.Marshal(roles)
|
||||
_, err := sq.Update("hpc_user").Set("roles", rolesJSON).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec()
|
||||
return err
|
||||
}
|
||||
|
||||
// Helper to update projects
|
||||
updateProjects := func(projects []string) error {
|
||||
projectsJSON, _ := json.Marshal(projects)
|
||||
_, err := sq.Update("hpc_user").Set("projects", projectsJSON).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec()
|
||||
return err
|
||||
}
|
||||
|
||||
// Helper to clear projects
|
||||
clearProjects := func() error {
|
||||
_, err := sq.Update("hpc_user").Set("projects", "[]").Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec()
|
||||
return err
|
||||
}
|
||||
|
||||
// --- Manager Role Handling ---
|
||||
if dbUser.HasRole(schema.RoleManager) && user.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
|
||||
// Existing Manager: update projects
|
||||
if err := updateProjects(user.Projects); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if dbUser.HasRole(schema.RoleUser) && user.HasRole(schema.RoleManager) && user.HasNotRoles([]schema.Role{schema.RoleAdmin}) {
|
||||
// New Manager: update roles and projects
|
||||
if err := updateRoles(user.Roles); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := updateProjects(user.Projects); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if dbUser.HasRole(schema.RoleManager) && user.HasNotRoles([]schema.Role{schema.RoleAdmin, schema.RoleManager}) {
|
||||
// Remove Manager: update roles and clear projects
|
||||
if err := updateRoles(user.Roles); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := clearProjects(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// --- Support Role Handling ---
|
||||
if dbUser.HasRole(schema.RoleUser) && dbUser.HasNotRoles([]schema.Role{schema.RoleSupport}) &&
|
||||
user.HasRole(schema.RoleSupport) && user.HasNotRoles([]schema.Role{schema.RoleAdmin}) {
|
||||
// New Support: update roles
|
||||
if err := updateRoles(user.Roles); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if dbUser.HasRole(schema.RoleSupport) && user.HasNotRoles([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
// Remove Support: update roles
|
||||
if err := updateRoles(user.Roles); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -229,6 +309,14 @@ func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
||||
return users, nil
|
||||
}
|
||||
|
||||
// AddRole adds a role to a user's role list.
|
||||
// Role string is automatically lowercased.
|
||||
// Valid roles: admin, support, manager, api, user
|
||||
//
|
||||
// Returns error if:
|
||||
// - User doesn't exist
|
||||
// - Role is invalid
|
||||
// - User already has the role
|
||||
func (r *UserRepository) AddRole(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
@@ -258,6 +346,11 @@ func (r *UserRepository) AddRole(
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemoveRole removes a role from a user's role list.
|
||||
//
|
||||
// Special rules:
|
||||
// - Cannot remove "manager" role while user has assigned projects
|
||||
// - Must remove all projects first before removing manager role
|
||||
func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryrole string) error {
|
||||
oldRole := strings.ToLower(queryrole)
|
||||
user, err := r.GetUser(username)
|
||||
@@ -294,6 +387,12 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddProject assigns a project to a manager user.
|
||||
// Only users with the "manager" role can have assigned projects.
|
||||
//
|
||||
// Returns error if:
|
||||
// - User doesn't have manager role
|
||||
// - User already manages the project
|
||||
func (r *UserRepository) AddProject(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
@@ -345,7 +444,7 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
|
||||
}
|
||||
|
||||
if exists {
|
||||
var result interface{}
|
||||
var result any
|
||||
if len(newprojects) == 0 {
|
||||
result = "[]"
|
||||
} else {
|
||||
|
||||
@@ -12,9 +12,9 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/lrucache"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/lrucache"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
|
||||
@@ -10,9 +10,9 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
@@ -20,33 +20,40 @@ func setupUserTest(t *testing.T) *UserCfgRepo {
|
||||
const testconfig = `{
|
||||
"main": {
|
||||
"addr": "0.0.0.0:8080",
|
||||
"apiAllowedIPs": [
|
||||
"api-allowed-ips": [
|
||||
"*"
|
||||
]
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "testcluster",
|
||||
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
|
||||
"filterRanges": {
|
||||
"numNodes": { "from": 1, "to": 64 },
|
||||
"duration": { "from": 0, "to": 86400 },
|
||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
||||
}
|
||||
}]
|
||||
}`
|
||||
|
||||
cclog.Init("info", true)
|
||||
dbfilepath := "testdata/job.db"
|
||||
err := MigrateDB("sqlite3", dbfilepath)
|
||||
|
||||
// Copy test DB to a temp file for test isolation
|
||||
srcData, err := os.ReadFile("testdata/job.db")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
Connect("sqlite3", dbfilepath)
|
||||
dbfilepath := filepath.Join(t.TempDir(), "job.db")
|
||||
if err := os.WriteFile(dbfilepath, srcData, 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := ResetConnection(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
ResetConnection()
|
||||
})
|
||||
|
||||
err = MigrateDB(dbfilepath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
Connect(dbfilepath)
|
||||
|
||||
tmpdir := t.TempDir()
|
||||
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||
@@ -58,11 +65,7 @@ func setupUserTest(t *testing.T) *UserCfgRepo {
|
||||
|
||||
// Load and check main configuration
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||
config.Init(cfg, clustercfg)
|
||||
} else {
|
||||
t.Fatal("Cluster configuration must be present")
|
||||
}
|
||||
config.Init(cfg)
|
||||
} else {
|
||||
t.Fatal("Main configuration must be present")
|
||||
}
|
||||
|
||||
596
internal/repository/user_test.go
Normal file
596
internal/repository/user_test.go
Normal file
@@ -0,0 +1,596 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
)
|
||||
|
||||
func TestAddUser(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
|
||||
t.Run("add user with all fields", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "testuser1",
|
||||
Name: "Test User One",
|
||||
Email: "test1@example.com",
|
||||
Password: "testpassword123",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{"project1", "project2"},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
retrievedUser, err := r.GetUser("testuser1")
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, user.Username, retrievedUser.Username)
|
||||
assert.Equal(t, user.Name, retrievedUser.Name)
|
||||
assert.Equal(t, user.Email, retrievedUser.Email)
|
||||
assert.Equal(t, user.Roles, retrievedUser.Roles)
|
||||
assert.Equal(t, user.Projects, retrievedUser.Projects)
|
||||
assert.NotEmpty(t, retrievedUser.Password)
|
||||
err = bcrypt.CompareHashAndPassword([]byte(retrievedUser.Password), []byte("testpassword123"))
|
||||
assert.NoError(t, err, "Password should be hashed correctly")
|
||||
|
||||
err = r.DelUser("testuser1")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("add user with minimal fields", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "testuser2",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLDAP,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
retrievedUser, err := r.GetUser("testuser2")
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, user.Username, retrievedUser.Username)
|
||||
assert.Equal(t, "", retrievedUser.Name)
|
||||
assert.Equal(t, "", retrievedUser.Email)
|
||||
assert.Equal(t, "", retrievedUser.Password)
|
||||
|
||||
err = r.DelUser("testuser2")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("add duplicate user fails", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "testuser3",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.AddUser(user)
|
||||
assert.Error(t, err, "Adding duplicate user should fail")
|
||||
|
||||
err = r.DelUser("testuser3")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetUser(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
|
||||
t.Run("get existing user", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "getuser1",
|
||||
Name: "Get User",
|
||||
Email: "getuser@example.com",
|
||||
Roles: []string{"user", "admin"},
|
||||
Projects: []string{"proj1"},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
retrieved, err := r.GetUser("getuser1")
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, user.Username, retrieved.Username)
|
||||
assert.Equal(t, user.Name, retrieved.Name)
|
||||
assert.Equal(t, user.Email, retrieved.Email)
|
||||
assert.ElementsMatch(t, user.Roles, retrieved.Roles)
|
||||
assert.ElementsMatch(t, user.Projects, retrieved.Projects)
|
||||
|
||||
err = r.DelUser("getuser1")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("get non-existent user", func(t *testing.T) {
|
||||
_, err := r.GetUser("nonexistent")
|
||||
assert.Error(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestUpdateUser(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
|
||||
t.Run("update user name", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "updateuser1",
|
||||
Name: "Original Name",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
dbUser, err := r.GetUser("updateuser1")
|
||||
require.NoError(t, err)
|
||||
|
||||
updatedUser := &schema.User{
|
||||
Username: "updateuser1",
|
||||
Name: "Updated Name",
|
||||
}
|
||||
|
||||
err = r.UpdateUser(dbUser, updatedUser)
|
||||
require.NoError(t, err)
|
||||
|
||||
retrieved, err := r.GetUser("updateuser1")
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "Updated Name", retrieved.Name)
|
||||
|
||||
err = r.DelUser("updateuser1")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("update with no changes", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "updateuser2",
|
||||
Name: "Same Name",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
dbUser, err := r.GetUser("updateuser2")
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.UpdateUser(dbUser, dbUser)
|
||||
assert.NoError(t, err)
|
||||
|
||||
err = r.DelUser("updateuser2")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestDelUser(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
|
||||
t.Run("delete existing user", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "deluser1",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.DelUser("deluser1")
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = r.GetUser("deluser1")
|
||||
assert.Error(t, err, "User should not exist after deletion")
|
||||
})
|
||||
|
||||
t.Run("delete non-existent user", func(t *testing.T) {
|
||||
err := r.DelUser("nonexistent")
|
||||
assert.NoError(t, err, "Deleting non-existent user should not error")
|
||||
})
|
||||
}
|
||||
|
||||
func TestListUsers(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
|
||||
user1 := &schema.User{
|
||||
Username: "listuser1",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
user2 := &schema.User{
|
||||
Username: "listuser2",
|
||||
Roles: []string{"admin"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
user3 := &schema.User{
|
||||
Username: "listuser3",
|
||||
Roles: []string{"manager"},
|
||||
Projects: []string{"proj1"},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user1)
|
||||
require.NoError(t, err)
|
||||
err = r.AddUser(user2)
|
||||
require.NoError(t, err)
|
||||
err = r.AddUser(user3)
|
||||
require.NoError(t, err)
|
||||
|
||||
t.Run("list all users", func(t *testing.T) {
|
||||
users, err := r.ListUsers(false)
|
||||
require.NoError(t, err)
|
||||
assert.GreaterOrEqual(t, len(users), 3)
|
||||
|
||||
usernames := make([]string, len(users))
|
||||
for i, u := range users {
|
||||
usernames[i] = u.Username
|
||||
}
|
||||
assert.Contains(t, usernames, "listuser1")
|
||||
assert.Contains(t, usernames, "listuser2")
|
||||
assert.Contains(t, usernames, "listuser3")
|
||||
})
|
||||
|
||||
t.Run("list special users only", func(t *testing.T) {
|
||||
users, err := r.ListUsers(true)
|
||||
require.NoError(t, err)
|
||||
|
||||
usernames := make([]string, len(users))
|
||||
for i, u := range users {
|
||||
usernames[i] = u.Username
|
||||
}
|
||||
assert.Contains(t, usernames, "listuser2")
|
||||
assert.Contains(t, usernames, "listuser3")
|
||||
})
|
||||
|
||||
err = r.DelUser("listuser1")
|
||||
require.NoError(t, err)
|
||||
err = r.DelUser("listuser2")
|
||||
require.NoError(t, err)
|
||||
err = r.DelUser("listuser3")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestGetLdapUsernames(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
|
||||
ldapUser := &schema.User{
|
||||
Username: "ldapuser1",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLDAP,
|
||||
}
|
||||
localUser := &schema.User{
|
||||
Username: "localuser1",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(ldapUser)
|
||||
require.NoError(t, err)
|
||||
err = r.AddUser(localUser)
|
||||
require.NoError(t, err)
|
||||
|
||||
usernames, err := r.GetLdapUsernames()
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, usernames, "ldapuser1")
|
||||
assert.NotContains(t, usernames, "localuser1")
|
||||
|
||||
err = r.DelUser("ldapuser1")
|
||||
require.NoError(t, err)
|
||||
err = r.DelUser("localuser1")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestAddRole(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
ctx := context.Background()
|
||||
|
||||
t.Run("add valid role", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "roleuser1",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.AddRole(ctx, "roleuser1", "admin")
|
||||
require.NoError(t, err)
|
||||
|
||||
retrieved, err := r.GetUser("roleuser1")
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, retrieved.Roles, "admin")
|
||||
assert.Contains(t, retrieved.Roles, "user")
|
||||
|
||||
err = r.DelUser("roleuser1")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("add duplicate role", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "roleuser2",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.AddRole(ctx, "roleuser2", "user")
|
||||
assert.Error(t, err, "Adding duplicate role should fail")
|
||||
assert.Contains(t, err.Error(), "already has role")
|
||||
|
||||
err = r.DelUser("roleuser2")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("add invalid role", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "roleuser3",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.AddRole(ctx, "roleuser3", "invalidrole")
|
||||
assert.Error(t, err, "Adding invalid role should fail")
|
||||
assert.Contains(t, err.Error(), "no valid option")
|
||||
|
||||
err = r.DelUser("roleuser3")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestRemoveRole(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
ctx := context.Background()
|
||||
|
||||
t.Run("remove existing role", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "rmroleuser1",
|
||||
Roles: []string{"user", "admin"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.RemoveRole(ctx, "rmroleuser1", "admin")
|
||||
require.NoError(t, err)
|
||||
|
||||
retrieved, err := r.GetUser("rmroleuser1")
|
||||
require.NoError(t, err)
|
||||
assert.NotContains(t, retrieved.Roles, "admin")
|
||||
assert.Contains(t, retrieved.Roles, "user")
|
||||
|
||||
err = r.DelUser("rmroleuser1")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("remove non-existent role", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "rmroleuser2",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.RemoveRole(ctx, "rmroleuser2", "admin")
|
||||
assert.Error(t, err, "Removing non-existent role should fail")
|
||||
assert.Contains(t, err.Error(), "already deleted")
|
||||
|
||||
err = r.DelUser("rmroleuser2")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("remove manager role with projects", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "rmroleuser3",
|
||||
Roles: []string{"manager"},
|
||||
Projects: []string{"proj1", "proj2"},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.RemoveRole(ctx, "rmroleuser3", "manager")
|
||||
assert.Error(t, err, "Removing manager role with projects should fail")
|
||||
assert.Contains(t, err.Error(), "still has assigned project")
|
||||
|
||||
err = r.DelUser("rmroleuser3")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestAddProject(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
ctx := context.Background()
|
||||
|
||||
t.Run("add project to manager", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "projuser1",
|
||||
Roles: []string{"manager"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.AddProject(ctx, "projuser1", "newproject")
|
||||
require.NoError(t, err)
|
||||
|
||||
retrieved, err := r.GetUser("projuser1")
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, retrieved.Projects, "newproject")
|
||||
|
||||
err = r.DelUser("projuser1")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("add project to non-manager", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "projuser2",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.AddProject(ctx, "projuser2", "newproject")
|
||||
assert.Error(t, err, "Adding project to non-manager should fail")
|
||||
assert.Contains(t, err.Error(), "not a manager")
|
||||
|
||||
err = r.DelUser("projuser2")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("add duplicate project", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "projuser3",
|
||||
Roles: []string{"manager"},
|
||||
Projects: []string{"existingproject"},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.AddProject(ctx, "projuser3", "existingproject")
|
||||
assert.Error(t, err, "Adding duplicate project should fail")
|
||||
assert.Contains(t, err.Error(), "already manages")
|
||||
|
||||
err = r.DelUser("projuser3")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestRemoveProject(t *testing.T) {
|
||||
_ = setup(t)
|
||||
r := GetUserRepository()
|
||||
ctx := context.Background()
|
||||
|
||||
t.Run("remove existing project", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "rmprojuser1",
|
||||
Roles: []string{"manager"},
|
||||
Projects: []string{"proj1", "proj2"},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.RemoveProject(ctx, "rmprojuser1", "proj1")
|
||||
require.NoError(t, err)
|
||||
|
||||
retrieved, err := r.GetUser("rmprojuser1")
|
||||
require.NoError(t, err)
|
||||
assert.NotContains(t, retrieved.Projects, "proj1")
|
||||
assert.Contains(t, retrieved.Projects, "proj2")
|
||||
|
||||
err = r.DelUser("rmprojuser1")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("remove non-existent project", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "rmprojuser2",
|
||||
Roles: []string{"manager"},
|
||||
Projects: []string{"proj1"},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.RemoveProject(ctx, "rmprojuser2", "nonexistent")
|
||||
assert.Error(t, err, "Removing non-existent project should fail")
|
||||
|
||||
err = r.DelUser("rmprojuser2")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("remove project from non-manager", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "rmprojuser3",
|
||||
Roles: []string{"user"},
|
||||
Projects: []string{},
|
||||
AuthSource: schema.AuthViaLocalPassword,
|
||||
}
|
||||
|
||||
err := r.AddUser(user)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.RemoveProject(ctx, "rmprojuser3", "proj1")
|
||||
assert.Error(t, err, "Removing project from non-manager should fail")
|
||||
assert.Contains(t, err.Error(), "not a manager")
|
||||
|
||||
err = r.DelUser("rmprojuser3")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetUserFromContext(t *testing.T) {
|
||||
t.Run("get user from context", func(t *testing.T) {
|
||||
user := &schema.User{
|
||||
Username: "contextuser",
|
||||
Roles: []string{"user"},
|
||||
}
|
||||
|
||||
ctx := context.WithValue(context.Background(), ContextUserKey, user)
|
||||
retrieved := GetUserFromContext(ctx)
|
||||
|
||||
require.NotNil(t, retrieved)
|
||||
assert.Equal(t, user.Username, retrieved.Username)
|
||||
})
|
||||
|
||||
t.Run("get user from empty context", func(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
retrieved := GetUserFromContext(ctx)
|
||||
|
||||
assert.Nil(t, retrieved)
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user