This commit is contained in:
Christoph Kluge
2026-01-15 10:12:02 +01:00
32 changed files with 5483 additions and 574 deletions

View File

@@ -27,7 +27,7 @@ type GetClustersAPIResponse struct {
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
// @produce json
// @param cluster query string false "Job Cluster"
// @success 200 {object} api.GetClustersApiResponse "Array of clusters"
// @success 200 {object} api.GetClustersAPIResponse "Array of clusters"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"

File diff suppressed because it is too large Load Diff

View File

@@ -104,7 +104,7 @@ type JobMetricWithName struct {
// @param items-per-page query int false "Items per page (Default: 25)"
// @param page query int false "Page Number (Default: 1)"
// @param with-metadata query bool false "Include metadata (e.g. jobScript) in response"
// @success 200 {object} api.GetJobsApiResponse "Job array and page info"
// @success 200 {object} api.GetJobsAPIResponse "Job array and page info"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -232,7 +232,7 @@ func (api *RestAPI) getJobs(rw http.ResponseWriter, r *http.Request) {
// @produce json
// @param id path int true "Database ID of Job"
// @param all-metrics query bool false "Include all available metrics"
// @success 200 {object} api.GetJobApiResponse "Job resource"
// @success 200 {object} api.GetJobAPIResponse "Job resource"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -324,8 +324,8 @@ func (api *RestAPI) getCompleteJobByID(rw http.ResponseWriter, r *http.Request)
// @accept json
// @produce json
// @param id path int true "Database ID of Job"
// @param request body api.GetJobApiRequest true "Array of metric names"
// @success 200 {object} api.GetJobApiResponse "Job resource"
// @param request body api.GetJobAPIRequest true "Array of metric names"
// @success 200 {object} api.GetJobAPIResponse "Job resource"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -478,7 +478,7 @@ func (api *RestAPI) editMeta(rw http.ResponseWriter, r *http.Request) {
// @accept json
// @produce json
// @param id path int true "Job Database ID"
// @param request body api.TagJobApiRequest true "Array of tag-objects to add"
// @param request body api.TagJobAPIRequest true "Array of tag-objects to add"
// @success 200 {object} schema.Job "Updated job resource"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
@@ -542,7 +542,7 @@ func (api *RestAPI) tagJob(rw http.ResponseWriter, r *http.Request) {
// @accept json
// @produce json
// @param id path int true "Job Database ID"
// @param request body api.TagJobApiRequest true "Array of tag-objects to remove"
// @param request body api.TagJobAPIRequest true "Array of tag-objects to remove"
// @success 200 {object} schema.Job "Updated job resource"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
@@ -606,7 +606,7 @@ func (api *RestAPI) removeTagJob(rw http.ResponseWriter, r *http.Request) {
// @description Tag wills be removed from respective archive files.
// @accept json
// @produce plain
// @param request body api.TagJobApiRequest true "Array of tag-objects to remove"
// @param request body api.TagJobAPIRequest true "Array of tag-objects to remove"
// @success 200 {string} string "Success Response"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
@@ -650,7 +650,7 @@ func (api *RestAPI) removeTags(rw http.ResponseWriter, r *http.Request) {
// @accept json
// @produce json
// @param request body schema.Job true "Job to add"
// @success 201 {object} api.DefaultApiResponse "Job added successfully"
// @success 201 {object} api.DefaultAPIResponse "Job added successfully"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -728,7 +728,7 @@ func (api *RestAPI) startJob(rw http.ResponseWriter, r *http.Request) {
// @description Job to stop is specified by request body. All fields are required in this case.
// @description Returns full job resource information according to 'Job' scheme.
// @produce json
// @param request body api.StopJobApiRequest true "All fields required"
// @param request body api.StopJobAPIRequest true "All fields required"
// @success 200 {object} schema.Job "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
@@ -754,7 +754,6 @@ func (api *RestAPI) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
return
}
// cclog.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
if err != nil {
// Try cached jobs if not found in main repository
@@ -776,7 +775,7 @@ func (api *RestAPI) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
// @description Job to remove is specified by database ID. This will not remove the job from the job archive.
// @produce json
// @param id path int true "Database ID of Job"
// @success 200 {object} api.DefaultApiResponse "Success message"
// @success 200 {object} api.DefaultAPIResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -820,8 +819,8 @@ func (api *RestAPI) deleteJobByID(rw http.ResponseWriter, r *http.Request) {
// @description Job to delete is specified by request body. All fields are required in this case.
// @accept json
// @produce json
// @param request body api.DeleteJobApiRequest true "All fields required"
// @success 200 {object} api.DefaultApiResponse "Success message"
// @param request body api.DeleteJobAPIRequest true "All fields required"
// @success 200 {object} api.DefaultAPIResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
@@ -873,7 +872,7 @@ func (api *RestAPI) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
// @produce json
// @param ts path int true "Unix epoch timestamp"
// @success 200 {object} api.DefaultApiResponse "Success message"
// @success 200 {object} api.DefaultAPIResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"

View File

@@ -47,7 +47,7 @@ func determineState(states []string) schema.SchedulerState {
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
// @produce json
// @param request body UpdateNodeStatesRequest true "Request body containing nodes and their states"
// @success 200 {object} api.DefaultApiResponse "Success message"
// @success 200 {object} api.DefaultAPIResponse "Success message"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"

View File

@@ -31,7 +31,7 @@ type APIReturnedUser struct {
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
// @produce json
// @param not-just-user query bool true "If returned list should contain all users or only users with additional special roles"
// @success 200 {array} api.ApiReturnedUser "List of users returned successfully"
// @success 200 {array} api.APIReturnedUser "List of users returned successfully"
// @failure 400 {string} string "Bad Request"
// @failure 401 {string} string "Unauthorized"
// @failure 403 {string} string "Forbidden"

View File

@@ -10815,7 +10815,7 @@ func (ec *executionContext) _SubCluster_metricConfig(ctx context.Context, field
return obj.MetricConfig, nil
},
nil,
ec.marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfigᚄ,
ec.marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfigᚄ,
true,
true,
)
@@ -18466,11 +18466,7 @@ func (ec *executionContext) marshalNJobsStatistics2ᚖgithubᚗcomᚋClusterCock
return ec._JobsStatistics(ctx, sel, v)
}
func (ec *executionContext) marshalNMetricConfig2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfig(ctx context.Context, sel ast.SelectionSet, v schema.MetricConfig) graphql.Marshaler {
return ec._MetricConfig(ctx, sel, &v)
}
func (ec *executionContext) marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfigᚄ(ctx context.Context, sel ast.SelectionSet, v []schema.MetricConfig) graphql.Marshaler {
func (ec *executionContext) marshalNMetricConfig2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfig(ctx context.Context, sel ast.SelectionSet, v []*schema.MetricConfig) graphql.Marshaler {
ret := make(graphql.Array, len(v))
var wg sync.WaitGroup
isLen1 := len(v) == 1
@@ -18494,7 +18490,7 @@ func (ec *executionContext) marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpi
if !isLen1 {
defer wg.Done()
}
ret[i] = ec.marshalNMetricConfig2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfig(ctx, sel, v[i])
ret[i] = ec.marshalNMetricConfig2githubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfig(ctx, sel, v[i])
}
if isLen1 {
f(i)
@@ -18514,6 +18510,16 @@ func (ec *executionContext) marshalNMetricConfig2ᚕgithubᚗcomᚋClusterCockpi
return ret
}
func (ec *executionContext) marshalNMetricConfig2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐMetricConfig(ctx context.Context, sel ast.SelectionSet, v *schema.MetricConfig) graphql.Marshaler {
if v == nil {
if !graphql.HasFieldError(ctx, graphql.GetFieldContext(ctx)) {
graphql.AddErrorf(ctx, "the requested element is null which the schema does not allow")
}
return graphql.Null
}
return ec._MetricConfig(ctx, sel, v)
}
func (ec *executionContext) marshalNMetricFootprints2ᚕᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐMetricFootprintsᚄ(ctx context.Context, sel ast.SelectionSet, v []*model.MetricFootprints) graphql.Marshaler {
ret := make(graphql.Array, len(v))
var wg sync.WaitGroup

View File

@@ -3,7 +3,7 @@ package graph
// This file will be automatically regenerated based on the schema, any resolver
// implementations
// will be copied through when generating and any unknown code will be moved to the end.
// Code generated by github.com/99designs/gqlgen version v0.17.84
// Code generated by github.com/99designs/gqlgen version v0.17.85
import (
"context"
@@ -283,7 +283,7 @@ func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []strin
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
if user.HasRole(schema.RoleAdmin) && (tscope == "global" || tscope == "admin") || user.Username == tscope {
// Remove from DB
if err = r.Repo.RemoveTagById(tid); err != nil {
if err = r.Repo.RemoveTagByID(tid); err != nil {
cclog.Warn("Error while removing tag")
return nil, err
} else {

View File

@@ -2,6 +2,7 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package importer
import (

View File

@@ -2,6 +2,7 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package importer
import (

View File

@@ -74,7 +74,7 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
cclog.Debugf("[METRICSTORE]> Using %d workers for checkpoint/archive operations\n", Keys.NumWorkers)
// Helper function to add metric configuration
addMetricConfig := func(mc schema.MetricConfig) {
addMetricConfig := func(mc *schema.MetricConfig) {
agg, err := AssignAggregationStrategy(mc.Aggregation)
if err != nil {
cclog.Warnf("Could not find aggregation strategy for metric config '%s': %s", mc.Name, err.Error())
@@ -88,7 +88,7 @@ func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
for _, c := range archive.Clusters {
for _, mc := range c.MetricConfig {
addMetricConfig(*mc)
addMetricConfig(mc)
}
for _, sc := range c.SubClusters {

View File

@@ -2,6 +2,7 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (

View File

@@ -0,0 +1,274 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"testing"
"time"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type MockJobHook struct {
startCalled bool
stopCalled bool
startJobs []*schema.Job
stopJobs []*schema.Job
}
func (m *MockJobHook) JobStartCallback(job *schema.Job) {
m.startCalled = true
m.startJobs = append(m.startJobs, job)
}
func (m *MockJobHook) JobStopCallback(job *schema.Job) {
m.stopCalled = true
m.stopJobs = append(m.stopJobs, job)
}
func TestRegisterJobHook(t *testing.T) {
t.Run("register single hook", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
assert.NotNil(t, hooks)
assert.Len(t, hooks, 1)
assert.Equal(t, mock, hooks[0])
hooks = nil
})
t.Run("register multiple hooks", func(t *testing.T) {
hooks = nil
mock1 := &MockJobHook{}
mock2 := &MockJobHook{}
RegisterJobHook(mock1)
RegisterJobHook(mock2)
assert.Len(t, hooks, 2)
assert.Equal(t, mock1, hooks[0])
assert.Equal(t, mock2, hooks[1])
hooks = nil
})
t.Run("register nil hook does not add to hooks", func(t *testing.T) {
hooks = nil
RegisterJobHook(nil)
if hooks != nil {
assert.Len(t, hooks, 0, "Nil hook should not be added")
}
hooks = nil
})
}
func TestCallJobStartHooks(t *testing.T) {
t.Run("call start hooks with single job", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
job := &schema.Job{
JobID: 123,
User: "testuser",
Cluster: "testcluster",
}
CallJobStartHooks([]*schema.Job{job})
assert.True(t, mock.startCalled)
assert.False(t, mock.stopCalled)
assert.Len(t, mock.startJobs, 1)
assert.Equal(t, int64(123), mock.startJobs[0].JobID)
hooks = nil
})
t.Run("call start hooks with multiple jobs", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
jobs := []*schema.Job{
{JobID: 1, User: "user1", Cluster: "cluster1"},
{JobID: 2, User: "user2", Cluster: "cluster2"},
{JobID: 3, User: "user3", Cluster: "cluster3"},
}
CallJobStartHooks(jobs)
assert.True(t, mock.startCalled)
assert.Len(t, mock.startJobs, 3)
assert.Equal(t, int64(1), mock.startJobs[0].JobID)
assert.Equal(t, int64(2), mock.startJobs[1].JobID)
assert.Equal(t, int64(3), mock.startJobs[2].JobID)
hooks = nil
})
t.Run("call start hooks with multiple registered hooks", func(t *testing.T) {
hooks = nil
mock1 := &MockJobHook{}
mock2 := &MockJobHook{}
RegisterJobHook(mock1)
RegisterJobHook(mock2)
job := &schema.Job{
JobID: 456, User: "testuser", Cluster: "testcluster",
}
CallJobStartHooks([]*schema.Job{job})
assert.True(t, mock1.startCalled)
assert.True(t, mock2.startCalled)
assert.Len(t, mock1.startJobs, 1)
assert.Len(t, mock2.startJobs, 1)
hooks = nil
})
t.Run("call start hooks with nil hooks", func(t *testing.T) {
hooks = nil
job := &schema.Job{
JobID: 789, User: "testuser", Cluster: "testcluster",
}
CallJobStartHooks([]*schema.Job{job})
hooks = nil
})
t.Run("call start hooks with empty job list", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
CallJobStartHooks([]*schema.Job{})
assert.False(t, mock.startCalled)
assert.Len(t, mock.startJobs, 0)
hooks = nil
})
}
func TestCallJobStopHooks(t *testing.T) {
t.Run("call stop hooks with single job", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
job := &schema.Job{
JobID: 123,
User: "testuser",
Cluster: "testcluster",
}
CallJobStopHooks(job)
assert.True(t, mock.stopCalled)
assert.False(t, mock.startCalled)
assert.Len(t, mock.stopJobs, 1)
assert.Equal(t, int64(123), mock.stopJobs[0].JobID)
hooks = nil
})
t.Run("call stop hooks with multiple registered hooks", func(t *testing.T) {
hooks = nil
mock1 := &MockJobHook{}
mock2 := &MockJobHook{}
RegisterJobHook(mock1)
RegisterJobHook(mock2)
job := &schema.Job{
JobID: 456, User: "testuser", Cluster: "testcluster",
}
CallJobStopHooks(job)
assert.True(t, mock1.stopCalled)
assert.True(t, mock2.stopCalled)
assert.Len(t, mock1.stopJobs, 1)
assert.Len(t, mock2.stopJobs, 1)
hooks = nil
})
t.Run("call stop hooks with nil hooks", func(t *testing.T) {
hooks = nil
job := &schema.Job{
JobID: 789, User: "testuser", Cluster: "testcluster",
}
CallJobStopHooks(job)
hooks = nil
})
}
func TestSQLHooks(t *testing.T) {
_ = setup(t)
t.Run("hooks log queries in debug mode", func(t *testing.T) {
h := &Hooks{}
ctx := context.Background()
query := "SELECT * FROM job WHERE job_id = ?"
args := []any{123}
ctxWithTime, err := h.Before(ctx, query, args...)
require.NoError(t, err)
assert.NotNil(t, ctxWithTime)
beginTime := ctxWithTime.Value("begin")
require.NotNil(t, beginTime)
_, ok := beginTime.(time.Time)
assert.True(t, ok, "Begin time should be time.Time")
time.Sleep(10 * time.Millisecond)
ctxAfter, err := h.After(ctxWithTime, query, args...)
require.NoError(t, err)
assert.NotNil(t, ctxAfter)
})
}
func TestHookIntegration(t *testing.T) {
t.Run("hooks are called during job lifecycle", func(t *testing.T) {
hooks = nil
mock := &MockJobHook{}
RegisterJobHook(mock)
job := &schema.Job{
JobID: 999,
User: "integrationuser",
Cluster: "integrationcluster",
}
CallJobStartHooks([]*schema.Job{job})
assert.True(t, mock.startCalled)
assert.Equal(t, 1, len(mock.startJobs))
CallJobStopHooks(job)
assert.True(t, mock.stopCalled)
assert.Equal(t, 1, len(mock.stopJobs))
assert.Equal(t, mock.startJobs[0].JobID, mock.stopJobs[0].JobID)
hooks = nil
})
}

View File

@@ -80,18 +80,33 @@ import (
)
var (
jobRepoOnce sync.Once
// jobRepoOnce ensures singleton initialization of the JobRepository
jobRepoOnce sync.Once
// jobRepoInstance holds the single instance of JobRepository
jobRepoInstance *JobRepository
)
// JobRepository provides database access for job-related operations.
// It implements the repository pattern to abstract database interactions
// and provides caching for improved performance.
//
// The repository is a singleton initialized via GetJobRepository().
// All database queries use prepared statements via stmtCache for efficiency.
// Frequently accessed data (metadata, energy footprints) is cached in an LRU cache.
type JobRepository struct {
DB *sqlx.DB
stmtCache *sq.StmtCache
cache *lrucache.Cache
driver string
Mutex sync.Mutex
DB *sqlx.DB // Database connection pool
stmtCache *sq.StmtCache // Prepared statement cache for query optimization
cache *lrucache.Cache // LRU cache for metadata and footprint data
driver string // Database driver name (e.g., "sqlite3")
Mutex sync.Mutex // Mutex for thread-safe operations
}
// GetJobRepository returns the singleton instance of JobRepository.
// The repository is initialized lazily on first access with database connection,
// prepared statement cache, and LRU cache configured from repoConfig.
//
// This function is thread-safe and ensures only one instance is created.
// It must be called after Connect() has established a database connection.
func GetJobRepository() *JobRepository {
jobRepoOnce.Do(func() {
db := GetConnection()
@@ -107,6 +122,8 @@ func GetJobRepository() *JobRepository {
return jobRepoInstance
}
// jobColumns defines the standard set of columns selected from the job table.
// Used consistently across all job queries to ensure uniform data retrieval.
var jobColumns []string = []string{
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
@@ -115,6 +132,8 @@ var jobColumns []string = []string{
"job.footprint", "job.energy",
}
// jobCacheColumns defines columns from the job_cache table, mirroring jobColumns.
// Used for queries against cached job data for performance optimization.
var jobCacheColumns []string = []string{
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
@@ -124,6 +143,14 @@ var jobCacheColumns []string = []string{
"job_cache.footprint", "job_cache.energy",
}
// scanJob converts a database row into a schema.Job struct.
// It handles JSON unmarshaling of resources and footprint fields,
// and calculates accurate duration for running jobs.
//
// Parameters:
// - row: Database row implementing Scan() interface (sql.Row or sql.Rows)
//
// Returns the populated Job struct or an error if scanning or unmarshaling fails.
func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
job := &schema.Job{}
@@ -186,6 +213,16 @@ func (r *JobRepository) Flush() error {
return nil
}
// FetchMetadata retrieves and unmarshals the metadata JSON for a job.
// Metadata is cached with a 24-hour TTL to improve performance.
//
// The metadata field stores arbitrary key-value pairs associated with a job,
// such as tags, labels, or custom attributes added by external systems.
//
// Parameters:
// - job: Job struct with valid ID field, metadata will be populated in job.MetaData
//
// Returns the metadata map or an error if the job is nil or database query fails.
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
if job == nil {
return nil, fmt.Errorf("job cannot be nil")
@@ -218,6 +255,16 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
return job.MetaData, nil
}
// UpdateMetadata adds or updates a single metadata key-value pair for a job.
// The entire metadata map is re-marshaled and stored, and the cache is invalidated.
// Also triggers archive metadata update via archive.UpdateMetadata.
//
// Parameters:
// - job: Job struct with valid ID, existing metadata will be fetched if not present
// - key: Metadata key to set
// - val: Metadata value to set
//
// Returns an error if the job is nil, metadata fetch fails, or database update fails.
func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err error) {
if job == nil {
return fmt.Errorf("job cannot be nil")
@@ -228,7 +275,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
if job.MetaData == nil {
if _, err = r.FetchMetadata(job); err != nil {
cclog.Warnf("Error while fetching metadata for job, DB ID '%v'", job.ID)
return err
return fmt.Errorf("failed to fetch metadata for job %d: %w", job.ID, err)
}
}
@@ -243,7 +290,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil {
cclog.Warnf("Error while marshaling metadata for job, DB ID '%v'", job.ID)
return err
return fmt.Errorf("failed to marshal metadata for job %d: %w", job.ID, err)
}
if _, err = sq.Update("job").
@@ -251,13 +298,23 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
Where("job.id = ?", job.ID).
RunWith(r.stmtCache).Exec(); err != nil {
cclog.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
return err
return fmt.Errorf("failed to update metadata in database for job %d: %w", job.ID, err)
}
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
return archive.UpdateMetadata(job, job.MetaData)
}
// FetchFootprint retrieves and unmarshals the performance footprint JSON for a job.
// Unlike FetchMetadata, footprints are NOT cached as they can be large and change frequently.
//
// The footprint contains summary statistics (avg/min/max) for monitored metrics,
// stored as JSON with keys like "cpu_load_avg", "mem_used_max", etc.
//
// Parameters:
// - job: Job struct with valid ID, footprint will be populated in job.Footprint
//
// Returns the footprint map or an error if the job is nil or database query fails.
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
if job == nil {
return nil, fmt.Errorf("job cannot be nil")
@@ -284,6 +341,16 @@ func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, err
return job.Footprint, nil
}
// FetchEnergyFootprint retrieves and unmarshals the energy footprint JSON for a job.
// Energy footprints are cached with a 24-hour TTL as they are frequently accessed but rarely change.
//
// The energy footprint contains calculated energy consumption (in kWh) per metric,
// stored as JSON with keys like "power_avg", "acc_power_avg", etc.
//
// Parameters:
// - job: Job struct with valid ID, energy footprint will be populated in job.EnergyFootprint
//
// Returns the energy footprint map or an error if the job is nil or database query fails.
func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
if job == nil {
return nil, fmt.Errorf("job cannot be nil")
@@ -316,6 +383,18 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6
return job.EnergyFootprint, nil
}
// DeleteJobsBefore removes jobs older than the specified start time.
// Optionally preserves tagged jobs to protect important data from deletion.
// Cache entries for deleted jobs are automatically invalidated.
//
// This is typically used for data retention policies and cleanup operations.
// WARNING: This is a destructive operation that permanently deletes job records.
//
// Parameters:
// - startTime: Unix timestamp, jobs with start_time < this value will be deleted
// - omitTagged: If true, skip jobs that have associated tags (jobtag entries)
//
// Returns the count of deleted jobs or an error if the operation fails.
func (r *JobRepository) DeleteJobsBefore(startTime int64, omitTagged bool) (int, error) {
var cnt int
q := sq.Select("count(*)").From("job").Where("job.start_time < ?", startTime)
@@ -371,6 +450,13 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64, omitTagged bool) (int,
return cnt, err
}
// DeleteJobByID permanently removes a single job by its database ID.
// Cache entries for the deleted job are automatically invalidated.
//
// Parameters:
// - id: Database ID (primary key) of the job to delete
//
// Returns an error if the deletion fails.
func (r *JobRepository) DeleteJobByID(id int64) error {
// Invalidate cache entries before deletion
r.cache.Del(fmt.Sprintf("metadata:%d", id))
@@ -388,6 +474,24 @@ func (r *JobRepository) DeleteJobByID(id int64) error {
return err
}
// FindUserOrProjectOrJobname attempts to interpret a search term as a job ID,
// username, project ID, or job name by querying the database.
//
// Search logic (in priority order):
// 1. If searchterm is numeric, treat as job ID (returned immediately)
// 2. Try exact match in job.hpc_user column (username)
// 3. Try LIKE match in hpc_user.name column (real name)
// 4. Try exact match in job.project column (project ID)
// 5. If no matches, return searchterm as jobname for GraphQL query
//
// This powers the searchbar functionality for flexible job searching.
// Requires authenticated user for database lookups (returns empty if user is nil).
//
// Parameters:
// - user: Authenticated user context, required for database access
// - searchterm: Search string to interpret
//
// Returns up to one non-empty value among (jobid, username, project, jobname).
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
if searchterm == "" {
return "", "", "", ""
@@ -423,6 +527,19 @@ var (
ErrForbidden = errors.New("not authorized")
)
// FindColumnValue performs a generic column lookup in a database table with role-based access control.
// Only users with admin, support, or manager roles can execute this query.
//
// Parameters:
// - user: User context for authorization check
// - searchterm: Value to search for (exact match or LIKE pattern)
// - table: Database table name to query
// - selectColumn: Column name to return in results
// - whereColumn: Column name to filter on
// - isLike: If true, use LIKE with wildcards; if false, use exact equality
//
// Returns the first matching value, ErrForbidden if user lacks permission,
// or ErrNotFound if no matches are found.
func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, table string, selectColumn string, whereColumn string, isLike bool) (result string, err error) {
if user == nil {
return "", fmt.Errorf("user cannot be nil")
@@ -453,6 +570,19 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta
}
}
// FindColumnValues performs a generic column lookup returning multiple matches with role-based access control.
// Similar to FindColumnValue but returns all matching values instead of just the first.
// Only users with admin, support, or manager roles can execute this query.
//
// Parameters:
// - user: User context for authorization check
// - query: Search pattern (always uses LIKE with wildcards)
// - table: Database table name to query
// - selectColumn: Column name to return in results
// - whereColumn: Column name to filter on
//
// Returns a slice of matching values, ErrForbidden if user lacks permission,
// or ErrNotFound if no matches are found.
func (r *JobRepository) FindColumnValues(user *schema.User, query string, table string, selectColumn string, whereColumn string) (results []string, err error) {
if user == nil {
return nil, fmt.Errorf("user cannot be nil")
@@ -487,6 +617,13 @@ func (r *JobRepository) FindColumnValues(user *schema.User, query string, table
}
}
// Partitions returns a list of distinct cluster partitions for a given cluster.
// Results are cached with a 1-hour TTL to improve performance.
//
// Parameters:
// - cluster: Cluster name to query partitions for
//
// Returns a slice of partition names or an error if the database query fails.
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
var err error
start := time.Now()
@@ -549,7 +686,19 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
return subclusters, nil
}
// FIXME: Set duration to requested walltime?
// StopJobsExceedingWalltimeBy marks running jobs as failed if they exceed their walltime limit.
// This is typically called periodically to clean up stuck or orphaned jobs.
//
// Jobs are marked with:
// - monitoring_status: MonitoringStatusArchivingFailed
// - duration: 0
// - job_state: JobStateFailed
//
// Parameters:
// - seconds: Grace period beyond walltime before marking as failed
//
// Returns an error if the database update fails.
// Logs the number of jobs marked as failed if any were affected.
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
start := time.Now()
currentTime := time.Now().Unix()
@@ -579,6 +728,12 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
return nil
}
// FindJobIdsByTag returns all job database IDs associated with a specific tag.
//
// Parameters:
// - tagID: Database ID of the tag to search for
//
// Returns a slice of job IDs or an error if the query fails.
func (r *JobRepository) FindJobIdsByTag(tagID int64) ([]int64, error) {
query := sq.Select("job.id").From("job").
Join("jobtag ON jobtag.job_id = job.id").
@@ -606,7 +761,13 @@ func (r *JobRepository) FindJobIdsByTag(tagID int64) ([]int64, error) {
return jobIds, nil
}
// FIXME: Reconsider filtering short jobs with harcoded threshold
// FindRunningJobs returns all currently running jobs for a specific cluster.
// Filters out short-running jobs based on repoConfig.MinRunningJobDuration threshold.
//
// Parameters:
// - cluster: Cluster name to filter jobs
//
// Returns a slice of running job objects or an error if the query fails.
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
query := sq.Select(jobColumns...).From("job").
Where("job.cluster = ?", cluster).
@@ -634,6 +795,12 @@ func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
return jobs, nil
}
// UpdateDuration recalculates and updates the duration field for all running jobs.
// Called periodically to keep job durations current without querying individual jobs.
//
// Duration is calculated as: current_time - job.start_time
//
// Returns an error if the database update fails.
func (r *JobRepository) UpdateDuration() error {
stmnt := sq.Update("job").
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
@@ -648,6 +815,16 @@ func (r *JobRepository) UpdateDuration() error {
return nil
}
// FindJobsBetween returns jobs within a specified time range.
// If startTimeBegin is 0, returns all jobs before startTimeEnd.
// Optionally excludes tagged jobs from results.
//
// Parameters:
// - startTimeBegin: Unix timestamp for range start (use 0 for unbounded start)
// - startTimeEnd: Unix timestamp for range end
// - omitTagged: If true, exclude jobs with associated tags
//
// Returns a slice of jobs or an error if the time range is invalid or query fails.
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64, omitTagged bool) ([]*schema.Job, error) {
var query sq.SelectBuilder
@@ -688,6 +865,14 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
return jobs, nil
}
// UpdateMonitoringStatus updates the monitoring status for a job and invalidates its cache entries.
// Cache invalidation affects both metadata and energy footprint to ensure consistency.
//
// Parameters:
// - job: Database ID of the job to update
// - monitoringStatus: New monitoring status value (see schema.MonitoringStatus constants)
//
// Returns an error if the database update fails.
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
// Invalidate cache entries as monitoring status affects job state
r.cache.Del(fmt.Sprintf("metadata:%d", job))
@@ -704,6 +889,13 @@ func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32
return nil
}
// Execute runs a Squirrel UpdateBuilder statement against the database.
// This is a generic helper for executing pre-built update queries.
//
// Parameters:
// - stmt: Squirrel UpdateBuilder with prepared update query
//
// Returns an error if the execution fails.
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
cclog.Errorf("Error while executing statement: %v", err)
@@ -713,6 +905,14 @@ func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
return nil
}
// MarkArchived adds monitoring status update to an existing UpdateBuilder statement.
// This is a builder helper used when constructing multi-field update queries.
//
// Parameters:
// - stmt: Existing UpdateBuilder to modify
// - monitoringStatus: Monitoring status value to set
//
// Returns the modified UpdateBuilder for method chaining.
func (r *JobRepository) MarkArchived(
stmt sq.UpdateBuilder,
monitoringStatus int32,
@@ -720,11 +920,22 @@ func (r *JobRepository) MarkArchived(
return stmt.Set("monitoring_status", monitoringStatus)
}
// UpdateEnergy calculates and updates the energy consumption for a job.
// This is called for running jobs during intermediate updates or when archiving.
//
// Energy calculation formula:
// - For "power" metrics: Energy (kWh) = (Power_avg * NumNodes * Duration_hours) / 1000
// - For "energy" metrics: Currently not implemented (would need sum statistics)
//
// The calculation accounts for:
// - Multi-node jobs: Multiplies by NumNodes to get total cluster energy
// - Shared jobs: Node average is already based on partial resources, so NumNodes=1
// - Unit conversion: Watts * hours / 1000 = kilowatt-hours (kWh)
// - Rounding: Results rounded to 2 decimal places
func (r *JobRepository) UpdateEnergy(
stmt sq.UpdateBuilder,
jobMeta *schema.Job,
) (sq.UpdateBuilder, error) {
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
if err != nil {
cclog.Errorf("cannot get subcluster: %s", err.Error())
@@ -732,25 +943,27 @@ func (r *JobRepository) UpdateEnergy(
}
energyFootprint := make(map[string]float64)
// Total Job Energy Outside Loop
// Accumulate total energy across all energy-related metrics
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
// Calculate energy for this specific metric
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
switch sc.MetricConfig[i].Energy {
case "energy": // this metric has energy as unit (Joules or Wh)
case "energy": // Metric already in energy units (Joules or Wh)
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
// FIXME: Needs sum as stats type
case "power": // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
// FIXME: Needs sum as stats type to accumulate energy values over time
case "power": // Metric in power units (Watts)
// Energy (kWh) = Power (W) × Time (h) / 1000
// Formula: (avg_power_per_node * num_nodes) * (duration_sec / 3600) / 1000
//
// Breakdown:
// LoadJobStat(jobMeta, fp, "avg") = average power per node (W)
// jobMeta.NumNodes = number of nodes (1 for shared jobs)
// jobMeta.Duration / 3600.0 = duration in hours
// / 1000.0 = convert Wh to kWh
rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0 // Round to 2 decimal places
}
} else {
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
@@ -758,8 +971,6 @@ func (r *JobRepository) UpdateEnergy(
energyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
// cclog.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy)
}
var rawFootprint []byte
@@ -771,11 +982,19 @@ func (r *JobRepository) UpdateEnergy(
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
}
// UpdateFootprint calculates and updates the performance footprint for a job.
// This is called for running jobs during intermediate updates or when archiving.
//
// A footprint is a summary statistic (avg/min/max) for each monitored metric.
// The specific statistic type is defined in the cluster config's Footprint field.
// Results are stored as JSON with keys like "metric_avg", "metric_max", etc.
//
// Example: For a "cpu_load" metric with Footprint="avg", this stores
// the average CPU load across all nodes as "cpu_load_avg": 85.3
func (r *JobRepository) UpdateFootprint(
stmt sq.UpdateBuilder,
jobMeta *schema.Job,
) (sq.UpdateBuilder, error) {
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
if err != nil {
cclog.Errorf("cannot get subcluster: %s", err.Error())
@@ -783,7 +1002,10 @@ func (r *JobRepository) UpdateFootprint(
}
footprint := make(map[string]float64)
// Build footprint map with metric_stattype as keys
for _, fp := range sc.Footprint {
// Determine which statistic to use: avg, min, or max
// First check global metric config, then cluster-specific config
var statType string
for _, gm := range archive.GlobalMetricList {
if gm.Name == fp {
@@ -791,15 +1013,18 @@ func (r *JobRepository) UpdateFootprint(
}
}
// Validate statistic type
if statType != "avg" && statType != "min" && statType != "max" {
cclog.Warnf("unknown statType for footprint update: %s", statType)
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
}
// Override with cluster-specific config if available
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
statType = sc.MetricConfig[i].Footprint
}
// Store as "metric_stattype": value (e.g., "cpu_load_avg": 85.3)
name := fmt.Sprintf("%s_%s", fp, statType)
footprint[name] = LoadJobStat(jobMeta, fp, statType)
}

View File

@@ -0,0 +1,500 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"encoding/json"
"testing"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// createTestJob creates a minimal valid job for testing
func createTestJob(jobID int64, cluster string) *schema.Job {
return &schema.Job{
JobID: jobID,
User: "testuser",
Project: "testproject",
Cluster: cluster,
SubCluster: "main",
Partition: "batch",
NumNodes: 1,
NumHWThreads: 4,
NumAcc: 0,
Shared: "none",
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
SMT: 1,
State: schema.JobStateRunning,
StartTime: 1234567890,
Duration: 0,
Walltime: 3600,
Resources: []*schema.Resource{
{
Hostname: "node01",
HWThreads: []int{0, 1, 2, 3},
},
},
Footprint: map[string]float64{
"cpu_load": 50.0,
"mem_used": 8000.0,
"flops_any": 0.5,
"mem_bw": 10.0,
"net_bw": 2.0,
"file_bw": 1.0,
"cpu_used": 2.0,
"cpu_load_core": 12.5,
},
MetaData: map[string]string{
"jobName": "test_job",
"queue": "normal",
"qosName": "default",
"accountName": "testaccount",
},
}
}
func TestInsertJob(t *testing.T) {
r := setup(t)
t.Run("successful insertion", func(t *testing.T) {
job := createTestJob(999001, "testcluster")
job.RawResources, _ = json.Marshal(job.Resources)
job.RawFootprint, _ = json.Marshal(job.Footprint)
job.RawMetaData, _ = json.Marshal(job.MetaData)
id, err := r.InsertJob(job)
require.NoError(t, err, "InsertJob should succeed")
assert.Greater(t, id, int64(0), "Should return valid insert ID")
// Verify job was inserted into job_cache
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id = ? AND cluster = ?",
job.JobID, job.Cluster).Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Job should be in job_cache table")
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
require.NoError(t, err)
})
t.Run("insertion with all fields", func(t *testing.T) {
job := createTestJob(999002, "testcluster")
job.ArrayJobID = 5000
job.Energy = 1500.5
job.RawResources, _ = json.Marshal(job.Resources)
job.RawFootprint, _ = json.Marshal(job.Footprint)
job.RawMetaData, _ = json.Marshal(job.MetaData)
id, err := r.InsertJob(job)
require.NoError(t, err)
assert.Greater(t, id, int64(0))
// Verify all fields were stored correctly
var retrievedJob schema.Job
err = r.DB.QueryRow(`SELECT job_id, hpc_user, project, cluster, array_job_id, energy
FROM job_cache WHERE id = ?`, id).Scan(
&retrievedJob.JobID, &retrievedJob.User, &retrievedJob.Project,
&retrievedJob.Cluster, &retrievedJob.ArrayJobID, &retrievedJob.Energy)
require.NoError(t, err)
assert.Equal(t, job.JobID, retrievedJob.JobID)
assert.Equal(t, job.User, retrievedJob.User)
assert.Equal(t, job.Project, retrievedJob.Project)
assert.Equal(t, job.Cluster, retrievedJob.Cluster)
assert.Equal(t, job.ArrayJobID, retrievedJob.ArrayJobID)
assert.Equal(t, job.Energy, retrievedJob.Energy)
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
}
func TestStart(t *testing.T) {
r := setup(t)
t.Run("successful job start with JSON encoding", func(t *testing.T) {
job := createTestJob(999003, "testcluster")
id, err := r.Start(job)
require.NoError(t, err, "Start should succeed")
assert.Greater(t, id, int64(0), "Should return valid insert ID")
// Verify job was inserted and JSON fields were encoded
var rawResources, rawFootprint, rawMetaData []byte
err = r.DB.QueryRow(`SELECT resources, footprint, meta_data FROM job_cache WHERE id = ?`, id).Scan(
&rawResources, &rawFootprint, &rawMetaData)
require.NoError(t, err)
// Verify resources JSON
var resources []*schema.Resource
err = json.Unmarshal(rawResources, &resources)
require.NoError(t, err, "Resources should be valid JSON")
assert.Equal(t, 1, len(resources))
assert.Equal(t, "node01", resources[0].Hostname)
// Verify footprint JSON
var footprint map[string]float64
err = json.Unmarshal(rawFootprint, &footprint)
require.NoError(t, err, "Footprint should be valid JSON")
assert.Equal(t, 50.0, footprint["cpu_load"])
assert.Equal(t, 8000.0, footprint["mem_used"])
// Verify metadata JSON
var metaData map[string]string
err = json.Unmarshal(rawMetaData, &metaData)
require.NoError(t, err, "MetaData should be valid JSON")
assert.Equal(t, "test_job", metaData["jobName"])
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("job start with empty footprint", func(t *testing.T) {
job := createTestJob(999004, "testcluster")
job.Footprint = map[string]float64{}
id, err := r.Start(job)
require.NoError(t, err)
assert.Greater(t, id, int64(0))
// Verify empty footprint was encoded as empty JSON object
var rawFootprint []byte
err = r.DB.QueryRow(`SELECT footprint FROM job_cache WHERE id = ?`, id).Scan(&rawFootprint)
require.NoError(t, err)
assert.Equal(t, []byte("{}"), rawFootprint)
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("job start with nil metadata", func(t *testing.T) {
job := createTestJob(999005, "testcluster")
job.MetaData = nil
id, err := r.Start(job)
require.NoError(t, err)
assert.Greater(t, id, int64(0))
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
}
func TestStop(t *testing.T) {
r := setup(t)
t.Run("successful job stop", func(t *testing.T) {
// First insert a job using Start
job := createTestJob(999106, "testcluster")
id, err := r.Start(job)
require.NoError(t, err)
// Move from job_cache to job table (simulate SyncJobs) - exclude id to let it auto-increment
_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint)
SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint
FROM job_cache WHERE id = ?`, id)
require.NoError(t, err)
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
// Get the new job id in the job table
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
job.JobID, job.Cluster, job.StartTime).Scan(&id)
require.NoError(t, err)
// Stop the job
duration := int32(3600)
state := schema.JobStateCompleted
monitoringStatus := int32(schema.MonitoringStatusArchivingSuccessful)
err = r.Stop(id, duration, state, monitoringStatus)
require.NoError(t, err, "Stop should succeed")
// Verify job was updated
var retrievedDuration int32
var retrievedState string
var retrievedMonStatus int32
err = r.DB.QueryRow(`SELECT duration, job_state, monitoring_status FROM job WHERE id = ?`, id).Scan(
&retrievedDuration, &retrievedState, &retrievedMonStatus)
require.NoError(t, err)
assert.Equal(t, duration, retrievedDuration)
assert.Equal(t, string(state), retrievedState)
assert.Equal(t, monitoringStatus, retrievedMonStatus)
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("stop updates job state transitions", func(t *testing.T) {
// Insert a job
job := createTestJob(999107, "testcluster")
id, err := r.Start(job)
require.NoError(t, err)
// Move to job table
_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint)
SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint
FROM job_cache WHERE id = ?`, id)
require.NoError(t, err)
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
// Get the new job id in the job table
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
job.JobID, job.Cluster, job.StartTime).Scan(&id)
require.NoError(t, err)
// Stop the job with different duration
err = r.Stop(id, 7200, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful))
require.NoError(t, err)
// Verify the duration was updated correctly
var duration int32
err = r.DB.QueryRow(`SELECT duration FROM job WHERE id = ?`, id).Scan(&duration)
require.NoError(t, err)
assert.Equal(t, int32(7200), duration, "Duration should be updated to 7200")
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("stop with different states", func(t *testing.T) {
testCases := []struct {
name string
jobID int64
state schema.JobState
monitoringStatus int32
}{
{"completed", 999108, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful)},
{"failed", 999118, schema.JobStateFailed, int32(schema.MonitoringStatusArchivingSuccessful)},
{"cancelled", 999119, schema.JobStateCancelled, int32(schema.MonitoringStatusArchivingSuccessful)},
{"timeout", 999120, schema.JobStateTimeout, int32(schema.MonitoringStatusArchivingSuccessful)},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
job := createTestJob(tc.jobID, "testcluster")
id, err := r.Start(job)
require.NoError(t, err)
// Move to job table
_, err = r.DB.Exec(`INSERT INTO job (job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint)
SELECT job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources, num_nodes,
num_hwthreads, num_acc, smt, shared, monitoring_status, energy, energy_footprint, footprint
FROM job_cache WHERE id = ?`, id)
require.NoError(t, err)
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
// Get the new job id in the job table
err = r.DB.QueryRow("SELECT id FROM job WHERE job_id = ? AND cluster = ? AND start_time = ?",
job.JobID, job.Cluster, job.StartTime).Scan(&id)
require.NoError(t, err)
// Stop with specific state
err = r.Stop(id, 1800, tc.state, tc.monitoringStatus)
require.NoError(t, err)
// Verify state was set correctly
var retrievedState string
err = r.DB.QueryRow(`SELECT job_state FROM job WHERE id = ?`, id).Scan(&retrievedState)
require.NoError(t, err)
assert.Equal(t, string(tc.state), retrievedState)
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE id = ?", id)
require.NoError(t, err)
})
}
})
}
func TestStopCached(t *testing.T) {
r := setup(t)
t.Run("successful stop cached job", func(t *testing.T) {
// Insert a job in job_cache
job := createTestJob(999009, "testcluster")
id, err := r.Start(job)
require.NoError(t, err)
// Stop the cached job
duration := int32(3600)
state := schema.JobStateCompleted
monitoringStatus := int32(schema.MonitoringStatusArchivingSuccessful)
err = r.StopCached(id, duration, state, monitoringStatus)
require.NoError(t, err, "StopCached should succeed")
// Verify job was updated in job_cache table
var retrievedDuration int32
var retrievedState string
var retrievedMonStatus int32
err = r.DB.QueryRow(`SELECT duration, job_state, monitoring_status FROM job_cache WHERE id = ?`, id).Scan(
&retrievedDuration, &retrievedState, &retrievedMonStatus)
require.NoError(t, err)
assert.Equal(t, duration, retrievedDuration)
assert.Equal(t, string(state), retrievedState)
assert.Equal(t, monitoringStatus, retrievedMonStatus)
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
t.Run("stop cached job does not affect job table", func(t *testing.T) {
// Insert a job in job_cache
job := createTestJob(999010, "testcluster")
id, err := r.Start(job)
require.NoError(t, err)
// Stop the cached job
err = r.StopCached(id, 3600, schema.JobStateCompleted, int32(schema.MonitoringStatusArchivingSuccessful))
require.NoError(t, err)
// Verify job table was not affected
var count int
err = r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE job_id = ? AND cluster = ?`,
job.JobID, job.Cluster).Scan(&count)
require.NoError(t, err)
assert.Equal(t, 0, count, "Job table should not be affected by StopCached")
// Clean up
_, err = r.DB.Exec("DELETE FROM job_cache WHERE id = ?", id)
require.NoError(t, err)
})
}
func TestSyncJobs(t *testing.T) {
r := setup(t)
t.Run("sync jobs from cache to main table", func(t *testing.T) {
// Ensure cache is empty first
_, err := r.DB.Exec("DELETE FROM job_cache")
require.NoError(t, err)
// Insert multiple jobs in job_cache
job1 := createTestJob(999011, "testcluster")
job2 := createTestJob(999012, "testcluster")
job3 := createTestJob(999013, "testcluster")
_, err = r.Start(job1)
require.NoError(t, err)
_, err = r.Start(job2)
require.NoError(t, err)
_, err = r.Start(job3)
require.NoError(t, err)
// Verify jobs are in job_cache
var cacheCount int
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id IN (?, ?, ?)",
job1.JobID, job2.JobID, job3.JobID).Scan(&cacheCount)
require.NoError(t, err)
assert.Equal(t, 3, cacheCount, "All jobs should be in job_cache")
// Sync jobs
jobs, err := r.SyncJobs()
require.NoError(t, err, "SyncJobs should succeed")
assert.Equal(t, 3, len(jobs), "Should return 3 synced jobs")
// Verify jobs were moved to job table
var jobCount int
err = r.DB.QueryRow("SELECT COUNT(*) FROM job WHERE job_id IN (?, ?, ?)",
job1.JobID, job2.JobID, job3.JobID).Scan(&jobCount)
require.NoError(t, err)
assert.Equal(t, 3, jobCount, "All jobs should be in job table")
// Verify job_cache was cleared
err = r.DB.QueryRow("SELECT COUNT(*) FROM job_cache WHERE job_id IN (?, ?, ?)",
job1.JobID, job2.JobID, job3.JobID).Scan(&cacheCount)
require.NoError(t, err)
assert.Equal(t, 0, cacheCount, "job_cache should be empty after sync")
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE job_id IN (?, ?, ?)", job1.JobID, job2.JobID, job3.JobID)
require.NoError(t, err)
})
t.Run("sync preserves job data", func(t *testing.T) {
// Ensure cache is empty first
_, err := r.DB.Exec("DELETE FROM job_cache")
require.NoError(t, err)
// Insert a job with specific data
job := createTestJob(999014, "testcluster")
job.ArrayJobID = 7777
job.Energy = 2500.75
job.Duration = 1800
id, err := r.Start(job)
require.NoError(t, err)
// Update some fields to simulate job progress
result, err := r.DB.Exec(`UPDATE job_cache SET duration = ?, energy = ? WHERE id = ?`,
3600, 3000.5, id)
require.NoError(t, err)
rowsAffected, _ := result.RowsAffected()
require.Equal(t, int64(1), rowsAffected, "UPDATE should affect exactly 1 row")
// Verify the update worked
var checkDuration int32
var checkEnergy float64
err = r.DB.QueryRow(`SELECT duration, energy FROM job_cache WHERE id = ?`, id).Scan(&checkDuration, &checkEnergy)
require.NoError(t, err)
require.Equal(t, int32(3600), checkDuration, "Duration should be updated to 3600 before sync")
require.Equal(t, 3000.5, checkEnergy, "Energy should be updated to 3000.5 before sync")
// Sync jobs
jobs, err := r.SyncJobs()
require.NoError(t, err)
require.Equal(t, 1, len(jobs), "Should return exactly 1 synced job")
// Verify in database
var dbJob schema.Job
err = r.DB.QueryRow(`SELECT job_id, hpc_user, project, cluster, array_job_id, duration, energy
FROM job WHERE job_id = ? AND cluster = ?`, job.JobID, job.Cluster).Scan(
&dbJob.JobID, &dbJob.User, &dbJob.Project, &dbJob.Cluster,
&dbJob.ArrayJobID, &dbJob.Duration, &dbJob.Energy)
require.NoError(t, err)
assert.Equal(t, job.JobID, dbJob.JobID)
assert.Equal(t, int32(3600), dbJob.Duration)
assert.Equal(t, 3000.5, dbJob.Energy)
// Clean up
_, err = r.DB.Exec("DELETE FROM job WHERE job_id = ? AND cluster = ?", job.JobID, job.Cluster)
require.NoError(t, err)
})
t.Run("sync with empty cache returns empty list", func(t *testing.T) {
// Ensure cache is empty
_, err := r.DB.Exec("DELETE FROM job_cache")
require.NoError(t, err)
// Sync should return empty list
jobs, err := r.SyncJobs()
require.NoError(t, err)
assert.Equal(t, 0, len(jobs), "Should return empty list when cache is empty")
})
}

View File

@@ -2,6 +2,7 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
@@ -10,8 +11,36 @@ import (
"github.com/ClusterCockpit/cc-lib/v2/schema"
)
// JobHook interface allows external components to hook into job lifecycle events.
// Implementations can perform actions when jobs start or stop, such as tagging,
// logging, notifications, or triggering external workflows.
//
// Example implementation:
//
// type MyJobTagger struct{}
//
// func (t *MyJobTagger) JobStartCallback(job *schema.Job) {
// if job.NumNodes > 100 {
// // Tag large jobs automatically
// }
// }
//
// func (t *MyJobTagger) JobStopCallback(job *schema.Job) {
// if job.State == schema.JobStateFailed {
// // Log or alert on failed jobs
// }
// }
//
// Register hooks during application initialization:
//
// repository.RegisterJobHook(&MyJobTagger{})
type JobHook interface {
// JobStartCallback is invoked when one or more jobs start.
// This is called synchronously, so implementations should be fast.
JobStartCallback(job *schema.Job)
// JobStopCallback is invoked when a job completes.
// This is called synchronously, so implementations should be fast.
JobStopCallback(job *schema.Job)
}
@@ -20,7 +49,13 @@ var (
hooks []JobHook
)
func RegisterJobJook(hook JobHook) {
// RegisterJobHook registers a JobHook to receive job lifecycle callbacks.
// Multiple hooks can be registered and will be called in registration order.
// This function is safe to call multiple times and is typically called during
// application initialization.
//
// Nil hooks are silently ignored to simplify conditional registration.
func RegisterJobHook(hook JobHook) {
initOnce.Do(func() {
hooks = make([]JobHook, 0)
})
@@ -30,6 +65,12 @@ func RegisterJobJook(hook JobHook) {
}
}
// CallJobStartHooks invokes all registered JobHook.JobStartCallback methods
// for each job in the provided slice. This is called internally by the repository
// when jobs are started (e.g., via StartJob or batch job imports).
//
// Hooks are called synchronously in registration order. If a hook panics,
// the panic will propagate to the caller.
func CallJobStartHooks(jobs []*schema.Job) {
if hooks == nil {
return
@@ -44,6 +85,12 @@ func CallJobStartHooks(jobs []*schema.Job) {
}
}
// CallJobStopHooks invokes all registered JobHook.JobStopCallback methods
// for the provided job. This is called internally by the repository when a
// job completes (e.g., via StopJob or job state updates).
//
// Hooks are called synchronously in registration order. If a hook panics,
// the panic will propagate to the caller.
func CallJobStopHooks(job *schema.Job) {
if hooks == nil {
return

View File

@@ -90,13 +90,13 @@ func TestFindJobsBetween(t *testing.T) {
// 2. Create a tag
tagName := fmt.Sprintf("testtag_%d", time.Now().UnixNano())
tagId, err := r.CreateTag("testtype", tagName, "global")
tagID, err := r.CreateTag("testtype", tagName, "global")
if err != nil {
t.Fatal(err)
}
// 3. Link Tag (Manually to avoid archive dependency side-effects in unit test)
_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, tagId)
_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, tagID)
if err != nil {
t.Fatal(err)
}

View File

@@ -16,11 +16,29 @@ import (
"github.com/golang-migrate/migrate/v4/source/iofs"
)
// Version is the current database schema version required by this version of cc-backend.
// When the database schema changes, this version is incremented and a new migration file
// is added to internal/repository/migrations/sqlite3/.
//
// Version history:
// - Version 10: Current version
//
// Migration files are embedded at build time from the migrations directory.
const Version uint = 10
//go:embed migrations/*
var migrationFiles embed.FS
// checkDBVersion verifies that the database schema version matches the expected version.
// This is called automatically during Connect() to ensure schema compatibility.
//
// Returns an error if:
// - Database version is older than expected (needs migration)
// - Database version is newer than expected (needs app upgrade)
// - Database is in a dirty state (failed migration)
//
// A "dirty" database indicates a migration was started but not completed successfully.
// This requires manual intervention to fix the database and force the version.
func checkDBVersion(db *sql.DB) error {
driver, err := sqlite3.WithInstance(db, &sqlite3.Config{})
if err != nil {
@@ -58,6 +76,8 @@ func checkDBVersion(db *sql.DB) error {
return nil
}
// getMigrateInstance creates a new migration instance for the given database file.
// This is used internally by MigrateDB, RevertDB, and ForceDB.
func getMigrateInstance(db string) (m *migrate.Migrate, err error) {
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
if err != nil {
@@ -72,6 +92,23 @@ func getMigrateInstance(db string) (m *migrate.Migrate, err error) {
return m, nil
}
// MigrateDB applies all pending database migrations to bring the schema up to date.
// This should be run with the -migrate-db flag before starting the application
// after upgrading to a new version that requires schema changes.
//
// Process:
// 1. Checks current database version
// 2. Applies all migrations from current version to target Version
// 3. Updates schema_migrations table to track applied migrations
//
// Important:
// - Always backup your database before running migrations
// - Migrations are irreversible without manual intervention
// - If a migration fails, the database is marked "dirty" and requires manual fix
//
// Usage:
//
// cc-backend -migrate-db
func MigrateDB(db string) error {
m, err := getMigrateInstance(db)
if err != nil {
@@ -107,6 +144,17 @@ func MigrateDB(db string) error {
return nil
}
// RevertDB rolls back the database schema to the previous version (Version - 1).
// This is primarily used for testing or emergency rollback scenarios.
//
// Warning:
// - This may cause data loss if newer schema added columns/tables
// - Always backup before reverting
// - Not all migrations are safely reversible
//
// Usage:
//
// cc-backend -revert-db
func RevertDB(db string) error {
m, err := getMigrateInstance(db)
if err != nil {
@@ -125,6 +173,21 @@ func RevertDB(db string) error {
return nil
}
// ForceDB forces the database schema version to the current Version without running migrations.
// This is only used to recover from failed migrations that left the database in a "dirty" state.
//
// When to use:
// - After manually fixing a failed migration
// - When you've manually applied schema changes and need to update the version marker
//
// Warning:
// - This does NOT apply any schema changes
// - Only use after manually verifying the schema is correct
// - Improper use can cause schema/version mismatch
//
// Usage:
//
// cc-backend -force-db
func ForceDB(db string) error {
m, err := getMigrateInstance(db)
if err != nil {

View File

@@ -579,7 +579,7 @@ func (r *NodeRepository) GetNodesForList(
queryFilters = append(queryFilters, &model.NodeFilter{Hostname: &model.StringInput{Contains: &nodeFilter}})
}
if stateFilter != "all" && stateFilter != "notindb" {
var queryState schema.SchedulerState = schema.SchedulerState(stateFilter)
queryState := schema.SchedulerState(stateFilter)
queryFilters = append(queryFilters, &model.NodeFilter{SchedulerState: &queryState})
}
// if healthFilter != "all" {

View File

@@ -46,7 +46,7 @@ func BenchmarkSelect1(b *testing.B) {
}
func BenchmarkDB_FindJobById(b *testing.B) {
var jobId int64 = 1677322
var jobID int64 = 1677322
b.Run("FindJobById", func(b *testing.B) {
db := setup(b)
@@ -55,7 +55,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := db.FindByID(getContext(b), jobId)
_, err := db.FindByID(getContext(b), jobID)
noErr(b, err)
}
})
@@ -63,7 +63,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
}
func BenchmarkDB_FindJob(b *testing.B) {
var jobId int64 = 107266
var jobID int64 = 107266
var startTime int64 = 1657557241
cluster := "fritz"
@@ -74,7 +74,7 @@ func BenchmarkDB_FindJob(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := db.Find(&jobId, &cluster, &startTime)
_, err := db.Find(&jobID, &cluster, &startTime)
noErr(b, err)
}
})

View File

@@ -2,6 +2,44 @@
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// This file contains job statistics and histogram generation functionality for the JobRepository.
//
// # Job Statistics
//
// The statistics methods provide aggregated metrics about jobs including total jobs, users,
// walltime, and resource usage (nodes, cores, accelerators). Statistics can be computed:
// - Overall (JobsStats): Single aggregate across all matching jobs
// - Grouped (JobsStatsGrouped): Aggregated by user, project, cluster, or subcluster
// - Counts (JobCountGrouped, AddJobCount): Simple job counts with optional filtering
//
// All statistics methods support filtering via JobFilter and respect security contexts.
//
// # Histograms
//
// Histogram methods generate distribution data for visualization:
// - Duration, nodes, cores, accelerators (AddHistograms)
// - Job metrics like CPU load, memory usage (AddMetricHistograms)
//
// Histograms use intelligent binning:
// - Duration: Variable bin sizes (1m, 10m, 1h, 6h, 12h, 24h) with zero-padding
// - Resources: Natural value-based bins
// - Metrics: Normalized to peak values with configurable bin counts
//
// # Running vs. Completed Jobs
//
// Statistics handle running jobs specially:
// - Duration calculated as (now - start_time) for running jobs
// - Metric histograms for running jobs load data from metric backend instead of footprint
// - Job state filtering distinguishes running/completed jobs
//
// # Performance Considerations
//
// - All queries use prepared statements via stmtCache
// - Complex aggregations use SQL for efficiency
// - Histogram pre-initialization ensures consistent bin ranges
// - Metric histogram queries limited to 500 jobs for running job analysis
package repository
import (
@@ -19,7 +57,9 @@ import (
sq "github.com/Masterminds/squirrel"
)
// GraphQL validation should make sure that no unkown values can be specified.
// groupBy2column maps GraphQL Aggregate enum values to their corresponding database column names.
// Used by JobsStatsGrouped and JobCountGrouped to translate user-facing grouping dimensions
// into SQL GROUP BY clauses. GraphQL validation ensures only valid enum values are accepted.
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.hpc_user",
model.AggregateProject: "job.project",
@@ -27,6 +67,9 @@ var groupBy2column = map[model.Aggregate]string{
model.AggregateSubcluster: "job.subcluster",
}
// sortBy2column maps GraphQL SortByAggregate enum values to their corresponding computed column names.
// Used by JobsStatsGrouped to translate sort preferences into SQL ORDER BY clauses.
// Column names match the AS aliases used in buildStatsQuery.
var sortBy2column = map[model.SortByAggregate]string{
model.SortByAggregateTotaljobs: "totalJobs",
model.SortByAggregateTotalusers: "totalUsers",
@@ -39,6 +82,21 @@ var sortBy2column = map[model.SortByAggregate]string{
model.SortByAggregateTotalacchours: "totalAccHours",
}
// buildCountQuery constructs a SQL query to count jobs with optional grouping and filtering.
//
// Parameters:
// - filter: Job filters to apply (cluster, user, time range, etc.)
// - kind: Special filter - "running" for running jobs only, "short" for jobs under threshold
// - col: Column name to GROUP BY; empty string for total count without grouping
//
// Returns a SelectBuilder that produces either:
// - Single count: COUNT(job.id) when col is empty
// - Grouped counts: col, COUNT(job.id) when col is specified
//
// The kind parameter enables counting specific job categories:
// - "running": Only jobs with job_state = 'running'
// - "short": Only jobs with duration < ShortRunningJobsDuration config value
// - empty: All jobs matching filters
func (r *JobRepository) buildCountQuery(
filter []*model.JobFilter,
kind string,
@@ -47,10 +105,8 @@ func (r *JobRepository) buildCountQuery(
var query sq.SelectBuilder
if col != "" {
// Scan columns: id, cnt
query = sq.Select(col, "COUNT(job.id)").From("job").GroupBy(col)
} else {
// Scan columns: cnt
query = sq.Select("COUNT(job.id)").From("job")
}
@@ -68,6 +124,27 @@ func (r *JobRepository) buildCountQuery(
return query
}
// buildStatsQuery constructs a SQL query to compute comprehensive job statistics with optional grouping.
//
// Parameters:
// - filter: Job filters to apply (cluster, user, time range, etc.)
// - col: Column name to GROUP BY; empty string for overall statistics without grouping
//
// Returns a SelectBuilder that produces comprehensive statistics:
// - totalJobs: Count of jobs
// - totalUsers: Count of distinct users (always 0 when grouping by user)
// - totalWalltime: Sum of job durations in hours
// - totalNodes: Sum of nodes used across all jobs
// - totalNodeHours: Sum of (duration × num_nodes) in hours
// - totalCores: Sum of hardware threads used across all jobs
// - totalCoreHours: Sum of (duration × num_hwthreads) in hours
// - totalAccs: Sum of accelerators used across all jobs
// - totalAccHours: Sum of (duration × num_acc) in hours
//
// Special handling:
// - Running jobs: Duration calculated as (now - start_time) instead of stored duration
// - Grouped queries: Also select grouping column and user's display name from hpc_user table
// - All time values converted from seconds to hours (÷ 3600) and rounded
func (r *JobRepository) buildStatsQuery(
filter []*model.JobFilter,
col string,
@@ -75,31 +152,29 @@ func (r *JobRepository) buildStatsQuery(
var query sq.SelectBuilder
if col != "" {
// Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(
col,
"name",
"COUNT(job.id) as totalJobs",
"COUNT(DISTINCT job.hpc_user) AS totalUsers",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int) as totalWalltime`, time.Now().Unix()),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as int) as totalNodes`),
`CAST(SUM(job.num_nodes) as int) as totalNodes`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int) as totalNodeHours`, time.Now().Unix()),
fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as int) as totalCores`),
`CAST(SUM(job.num_hwthreads) as int) as totalCores`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int) as totalCoreHours`, time.Now().Unix()),
fmt.Sprintf(`CAST(SUM(job.num_acc) as int) as totalAccs`),
`CAST(SUM(job.num_acc) as int) as totalAccs`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int) as totalAccHours`, time.Now().Unix()),
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
} else {
// Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(
"COUNT(job.id) as totalJobs",
"COUNT(DISTINCT job.hpc_user) AS totalUsers",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as int)`, time.Now().Unix()),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as int)`),
`CAST(SUM(job.num_nodes) as int)`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as int)`, time.Now().Unix()),
fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as int)`),
`CAST(SUM(job.num_hwthreads) as int)`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as int)`, time.Now().Unix()),
fmt.Sprintf(`CAST(SUM(job.num_acc) as int)`),
`CAST(SUM(job.num_acc) as int)`,
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as int)`, time.Now().Unix()),
).From("job")
}
@@ -111,6 +186,25 @@ func (r *JobRepository) buildStatsQuery(
return query
}
// JobsStatsGrouped computes comprehensive job statistics grouped by a dimension (user, project, cluster, or subcluster).
//
// This is the primary method for generating aggregated statistics views in the UI, providing
// metrics like total jobs, walltime, and resource usage broken down by the specified grouping.
//
// Parameters:
// - ctx: Context for security checks and cancellation
// - filter: Filters to apply (time range, cluster, job state, etc.)
// - page: Optional pagination (ItemsPerPage: -1 disables pagination)
// - sortBy: Optional sort column (totalJobs, totalWalltime, totalCoreHours, etc.)
// - groupBy: Required grouping dimension (User, Project, Cluster, or Subcluster)
//
// Returns a slice of JobsStatistics, one per group, with:
// - ID: The group identifier (username, project name, cluster name, etc.)
// - Name: Display name (for users, from hpc_user.name; empty for other groups)
// - Statistics: totalJobs, totalUsers, totalWalltime, resource usage metrics
//
// Security: Respects user roles via SecurityCheck - users see only their own data unless admin/support.
// Performance: Results are sorted in SQL and pagination applied before scanning rows.
func (r *JobRepository) JobsStatsGrouped(
ctx context.Context,
filter []*model.JobFilter,
@@ -230,6 +324,21 @@ func (r *JobRepository) JobsStatsGrouped(
return stats, nil
}
// JobsStats computes overall job statistics across all matching jobs without grouping.
//
// This method provides a single aggregate view of job metrics, useful for dashboard
// summaries and overall system utilization reports.
//
// Parameters:
// - ctx: Context for security checks and cancellation
// - filter: Filters to apply (time range, cluster, job state, etc.)
//
// Returns a single-element slice containing aggregate statistics:
// - totalJobs, totalUsers, totalWalltime
// - totalNodeHours, totalCoreHours, totalAccHours
//
// Unlike JobsStatsGrouped, this returns overall totals without breaking down by dimension.
// Security checks are applied via SecurityCheck to respect user access levels.
func (r *JobRepository) JobsStats(
ctx context.Context,
filter []*model.JobFilter,
@@ -277,6 +386,15 @@ func (r *JobRepository) JobsStats(
return stats, nil
}
// LoadJobStat retrieves a specific statistic for a metric from a job's statistics.
// Returns 0.0 if the metric is not found or statType is invalid.
//
// Parameters:
// - job: Job struct with populated Statistics field
// - metric: Name of the metric to query (e.g., "cpu_load", "mem_used")
// - statType: Type of statistic: "avg", "min", or "max"
//
// Returns the requested statistic value or 0.0 if not found.
func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
if stats, ok := job.Statistics[metric]; ok {
switch statType {
@@ -294,6 +412,17 @@ func LoadJobStat(job *schema.Job, metric string, statType string) float64 {
return 0.0
}
// JobCountGrouped counts jobs grouped by a dimension without computing detailed statistics.
//
// This is a lightweight alternative to JobsStatsGrouped when only job counts are needed,
// avoiding the overhead of calculating walltime and resource usage metrics.
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply
// - groupBy: Grouping dimension (User, Project, Cluster, or Subcluster)
//
// Returns JobsStatistics with only ID and TotalJobs populated for each group.
func (r *JobRepository) JobCountGrouped(
ctx context.Context,
filter []*model.JobFilter,
@@ -334,6 +463,20 @@ func (r *JobRepository) JobCountGrouped(
return stats, nil
}
// AddJobCountGrouped augments existing statistics with additional job counts by category.
//
// This method enriches JobsStatistics returned by JobsStatsGrouped or JobCountGrouped
// with counts of running or short-running jobs, matched by group ID.
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply
// - groupBy: Grouping dimension (must match the dimension used for stats parameter)
// - stats: Existing statistics to augment (modified in-place by ID matching)
// - kind: "running" to add RunningJobs count, "short" to add ShortJobs count
//
// Returns the same stats slice with RunningJobs or ShortJobs fields populated per group.
// Groups without matching jobs will have 0 for the added field.
func (r *JobRepository) AddJobCountGrouped(
ctx context.Context,
filter []*model.JobFilter,
@@ -383,6 +526,18 @@ func (r *JobRepository) AddJobCountGrouped(
return stats, nil
}
// AddJobCount augments existing overall statistics with additional job counts by category.
//
// Similar to AddJobCountGrouped but for ungrouped statistics. Applies the same count
// to all statistics entries (typically just one).
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply
// - stats: Existing statistics to augment (modified in-place)
// - kind: "running" to add RunningJobs count, "short" to add ShortJobs count
//
// Returns the same stats slice with RunningJobs or ShortJobs fields set to the total count.
func (r *JobRepository) AddJobCount(
ctx context.Context,
filter []*model.JobFilter,
@@ -428,6 +583,26 @@ func (r *JobRepository) AddJobCount(
return stats, nil
}
// AddHistograms augments statistics with distribution histograms for job properties.
//
// Generates histogram data for visualization of job duration, node count, core count,
// and accelerator count distributions. Duration histogram uses intelligent binning based
// on the requested resolution.
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply to jobs included in histograms
// - stat: Statistics struct to augment (modified in-place)
// - durationBins: Bin size - "1m", "10m", "1h", "6h", "12h", or "24h" (default)
//
// Populates these fields in stat:
// - HistDuration: Job duration distribution (zero-padded bins)
// - HistNumNodes: Node count distribution
// - HistNumCores: Core (hwthread) count distribution
// - HistNumAccs: Accelerator count distribution
//
// Duration bins are pre-initialized with zeros to ensure consistent ranges for visualization.
// Bin size determines both the width and maximum duration displayed (e.g., "1h" = 48 bins × 1h = 48h max).
func (r *JobRepository) AddHistograms(
ctx context.Context,
filter []*model.JobFilter,
@@ -438,20 +613,20 @@ func (r *JobRepository) AddHistograms(
var targetBinCount int
var targetBinSize int
switch {
case *durationBins == "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
switch *durationBins {
case "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
targetBinCount = 60
targetBinSize = 60
case *durationBins == "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
case "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
targetBinCount = 72
targetBinSize = 600
case *durationBins == "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
case "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
targetBinCount = 48
targetBinSize = 3600
case *durationBins == "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
case "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
targetBinCount = 12
targetBinSize = 21600
case *durationBins == "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
case "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
targetBinCount = 14
targetBinSize = 43200
default: // 24h
@@ -490,7 +665,30 @@ func (r *JobRepository) AddHistograms(
return stat, nil
}
// Requires thresholds for metric from config for cluster? Of all clusters and use largest? split to 10 + 1 for artifacts?
// AddMetricHistograms augments statistics with distribution histograms for job metrics.
//
// Generates histogram data for metrics like CPU load, memory usage, etc. Handles running
// and completed jobs differently: running jobs load data from metric backend, completed jobs
// use footprint data from database.
//
// Parameters:
// - ctx: Context for security checks
// - filter: Filters to apply (MUST contain State filter for running jobs)
// - metrics: List of metric names to histogram (e.g., ["cpu_load", "mem_used"])
// - stat: Statistics struct to augment (modified in-place)
// - targetBinCount: Number of histogram bins (default: 10)
//
// Populates HistMetrics field in stat with MetricHistoPoints for each metric.
//
// Binning algorithm:
// - Values normalized to metric's peak value from cluster configuration
// - Bins evenly distributed from 0 to peak
// - Pre-initialized with zeros for consistent visualization
//
// Limitations:
// - Running jobs: Limited to 500 jobs for performance
// - Requires valid cluster configuration with metric peak values
// - Uses footprint statistic (avg/max/min) configured per metric
func (r *JobRepository) AddMetricHistograms(
ctx context.Context,
filter []*model.JobFilter,
@@ -525,7 +723,16 @@ func (r *JobRepository) AddMetricHistograms(
return stat, nil
}
// `value` must be the column grouped by, but renamed to "value"
// jobsStatisticsHistogram generates a simple histogram by grouping on a column value.
//
// Used for histograms where the column value directly represents the bin (e.g., node count, core count).
// Unlike duration/metric histograms, this doesn't pre-initialize bins with zeros.
//
// Parameters:
// - value: SQL expression that produces the histogram value, aliased as "value"
// - filters: Job filters to apply
//
// Returns histogram points with Value (from column) and Count (number of jobs).
func (r *JobRepository) jobsStatisticsHistogram(
ctx context.Context,
value string,
@@ -564,6 +771,26 @@ func (r *JobRepository) jobsStatisticsHistogram(
return points, nil
}
// jobsDurationStatisticsHistogram generates a duration histogram with pre-initialized bins.
//
// Bins are zero-padded to provide consistent ranges for visualization, unlike simple
// histograms which only return bins with data. The value parameter should compute
// the bin number from job duration.
//
// Parameters:
// - value: SQL expression computing bin number from duration, aliased as "value"
// - filters: Job filters to apply
// - binSizeSeconds: Width of each bin in seconds
// - targetBinCount: Number of bins to pre-initialize
//
// Returns histogram points with Value (bin_number × binSizeSeconds) and Count.
// All bins from 1 to targetBinCount are returned, with Count=0 for empty bins.
//
// Algorithm:
// 1. Pre-initialize targetBinCount bins with zero counts
// 2. Query database for actual counts per bin
// 3. Match query results to pre-initialized bins by value
// 4. Bins without matches remain at zero
func (r *JobRepository) jobsDurationStatisticsHistogram(
ctx context.Context,
value string,
@@ -579,7 +806,8 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
return nil, qerr
}
// Setup Array
// Each bin represents a duration range: bin N = [N*binSizeSeconds, (N+1)*binSizeSeconds)
// Example: binSizeSeconds=3600 (1 hour), bin 1 = 0-1h, bin 2 = 1-2h, etc.
points := make([]*model.HistoPoint, 0)
for i := 1; i <= *targetBinCount; i++ {
point := model.HistoPoint{Value: i * binSizeSeconds, Count: 0}
@@ -596,7 +824,8 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
return nil, err
}
// Fill Array at matching $Value
// Match query results to pre-initialized bins.
// point.Value from query is the bin number; multiply by binSizeSeconds to match bin.Value.
for rows.Next() {
point := model.HistoPoint{}
if err := rows.Scan(&point.Value, &point.Count); err != nil {
@@ -606,9 +835,6 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
for _, e := range points {
if e.Value == (point.Value * binSizeSeconds) {
// Note:
// Matching on unmodified integer value (and multiplying point.Value by binSizeSeconds after match)
// causes frontend to loop into highest targetBinCount, due to zoom condition instantly being fullfilled (cause unknown)
e.Count = point.Count
break
}
@@ -619,18 +845,43 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
return points, nil
}
// jobsMetricStatisticsHistogram generates a metric histogram using footprint data from completed jobs.
//
// Values are normalized to the metric's peak value and distributed into bins. The algorithm
// is based on SQL histogram generation techniques, extracting metric values from JSON footprint
// and computing bin assignments in SQL.
//
// Parameters:
// - metric: Metric name (e.g., "cpu_load", "mem_used")
// - filters: Job filters to apply
// - bins: Number of bins to generate
//
// Returns MetricHistoPoints with metric name, unit, footprint stat type, and binned data.
//
// Algorithm:
// 1. Determine peak value from cluster configuration (filtered cluster or max across all)
// 2. Generate SQL that extracts footprint value, normalizes to [0,1], multiplies by bin count
// 3. Pre-initialize bins with min/max ranges based on peak value
// 4. Query database for counts per bin
// 5. Match results to pre-initialized bins
//
// Special handling: Values exactly equal to peak are forced into the last bin by multiplying
// peak by 0.999999999 to avoid creating an extra bin.
func (r *JobRepository) jobsMetricStatisticsHistogram(
ctx context.Context,
metric string,
filters []*model.JobFilter,
bins *int,
) (*model.MetricHistoPoints, error) {
// Get specific Peak or largest Peak
// Peak value defines the upper bound for binning: values are distributed across
// bins from 0 to peak. First try to get peak from filtered cluster, otherwise
// scan all clusters to find the maximum peak value.
var metricConfig *schema.MetricConfig
var peak float64
var unit string
var footprintStat string
// Try to get metric config from filtered cluster
for _, f := range filters {
if f.Cluster != nil {
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
@@ -641,6 +892,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
}
}
// If no cluster filter or peak not found, find largest peak across all clusters
// This ensures histogram can accommodate all possible values
if peak == 0.0 {
for _, c := range archive.Clusters {
for _, m := range c.MetricConfig {
@@ -659,11 +912,14 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
}
}
// cclog.Debugf("Metric %s, Peak %f, Unit %s", metric, peak, unit)
// Make bins, see https://jereze.com/code/sql-histogram/ (Modified here)
// Construct SQL histogram bins using normalized values.
// Algorithm based on: https://jereze.com/code/sql-histogram/ (modified)
start := time.Now()
// Find Jobs' Value Bin Number: Divide Value by Peak, Multiply by RequestedBins, then CAST to INT: Gets Bin-Number of Job
// Bin calculation formula:
// bin_number = CAST( (value / peak) * num_bins AS INTEGER ) + 1
// Special case: value == peak would create bin N+1, so we test for equality
// and multiply peak by 0.999999999 to force it into bin N.
binQuery := fmt.Sprintf(`CAST(
((case when json_extract(footprint, "$.%s") = %f then %f*0.999999999 else json_extract(footprint, "$.%s") end) / %f)
* %v as INTEGER )`,
@@ -672,24 +928,19 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
mainQuery := sq.Select(
fmt.Sprintf(`%s + 1 as bin`, binQuery),
`count(*) as count`,
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * %s as min`, peak, *bins, binQuery),
// For Debug: // fmt.Sprintf(`CAST((%f / %d) as INTEGER ) * (%s + 1) as max`, peak, *bins, binQuery),
).From("job").Where(
"JSON_VALID(footprint)",
).Where(fmt.Sprintf(`json_extract(footprint, "$.%s") is not null and json_extract(footprint, "$.%s") <= %f`, (metric + "_" + footprintStat), (metric + "_" + footprintStat), peak))
// Only accessible Jobs...
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
if qerr != nil {
return nil, qerr
}
// Filters...
for _, f := range filters {
mainQuery = BuildWhereClause(f, mainQuery)
}
// Finalize query with Grouping and Ordering
mainQuery = mainQuery.GroupBy("bin").OrderBy("bin")
rows, err := mainQuery.RunWith(r.DB).Query()
@@ -698,7 +949,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
return nil, err
}
// Setup Return Array With Bin-Numbers for Match and Min/Max based on Peak
// Pre-initialize bins with calculated min/max ranges.
// Example: peak=1000, bins=10 -> bin 1=[0,100), bin 2=[100,200), ..., bin 10=[900,1000]
points := make([]*model.MetricHistoPoint, 0)
binStep := int(peak) / *bins
for i := 1; i <= *bins; i++ {
@@ -708,26 +960,18 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
points = append(points, &epoint)
}
for rows.Next() { // Fill Count if Bin-No. Matches (Not every Bin exists in DB!)
// Match query results to pre-initialized bins.
for rows.Next() {
rpoint := model.MetricHistoPoint{}
if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil { // Required for Debug: &rpoint.Min, &rpoint.Max
if err := rows.Scan(&rpoint.Bin, &rpoint.Count); err != nil {
cclog.Warnf("Error while scanning rows for %s", metric)
return nil, err // FIXME: Totally bricks cc-backend if returned and if all metrics requested?
return nil, err
}
for _, e := range points {
if e.Bin != nil && rpoint.Bin != nil {
if *e.Bin == *rpoint.Bin {
e.Count = rpoint.Count
// Only Required For Debug: Check DB returned Min/Max against Backend Init above
// if rpoint.Min != nil {
// cclog.Warnf(">>>> Bin %d Min Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Min, *e.Min)
// }
// if rpoint.Max != nil {
// cclog.Warnf(">>>> Bin %d Max Set For %s to %d (Init'd with: %d)", *e.Bin, metric, *rpoint.Max, *e.Max)
// }
break
}
if e.Bin != nil && rpoint.Bin != nil && *e.Bin == *rpoint.Bin {
e.Count = rpoint.Count
break
}
}
}
@@ -738,6 +982,28 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
return &result, nil
}
// runningJobsMetricStatisticsHistogram generates metric histograms for running jobs using live data.
//
// Unlike completed jobs which use footprint data from the database, running jobs require
// fetching current metric averages from the metric backend (via metricdispatch).
//
// Parameters:
// - metrics: List of metric names
// - filters: Job filters (should filter to running jobs only)
// - bins: Number of histogram bins
//
// Returns slice of MetricHistoPoints, one per metric.
//
// Limitations:
// - Maximum 500 jobs (returns nil if more jobs match)
// - Requires metric backend availability
// - Bins based on metric peak values from cluster configuration
//
// Algorithm:
// 1. Query first 501 jobs to check count limit
// 2. Load metric averages for all jobs via metricdispatch
// 3. For each metric, create bins based on peak value
// 4. Iterate averages and count jobs per bin
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
ctx context.Context,
metrics []string,

View File

@@ -25,11 +25,20 @@ func TestBuildJobStatsQuery(t *testing.T) {
func TestJobStats(t *testing.T) {
r := setup(t)
// First, count the actual jobs in the database (excluding test jobs)
var expectedCount int
err := r.DB.QueryRow(`SELECT COUNT(*) FROM job WHERE cluster != 'testcluster'`).Scan(&expectedCount)
noErr(t, err)
filter := &model.JobFilter{}
// Exclude test jobs created by other tests
testCluster := "testcluster"
filter.Cluster = &model.StringInput{Neq: &testCluster}
stats, err := r.JobsStats(getContext(t), []*model.JobFilter{filter})
noErr(t, err)
if stats[0].TotalJobs != 544 {
t.Fatalf("Want 544, Got %d", stats[0].TotalJobs)
if stats[0].TotalJobs != expectedCount {
t.Fatalf("Want %d, Got %d", expectedCount, stats[0].TotalJobs)
}
}

View File

@@ -3,6 +3,34 @@
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// Package repository provides data access and persistence layer for ClusterCockpit.
//
// This file implements tag management functionality for job categorization and classification.
// Tags support both manual assignment (via REST/GraphQL APIs) and automatic detection
// (via tagger plugins). The implementation includes role-based access control through
// tag scopes and maintains bidirectional consistency between the SQL database and
// the file-based job archive.
//
// Database Schema:
//
// CREATE TABLE tag (
// id INTEGER PRIMARY KEY AUTOINCREMENT,
// tag_type VARCHAR(255) NOT NULL,
// tag_name VARCHAR(255) NOT NULL,
// tag_scope VARCHAR(255) NOT NULL DEFAULT "global",
// CONSTRAINT tag_unique UNIQUE (tag_type, tag_name, tag_scope)
// );
//
// CREATE TABLE jobtag (
// job_id INTEGER,
// tag_id INTEGER,
// PRIMARY KEY (job_id, tag_id),
// FOREIGN KEY (job_id) REFERENCES job(id) ON DELETE CASCADE,
// FOREIGN KEY (tag_id) REFERENCES tag(id) ON DELETE CASCADE
// );
//
// The jobtag junction table enables many-to-many relationships between jobs and tags.
// CASCADE deletion ensures referential integrity when jobs or tags are removed.
package repository
import (
@@ -16,8 +44,32 @@ import (
sq "github.com/Masterminds/squirrel"
)
// Tag Scope Rules:
//
// Tags in ClusterCockpit have three visibility scopes that control who can see and use them:
//
// 1. "global" - Visible to all users, can be used by anyone
// Example: System-generated tags like "energy-efficient", "failed", "short"
//
// 2. "private" - Only visible to the creating user
// Example: Personal notes like "needs-review", "interesting-case"
//
// 3. "admin" - Only visible to users with admin or support roles
// Example: Internal notes like "hardware-issue", "billing-problem"
//
// Authorization Rules:
// - Regular users can only create/see "global" and their own "private" tags
// - Admin/Support can create/see all scopes including "admin" tags
// - Users can only add tags to jobs they have permission to view
// - Tag scope is enforced at query time in GetTags() and CountTags()
// AddTag adds the tag with id `tagId` to the job with the database id `jobId`.
// Requires user authentication for security checks.
//
// The user must have permission to view the job. Tag visibility is determined by scope:
// - "global" tags: visible to all users
// - "private" tags: only visible to the tag creator
// - "admin" tags: only visible to admin/support users
func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
j, err := r.FindByIDWithUser(user, job)
if err != nil {
@@ -49,7 +101,7 @@ func (r *JobRepository) AddTagDirect(job int64, tag int64) ([]*schema.Tag, error
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
j, err := r.FindByIDWithUser(user, job)
if err != nil {
cclog.Warn("Error while finding job by id")
cclog.Warnf("Error while finding job %d for user %s during tag removal: %v", job, user.Username, err)
return nil, err
}
@@ -69,7 +121,7 @@ func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.
archiveTags, err := r.getArchiveTags(&job)
if err != nil {
cclog.Warn("Error while getting tags for job")
cclog.Warnf("Error while getting archive tags for job %d in RemoveTag: %v", job, err)
return nil, err
}
@@ -80,7 +132,7 @@ func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.
// Requires user authentication for security checks. Used by REST API.
func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagType string, tagName string, tagScope string) ([]*schema.Tag, error) {
// Get Tag ID to delete
tagID, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
cclog.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
return nil, fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
@@ -89,7 +141,7 @@ func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagT
// Get Job
j, err := r.FindByIDWithUser(user, job)
if err != nil {
cclog.Warn("Error while finding job by id")
cclog.Warnf("Error while finding job %d for user %s during tag removal by request: %v", job, user.Username, err)
return nil, err
}
@@ -104,19 +156,30 @@ func (r *JobRepository) RemoveJobTagByRequest(user *schema.User, job int64, tagT
tags, err := r.GetTags(user, &job)
if err != nil {
cclog.Warn("Error while getting tags for job")
cclog.Warnf("Error while getting tags for job %d in RemoveJobTagByRequest: %v", job, err)
return nil, err
}
archiveTags, err := r.getArchiveTags(&job)
if err != nil {
cclog.Warn("Error while getting tags for job")
cclog.Warnf("Error while getting archive tags for job %d in RemoveJobTagByRequest: %v", job, err)
return nil, err
}
return tags, archive.UpdateTags(j, archiveTags)
}
// removeTagFromArchiveJobs updates the job archive for all affected jobs after a tag deletion.
//
// This function is called asynchronously (via goroutine) after removing a tag from the database
// to synchronize the file-based job archive with the database state. Errors are logged but not
// returned since this runs in the background.
//
// Parameters:
// - jobIds: Database IDs of all jobs that had the deleted tag
//
// Implementation note: Each job is processed individually to handle partial failures gracefully.
// If one job fails to update, others will still be processed.
func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
for _, j := range jobIds {
tags, err := r.getArchiveTags(&j)
@@ -139,18 +202,18 @@ func (r *JobRepository) removeTagFromArchiveJobs(jobIds []int64) {
// Used by REST API. Does not update tagged jobs in Job archive.
func (r *JobRepository) RemoveTagByRequest(tagType string, tagName string, tagScope string) error {
// Get Tag ID to delete
tagID, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
cclog.Warnf("Tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
return fmt.Errorf("tag does not exist (name, type, scope): %s, %s, %s", tagName, tagType, tagScope)
}
return r.RemoveTagById(tagID)
return r.RemoveTagByID(tagID)
}
// Removes a tag from db by tag id
// Used by GraphQL API.
func (r *JobRepository) RemoveTagById(tagID int64) error {
func (r *JobRepository) RemoveTagByID(tagID int64) error {
jobIds, err := r.FindJobIdsByTag(tagID)
if err != nil {
return err
@@ -180,8 +243,16 @@ func (r *JobRepository) RemoveTagById(tagID int64) error {
return nil
}
// CreateTag creates a new tag with the specified type and name and returns its database id.
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
// CreateTag creates a new tag with the specified type, name, and scope.
// Returns the database ID of the newly created tag.
//
// Scope defaults to "global" if empty string is provided.
// Valid scopes: "global", "private", "admin"
//
// Example:
//
// tagID, err := repo.CreateTag("performance", "high-memory", "global")
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagID int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
@@ -199,8 +270,14 @@ func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope strin
return res.LastInsertId()
}
// CountTags returns all tags visible to the user and the count of jobs for each tag.
// Applies scope-based filtering to respect tag visibility rules.
//
// Returns:
// - tags: slice of tags the user can see
// - counts: map of tag name to job count
// - err: any error encountered
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
// Fetch all Tags in DB for Display in Frontend Tag-View
tags = make([]schema.Tag, 0, 100)
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
if err != nil {
@@ -262,13 +339,13 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
for rows.Next() {
var tagType string
var tagName string
var tagId int
var tagID int
var count int
if err = rows.Scan(&tagType, &tagName, &tagId, &count); err != nil {
if err = rows.Scan(&tagType, &tagName, &tagID, &count); err != nil {
return nil, nil, err
}
// Use tagId as second Map-Key component to differentiate tags with identical names
counts[fmt.Sprint(tagType, tagName, tagId)] = count
counts[fmt.Sprint(tagType, tagName, tagID)] = count
}
err = rows.Err()
@@ -276,18 +353,44 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
}
var (
ErrTagNotFound = errors.New("the tag does not exist")
ErrJobNotOwned = errors.New("user is not owner of job")
ErrTagNoAccess = errors.New("user not permitted to use that tag")
ErrTagPrivateScope = errors.New("tag is private to another user")
ErrTagAdminScope = errors.New("tag requires admin privileges")
// ErrTagNotFound is returned when a tag ID or tag identifier (type, name, scope) does not exist in the database.
ErrTagNotFound = errors.New("the tag does not exist")
// ErrJobNotOwned is returned when a user attempts to tag a job they do not have permission to access.
ErrJobNotOwned = errors.New("user is not owner of job")
// ErrTagNoAccess is returned when a user attempts to use a tag they cannot access due to scope restrictions.
ErrTagNoAccess = errors.New("user not permitted to use that tag")
// ErrTagPrivateScope is returned when a user attempts to access another user's private tag.
ErrTagPrivateScope = errors.New("tag is private to another user")
// ErrTagAdminScope is returned when a non-admin user attempts to use an admin-scoped tag.
ErrTagAdminScope = errors.New("tag requires admin privileges")
// ErrTagsIncompatScopes is returned when attempting to combine admin and non-admin scoped tags in a single operation.
ErrTagsIncompatScopes = errors.New("combining admin and non-admin scoped tags not allowed")
)
// addJobTag is a helper function that inserts a job-tag association and updates the archive.
// Returns the updated tag list for the job.
func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
//
// This function performs three operations atomically:
// 1. Inserts the job-tag association into the jobtag junction table
// 2. Retrieves the updated tag list for the job (using the provided getTags callback)
// 3. Updates the job archive with the new tags to maintain database-archive consistency
//
// Parameters:
// - jobId: Database ID of the job
// - tagId: Database ID of the tag to associate
// - job: Full job object needed for archive update
// - getTags: Callback function to retrieve updated tags (allows different security contexts)
//
// Returns the complete updated tag list for the job or an error.
//
// Note: This function does NOT validate tag scope permissions - callers must perform
// authorization checks before invoking this helper.
func (r *JobRepository) addJobTag(jobID int64, tagID int64, job *schema.Job, getTags func() ([]*schema.Tag, error)) ([]*schema.Tag, error) {
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobID, tagID)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
@@ -297,13 +400,13 @@ func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, get
tags, err := getTags()
if err != nil {
cclog.Warnf("Error getting tags for job %d: %v", jobId, err)
cclog.Warnf("Error getting tags for job %d: %v", jobID, err)
return nil, err
}
archiveTags, err := r.getArchiveTags(&jobId)
archiveTags, err := r.getArchiveTags(&jobID)
if err != nil {
cclog.Warnf("Error getting archive tags for job %d: %v", jobId, err)
cclog.Warnf("Error getting archive tags for job %d: %v", jobID, err)
return nil, err
}
@@ -312,7 +415,7 @@ func (r *JobRepository) addJobTag(jobId int64, tagId int64, job *schema.Job, get
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
// If such a tag does not yet exist, it is created.
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobID int64, tagType string, tagName string, tagScope string) (tagID int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
@@ -326,44 +429,43 @@ func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType s
return 0, fmt.Errorf("cannot write tag scope with current authorization")
}
tagId, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
tagId, err = r.CreateTag(tagType, tagName, tagScope)
tagID, err = r.CreateTag(tagType, tagName, tagScope)
if err != nil {
return 0, err
}
}
if _, err := r.AddTag(user, jobId, tagId); err != nil {
if _, err := r.AddTag(user, jobID, tagID); err != nil {
return 0, err
}
return tagId, nil
return tagID, nil
}
// used in auto tagger plugins
func (r *JobRepository) AddTagOrCreateDirect(jobId int64, tagType string, tagName string) (tagId int64, err error) {
func (r *JobRepository) AddTagOrCreateDirect(jobID int64, tagType string, tagName string) (tagID int64, err error) {
tagScope := "global"
tagId, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
tagId, err = r.CreateTag(tagType, tagName, tagScope)
tagID, err = r.CreateTag(tagType, tagName, tagScope)
if err != nil {
return 0, err
}
}
if _, err := r.AddTagDirect(jobId, tagId); err != nil {
if _, err := r.AddTagDirect(jobID, tagID); err != nil {
return 0, err
}
return tagId, nil
return tagID, nil
}
func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool {
func (r *JobRepository) HasTag(jobID int64, tagType string, tagName string) bool {
var id int64
q := sq.Select("id").From("tag").Join("jobtag ON jobtag.tag_id = tag.id").
Where("jobtag.job_id = ?", jobId).Where("tag.tag_type = ?", tagType).
Where("jobtag.job_id = ?", jobID).Where("tag.tag_type = ?", tagType).
Where("tag.tag_name = ?", tagName)
err := q.RunWith(r.stmtCache).QueryRow().Scan(&id)
if err != nil {
@@ -373,21 +475,21 @@ func (r *JobRepository) HasTag(jobId int64, tagType string, tagName string) bool
}
}
// TagId returns the database id of the tag with the specified type and name.
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
// TagID returns the database id of the tag with the specified type and name.
func (r *JobRepository) TagID(tagType string, tagName string, tagScope string) (tagID int64, exists bool) {
exists = true
if err := sq.Select("id").From("tag").
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
RunWith(r.stmtCache).QueryRow().Scan(&tagID); err != nil {
exists = false
}
return
}
// TagInfo returns the database infos of the tag with the specified id.
func (r *JobRepository) TagInfo(tagId int64) (tagType string, tagName string, tagScope string, exists bool) {
func (r *JobRepository) TagInfo(tagID int64) (tagType string, tagName string, tagScope string, exists bool) {
exists = true
if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagId).
if err := sq.Select("tag.tag_type", "tag.tag_name", "tag.tag_scope").From("tag").Where("tag.id = ?", tagID).
RunWith(r.stmtCache).QueryRow().Scan(&tagType, &tagName, &tagScope); err != nil {
exists = false
}
@@ -412,7 +514,7 @@ func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, e
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
cclog.Warn("Error while scanning rows")
cclog.Warnf("Error while scanning tag rows in GetTags: %v", err)
return nil, err
}
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
@@ -445,7 +547,7 @@ func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
cclog.Warn("Error while scanning rows")
cclog.Warnf("Error while scanning tag rows in GetTagsDirect: %v", err)
return nil, err
}
tags = append(tags, tag)
@@ -454,7 +556,18 @@ func (r *JobRepository) GetTagsDirect(job *int64) ([]*schema.Tag, error) {
return tags, nil
}
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
// getArchiveTags returns all tags for a job WITHOUT applying scope-based filtering.
//
// This internal function is used exclusively for job archive synchronization where we need
// to store all tags regardless of the current user's permissions. Unlike GetTags() which
// filters by scope, this returns the complete unfiltered tag list.
//
// Parameters:
// - job: Pointer to job database ID, or nil to return all tags in the system
//
// Returns all tags without scope filtering, used only for archive operations.
//
// WARNING: Do NOT expose this function to user-facing APIs as it bypasses authorization.
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
if job != nil {
@@ -472,7 +585,7 @@ func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
cclog.Warn("Error while scanning rows")
cclog.Warnf("Error while scanning tag rows in getArchiveTags: %v", err)
return nil, err
}
tags = append(tags, tag)
@@ -481,18 +594,18 @@ func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
return tags, nil
}
func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
func (r *JobRepository) ImportTag(jobID int64, tagType string, tagName string, tagScope string) (err error) {
// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err
tagId, exists := r.TagId(tagType, tagName, tagScope)
tagID, exists := r.TagID(tagType, tagName, tagScope)
if !exists {
tagId, err = r.CreateTag(tagType, tagName, tagScope)
tagID, err = r.CreateTag(tagType, tagName, tagScope)
if err != nil {
return err
}
}
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobID, tagID)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
@@ -503,6 +616,28 @@ func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, t
return nil
}
// checkScopeAuth validates whether a user is authorized to perform an operation on a tag with the given scope.
//
// This function implements the tag scope authorization matrix:
//
// Scope | Read Access | Write Access
// -------------|----------------------------------|----------------------------------
// "global" | All users | Admin, Support, API-only
// "admin" | Admin, Support | Admin, API-only
// <username> | Owner only | Owner only (private tags)
//
// Parameters:
// - user: User attempting the operation (must not be nil)
// - operation: Either "read" or "write"
// - scope: Tag scope value ("global", "admin", or username for private tags)
//
// Returns:
// - pass: true if authorized, false if denied
// - err: error only if operation is invalid or user is nil
//
// Special cases:
// - API-only users (single role: RoleApi) can write to admin and global scopes for automation
// - Private tags use the username as scope, granting exclusive access to that user
func (r *JobRepository) checkScopeAuth(user *schema.User, operation string, scope string) (pass bool, err error) {
if user != nil {
switch {

View File

@@ -0,0 +1,311 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"testing"
_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestTransactionInit(t *testing.T) {
r := setup(t)
t.Run("successful transaction init", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err, "TransactionInit should succeed")
require.NotNil(t, tx, "Transaction should not be nil")
require.NotNil(t, tx.tx, "Transaction.tx should not be nil")
// Clean up
err = tx.Rollback()
require.NoError(t, err, "Rollback should succeed")
})
}
func TestTransactionCommit(t *testing.T) {
r := setup(t)
t.Run("commit after successful operations", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
// Insert a test tag
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_tag_commit", "global")
require.NoError(t, err, "TransactionAdd should succeed")
// Commit the transaction
err = tx.Commit()
require.NoError(t, err, "Commit should succeed")
// Verify the tag was inserted
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_tag_commit").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Tag should be committed to database")
// Clean up
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_tag_commit")
require.NoError(t, err)
})
t.Run("commit on already committed transaction", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err, "First commit should succeed")
err = tx.Commit()
assert.Error(t, err, "Second commit should fail")
assert.Contains(t, err.Error(), "transaction already committed or rolled back")
})
}
func TestTransactionRollback(t *testing.T) {
r := setup(t)
t.Run("rollback after operations", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
// Insert a test tag
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_tag_rollback", "global")
require.NoError(t, err, "TransactionAdd should succeed")
// Rollback the transaction
err = tx.Rollback()
require.NoError(t, err, "Rollback should succeed")
// Verify the tag was NOT inserted
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_tag_rollback").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 0, count, "Tag should not be in database after rollback")
})
t.Run("rollback on already rolled back transaction", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
err = tx.Rollback()
require.NoError(t, err, "First rollback should succeed")
err = tx.Rollback()
assert.NoError(t, err, "Second rollback should be safe (no-op)")
})
t.Run("rollback on committed transaction", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err)
err = tx.Rollback()
assert.NoError(t, err, "Rollback after commit should be safe (no-op)")
})
}
func TestTransactionAdd(t *testing.T) {
r := setup(t)
t.Run("insert with TransactionAdd", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
defer tx.Rollback()
id, err := r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_add", "global")
require.NoError(t, err, "TransactionAdd should succeed")
assert.Greater(t, id, int64(0), "Should return valid insert ID")
})
t.Run("error on nil transaction", func(t *testing.T) {
tx := &Transaction{tx: nil}
_, err := r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_nil", "global")
assert.Error(t, err, "Should error on nil transaction")
assert.Contains(t, err.Error(), "transaction is nil or already completed")
})
t.Run("error on invalid SQL", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
defer tx.Rollback()
_, err = r.TransactionAdd(tx, "INVALID SQL STATEMENT")
assert.Error(t, err, "Should error on invalid SQL")
})
t.Run("error after transaction committed", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err)
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_after_commit", "global")
assert.Error(t, err, "Should error when transaction is already committed")
})
}
func TestTransactionAddNamed(t *testing.T) {
r := setup(t)
t.Run("insert with TransactionAddNamed", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
defer tx.Rollback()
type TagArgs struct {
Type string `db:"type"`
Name string `db:"name"`
Scope string `db:"scope"`
}
args := TagArgs{
Type: "test_type",
Name: "test_named",
Scope: "global",
}
id, err := r.TransactionAddNamed(tx,
"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (:type, :name, :scope)",
args)
require.NoError(t, err, "TransactionAddNamed should succeed")
assert.Greater(t, id, int64(0), "Should return valid insert ID")
})
t.Run("error on nil transaction", func(t *testing.T) {
tx := &Transaction{tx: nil}
_, err := r.TransactionAddNamed(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (:type, :name, :scope)",
map[string]interface{}{"type": "test", "name": "test", "scope": "global"})
assert.Error(t, err, "Should error on nil transaction")
assert.Contains(t, err.Error(), "transaction is nil or already completed")
})
}
func TestTransactionMultipleOperations(t *testing.T) {
r := setup(t)
t.Run("multiple inserts in single transaction", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
defer tx.Rollback()
// Insert multiple tags
for i := 0; i < 5; i++ {
_, err = r.TransactionAdd(tx,
"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_multi_"+string(rune('a'+i)), "global")
require.NoError(t, err, "Insert %d should succeed", i)
}
err = tx.Commit()
require.NoError(t, err, "Commit should succeed")
// Verify all tags were inserted
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name LIKE 'test_multi_%'").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 5, count, "All 5 tags should be committed")
// Clean up
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name LIKE 'test_multi_%'")
require.NoError(t, err)
})
t.Run("rollback undoes all operations", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
// Insert multiple tags
for i := 0; i < 3; i++ {
_, err = r.TransactionAdd(tx,
"INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_rollback_"+string(rune('a'+i)), "global")
require.NoError(t, err)
}
err = tx.Rollback()
require.NoError(t, err, "Rollback should succeed")
// Verify no tags were inserted
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name LIKE 'test_rollback_%'").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 0, count, "No tags should be in database after rollback")
})
}
func TestTransactionEnd(t *testing.T) {
r := setup(t)
t.Run("deprecated TransactionEnd calls Commit", func(t *testing.T) {
tx, err := r.TransactionInit()
require.NoError(t, err)
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_end", "global")
require.NoError(t, err)
// Use deprecated method
err = r.TransactionEnd(tx)
require.NoError(t, err, "TransactionEnd should succeed")
// Verify the tag was committed
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_end").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Tag should be committed")
// Clean up
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_end")
require.NoError(t, err)
})
}
func TestTransactionDeferPattern(t *testing.T) {
r := setup(t)
t.Run("defer rollback pattern", func(t *testing.T) {
insertTag := func() error {
tx, err := r.TransactionInit()
if err != nil {
return err
}
defer tx.Rollback() // Safe to call even after commit
_, err = r.TransactionAdd(tx, "INSERT INTO tag (tag_type, tag_name, tag_scope) VALUES (?, ?, ?)",
"test_type", "test_defer", "global")
if err != nil {
return err
}
return tx.Commit()
}
err := insertTag()
require.NoError(t, err, "Function should succeed")
// Verify the tag was committed
var count int
err = r.DB.QueryRow("SELECT COUNT(*) FROM tag WHERE tag_name = ?", "test_defer").Scan(&count)
require.NoError(t, err)
assert.Equal(t, 1, count, "Tag should be committed despite defer rollback")
// Clean up
_, err = r.DB.Exec("DELETE FROM tag WHERE tag_name = ?", "test_defer")
require.NoError(t, err)
})
}

View File

@@ -22,6 +22,25 @@ import (
"golang.org/x/crypto/bcrypt"
)
// Authentication and Role System:
//
// ClusterCockpit supports multiple authentication sources:
// - Local: Username/password stored in database (password hashed with bcrypt)
// - LDAP: External LDAP/Active Directory authentication
// - JWT: Token-based authentication for API access
//
// Role Hierarchy (from highest to lowest privilege):
// 1. "admin" - Full system access, can manage all users and jobs
// 2. "support" - Can view all jobs but limited management capabilities
// 3. "manager" - Can manage specific projects and their users
// 4. "api" - Programmatic access for job submission/management
// 5. "user" - Default role, can only view own jobs
//
// Project Association:
// - Managers have a list of projects they oversee
// - Regular users' project membership is determined by job data
// - Managers can view/manage all jobs within their projects
var (
userRepoOnce sync.Once
userRepoInstance *UserRepository
@@ -44,6 +63,9 @@ func GetUserRepository() *UserRepository {
return userRepoInstance
}
// GetUser retrieves a user by username from the database.
// Returns the complete user record including hashed password, roles, and projects.
// Password field contains bcrypt hash for local auth users, empty for LDAP users.
func (r *UserRepository) GetUser(username string) (*schema.User, error) {
user := &schema.User{Username: username}
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
@@ -93,6 +115,12 @@ func (r *UserRepository) GetLdapUsernames() ([]string, error) {
return users, nil
}
// AddUser creates a new user in the database.
// Passwords are automatically hashed with bcrypt before storage.
// Auth source determines authentication method (local, LDAP, etc.).
//
// Required fields: Username, Roles
// Optional fields: Name, Email, Password, Projects, AuthSource
func (r *UserRepository) AddUser(user *schema.User) error {
rolesJson, _ := json.Marshal(user.Roles)
projectsJson, _ := json.Marshal(user.Projects)
@@ -229,6 +257,14 @@ func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
return users, nil
}
// AddRole adds a role to a user's role list.
// Role string is automatically lowercased.
// Valid roles: admin, support, manager, api, user
//
// Returns error if:
// - User doesn't exist
// - Role is invalid
// - User already has the role
func (r *UserRepository) AddRole(
ctx context.Context,
username string,
@@ -258,6 +294,11 @@ func (r *UserRepository) AddRole(
return nil
}
// RemoveRole removes a role from a user's role list.
//
// Special rules:
// - Cannot remove "manager" role while user has assigned projects
// - Must remove all projects first before removing manager role
func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryrole string) error {
oldRole := strings.ToLower(queryrole)
user, err := r.GetUser(username)
@@ -294,6 +335,12 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
return nil
}
// AddProject assigns a project to a manager user.
// Only users with the "manager" role can have assigned projects.
//
// Returns error if:
// - User doesn't have manager role
// - User already manages the project
func (r *UserRepository) AddProject(
ctx context.Context,
username string,

View File

@@ -0,0 +1,596 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"testing"
"github.com/ClusterCockpit/cc-lib/v2/schema"
_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/crypto/bcrypt"
)
func TestAddUser(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
t.Run("add user with all fields", func(t *testing.T) {
user := &schema.User{
Username: "testuser1",
Name: "Test User One",
Email: "test1@example.com",
Password: "testpassword123",
Roles: []string{"user"},
Projects: []string{"project1", "project2"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
retrievedUser, err := r.GetUser("testuser1")
require.NoError(t, err)
assert.Equal(t, user.Username, retrievedUser.Username)
assert.Equal(t, user.Name, retrievedUser.Name)
assert.Equal(t, user.Email, retrievedUser.Email)
assert.Equal(t, user.Roles, retrievedUser.Roles)
assert.Equal(t, user.Projects, retrievedUser.Projects)
assert.NotEmpty(t, retrievedUser.Password)
err = bcrypt.CompareHashAndPassword([]byte(retrievedUser.Password), []byte("testpassword123"))
assert.NoError(t, err, "Password should be hashed correctly")
err = r.DelUser("testuser1")
require.NoError(t, err)
})
t.Run("add user with minimal fields", func(t *testing.T) {
user := &schema.User{
Username: "testuser2",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLDAP,
}
err := r.AddUser(user)
require.NoError(t, err)
retrievedUser, err := r.GetUser("testuser2")
require.NoError(t, err)
assert.Equal(t, user.Username, retrievedUser.Username)
assert.Equal(t, "", retrievedUser.Name)
assert.Equal(t, "", retrievedUser.Email)
assert.Equal(t, "", retrievedUser.Password)
err = r.DelUser("testuser2")
require.NoError(t, err)
})
t.Run("add duplicate user fails", func(t *testing.T) {
user := &schema.User{
Username: "testuser3",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddUser(user)
assert.Error(t, err, "Adding duplicate user should fail")
err = r.DelUser("testuser3")
require.NoError(t, err)
})
}
func TestGetUser(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
t.Run("get existing user", func(t *testing.T) {
user := &schema.User{
Username: "getuser1",
Name: "Get User",
Email: "getuser@example.com",
Roles: []string{"user", "admin"},
Projects: []string{"proj1"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
retrieved, err := r.GetUser("getuser1")
require.NoError(t, err)
assert.Equal(t, user.Username, retrieved.Username)
assert.Equal(t, user.Name, retrieved.Name)
assert.Equal(t, user.Email, retrieved.Email)
assert.ElementsMatch(t, user.Roles, retrieved.Roles)
assert.ElementsMatch(t, user.Projects, retrieved.Projects)
err = r.DelUser("getuser1")
require.NoError(t, err)
})
t.Run("get non-existent user", func(t *testing.T) {
_, err := r.GetUser("nonexistent")
assert.Error(t, err)
})
}
func TestUpdateUser(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
t.Run("update user name", func(t *testing.T) {
user := &schema.User{
Username: "updateuser1",
Name: "Original Name",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
dbUser, err := r.GetUser("updateuser1")
require.NoError(t, err)
updatedUser := &schema.User{
Username: "updateuser1",
Name: "Updated Name",
}
err = r.UpdateUser(dbUser, updatedUser)
require.NoError(t, err)
retrieved, err := r.GetUser("updateuser1")
require.NoError(t, err)
assert.Equal(t, "Updated Name", retrieved.Name)
err = r.DelUser("updateuser1")
require.NoError(t, err)
})
t.Run("update with no changes", func(t *testing.T) {
user := &schema.User{
Username: "updateuser2",
Name: "Same Name",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
dbUser, err := r.GetUser("updateuser2")
require.NoError(t, err)
err = r.UpdateUser(dbUser, dbUser)
assert.NoError(t, err)
err = r.DelUser("updateuser2")
require.NoError(t, err)
})
}
func TestDelUser(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
t.Run("delete existing user", func(t *testing.T) {
user := &schema.User{
Username: "deluser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.DelUser("deluser1")
require.NoError(t, err)
_, err = r.GetUser("deluser1")
assert.Error(t, err, "User should not exist after deletion")
})
t.Run("delete non-existent user", func(t *testing.T) {
err := r.DelUser("nonexistent")
assert.NoError(t, err, "Deleting non-existent user should not error")
})
}
func TestListUsers(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
user1 := &schema.User{
Username: "listuser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
user2 := &schema.User{
Username: "listuser2",
Roles: []string{"admin"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
user3 := &schema.User{
Username: "listuser3",
Roles: []string{"manager"},
Projects: []string{"proj1"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user1)
require.NoError(t, err)
err = r.AddUser(user2)
require.NoError(t, err)
err = r.AddUser(user3)
require.NoError(t, err)
t.Run("list all users", func(t *testing.T) {
users, err := r.ListUsers(false)
require.NoError(t, err)
assert.GreaterOrEqual(t, len(users), 3)
usernames := make([]string, len(users))
for i, u := range users {
usernames[i] = u.Username
}
assert.Contains(t, usernames, "listuser1")
assert.Contains(t, usernames, "listuser2")
assert.Contains(t, usernames, "listuser3")
})
t.Run("list special users only", func(t *testing.T) {
users, err := r.ListUsers(true)
require.NoError(t, err)
usernames := make([]string, len(users))
for i, u := range users {
usernames[i] = u.Username
}
assert.Contains(t, usernames, "listuser2")
assert.Contains(t, usernames, "listuser3")
})
err = r.DelUser("listuser1")
require.NoError(t, err)
err = r.DelUser("listuser2")
require.NoError(t, err)
err = r.DelUser("listuser3")
require.NoError(t, err)
}
func TestGetLdapUsernames(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ldapUser := &schema.User{
Username: "ldapuser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLDAP,
}
localUser := &schema.User{
Username: "localuser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(ldapUser)
require.NoError(t, err)
err = r.AddUser(localUser)
require.NoError(t, err)
usernames, err := r.GetLdapUsernames()
require.NoError(t, err)
assert.Contains(t, usernames, "ldapuser1")
assert.NotContains(t, usernames, "localuser1")
err = r.DelUser("ldapuser1")
require.NoError(t, err)
err = r.DelUser("localuser1")
require.NoError(t, err)
}
func TestAddRole(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ctx := context.Background()
t.Run("add valid role", func(t *testing.T) {
user := &schema.User{
Username: "roleuser1",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddRole(ctx, "roleuser1", "admin")
require.NoError(t, err)
retrieved, err := r.GetUser("roleuser1")
require.NoError(t, err)
assert.Contains(t, retrieved.Roles, "admin")
assert.Contains(t, retrieved.Roles, "user")
err = r.DelUser("roleuser1")
require.NoError(t, err)
})
t.Run("add duplicate role", func(t *testing.T) {
user := &schema.User{
Username: "roleuser2",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddRole(ctx, "roleuser2", "user")
assert.Error(t, err, "Adding duplicate role should fail")
assert.Contains(t, err.Error(), "already has role")
err = r.DelUser("roleuser2")
require.NoError(t, err)
})
t.Run("add invalid role", func(t *testing.T) {
user := &schema.User{
Username: "roleuser3",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddRole(ctx, "roleuser3", "invalidrole")
assert.Error(t, err, "Adding invalid role should fail")
assert.Contains(t, err.Error(), "no valid option")
err = r.DelUser("roleuser3")
require.NoError(t, err)
})
}
func TestRemoveRole(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ctx := context.Background()
t.Run("remove existing role", func(t *testing.T) {
user := &schema.User{
Username: "rmroleuser1",
Roles: []string{"user", "admin"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveRole(ctx, "rmroleuser1", "admin")
require.NoError(t, err)
retrieved, err := r.GetUser("rmroleuser1")
require.NoError(t, err)
assert.NotContains(t, retrieved.Roles, "admin")
assert.Contains(t, retrieved.Roles, "user")
err = r.DelUser("rmroleuser1")
require.NoError(t, err)
})
t.Run("remove non-existent role", func(t *testing.T) {
user := &schema.User{
Username: "rmroleuser2",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveRole(ctx, "rmroleuser2", "admin")
assert.Error(t, err, "Removing non-existent role should fail")
assert.Contains(t, err.Error(), "already deleted")
err = r.DelUser("rmroleuser2")
require.NoError(t, err)
})
t.Run("remove manager role with projects", func(t *testing.T) {
user := &schema.User{
Username: "rmroleuser3",
Roles: []string{"manager"},
Projects: []string{"proj1", "proj2"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveRole(ctx, "rmroleuser3", "manager")
assert.Error(t, err, "Removing manager role with projects should fail")
assert.Contains(t, err.Error(), "still has assigned project")
err = r.DelUser("rmroleuser3")
require.NoError(t, err)
})
}
func TestAddProject(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ctx := context.Background()
t.Run("add project to manager", func(t *testing.T) {
user := &schema.User{
Username: "projuser1",
Roles: []string{"manager"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddProject(ctx, "projuser1", "newproject")
require.NoError(t, err)
retrieved, err := r.GetUser("projuser1")
require.NoError(t, err)
assert.Contains(t, retrieved.Projects, "newproject")
err = r.DelUser("projuser1")
require.NoError(t, err)
})
t.Run("add project to non-manager", func(t *testing.T) {
user := &schema.User{
Username: "projuser2",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddProject(ctx, "projuser2", "newproject")
assert.Error(t, err, "Adding project to non-manager should fail")
assert.Contains(t, err.Error(), "not a manager")
err = r.DelUser("projuser2")
require.NoError(t, err)
})
t.Run("add duplicate project", func(t *testing.T) {
user := &schema.User{
Username: "projuser3",
Roles: []string{"manager"},
Projects: []string{"existingproject"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.AddProject(ctx, "projuser3", "existingproject")
assert.Error(t, err, "Adding duplicate project should fail")
assert.Contains(t, err.Error(), "already manages")
err = r.DelUser("projuser3")
require.NoError(t, err)
})
}
func TestRemoveProject(t *testing.T) {
_ = setup(t)
r := GetUserRepository()
ctx := context.Background()
t.Run("remove existing project", func(t *testing.T) {
user := &schema.User{
Username: "rmprojuser1",
Roles: []string{"manager"},
Projects: []string{"proj1", "proj2"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveProject(ctx, "rmprojuser1", "proj1")
require.NoError(t, err)
retrieved, err := r.GetUser("rmprojuser1")
require.NoError(t, err)
assert.NotContains(t, retrieved.Projects, "proj1")
assert.Contains(t, retrieved.Projects, "proj2")
err = r.DelUser("rmprojuser1")
require.NoError(t, err)
})
t.Run("remove non-existent project", func(t *testing.T) {
user := &schema.User{
Username: "rmprojuser2",
Roles: []string{"manager"},
Projects: []string{"proj1"},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveProject(ctx, "rmprojuser2", "nonexistent")
assert.Error(t, err, "Removing non-existent project should fail")
err = r.DelUser("rmprojuser2")
require.NoError(t, err)
})
t.Run("remove project from non-manager", func(t *testing.T) {
user := &schema.User{
Username: "rmprojuser3",
Roles: []string{"user"},
Projects: []string{},
AuthSource: schema.AuthViaLocalPassword,
}
err := r.AddUser(user)
require.NoError(t, err)
err = r.RemoveProject(ctx, "rmprojuser3", "proj1")
assert.Error(t, err, "Removing project from non-manager should fail")
assert.Contains(t, err.Error(), "not a manager")
err = r.DelUser("rmprojuser3")
require.NoError(t, err)
})
}
func TestGetUserFromContext(t *testing.T) {
t.Run("get user from context", func(t *testing.T) {
user := &schema.User{
Username: "contextuser",
Roles: []string{"user"},
}
ctx := context.WithValue(context.Background(), ContextUserKey, user)
retrieved := GetUserFromContext(ctx)
require.NotNil(t, retrieved)
assert.Equal(t, user.Username, retrieved.Username)
})
t.Run("get user from empty context", func(t *testing.T) {
ctx := context.Background()
retrieved := GetUserFromContext(ctx)
assert.Nil(t, retrieved)
})
}

View File

@@ -64,7 +64,7 @@ func newTagger() {
func Init() {
initOnce.Do(func() {
newTagger()
repository.RegisterJobJook(jobTagger)
repository.RegisterJobHook(jobTagger)
})
}