Add sqlite and s3 job archive backend

Add documentation
Extend config
This commit is contained in:
2025-11-19 17:00:11 +01:00
parent 39a2157d46
commit e1c7583670
9 changed files with 2284 additions and 19 deletions

View File

@@ -3,7 +3,79 @@
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// Package archive implements the job archive interface and various backend implementations
// Package archive implements the job archive interface and various backend implementations.
//
// The archive package provides a pluggable storage backend system for job metadata and performance data.
// It supports three backend types:
//
// - file: Filesystem-based storage with hierarchical directory structure
// - s3: AWS S3 and S3-compatible object storage (MinIO, localstack)
// - sqlite: Single-file SQLite database with BLOB storage
//
// # Backend Selection
//
// Choose a backend based on your deployment requirements:
//
// - File: Best for single-server deployments with local fast storage
// - S3: Best for distributed deployments requiring redundancy and multi-instance access
// - SQLite: Best for portable archives with SQL query capability and transactional integrity
//
// # Configuration
//
// The archive backend is configured via JSON in the application config file:
//
// {
// "archive": {
// "kind": "file", // or "s3" or "sqlite"
// "path": "/var/lib/archive" // for file backend
// }
// }
//
// For S3 backend:
//
// {
// "archive": {
// "kind": "s3",
// "bucket": "my-job-archive",
// "region": "us-east-1",
// "accessKey": "...",
// "secretKey": "..."
// }
// }
//
// For SQLite backend:
//
// {
// "archive": {
// "kind": "sqlite",
// "dbPath": "/var/lib/archive.db"
// }
// }
//
// # Usage
//
// The package is initialized once at application startup:
//
// err := archive.Init(rawConfig, false)
// if err != nil {
// log.Fatal(err)
// }
//
// After initialization, use the global functions to interact with the archive:
//
// // Check if a job exists
// exists := archive.GetHandle().Exists(job)
//
// // Load job metadata
// jobMeta, err := archive.GetHandle().LoadJobMeta(job)
//
// // Store job metadata
// err = archive.GetHandle().StoreJobMeta(job)
//
// # Thread Safety
//
// All backend implementations are safe for concurrent use. The package uses
// internal locking for operations that modify shared state.
package archive
import (
@@ -18,45 +90,88 @@ import (
"github.com/ClusterCockpit/cc-lib/schema"
)
// Version is the current archive schema version.
// The archive backend must match this version for compatibility.
const Version uint64 = 2
// ArchiveBackend defines the interface that all archive storage backends must implement.
// Implementations include FsArchive (filesystem), S3Archive (object storage), and SqliteArchive (database).
//
// All methods are safe for concurrent use unless otherwise noted.
type ArchiveBackend interface {
// Init initializes the archive backend with the provided configuration.
// Returns the archive version found in the backend storage.
// Returns an error if the version is incompatible or initialization fails.
Init(rawConfig json.RawMessage) (uint64, error)
// Info prints archive statistics to stdout, including job counts,
// date ranges, and storage sizes per cluster.
Info()
// Exists checks if a job with the given ID, cluster, and start time
// exists in the archive.
Exists(job *schema.Job) bool
// LoadJobMeta loads job metadata from the archive.
// Returns the complete Job structure including resources, tags, and statistics.
LoadJobMeta(job *schema.Job) (*schema.Job, error)
// LoadJobData loads the complete time-series performance data for a job.
// Returns a map of metric names to their scoped data (node, socket, core, etc.).
LoadJobData(job *schema.Job) (schema.JobData, error)
// LoadJobStats loads pre-computed statistics from the job data.
// Returns scoped statistics (min, max, avg) for all metrics.
LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error)
// LoadClusterCfg loads the cluster configuration.
// Returns the cluster topology, metrics, and hardware specifications.
LoadClusterCfg(name string) (*schema.Cluster, error)
// StoreJobMeta stores job metadata to the archive.
// Overwrites existing metadata for the same job ID, cluster, and start time.
StoreJobMeta(jobMeta *schema.Job) error
// ImportJob stores both job metadata and performance data to the archive.
// This is typically used during initial job archiving.
ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error
// GetClusters returns a list of all cluster names found in the archive.
GetClusters() []string
// CleanUp removes the specified jobs from the archive.
// Used by retention policies to delete old jobs.
CleanUp(jobs []*schema.Job)
// Move relocates jobs to a different path within the archive.
// The implementation depends on the backend type.
Move(jobs []*schema.Job, path string)
// Clean removes jobs outside the specified time range.
// Jobs with start_time < before OR start_time > after are deleted.
// Set after=0 to only use the before parameter.
Clean(before int64, after int64)
// Compress compresses job data files to save storage space.
// For filesystem and SQLite backends, this applies gzip compression.
// For S3, this compresses and replaces objects.
Compress(jobs []*schema.Job)
// CompressLast returns the timestamp of the last compression run
// and updates it to the provided starttime.
CompressLast(starttime int64) int64
// Iter returns a channel that yields all jobs in the archive.
// If loadMetricData is true, includes performance data; otherwise only metadata.
// The channel is closed when iteration completes.
Iter(loadMetricData bool) <-chan JobContainer
}
// JobContainer combines job metadata and optional performance data.
// Used by Iter() to yield jobs during archive iteration.
type JobContainer struct {
Meta *schema.Job
Data *schema.JobData
Meta *schema.Job // Job metadata (always present)
Data *schema.JobData // Performance data (nil if not loaded)
}
var (
@@ -67,6 +182,15 @@ var (
mutex sync.Mutex
)
// Init initializes the archive backend with the provided configuration.
// Must be called once at application startup before using any archive functions.
//
// Parameters:
// - rawConfig: JSON configuration for the archive backend
// - disableArchive: if true, disables archive functionality
//
// The configuration determines which backend is used (file, s3, or sqlite).
// Returns an error if initialization fails or version is incompatible.
func Init(rawConfig json.RawMessage, disableArchive bool) error {
var err error
@@ -86,8 +210,10 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
switch cfg.Kind {
case "file":
ar = &FsArchive{}
// case "s3":
// ar = &S3Archive{}
case "s3":
ar = &S3Archive{}
case "sqlite":
ar = &SqliteArchive{}
default:
err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
}
@@ -106,10 +232,19 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
return err
}
// GetHandle returns the initialized archive backend instance.
// Must be called after Init().
func GetHandle() ArchiveBackend {
return ar
}
// LoadAveragesFromArchive loads average metric values for a job from the archive.
// This is a helper function that extracts average values from job statistics.
//
// Parameters:
// - job: Job to load averages for
// - metrics: List of metric names to retrieve
// - data: 2D slice where averages will be appended (one row per metric)
func LoadAveragesFromArchive(
job *schema.Job,
metrics []string,
@@ -132,6 +267,8 @@ func LoadAveragesFromArchive(
return nil
}
// LoadStatsFromArchive loads metric statistics for a job from the archive.
// Returns a map of metric names to their statistics (min, max, avg).
func LoadStatsFromArchive(
job *schema.Job,
metrics []string,
@@ -160,6 +297,8 @@ func LoadStatsFromArchive(
return data, nil
}
// LoadScopedStatsFromArchive loads scoped statistics for a job from the archive.
// Returns statistics organized by metric scope (node, socket, core, etc.).
func LoadScopedStatsFromArchive(
job *schema.Job,
metrics []string,
@@ -174,6 +313,8 @@ func LoadScopedStatsFromArchive(
return data, nil
}
// GetStatistics returns all metric statistics for a job.
// Returns a map of metric names to their job-level statistics.
func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
metaFile, err := ar.LoadJobMeta(job)
if err != nil {
@@ -184,8 +325,10 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
return metaFile.Statistics, nil
}
// UpdateMetadata checks if the job is archived, find its `meta.json` file and override the Metadata
// in that JSON file. If the job is not archived, nothing is done.
// UpdateMetadata updates the metadata map for an archived job.
// If the job is still running or archiving is disabled, this is a no-op.
//
// This function is safe for concurrent use (protected by mutex).
func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
mutex.Lock()
defer mutex.Unlock()
@@ -205,8 +348,10 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
return ar.StoreJobMeta(jobMeta)
}
// UpdateTags checks if the job is archived, find its `meta.json` file and override the tags list
// in that JSON file. If the job is not archived, nothing is done.
// UpdateTags updates the tag list for an archived job.
// If the job is still running or archiving is disabled, this is a no-op.
//
// This function is safe for concurrent use (protected by mutex).
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
mutex.Lock()
defer mutex.Unlock()