Add sqlite and s3 job archive backend

Add documentation Extend config
2026-01-10 15:06:16 +01:00 · 2025-11-19 17:00:11 +01:00
parent 39a2157d46
commit e1c7583670
9 changed files with 2284 additions and 19 deletions
--- a/pkg/archive/archive.go
+++ b/pkg/archive/archive.go
@@ -3,7 +3,79 @@
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.

-// Package archive implements the job archive interface and various backend implementations
+// Package archive implements the job archive interface and various backend implementations.
+//
+// The archive package provides a pluggable storage backend system for job metadata and performance data.
+// It supports three backend types:
+//
+//   - file: Filesystem-based storage with hierarchical directory structure
+//   - s3: AWS S3 and S3-compatible object storage (MinIO, localstack)
+//   - sqlite: Single-file SQLite database with BLOB storage
+//
+// # Backend Selection
+//
+// Choose a backend based on your deployment requirements:
+//
+//   - File: Best for single-server deployments with local fast storage
+//   - S3: Best for distributed deployments requiring redundancy and multi-instance access
+//   - SQLite: Best for portable archives with SQL query capability and transactional integrity
+//
+// # Configuration
+//
+// The archive backend is configured via JSON in the application config file:
+//
+//	{
+//	  "archive": {
+//	    "kind": "file",           // or "s3" or "sqlite"
+//	    "path": "/var/lib/archive" // for file backend
+//	  }
+//	}
+//
+// For S3 backend:
+//
+//	{
+//	  "archive": {
+//	    "kind": "s3",
+//	    "bucket": "my-job-archive",
+//	    "region": "us-east-1",
+//	    "accessKey": "...",
+//	    "secretKey": "..."
+//	  }
+//	}
+//
+// For SQLite backend:
+//
+//	{
+//	  "archive": {
+//	    "kind": "sqlite",
+//	    "dbPath": "/var/lib/archive.db"
+//	  }
+//	}
+//
+// # Usage
+//
+// The package is initialized once at application startup:
+//
+//	err := archive.Init(rawConfig, false)
+//	if err != nil {
+//	    log.Fatal(err)
+//	}
+//
+// After initialization, use the global functions to interact with the archive:
+//
+//	// Check if a job exists
+//	exists := archive.GetHandle().Exists(job)
+//
+//	// Load job metadata
+//	jobMeta, err := archive.GetHandle().LoadJobMeta(job)
+//
+//	// Store job metadata
+//	err = archive.GetHandle().StoreJobMeta(job)
+//
+// # Thread Safety
+//
+// All backend implementations are safe for concurrent use. The package uses
+// internal locking for operations that modify shared state.
 package archive

 import (
@@ -18,45 +90,88 @@ import (
 	"github.com/ClusterCockpit/cc-lib/schema"
 )

+// Version is the current archive schema version.
+// The archive backend must match this version for compatibility.
 const Version uint64 = 2

+// ArchiveBackend defines the interface that all archive storage backends must implement.
+// Implementations include FsArchive (filesystem), S3Archive (object storage), and SqliteArchive (database).
+//
+// All methods are safe for concurrent use unless otherwise noted.
 type ArchiveBackend interface {
+	// Init initializes the archive backend with the provided configuration.
+	// Returns the archive version found in the backend storage.
+	// Returns an error if the version is incompatible or initialization fails.
 	Init(rawConfig json.RawMessage) (uint64, error)

+	// Info prints archive statistics to stdout, including job counts,
+	// date ranges, and storage sizes per cluster.
 	Info()

+	// Exists checks if a job with the given ID, cluster, and start time
+	// exists in the archive.
 	Exists(job *schema.Job) bool

+	// LoadJobMeta loads job metadata from the archive.
+	// Returns the complete Job structure including resources, tags, and statistics.
 	LoadJobMeta(job *schema.Job) (*schema.Job, error)

+	// LoadJobData loads the complete time-series performance data for a job.
+	// Returns a map of metric names to their scoped data (node, socket, core, etc.).
 	LoadJobData(job *schema.Job) (schema.JobData, error)

+	// LoadJobStats loads pre-computed statistics from the job data.
+	// Returns scoped statistics (min, max, avg) for all metrics.
 	LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error)

+	// LoadClusterCfg loads the cluster configuration.
+	// Returns the cluster topology, metrics, and hardware specifications.
 	LoadClusterCfg(name string) (*schema.Cluster, error)

+	// StoreJobMeta stores job metadata to the archive.
+	// Overwrites existing metadata for the same job ID, cluster, and start time.
 	StoreJobMeta(jobMeta *schema.Job) error

+	// ImportJob stores both job metadata and performance data to the archive.
+	// This is typically used during initial job archiving.
 	ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error

+	// GetClusters returns a list of all cluster names found in the archive.
 	GetClusters() []string

+	// CleanUp removes the specified jobs from the archive.
+	// Used by retention policies to delete old jobs.
 	CleanUp(jobs []*schema.Job)

+	// Move relocates jobs to a different path within the archive.
+	// The implementation depends on the backend type.
 	Move(jobs []*schema.Job, path string)

+	// Clean removes jobs outside the specified time range.
+	// Jobs with start_time < before OR start_time > after are deleted.
+	// Set after=0 to only use the before parameter.
 	Clean(before int64, after int64)

+	// Compress compresses job data files to save storage space.
+	// For filesystem and SQLite backends, this applies gzip compression.
+	// For S3, this compresses and replaces objects.
 	Compress(jobs []*schema.Job)

+	// CompressLast returns the timestamp of the last compression run
+	// and updates it to the provided starttime.
 	CompressLast(starttime int64) int64

+	// Iter returns a channel that yields all jobs in the archive.
+	// If loadMetricData is true, includes performance data; otherwise only metadata.
+	// The channel is closed when iteration completes.
 	Iter(loadMetricData bool) <-chan JobContainer
 }

+// JobContainer combines job metadata and optional performance data.
+// Used by Iter() to yield jobs during archive iteration.
 type JobContainer struct {
-	Meta *schema.Job
-	Data *schema.JobData
+	Meta *schema.Job      // Job metadata (always present)
+	Data *schema.JobData  // Performance data (nil if not loaded)
 }

 var (
@@ -67,6 +182,15 @@ var (
 	mutex      sync.Mutex
 )

+// Init initializes the archive backend with the provided configuration.
+// Must be called once at application startup before using any archive functions.
+//
+// Parameters:
+//   - rawConfig: JSON configuration for the archive backend
+//   - disableArchive: if true, disables archive functionality
+//
+// The configuration determines which backend is used (file, s3, or sqlite).
+// Returns an error if initialization fails or version is incompatible.
 func Init(rawConfig json.RawMessage, disableArchive bool) error {
 	var err error

@@ -86,8 +210,10 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
 		switch cfg.Kind {
 		case "file":
 			ar = &FsArchive{}
-			// case "s3":
-			// 	ar = &S3Archive{}
+		case "s3":
+			ar = &S3Archive{}
+		case "sqlite":
+			ar = &SqliteArchive{}
 		default:
 			err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
 		}
@@ -106,10 +232,19 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
 	return err
 }

+// GetHandle returns the initialized archive backend instance.
+// Must be called after Init().
 func GetHandle() ArchiveBackend {
 	return ar
 }

+// LoadAveragesFromArchive loads average metric values for a job from the archive.
+// This is a helper function that extracts average values from job statistics.
+//
+// Parameters:
+//   - job: Job to load averages for
+//   - metrics: List of metric names to retrieve
+//   - data: 2D slice where averages will be appended (one row per metric)
 func LoadAveragesFromArchive(
 	job *schema.Job,
 	metrics []string,
@@ -132,6 +267,8 @@ func LoadAveragesFromArchive(
 	return nil
 }

+// LoadStatsFromArchive loads metric statistics for a job from the archive.
+// Returns a map of metric names to their statistics (min, max, avg).
 func LoadStatsFromArchive(
 	job *schema.Job,
 	metrics []string,
@@ -160,6 +297,8 @@ func LoadStatsFromArchive(
 	return data, nil
 }

+// LoadScopedStatsFromArchive loads scoped statistics for a job from the archive.
+// Returns statistics organized by metric scope (node, socket, core, etc.).
 func LoadScopedStatsFromArchive(
 	job *schema.Job,
 	metrics []string,
@@ -174,6 +313,8 @@ func LoadScopedStatsFromArchive(
 	return data, nil
 }

+// GetStatistics returns all metric statistics for a job.
+// Returns a map of metric names to their job-level statistics.
 func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
 	metaFile, err := ar.LoadJobMeta(job)
 	if err != nil {
@@ -184,8 +325,10 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
 	return metaFile.Statistics, nil
 }

-// UpdateMetadata checks if the job is archived, find its `meta.json` file and override the Metadata
-// in that JSON file. If the job is not archived, nothing is done.
+// UpdateMetadata updates the metadata map for an archived job.
+// If the job is still running or archiving is disabled, this is a no-op.
+//
+// This function is safe for concurrent use (protected by mutex).
 func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
 	mutex.Lock()
 	defer mutex.Unlock()
@@ -205,8 +348,10 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
 	return ar.StoreJobMeta(jobMeta)
 }

-// UpdateTags checks if the job is archived, find its `meta.json` file and override the tags list
-// in that JSON file. If the job is not archived, nothing is done.
+// UpdateTags updates the tag list for an archived job.
+// If the job is still running or archiving is disabled, this is a no-op.
+//
+// This function is safe for concurrent use (protected by mutex).
 func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
 	mutex.Lock()
 	defer mutex.Unlock()