// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. // All rights reserved. This file is part of cc-backend. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. // Package archive implements the job archive interface and various backend implementations. // // The archive package provides a pluggable storage backend system for job metadata and performance data. // It supports three backend types: // // - file: Filesystem-based storage with hierarchical directory structure // - s3: AWS S3 and S3-compatible object storage (MinIO, localstack) // - sqlite: Single-file SQLite database with BLOB storage // // # Backend Selection // // Choose a backend based on your deployment requirements: // // - File: Best for single-server deployments with local fast storage // - S3: Best for distributed deployments requiring redundancy and multi-instance access // - SQLite: Best for portable archives with SQL query capability and transactional integrity // // # Configuration // // The archive backend is configured via JSON in the application config file: // // { // "archive": { // "kind": "file", // or "s3" or "sqlite" // "path": "/var/lib/archive" // for file backend // } // } // // For S3 backend: // // { // "archive": { // "kind": "s3", // "bucket": "my-job-archive", // "region": "us-east-1", // "accessKey": "...", // "secretKey": "..." // } // } // // For SQLite backend: // // { // "archive": { // "kind": "sqlite", // "dbPath": "/var/lib/archive.db" // } // } // // # Usage // // The package is initialized once at application startup: // // err := archive.Init(rawConfig, false) // if err != nil { // log.Fatal(err) // } // // After initialization, use the global functions to interact with the archive: // // // Check if a job exists // exists := archive.GetHandle().Exists(job) // // // Load job metadata // jobMeta, err := archive.GetHandle().LoadJobMeta(job) // // // Store job metadata // err = archive.GetHandle().StoreJobMeta(job) // // # Thread Safety // // All backend implementations are safe for concurrent use. The package uses // internal locking for operations that modify shared state. package archive import ( "encoding/json" "fmt" "maps" "sync" "github.com/ClusterCockpit/cc-backend/internal/config" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" "github.com/ClusterCockpit/cc-lib/lrucache" "github.com/ClusterCockpit/cc-lib/schema" ) // Version is the current archive schema version. // The archive backend must match this version for compatibility. const Version uint64 = 2 // ArchiveBackend defines the interface that all archive storage backends must implement. // Implementations include FsArchive (filesystem), S3Archive (object storage), and SqliteArchive (database). // // All methods are safe for concurrent use unless otherwise noted. type ArchiveBackend interface { // Init initializes the archive backend with the provided configuration. // Returns the archive version found in the backend storage. // Returns an error if the version is incompatible or initialization fails. Init(rawConfig json.RawMessage) (uint64, error) // Info prints archive statistics to stdout, including job counts, // date ranges, and storage sizes per cluster. Info() // Exists checks if a job with the given ID, cluster, and start time // exists in the archive. Exists(job *schema.Job) bool // LoadJobMeta loads job metadata from the archive. // Returns the complete Job structure including resources, tags, and statistics. LoadJobMeta(job *schema.Job) (*schema.Job, error) // LoadJobData loads the complete time-series performance data for a job. // Returns a map of metric names to their scoped data (node, socket, core, etc.). LoadJobData(job *schema.Job) (schema.JobData, error) // LoadJobStats loads pre-computed statistics from the job data. // Returns scoped statistics (min, max, avg) for all metrics. LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) // LoadClusterCfg loads the cluster configuration. // Returns the cluster topology, metrics, and hardware specifications. LoadClusterCfg(name string) (*schema.Cluster, error) // StoreJobMeta stores job metadata to the archive. // Overwrites existing metadata for the same job ID, cluster, and start time. StoreJobMeta(jobMeta *schema.Job) error // ImportJob stores both job metadata and performance data to the archive. // This is typically used during initial job archiving. ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error // GetClusters returns a list of all cluster names found in the archive. GetClusters() []string // CleanUp removes the specified jobs from the archive. // Used by retention policies to delete old jobs. CleanUp(jobs []*schema.Job) // Move relocates jobs to a different path within the archive. // The implementation depends on the backend type. Move(jobs []*schema.Job, path string) // Clean removes jobs outside the specified time range. // Jobs with start_time < before OR start_time > after are deleted. // Set after=0 to only use the before parameter. Clean(before int64, after int64) // Compress compresses job data files to save storage space. // For filesystem and SQLite backends, this applies gzip compression. // For S3, this compresses and replaces objects. Compress(jobs []*schema.Job) // CompressLast returns the timestamp of the last compression run // and updates it to the provided starttime. CompressLast(starttime int64) int64 // Iter returns a channel that yields all jobs in the archive. // If loadMetricData is true, includes performance data; otherwise only metadata. // The channel is closed when iteration completes. Iter(loadMetricData bool) <-chan JobContainer } // JobContainer combines job metadata and optional performance data. // Used by Iter() to yield jobs during archive iteration. type JobContainer struct { Meta *schema.Job // Job metadata (always present) Data *schema.JobData // Performance data (nil if not loaded) } var ( initOnce sync.Once cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024) ar ArchiveBackend useArchive bool mutex sync.Mutex ) // Init initializes the archive backend with the provided configuration. // Must be called once at application startup before using any archive functions. // // Parameters: // - rawConfig: JSON configuration for the archive backend // - disableArchive: if true, disables archive functionality // // The configuration determines which backend is used (file, s3, or sqlite). // Returns an error if initialization fails or version is incompatible. func Init(rawConfig json.RawMessage, disableArchive bool) error { var err error initOnce.Do(func() { useArchive = !disableArchive var cfg struct { Kind string `json:"kind"` } config.Validate(configSchema, rawConfig) if err = json.Unmarshal(rawConfig, &cfg); err != nil { cclog.Warn("Error while unmarshaling raw config json") return } switch cfg.Kind { case "file": ar = &FsArchive{} case "s3": ar = &S3Archive{} case "sqlite": ar = &SqliteArchive{} default: err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind) } var version uint64 version, err = ar.Init(rawConfig) if err != nil { cclog.Errorf("Error while initializing archiveBackend: %s", err.Error()) return } cclog.Infof("Load archive version %d", version) err = initClusterConfig() }) return err } // GetHandle returns the initialized archive backend instance. // Must be called after Init(). func GetHandle() ArchiveBackend { return ar } // LoadAveragesFromArchive loads average metric values for a job from the archive. // This is a helper function that extracts average values from job statistics. // // Parameters: // - job: Job to load averages for // - metrics: List of metric names to retrieve // - data: 2D slice where averages will be appended (one row per metric) func LoadAveragesFromArchive( job *schema.Job, metrics []string, data [][]schema.Float, ) error { metaFile, err := ar.LoadJobMeta(job) if err != nil { cclog.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) return err } for i, m := range metrics { if stat, ok := metaFile.Statistics[m]; ok { data[i] = append(data[i], schema.Float(stat.Avg)) } else { data[i] = append(data[i], schema.NaN) } } return nil } // LoadStatsFromArchive loads metric statistics for a job from the archive. // Returns a map of metric names to their statistics (min, max, avg). func LoadStatsFromArchive( job *schema.Job, metrics []string, ) (map[string]schema.MetricStatistics, error) { data := make(map[string]schema.MetricStatistics, len(metrics)) metaFile, err := ar.LoadJobMeta(job) if err != nil { cclog.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) return data, err } for _, m := range metrics { stat, ok := metaFile.Statistics[m] if !ok { data[m] = schema.MetricStatistics{Min: 0.0, Avg: 0.0, Max: 0.0} continue } data[m] = schema.MetricStatistics{ Avg: stat.Avg, Min: stat.Min, Max: stat.Max, } } return data, nil } // LoadScopedStatsFromArchive loads scoped statistics for a job from the archive. // Returns statistics organized by metric scope (node, socket, core, etc.). func LoadScopedStatsFromArchive( job *schema.Job, metrics []string, scopes []schema.MetricScope, ) (schema.ScopedJobStats, error) { data, err := ar.LoadJobStats(job) if err != nil { cclog.Errorf("Error while loading job stats from archiveBackend: %s", err.Error()) return nil, err } return data, nil } // GetStatistics returns all metric statistics for a job. // Returns a map of metric names to their job-level statistics. func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) { metaFile, err := ar.LoadJobMeta(job) if err != nil { cclog.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) return nil, err } return metaFile.Statistics, nil } // UpdateMetadata updates the metadata map for an archived job. // If the job is still running or archiving is disabled, this is a no-op. // // This function is safe for concurrent use (protected by mutex). func UpdateMetadata(job *schema.Job, metadata map[string]string) error { mutex.Lock() defer mutex.Unlock() if job.State == schema.JobStateRunning || !useArchive { return nil } jobMeta, err := ar.LoadJobMeta(job) if err != nil { cclog.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) return err } maps.Copy(jobMeta.MetaData, metadata) return ar.StoreJobMeta(jobMeta) } // UpdateTags updates the tag list for an archived job. // If the job is still running or archiving is disabled, this is a no-op. // // This function is safe for concurrent use (protected by mutex). func UpdateTags(job *schema.Job, tags []*schema.Tag) error { mutex.Lock() defer mutex.Unlock() if job.State == schema.JobStateRunning || !useArchive { return nil } jobMeta, err := ar.LoadJobMeta(job) if err != nil { cclog.Errorf("Error while loading job metadata from archiveBackend: %s", err.Error()) return err } jobMeta.Tags = make([]*schema.Tag, 0) for _, tag := range tags { jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{ Name: tag.Name, Type: tag.Type, Scope: tag.Scope, }) } return ar.StoreJobMeta(jobMeta) }