Add sqlite and s3 job archive backend

Add documentation Extend config
2026-01-12 07:46:16 +01:00 · 2025-11-19 17:00:11 +01:00
parent 39a2157d46
commit e1c7583670
9 changed files with 2284 additions and 19 deletions
--- a/pkg/archive/ConfigSchema.go
+++ b/pkg/archive/ConfigSchema.go
@@ -12,12 +12,40 @@ var configSchema = `
        "kind": {
          "description": "Backend type for job-archive",
          "type": "string",
-          "enum": ["file", "s3"]
+          "enum": ["file", "s3", "sqlite"]
        },
        "path": {
          "description": "Path to job archive for file backend",
          "type": "string"
        },
+        "dbPath": {
+          "description": "Path to SQLite database file for sqlite backend",
+          "type": "string"
+        },
+        "endpoint": {
+          "description": "S3 endpoint URL (for S3-compatible services like MinIO)",
+          "type": "string"
+        },
+        "accessKey": {
+          "description": "S3 access key ID",
+          "type": "string"
+        },
+        "secretKey": {
+          "description": "S3 secret access key",
+          "type": "string"
+        },
+        "bucket": {
+          "description": "S3 bucket name for job archive",
+          "type": "string"
+        },
+        "region": {
+          "description": "AWS region for S3 bucket",
+          "type": "string"
+        },
+        "usePathStyle": {
+          "description": "Use path-style S3 URLs (required for MinIO and some S3-compatible services)",
+          "type": "boolean"
+        },
        "compression": {
          "description": "Setup automatic compression for jobs older than number of days",
          "type": "integer"
--- a/pkg/archive/archive.go
+++ b/pkg/archive/archive.go
@@ -3,7 +3,79 @@
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.

-// Package archive implements the job archive interface and various backend implementations
+// Package archive implements the job archive interface and various backend implementations.
+//
+// The archive package provides a pluggable storage backend system for job metadata and performance data.
+// It supports three backend types:
+//
+//   - file: Filesystem-based storage with hierarchical directory structure
+//   - s3: AWS S3 and S3-compatible object storage (MinIO, localstack)
+//   - sqlite: Single-file SQLite database with BLOB storage
+//
+// # Backend Selection
+//
+// Choose a backend based on your deployment requirements:
+//
+//   - File: Best for single-server deployments with local fast storage
+//   - S3: Best for distributed deployments requiring redundancy and multi-instance access
+//   - SQLite: Best for portable archives with SQL query capability and transactional integrity
+//
+// # Configuration
+//
+// The archive backend is configured via JSON in the application config file:
+//
+//	{
+//	  "archive": {
+//	    "kind": "file",           // or "s3" or "sqlite"
+//	    "path": "/var/lib/archive" // for file backend
+//	  }
+//	}
+//
+// For S3 backend:
+//
+//	{
+//	  "archive": {
+//	    "kind": "s3",
+//	    "bucket": "my-job-archive",
+//	    "region": "us-east-1",
+//	    "accessKey": "...",
+//	    "secretKey": "..."
+//	  }
+//	}
+//
+// For SQLite backend:
+//
+//	{
+//	  "archive": {
+//	    "kind": "sqlite",
+//	    "dbPath": "/var/lib/archive.db"
+//	  }
+//	}
+//
+// # Usage
+//
+// The package is initialized once at application startup:
+//
+//	err := archive.Init(rawConfig, false)
+//	if err != nil {
+//	    log.Fatal(err)
+//	}
+//
+// After initialization, use the global functions to interact with the archive:
+//
+//	// Check if a job exists
+//	exists := archive.GetHandle().Exists(job)
+//
+//	// Load job metadata
+//	jobMeta, err := archive.GetHandle().LoadJobMeta(job)
+//
+//	// Store job metadata
+//	err = archive.GetHandle().StoreJobMeta(job)
+//
+// # Thread Safety
+//
+// All backend implementations are safe for concurrent use. The package uses
+// internal locking for operations that modify shared state.
 package archive

 import (
@@ -18,45 +90,88 @@ import (
 	"github.com/ClusterCockpit/cc-lib/schema"
 )

+// Version is the current archive schema version.
+// The archive backend must match this version for compatibility.
 const Version uint64 = 2

+// ArchiveBackend defines the interface that all archive storage backends must implement.
+// Implementations include FsArchive (filesystem), S3Archive (object storage), and SqliteArchive (database).
+//
+// All methods are safe for concurrent use unless otherwise noted.
 type ArchiveBackend interface {
+	// Init initializes the archive backend with the provided configuration.
+	// Returns the archive version found in the backend storage.
+	// Returns an error if the version is incompatible or initialization fails.
 	Init(rawConfig json.RawMessage) (uint64, error)

+	// Info prints archive statistics to stdout, including job counts,
+	// date ranges, and storage sizes per cluster.
 	Info()

+	// Exists checks if a job with the given ID, cluster, and start time
+	// exists in the archive.
 	Exists(job *schema.Job) bool

+	// LoadJobMeta loads job metadata from the archive.
+	// Returns the complete Job structure including resources, tags, and statistics.
 	LoadJobMeta(job *schema.Job) (*schema.Job, error)

+	// LoadJobData loads the complete time-series performance data for a job.
+	// Returns a map of metric names to their scoped data (node, socket, core, etc.).
 	LoadJobData(job *schema.Job) (schema.JobData, error)

+	// LoadJobStats loads pre-computed statistics from the job data.
+	// Returns scoped statistics (min, max, avg) for all metrics.
 	LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error)

+	// LoadClusterCfg loads the cluster configuration.
+	// Returns the cluster topology, metrics, and hardware specifications.
 	LoadClusterCfg(name string) (*schema.Cluster, error)

+	// StoreJobMeta stores job metadata to the archive.
+	// Overwrites existing metadata for the same job ID, cluster, and start time.
 	StoreJobMeta(jobMeta *schema.Job) error

+	// ImportJob stores both job metadata and performance data to the archive.
+	// This is typically used during initial job archiving.
 	ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error

+	// GetClusters returns a list of all cluster names found in the archive.
 	GetClusters() []string

+	// CleanUp removes the specified jobs from the archive.
+	// Used by retention policies to delete old jobs.
 	CleanUp(jobs []*schema.Job)

+	// Move relocates jobs to a different path within the archive.
+	// The implementation depends on the backend type.
 	Move(jobs []*schema.Job, path string)

+	// Clean removes jobs outside the specified time range.
+	// Jobs with start_time < before OR start_time > after are deleted.
+	// Set after=0 to only use the before parameter.
 	Clean(before int64, after int64)

+	// Compress compresses job data files to save storage space.
+	// For filesystem and SQLite backends, this applies gzip compression.
+	// For S3, this compresses and replaces objects.
 	Compress(jobs []*schema.Job)

+	// CompressLast returns the timestamp of the last compression run
+	// and updates it to the provided starttime.
 	CompressLast(starttime int64) int64

+	// Iter returns a channel that yields all jobs in the archive.
+	// If loadMetricData is true, includes performance data; otherwise only metadata.
+	// The channel is closed when iteration completes.
 	Iter(loadMetricData bool) <-chan JobContainer
 }

+// JobContainer combines job metadata and optional performance data.
+// Used by Iter() to yield jobs during archive iteration.
 type JobContainer struct {
-	Meta *schema.Job
-	Data *schema.JobData
+	Meta *schema.Job      // Job metadata (always present)
+	Data *schema.JobData  // Performance data (nil if not loaded)
 }

 var (
@@ -67,6 +182,15 @@ var (
 	mutex      sync.Mutex
 )

+// Init initializes the archive backend with the provided configuration.
+// Must be called once at application startup before using any archive functions.
+//
+// Parameters:
+//   - rawConfig: JSON configuration for the archive backend
+//   - disableArchive: if true, disables archive functionality
+//
+// The configuration determines which backend is used (file, s3, or sqlite).
+// Returns an error if initialization fails or version is incompatible.
 func Init(rawConfig json.RawMessage, disableArchive bool) error {
 	var err error

@@ -86,8 +210,10 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
 		switch cfg.Kind {
 		case "file":
 			ar = &FsArchive{}
-			// case "s3":
-			// 	ar = &S3Archive{}
+		case "s3":
+			ar = &S3Archive{}
+		case "sqlite":
+			ar = &SqliteArchive{}
 		default:
 			err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
 		}
@@ -106,10 +232,19 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
 	return err
 }

+// GetHandle returns the initialized archive backend instance.
+// Must be called after Init().
 func GetHandle() ArchiveBackend {
 	return ar
 }

+// LoadAveragesFromArchive loads average metric values for a job from the archive.
+// This is a helper function that extracts average values from job statistics.
+//
+// Parameters:
+//   - job: Job to load averages for
+//   - metrics: List of metric names to retrieve
+//   - data: 2D slice where averages will be appended (one row per metric)
 func LoadAveragesFromArchive(
 	job *schema.Job,
 	metrics []string,
@@ -132,6 +267,8 @@ func LoadAveragesFromArchive(
 	return nil
 }

+// LoadStatsFromArchive loads metric statistics for a job from the archive.
+// Returns a map of metric names to their statistics (min, max, avg).
 func LoadStatsFromArchive(
 	job *schema.Job,
 	metrics []string,
@@ -160,6 +297,8 @@ func LoadStatsFromArchive(
 	return data, nil
 }

+// LoadScopedStatsFromArchive loads scoped statistics for a job from the archive.
+// Returns statistics organized by metric scope (node, socket, core, etc.).
 func LoadScopedStatsFromArchive(
 	job *schema.Job,
 	metrics []string,
@@ -174,6 +313,8 @@ func LoadScopedStatsFromArchive(
 	return data, nil
 }

+// GetStatistics returns all metric statistics for a job.
+// Returns a map of metric names to their job-level statistics.
 func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
 	metaFile, err := ar.LoadJobMeta(job)
 	if err != nil {
@@ -184,8 +325,10 @@ func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
 	return metaFile.Statistics, nil
 }

-// UpdateMetadata checks if the job is archived, find its `meta.json` file and override the Metadata
-// in that JSON file. If the job is not archived, nothing is done.
+// UpdateMetadata updates the metadata map for an archived job.
+// If the job is still running or archiving is disabled, this is a no-op.
+//
+// This function is safe for concurrent use (protected by mutex).
 func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
 	mutex.Lock()
 	defer mutex.Unlock()
@@ -205,8 +348,10 @@ func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
 	return ar.StoreJobMeta(jobMeta)
 }

-// UpdateTags checks if the job is archived, find its `meta.json` file and override the tags list
-// in that JSON file. If the job is not archived, nothing is done.
+// UpdateTags updates the tag list for an archived job.
+// If the job is still running or archiving is disabled, this is a no-op.
+//
+// This function is safe for concurrent use (protected by mutex).
 func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
 	mutex.Lock()
 	defer mutex.Unlock()
--- a/pkg/archive/fsBackend.go
+++ b/pkg/archive/fsBackend.go
@@ -28,20 +28,26 @@ import (
 	"github.com/santhosh-tekuri/jsonschema/v5"
 )

+// FsArchiveConfig holds the configuration for the filesystem archive backend.
 type FsArchiveConfig struct {
-	Path string `json:"path"`
+	Path string `json:"path"` // Root directory path for the archive
 }

+// FsArchive implements ArchiveBackend using a hierarchical filesystem structure.
+// Jobs are stored in directories organized by cluster, job ID, and start time.
+//
+// Directory structure: <path>/<cluster>/<jobid/1000>/<jobid%1000>/<starttime>/
 type FsArchive struct {
-	path     string
-	clusters []string
+	path     string   // Root path of the archive
+	clusters []string // List of discovered cluster names
 }

+// clusterInfo holds statistics about jobs in a cluster.
 type clusterInfo struct {
-	numJobs   int
-	dateFirst int64
-	dateLast  int64
-	diskSize  float64
+	numJobs   int     // Total number of jobs
+	dateFirst int64   // Unix timestamp of oldest job
+	dateLast  int64   // Unix timestamp of newest job
+	diskSize  float64 // Total disk usage in MB
 }

 func getDirectory(
--- a/pkg/archive/s3Backend.go
+++ b/pkg/archive/s3Backend.go
@@ -5,10 +5,843 @@

 package archive

+import (
+	"bytes"
+	"compress/gzip"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"strconv"
+	"strings"
+	"text/tabwriter"
+	"time"
+
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/ClusterCockpit/cc-lib/util"
+	"github.com/aws/aws-sdk-go-v2/aws"
+	awsconfig "github.com/aws/aws-sdk-go-v2/config"
+	"github.com/aws/aws-sdk-go-v2/credentials"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+)
+
+// S3ArchiveConfig holds the configuration for the S3 archive backend.
 type S3ArchiveConfig struct {
-	Path string `json:"filePath"`
+	Endpoint     string `json:"endpoint"`     // S3 endpoint URL (optional, for MinIO/localstack)
+	AccessKey    string `json:"accessKey"`    // AWS access key ID
+	SecretKey    string `json:"secretKey"`    // AWS secret access key
+	Bucket       string `json:"bucket"`       // S3 bucket name
+	Region       string `json:"region"`       // AWS region
+	UsePathStyle bool   `json:"usePathStyle"` // Use path-style URLs (required for MinIO)
 }

+// S3Archive implements ArchiveBackend using AWS S3 or S3-compatible object storage.
+// Jobs are stored as objects with keys mirroring the filesystem structure.
+//
+// Object key structure: <cluster>/<jobid/1000>/<jobid%1000>/<starttime>/meta.json
 type S3Archive struct {
-	path string
+	client   *s3.Client // AWS S3 client
+	bucket   string     // S3 bucket name
+	clusters []string   // List of discovered cluster names
+}
+
+// getS3Key generates the S3 object key for a job file
+func getS3Key(job *schema.Job, file string) string {
+	lvl1 := fmt.Sprintf("%d", job.JobID/1000)
+	lvl2 := fmt.Sprintf("%03d", job.JobID%1000)
+	startTime := strconv.FormatInt(job.StartTime, 10)
+	return fmt.Sprintf("%s/%s/%s/%s/%s", job.Cluster, lvl1, lvl2, startTime, file)
+}
+
+// getS3Directory generates the S3 key prefix for a job directory
+func getS3Directory(job *schema.Job) string {
+	lvl1 := fmt.Sprintf("%d", job.JobID/1000)
+	lvl2 := fmt.Sprintf("%03d", job.JobID%1000)
+	startTime := strconv.FormatInt(job.StartTime, 10)
+	return fmt.Sprintf("%s/%s/%s/%s/", job.Cluster, lvl1, lvl2, startTime)
+}
+
+func (s3a *S3Archive) Init(rawConfig json.RawMessage) (uint64, error) {
+	var cfg S3ArchiveConfig
+	if err := json.Unmarshal(rawConfig, &cfg); err != nil {
+		cclog.Warnf("S3Archive Init() > Unmarshal error: %#v", err)
+		return 0, err
+	}
+
+	if cfg.Bucket == "" {
+		err := fmt.Errorf("S3Archive Init(): empty bucket name")
+		cclog.Errorf("S3Archive Init() > config error: %v", err)
+		return 0, err
+	}
+
+	if cfg.Region == "" {
+		cfg.Region = "us-east-1" // Default region
+	}
+
+	ctx := context.Background()
+
+	// Create custom AWS config
+	var awsCfg aws.Config
+	var err error
+
+	if cfg.AccessKey != "" && cfg.SecretKey != "" {
+		// Use static credentials
+		customResolver := aws.EndpointResolverWithOptionsFunc(func(service, region string, options ...interface{}) (aws.Endpoint, error) {
+			if cfg.Endpoint != "" {
+				return aws.Endpoint{
+					URL:               cfg.Endpoint,
+					HostnameImmutable: cfg.UsePathStyle,
+					Source:            aws.EndpointSourceCustom,
+				}, nil
+			}
+			return aws.Endpoint{}, &aws.EndpointNotFoundError{}
+		})
+
+		awsCfg, err = awsconfig.LoadDefaultConfig(ctx,
+			awsconfig.WithRegion(cfg.Region),
+			awsconfig.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(
+				cfg.AccessKey,
+				cfg.SecretKey,
+				"",
+			)),
+			awsconfig.WithEndpointResolverWithOptions(customResolver),
+		)
+	} else {
+		// Use default credential chain
+		awsCfg, err = awsconfig.LoadDefaultConfig(ctx,
+			awsconfig.WithRegion(cfg.Region),
+		)
+	}
+
+	if err != nil {
+		cclog.Errorf("S3Archive Init() > failed to load AWS config: %v", err)
+		return 0, err
+	}
+
+	// Create S3 client with path-style option
+	s3a.client = s3.NewFromConfig(awsCfg, func(o *s3.Options) {
+		o.UsePathStyle = cfg.UsePathStyle
+	})
+	s3a.bucket = cfg.Bucket
+
+	// Check if bucket exists and is accessible
+	_, err = s3a.client.HeadBucket(ctx, &s3.HeadBucketInput{
+		Bucket: aws.String(s3a.bucket),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive Init() > bucket access error: %v", err)
+		return 0, fmt.Errorf("cannot access S3 bucket '%s': %w", s3a.bucket, err)
+	}
+
+	// Read version.txt from S3
+	versionKey := "version.txt"
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(versionKey),
+	})
+	if err != nil {
+		cclog.Warnf("S3Archive Init() > cannot read version.txt: %v", err)
+		return 0, err
+	}
+	defer result.Body.Close()
+
+	versionBytes, err := io.ReadAll(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive Init() > failed to read version.txt: %v", err)
+		return 0, err
+	}
+
+	version, err := strconv.ParseUint(strings.TrimSuffix(string(versionBytes), "\n"), 10, 64)
+	if err != nil {
+		cclog.Errorf("S3Archive Init() > version parse error: %v", err)
+		return 0, err
+	}
+
+	if version != Version {
+		return version, fmt.Errorf("unsupported version %d, need %d", version, Version)
+	}
+
+	// Discover clusters by listing top-level prefixes
+	s3a.clusters = []string{}
+	paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+		Bucket:    aws.String(s3a.bucket),
+		Delimiter: aws.String("/"),
+	})
+
+	for paginator.HasMorePages() {
+		page, err := paginator.NextPage(ctx)
+		if err != nil {
+			cclog.Errorf("S3Archive Init() > failed to list clusters: %v", err)
+			return 0, err
+		}
+
+		for _, prefix := range page.CommonPrefixes {
+			if prefix.Prefix != nil {
+				clusterName := strings.TrimSuffix(*prefix.Prefix, "/")
+				// Filter out non-cluster entries
+				if clusterName != "" && clusterName != "version.txt" {
+					s3a.clusters = append(s3a.clusters, clusterName)
+				}
+			}
+		}
+	}
+
+	cclog.Infof("S3Archive initialized with bucket '%s', found %d clusters", s3a.bucket, len(s3a.clusters))
+	return version, nil
+}
+
+func (s3a *S3Archive) Info() {
+	ctx := context.Background()
+	fmt.Printf("S3 Job archive bucket: %s\n", s3a.bucket)
+
+	ci := make(map[string]*clusterInfo)
+
+	for _, cluster := range s3a.clusters {
+		ci[cluster] = &clusterInfo{dateFirst: time.Now().Unix()}
+		
+		// List all jobs for this cluster
+		prefix := cluster + "/"
+		paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+			Bucket: aws.String(s3a.bucket),
+			Prefix: aws.String(prefix),
+		})
+
+		for paginator.HasMorePages() {
+			page, err := paginator.NextPage(ctx)
+			if err != nil {
+				cclog.Fatalf("S3Archive Info() > failed to list objects: %s", err.Error())
+			}
+
+			for _, obj := range page.Contents {
+				if obj.Key != nil && strings.HasSuffix(*obj.Key, "/meta.json") {
+					ci[cluster].numJobs++
+					// Extract starttime from key: cluster/lvl1/lvl2/starttime/meta.json
+					parts := strings.Split(*obj.Key, "/")
+					if len(parts) >= 4 {
+						startTime, err := strconv.ParseInt(parts[3], 10, 64)
+						if err == nil {
+							ci[cluster].dateFirst = util.Min(ci[cluster].dateFirst, startTime)
+							ci[cluster].dateLast = util.Max(ci[cluster].dateLast, startTime)
+						}
+					}
+					if obj.Size != nil {
+						ci[cluster].diskSize += float64(*obj.Size) / (1024 * 1024) // Convert to MB
+					}
+				}
+			}
+		}
+	}
+
+	cit := clusterInfo{dateFirst: time.Now().Unix()}
+	w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', tabwriter.Debug)
+	fmt.Fprintln(w, "cluster\t#jobs\tfrom\tto\tsize (MB)")
+	for cluster, clusterInfo := range ci {
+		fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%.2f\n", cluster,
+			clusterInfo.numJobs,
+			time.Unix(clusterInfo.dateFirst, 0),
+			time.Unix(clusterInfo.dateLast, 0),
+			clusterInfo.diskSize)
+
+		cit.numJobs += clusterInfo.numJobs
+		cit.dateFirst = util.Min(cit.dateFirst, clusterInfo.dateFirst)
+		cit.dateLast = util.Max(cit.dateLast, clusterInfo.dateLast)
+		cit.diskSize += clusterInfo.diskSize
+	}
+
+	fmt.Fprintf(w, "TOTAL\t%d\t%s\t%s\t%.2f\n",
+		cit.numJobs, time.Unix(cit.dateFirst, 0), time.Unix(cit.dateLast, 0), cit.diskSize)
+	w.Flush()
+}
+
+func (s3a *S3Archive) Exists(job *schema.Job) bool {
+	ctx := context.Background()
+	key := getS3Key(job, "meta.json")
+	
+	_, err := s3a.client.HeadObject(ctx, &s3.HeadObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(key),
+	})
+	
+	return err == nil
+}
+
+func (s3a *S3Archive) LoadJobMeta(job *schema.Job) (*schema.Job, error) {
+	ctx := context.Background()
+	key := getS3Key(job, "meta.json")
+	
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(key),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive LoadJobMeta() > GetObject error: %v", err)
+		return nil, err
+	}
+	defer result.Body.Close()
+
+	b, err := io.ReadAll(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive LoadJobMeta() > read error: %v", err)
+		return nil, err
+	}
+
+	if config.Keys.Validate {
+		if err := schema.Validate(schema.Meta, bytes.NewReader(b)); err != nil {
+			return nil, fmt.Errorf("validate job meta: %v", err)
+		}
+	}
+
+	return DecodeJobMeta(bytes.NewReader(b))
+}
+
+func (s3a *S3Archive) LoadJobData(job *schema.Job) (schema.JobData, error) {
+	ctx := context.Background()
+	
+	// Try compressed file first
+	keyGz := getS3Key(job, "data.json.gz")
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(keyGz),
+	})
+	
+	if err != nil {
+		// Try uncompressed file
+		key := getS3Key(job, "data.json")
+		result, err = s3a.client.GetObject(ctx, &s3.GetObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(key),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive LoadJobData() > GetObject error: %v", err)
+			return nil, err
+		}
+		defer result.Body.Close()
+
+		if config.Keys.Validate {
+			b, _ := io.ReadAll(result.Body)
+			if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
+				return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
+			}
+			return DecodeJobData(bytes.NewReader(b), key)
+		}
+		return DecodeJobData(result.Body, key)
+	}
+	defer result.Body.Close()
+
+	// Decompress
+	r, err := gzip.NewReader(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive LoadJobData() > gzip error: %v", err)
+		return nil, err
+	}
+	defer r.Close()
+
+	if config.Keys.Validate {
+		b, _ := io.ReadAll(r)
+		if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
+			return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
+		}
+		return DecodeJobData(bytes.NewReader(b), keyGz)
+	}
+	return DecodeJobData(r, keyGz)
+}
+
+func (s3a *S3Archive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) {
+	ctx := context.Background()
+	
+	// Try compressed file first
+	keyGz := getS3Key(job, "data.json.gz")
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(keyGz),
+	})
+	
+	if err != nil {
+		// Try uncompressed file
+		key := getS3Key(job, "data.json")
+		result, err = s3a.client.GetObject(ctx, &s3.GetObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(key),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive LoadJobStats() > GetObject error: %v", err)
+			return nil, err
+		}
+		defer result.Body.Close()
+
+		if config.Keys.Validate {
+			b, _ := io.ReadAll(result.Body)
+			if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
+				return nil, fmt.Errorf("validate job data: %v", err)
+			}
+			return DecodeJobStats(bytes.NewReader(b), key)
+		}
+		return DecodeJobStats(result.Body, key)
+	}
+	defer result.Body.Close()
+
+	// Decompress
+	r, err := gzip.NewReader(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive LoadJobStats() > gzip error: %v", err)
+		return nil, err
+	}
+	defer r.Close()
+
+	if config.Keys.Validate {
+		b, _ := io.ReadAll(r)
+		if err := schema.Validate(schema.Data, bytes.NewReader(b)); err != nil {
+			return nil, fmt.Errorf("validate job data: %v", err)
+		}
+		return DecodeJobStats(bytes.NewReader(b), keyGz)
+	}
+	return DecodeJobStats(r, keyGz)
+}
+
+func (s3a *S3Archive) LoadClusterCfg(name string) (*schema.Cluster, error) {
+	ctx := context.Background()
+	key := fmt.Sprintf("%s/cluster.json", name)
+	
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(key),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive LoadClusterCfg() > GetObject error: %v", err)
+		return nil, err
+	}
+	defer result.Body.Close()
+
+	b, err := io.ReadAll(result.Body)
+	if err != nil {
+		cclog.Errorf("S3Archive LoadClusterCfg() > read error: %v", err)
+		return nil, err
+	}
+
+	if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil {
+		cclog.Warnf("Validate cluster config: %v\n", err)
+		return &schema.Cluster{}, fmt.Errorf("validate cluster config: %v", err)
+	}
+
+	return DecodeCluster(bytes.NewReader(b))
+}
+
+func (s3a *S3Archive) StoreJobMeta(job *schema.Job) error {
+	ctx := context.Background()
+	key := getS3Key(job, "meta.json")
+	
+	var buf bytes.Buffer
+	if err := EncodeJobMeta(&buf, job); err != nil {
+		cclog.Error("S3Archive StoreJobMeta() > encoding error")
+		return err
+	}
+
+	_, err := s3a.client.PutObject(ctx, &s3.PutObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(key),
+		Body:   bytes.NewReader(buf.Bytes()),
+	})
+	
+	if err != nil {
+		cclog.Errorf("S3Archive StoreJobMeta() > PutObject error: %v", err)
+		return err
+	}
+
+	return nil
+}
+
+func (s3a *S3Archive) ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error {
+	ctx := context.Background()
+
+	// Upload meta.json
+	metaKey := getS3Key(jobMeta, "meta.json")
+	var metaBuf bytes.Buffer
+	if err := EncodeJobMeta(&metaBuf, jobMeta); err != nil {
+		cclog.Error("S3Archive ImportJob() > encoding meta error")
+		return err
+	}
+
+	_, err := s3a.client.PutObject(ctx, &s3.PutObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(metaKey),
+		Body:   bytes.NewReader(metaBuf.Bytes()),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive ImportJob() > PutObject meta error: %v", err)
+		return err
+	}
+
+	// Upload data.json
+	dataKey := getS3Key(jobMeta, "data.json")
+	var dataBuf bytes.Buffer
+	if err := EncodeJobData(&dataBuf, jobData); err != nil {
+		cclog.Error("S3Archive ImportJob() > encoding data error")
+		return err
+	}
+
+	_, err = s3a.client.PutObject(ctx, &s3.PutObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(dataKey),
+		Body:   bytes.NewReader(dataBuf.Bytes()),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive ImportJob() > PutObject data error: %v", err)
+		return err
+	}
+
+	return nil
+}
+
+func (s3a *S3Archive) GetClusters() []string {
+	return s3a.clusters
+}
+
+func (s3a *S3Archive) CleanUp(jobs []*schema.Job) {
+	ctx := context.Background()
+	start := time.Now()
+	
+	for _, job := range jobs {
+		if job == nil {
+			cclog.Errorf("S3Archive CleanUp() error: job is nil")
+			continue
+		}
+		
+		// Delete all files in the job directory
+		prefix := getS3Directory(job)
+		
+		paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+			Bucket: aws.String(s3a.bucket),
+			Prefix: aws.String(prefix),
+		})
+
+		for paginator.HasMorePages() {
+			page, err := paginator.NextPage(ctx)
+			if err != nil {
+				cclog.Errorf("S3Archive CleanUp() > list error: %v", err)
+				continue
+			}
+
+			for _, obj := range page.Contents {
+				if obj.Key != nil {
+					_, err := s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
+						Bucket: aws.String(s3a.bucket),
+						Key:    obj.Key,
+					})
+					if err != nil {
+						cclog.Errorf("S3Archive CleanUp() > delete error: %v", err)
+					}
+				}
+			}
+		}
+	}
+
+	cclog.Infof("Retention Service - Remove %d jobs from S3 in %s", len(jobs), time.Since(start))
+}
+
+func (s3a *S3Archive) Move(jobs []*schema.Job, targetPath string) {
+	ctx := context.Background()
+	
+	for _, job := range jobs {
+		sourcePrefix := getS3Directory(job)
+		
+		// List all objects in source
+		paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+			Bucket: aws.String(s3a.bucket),
+			Prefix: aws.String(sourcePrefix),
+		})
+
+		for paginator.HasMorePages() {
+			page, err := paginator.NextPage(ctx)
+			if err != nil {
+				cclog.Errorf("S3Archive Move() > list error: %v", err)
+				continue
+			}
+
+			for _, obj := range page.Contents {
+				if obj.Key == nil {
+					continue
+				}
+				
+				// Compute target key by replacing prefix
+				targetKey := strings.Replace(*obj.Key, sourcePrefix, targetPath+"/", 1)
+				
+				// Copy object
+				_, err := s3a.client.CopyObject(ctx, &s3.CopyObjectInput{
+					Bucket:     aws.String(s3a.bucket),
+					CopySource: aws.String(fmt.Sprintf("%s/%s", s3a.bucket, *obj.Key)),
+					Key:        aws.String(targetKey),
+				})
+				if err != nil {
+					cclog.Errorf("S3Archive Move() > copy error: %v", err)
+					continue
+				}
+
+				// Delete source object
+				_, err = s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
+					Bucket: aws.String(s3a.bucket),
+					Key:    obj.Key,
+				})
+				if err != nil {
+					cclog.Errorf("S3Archive Move() > delete error: %v", err)
+				}
+			}
+		}
+	}
+}
+
+func (s3a *S3Archive) Clean(before int64, after int64) {
+	ctx := context.Background()
+	
+	if after == 0 {
+		after = math.MaxInt64
+	}
+
+	for _, cluster := range s3a.clusters {
+		prefix := cluster + "/"
+		
+		paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+			Bucket: aws.String(s3a.bucket),
+			Prefix: aws.String(prefix),
+		})
+
+		for paginator.HasMorePages() {
+			page, err := paginator.NextPage(ctx)
+			if err != nil {
+				cclog.Fatalf("S3Archive Clean() > list error: %s", err.Error())
+			}
+
+			for _, obj := range page.Contents {
+				if obj.Key == nil || !strings.HasSuffix(*obj.Key, "/meta.json") {
+					continue
+				}
+
+				// Extract starttime from key: cluster/lvl1/lvl2/starttime/meta.json
+				parts := strings.Split(*obj.Key, "/")
+				if len(parts) < 4 {
+					continue
+				}
+
+				startTime, err := strconv.ParseInt(parts[3], 10, 64)
+				if err != nil {
+					cclog.Fatalf("S3Archive Clean() > cannot parse starttime: %s", err.Error())
+				}
+
+				if startTime < before || startTime > after {
+					// Delete entire job directory
+					jobPrefix := strings.Join(parts[:4], "/") + "/"
+					
+					jobPaginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+						Bucket: aws.String(s3a.bucket),
+						Prefix: aws.String(jobPrefix),
+					})
+
+					for jobPaginator.HasMorePages() {
+						jobPage, err := jobPaginator.NextPage(ctx)
+						if err != nil {
+							cclog.Errorf("S3Archive Clean() > list job error: %v", err)
+							continue
+						}
+
+						for _, jobObj := range jobPage.Contents {
+							if jobObj.Key != nil {
+								_, err := s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
+									Bucket: aws.String(s3a.bucket),
+									Key:    jobObj.Key,
+								})
+								if err != nil {
+									cclog.Errorf("S3Archive Clean() > delete error: %v", err)
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+func (s3a *S3Archive) Compress(jobs []*schema.Job) {
+	ctx := context.Background()
+	var cnt int
+	start := time.Now()
+
+	for _, job := range jobs {
+		dataKey := getS3Key(job, "data.json")
+		
+		// Check if uncompressed file exists and get its size
+		headResult, err := s3a.client.HeadObject(ctx, &s3.HeadObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(dataKey),
+		})
+		if err != nil {
+			continue // File doesn't exist or error
+		}
+
+		if headResult.ContentLength == nil || *headResult.ContentLength < 2000 {
+			continue // Too small to compress
+		}
+
+		// Download the file
+		result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(dataKey),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive Compress() > GetObject error: %v", err)
+			continue
+		}
+
+		data, err := io.ReadAll(result.Body)
+		result.Body.Close()
+		if err != nil {
+			cclog.Errorf("S3Archive Compress() > read error: %v", err)
+			continue
+		}
+
+		// Compress the data
+		var compressedBuf bytes.Buffer
+		gzipWriter := gzip.NewWriter(&compressedBuf)
+		if _, err := gzipWriter.Write(data); err != nil {
+			cclog.Errorf("S3Archive Compress() > gzip write error: %v", err)
+			gzipWriter.Close()
+			continue
+		}
+		gzipWriter.Close()
+
+		// Upload compressed file
+		compressedKey := getS3Key(job, "data.json.gz")
+		_, err = s3a.client.PutObject(ctx, &s3.PutObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(compressedKey),
+			Body:   bytes.NewReader(compressedBuf.Bytes()),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive Compress() > PutObject error: %v", err)
+			continue
+		}
+
+		// Delete uncompressed file
+		_, err = s3a.client.DeleteObject(ctx, &s3.DeleteObjectInput{
+			Bucket: aws.String(s3a.bucket),
+			Key:    aws.String(dataKey),
+		})
+		if err != nil {
+			cclog.Errorf("S3Archive Compress() > delete error: %v", err)
+		}
+
+		cnt++
+	}
+
+	cclog.Infof("Compression Service - %d files in S3 took %s", cnt, time.Since(start))
+}
+
+func (s3a *S3Archive) CompressLast(starttime int64) int64 {
+	ctx := context.Background()
+	compressKey := "compress.txt"
+	
+	// Try to read existing compress.txt
+	result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(compressKey),
+	})
+	
+	var last int64
+	if err == nil {
+		b, _ := io.ReadAll(result.Body)
+		result.Body.Close()
+		last, err = strconv.ParseInt(strings.TrimSuffix(string(b), "\n"), 10, 64)
+		if err != nil {
+			cclog.Errorf("S3Archive CompressLast() > parse error: %v", err)
+			last = starttime
+		}
+	} else {
+		last = starttime
+	}
+
+	cclog.Infof("S3Archive CompressLast() - start %d last %d", starttime, last)
+
+	// Write new timestamp
+	newValue := fmt.Sprintf("%d", starttime)
+	_, err = s3a.client.PutObject(ctx, &s3.PutObjectInput{
+		Bucket: aws.String(s3a.bucket),
+		Key:    aws.String(compressKey),
+		Body:   strings.NewReader(newValue),
+	})
+	if err != nil {
+		cclog.Errorf("S3Archive CompressLast() > PutObject error: %v", err)
+	}
+
+	return last
+}
+
+func (s3a *S3Archive) Iter(loadMetricData bool) <-chan JobContainer {
+	ch := make(chan JobContainer)
+	
+	go func() {
+		ctx := context.Background()
+		defer close(ch)
+
+		for _, cluster := range s3a.clusters {
+			prefix := cluster + "/"
+			
+			paginator := s3.NewListObjectsV2Paginator(s3a.client, &s3.ListObjectsV2Input{
+				Bucket: aws.String(s3a.bucket),
+				Prefix: aws.String(prefix),
+			})
+
+			for paginator.HasMorePages() {
+				page, err := paginator.NextPage(ctx)
+				if err != nil {
+					cclog.Fatalf("S3Archive Iter() > list error: %s", err.Error())
+				}
+
+				for _, obj := range page.Contents {
+					if obj.Key == nil || !strings.HasSuffix(*obj.Key, "/meta.json") {
+						continue
+					}
+
+					// Load job metadata
+					result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
+						Bucket: aws.String(s3a.bucket),
+						Key:    obj.Key,
+					})
+					if err != nil {
+						cclog.Errorf("S3Archive Iter() > GetObject meta error: %v", err)
+						continue
+					}
+
+					b, err := io.ReadAll(result.Body)
+					result.Body.Close()
+					if err != nil {
+						cclog.Errorf("S3Archive Iter() > read meta error: %v", err)
+						continue
+					}
+
+					job, err := DecodeJobMeta(bytes.NewReader(b))
+					if err != nil {
+						cclog.Errorf("S3Archive Iter() > decode meta error: %v", err)
+						continue
+					}
+
+					if loadMetricData {
+						jobData, err := s3a.LoadJobData(job)
+						if err != nil {
+							cclog.Errorf("S3Archive Iter() > load data error: %v", err)
+							ch <- JobContainer{Meta: job, Data: nil}
+						} else {
+							ch <- JobContainer{Meta: job, Data: &jobData}
+						}
+					} else {
+						ch <- JobContainer{Meta: job, Data: nil}
+					}
+				}
+			}
+		}
+	}()
+
+	return ch
 }
--- a/pkg/archive/s3Backend_test.go
+++ b/pkg/archive/s3Backend_test.go
@@ -0,0 +1,293 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package archive
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"strings"
+	"testing"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/aws/aws-sdk-go-v2/aws"
+	"github.com/aws/aws-sdk-go-v2/service/s3"
+	"github.com/aws/aws-sdk-go-v2/service/s3/types"
+)
+
+// MockS3Client is a mock implementation of the S3 client for testing
+type MockS3Client struct {
+	objects map[string][]byte
+}
+
+func NewMockS3Client() *MockS3Client {
+	return &MockS3Client{
+		objects: make(map[string][]byte),
+	}
+}
+
+func (m *MockS3Client) HeadBucket(ctx context.Context, params *s3.HeadBucketInput, optFns ...func(*s3.Options)) (*s3.HeadBucketOutput, error) {
+	// Always succeed for mock
+	return &s3.HeadBucketOutput{}, nil
+}
+
+func (m *MockS3Client) GetObject(ctx context.Context, params *s3.GetObjectInput, optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) {
+	key := aws.ToString(params.Key)
+	data, exists := m.objects[key]
+	if !exists {
+		return nil, fmt.Errorf("NoSuchKey: object not found")
+	}
+	
+	contentLength := int64(len(data))
+	return &s3.GetObjectOutput{
+		Body:          io.NopCloser(bytes.NewReader(data)),
+		ContentLength: &contentLength,
+	}, nil
+}
+
+func (m *MockS3Client) PutObject(ctx context.Context, params *s3.PutObjectInput, optFns ...func(*s3.Options)) (*s3.PutObjectOutput, error) {
+	key := aws.ToString(params.Key)
+	data, err := io.ReadAll(params.Body)
+	if err != nil {
+		return nil, err
+	}
+	m.objects[key] = data
+	return &s3.PutObjectOutput{}, nil
+}
+
+func (m *MockS3Client) HeadObject(ctx context.Context, params *s3.HeadObjectInput, optFns ...func(*s3.Options)) (*s3.HeadObjectOutput, error) {
+	key := aws.ToString(params.Key)
+	data, exists := m.objects[key]
+	if !exists {
+		return nil, fmt.Errorf("NotFound")
+	}
+	
+	contentLength := int64(len(data))
+	return &s3.HeadObjectOutput{
+		ContentLength: &contentLength,
+	}, nil
+}
+
+func (m *MockS3Client) DeleteObject(ctx context.Context, params *s3.DeleteObjectInput, optFns ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) {
+	key := aws.ToString(params.Key)
+	delete(m.objects, key)
+	return &s3.DeleteObjectOutput{}, nil
+}
+
+func (m *MockS3Client) CopyObject(ctx context.Context, params *s3.CopyObjectInput, optFns ...func(*s3.Options)) (*s3.CopyObjectOutput, error) {
+	// Parse source bucket/key from CopySource
+	source := aws.ToString(params.CopySource)
+	parts := strings.SplitN(source, "/", 2)
+	if len(parts) < 2 {
+		return nil, fmt.Errorf("invalid CopySource")
+	}
+	sourceKey := parts[1]
+	
+	data, exists := m.objects[sourceKey]
+	if !exists {
+		return nil, fmt.Errorf("source not found")
+	}
+	
+	destKey := aws.ToString(params.Key)
+	m.objects[destKey] = data
+	return &s3.CopyObjectOutput{}, nil
+}
+
+func (m *MockS3Client) ListObjectsV2(ctx context.Context, params *s3.ListObjectsV2Input, optFns ...func(*s3.Options)) (*s3.ListObjectsV2Output, error) {
+	prefix := aws.ToString(params.Prefix)
+	delimiter := aws.ToString(params.Delimiter)
+	
+	var contents []types.Object
+	commonPrefixes := make(map[string]bool)
+	
+	for key, data := range m.objects {
+		if !strings.HasPrefix(key, prefix) {
+			continue
+		}
+		
+		if delimiter != "" {
+			// Check if there's a delimiter after the prefix
+			remainder := strings.TrimPrefix(key, prefix)
+			delimIdx := strings.Index(remainder, delimiter)
+			if delimIdx >= 0 {
+				// This is a "directory" - add to common prefixes
+				commonPrefix := prefix + remainder[:delimIdx+1]
+				commonPrefixes[commonPrefix] = true
+				continue
+			}
+		}
+		
+		size := int64(len(data))
+		contents = append(contents, types.Object{
+			Key:  aws.String(key),
+			Size: &size,
+		})
+	}
+	
+	var prefixList []types.CommonPrefix
+	for p := range commonPrefixes {
+		prefixList = append(prefixList, types.CommonPrefix{
+			Prefix: aws.String(p),
+		})
+	}
+	
+	return &s3.ListObjectsV2Output{
+		Contents:       contents,
+		CommonPrefixes: prefixList,
+	}, nil
+}
+
+// Test helper to create a mock S3 archive with test data
+func setupMockS3Archive(t *testing.T) *MockS3Client {
+	mock := NewMockS3Client()
+	
+	// Add version.txt
+	mock.objects["version.txt"] = []byte("2\n")
+	
+	// Add a test cluster directory
+	mock.objects["emmy/cluster.json"] = []byte(`{
+		"name": "emmy",
+		"metricConfig": [],
+		"subClusters": [
+			{
+				"name": "main",
+				"processorType": "Intel Xeon",
+				"socketsPerNode": 2,
+				"coresPerSocket": 4,
+				"threadsPerCore": 2,
+				"flopRateScalar": 16,
+				"flopRateSimd": 32,
+				"memoryBandwidth": 100
+			}
+		]
+	}`)
+	
+	// Add a test job
+	mock.objects["emmy/1403/244/1608923076/meta.json"] = []byte(`{
+		"jobId": 1403244,
+		"cluster": "emmy",
+		"startTime": 1608923076,
+		"numNodes": 1,
+		"resources": [{"hostname": "node001"}]
+	}`)
+	
+	mock.objects["emmy/1403/244/1608923076/data.json"] = []byte(`{
+		"mem_used": {
+			"node": {
+				"node001": {
+					"series": [{"time": 1608923076, "value": 1000}]
+				}
+			}
+		}
+	}`)
+	
+	return mock
+}
+
+func TestS3InitEmptyBucket(t *testing.T) {
+	var s3a S3Archive
+	_, err := s3a.Init(json.RawMessage(`{"kind":"s3"}`))
+	if err == nil {
+		t.Fatal("expected error for empty bucket")
+	}
+}
+
+func TestS3InitInvalidConfig(t *testing.T) {
+	var s3a S3Archive
+	_, err := s3a.Init(json.RawMessage(`"bucket":"test-bucket"`))
+	if err == nil {
+		t.Fatal("expected error for invalid config")
+	}
+}
+
+// Note: TestS3Init would require actual S3 connection or more complex mocking
+// For now, we document that Init() should be tested manually with MinIO
+
+func TestGetS3Key(t *testing.T) {
+	job := &schema.Job{
+		JobID:     1403244,
+		Cluster:   "emmy",
+		StartTime: 1608923076,
+	}
+	
+	key := getS3Key(job, "meta.json")
+	expected := "emmy/1403/244/1608923076/meta.json"
+	if key != expected {
+		t.Errorf("expected key %s, got %s", expected, key)
+	}
+}
+
+func TestGetS3Directory(t *testing.T) {
+	job := &schema.Job{
+		JobID:     1403244,
+		Cluster:   "emmy",
+		StartTime: 1608923076,
+	}
+	
+	dir := getS3Directory(job)
+	expected := "emmy/1403/244/1608923076/"
+	if dir != expected {
+		t.Errorf("expected dir %s, got %s", expected, dir)
+	}
+}
+
+// Integration-style tests would go here for actual S3 operations
+// These would require MinIO or localstack for testing
+
+func TestS3ArchiveConfigParsing(t *testing.T) {
+	rawConfig := json.RawMessage(`{
+		"endpoint": "http://localhost:9000",
+		"accessKey": "minioadmin",
+		"secretKey": "minioadmin",
+		"bucket": "test-bucket",
+		"region": "us-east-1",
+		"usePathStyle": true
+	}`)
+	
+	var cfg S3ArchiveConfig
+	err := json.Unmarshal(rawConfig, &cfg)
+	if err != nil {
+		t.Fatalf("failed to parse config: %v", err)
+	}
+	
+	if cfg.Bucket != "test-bucket" {
+		t.Errorf("expected bucket 'test-bucket', got '%s'", cfg.Bucket)
+	}
+	if cfg.Region != "us-east-1" {
+		t.Errorf("expected region 'us-east-1', got '%s'", cfg.Region)
+	}
+	if !cfg.UsePathStyle {
+		t.Error("expected usePathStyle to be true")
+	}
+}
+
+func TestS3KeyGeneration(t *testing.T) {
+	tests := []struct {
+		jobID     int64
+		cluster   string
+		startTime int64
+		file      string
+		expected  string
+	}{
+		{1403244, "emmy", 1608923076, "meta.json", "emmy/1403/244/1608923076/meta.json"},
+		{1404397, "emmy", 1609300556, "data.json.gz", "emmy/1404/397/1609300556/data.json.gz"},
+		{42, "fritz", 1234567890, "meta.json", "fritz/0/042/1234567890/meta.json"},
+	}
+	
+	for _, tt := range tests {
+		job := &schema.Job{
+			JobID:     tt.jobID,
+			Cluster:   tt.cluster,
+			StartTime: tt.startTime,
+		}
+		
+		key := getS3Key(job, tt.file)
+		if key != tt.expected {
+			t.Errorf("for job %d: expected %s, got %s", tt.jobID, tt.expected, key)
+		}
+	}
+}
--- a/pkg/archive/sqliteBackend.go
+++ b/pkg/archive/sqliteBackend.go
@@ -0,0 +1,584 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package archive
+
+import (
+	"bytes"
+	"compress/gzip"
+	"database/sql"
+	"encoding/json"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"strconv"
+	"text/tabwriter"
+	"time"
+
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/ClusterCockpit/cc-lib/util"
+	_ "github.com/mattn/go-sqlite3"
+)
+
+// SqliteArchiveConfig holds the configuration for the SQLite archive backend.
+type SqliteArchiveConfig struct {
+	DBPath string `json:"dbPath"` // Path to SQLite database file
+}
+
+// SqliteArchive implements ArchiveBackend using a SQLite database with BLOB storage.
+// Job metadata and data are stored as JSON BLOBs with indexes for fast queries.
+//
+// Uses WAL (Write-Ahead Logging) mode for better concurrency and a 64MB cache.
+type SqliteArchive struct {
+	db       *sql.DB  // SQLite database connection
+	clusters []string // List of discovered cluster names
+}
+
+// sqliteSchema defines the database schema for SQLite archive backend.
+// Jobs table: stores job metadata and data as BLOBs with compression flag
+// Clusters table: stores cluster configurations
+// Metadata table: stores version and other key-value pairs
+const sqliteSchema = `
+CREATE TABLE IF NOT EXISTS jobs (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    job_id INTEGER NOT NULL,
+    cluster TEXT NOT NULL,
+    start_time INTEGER NOT NULL,
+    meta_json BLOB NOT NULL,
+    data_json BLOB,
+    data_compressed BOOLEAN DEFAULT 0,
+    created_at INTEGER NOT NULL,
+    updated_at INTEGER NOT NULL,
+    UNIQUE(job_id, cluster, start_time)
+);
+
+CREATE INDEX IF NOT EXISTS idx_jobs_cluster ON jobs(cluster);
+CREATE INDEX IF NOT EXISTS idx_jobs_start_time ON jobs(start_time);
+CREATE INDEX IF NOT EXISTS idx_jobs_lookup ON jobs(cluster, job_id, start_time);
+
+CREATE TABLE IF NOT EXISTS clusters (
+    name TEXT PRIMARY KEY,
+    config_json BLOB NOT NULL,
+    updated_at INTEGER NOT NULL
+);
+
+CREATE TABLE IF NOT EXISTS metadata (
+    key TEXT PRIMARY KEY,
+    value TEXT NOT NULL
+);
+`
+
+func (sa *SqliteArchive) Init(rawConfig json.RawMessage) (uint64, error) {
+	var cfg SqliteArchiveConfig
+	if err := json.Unmarshal(rawConfig, &cfg); err != nil {
+		cclog.Warnf("SqliteArchive Init() > Unmarshal error: %#v", err)
+		return 0, err
+	}
+
+	if cfg.DBPath == "" {
+		err := fmt.Errorf("SqliteArchive Init(): empty database path")
+		cclog.Errorf("SqliteArchive Init() > config error: %v", err)
+		return 0, err
+	}
+
+	// Open SQLite database
+	db, err := sql.Open("sqlite3", cfg.DBPath)
+	if err != nil {
+		cclog.Errorf("SqliteArchive Init() > failed to open database: %v", err)
+		return 0, err
+	}
+	sa.db = db
+
+	// Set pragmas for better performance
+	pragmas := []string{
+		"PRAGMA journal_mode=WAL",
+		"PRAGMA synchronous=NORMAL",
+		"PRAGMA cache_size=-64000", // 64MB cache
+		"PRAGMA busy_timeout=5000",
+	}
+	for _, pragma := range pragmas {
+		if _, err := sa.db.Exec(pragma); err != nil {
+			cclog.Warnf("SqliteArchive Init() > pragma failed: %v", err)
+		}
+	}
+
+	// Create schema
+	if _, err := sa.db.Exec(sqliteSchema); err != nil {
+		cclog.Errorf("SqliteArchive Init() > schema creation failed: %v", err)
+		return 0, err
+	}
+
+	// Check/set version
+	var versionStr string
+	err = sa.db.QueryRow("SELECT value FROM metadata WHERE key = 'version'").Scan(&versionStr)
+	if err == sql.ErrNoRows {
+		// First time initialization, set version
+		_, err = sa.db.Exec("INSERT INTO metadata (key, value) VALUES ('version', ?)", fmt.Sprintf("%d", Version))
+		if err != nil {
+			cclog.Errorf("SqliteArchive Init() > failed to set version: %v", err)
+			return 0, err
+		}
+		versionStr = fmt.Sprintf("%d", Version)
+	} else if err != nil {
+		cclog.Errorf("SqliteArchive Init() > failed to read version: %v", err)
+		return 0, err
+	}
+
+	version, err := strconv.ParseUint(versionStr, 10, 64)
+	if err != nil {
+		cclog.Errorf("SqliteArchive Init() > version parse error: %v", err)
+		return 0, err
+	}
+
+	if version != Version {
+		return version, fmt.Errorf("unsupported version %d, need %d", version, Version)
+	}
+
+	// Discover clusters
+	sa.clusters = []string{}
+	rows, err := sa.db.Query("SELECT DISTINCT cluster FROM jobs ORDER BY cluster")
+	if err != nil {
+		cclog.Errorf("SqliteArchive Init() > failed to query clusters: %v", err)
+		return 0, err
+	}
+	defer rows.Close()
+
+	for rows.Next() {
+		var cluster string
+		if err := rows.Scan(&cluster); err != nil {
+			cclog.Errorf("SqliteArchive Init() > failed to scan cluster: %v", err)
+			continue
+		}
+		sa.clusters = append(sa.clusters, cluster)
+	}
+
+	cclog.Infof("SqliteArchive initialized with database '%s', found %d clusters", cfg.DBPath, len(sa.clusters))
+	return version, nil
+}
+
+func (sa *SqliteArchive) Info() {
+	fmt.Printf("SQLite Job archive database\n")
+
+	ci := make(map[string]*clusterInfo)
+
+	rows, err := sa.db.Query(`
+		SELECT cluster, COUNT(*), MIN(start_time), MAX(start_time),
+		       SUM(LENGTH(meta_json) + COALESCE(LENGTH(data_json), 0))
+		FROM jobs
+		GROUP BY cluster
+	`)
+	if err != nil {
+		cclog.Fatalf("SqliteArchive Info() > query failed: %s", err.Error())
+	}
+	defer rows.Close()
+
+	for rows.Next() {
+		var cluster string
+		var numJobs int
+		var dateFirst, dateLast int64
+		var diskSize int64
+
+		if err := rows.Scan(&cluster, &numJobs, &dateFirst, &dateLast, &diskSize); err != nil {
+			cclog.Errorf("SqliteArchive Info() > scan failed: %v", err)
+			continue
+		}
+
+		ci[cluster] = &clusterInfo{
+			numJobs:   numJobs,
+			dateFirst: dateFirst,
+			dateLast:  dateLast,
+			diskSize:  float64(diskSize) / (1024 * 1024), // Convert to MB
+		}
+	}
+
+	cit := clusterInfo{dateFirst: time.Now().Unix()}
+	w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', tabwriter.Debug)
+	fmt.Fprintln(w, "cluster\t#jobs\tfrom\tto\tsize (MB)")
+	for cluster, clusterInfo := range ci {
+		fmt.Fprintf(w, "%s\t%d\t%s\t%s\t%.2f\n", cluster,
+			clusterInfo.numJobs,
+			time.Unix(clusterInfo.dateFirst, 0),
+			time.Unix(clusterInfo.dateLast, 0),
+			clusterInfo.diskSize)
+
+		cit.numJobs += clusterInfo.numJobs
+		cit.dateFirst = util.Min(cit.dateFirst, clusterInfo.dateFirst)
+		cit.dateLast = util.Max(cit.dateLast, clusterInfo.dateLast)
+		cit.diskSize += clusterInfo.diskSize
+	}
+
+	fmt.Fprintf(w, "TOTAL\t%d\t%s\t%s\t%.2f\n",
+		cit.numJobs, time.Unix(cit.dateFirst, 0), time.Unix(cit.dateLast, 0), cit.diskSize)
+	w.Flush()
+}
+
+func (sa *SqliteArchive) Exists(job *schema.Job) bool {
+	var count int
+	err := sa.db.QueryRow("SELECT COUNT(*) FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+		job.JobID, job.Cluster, job.StartTime).Scan(&count)
+	return err == nil && count > 0
+}
+
+func (sa *SqliteArchive) LoadJobMeta(job *schema.Job) (*schema.Job, error) {
+	var metaBlob []byte
+	err := sa.db.QueryRow("SELECT meta_json FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+		job.JobID, job.Cluster, job.StartTime).Scan(&metaBlob)
+	if err != nil {
+		cclog.Errorf("SqliteArchive LoadJobMeta() > query error: %v", err)
+		return nil, err
+	}
+
+	if config.Keys.Validate {
+		if err := schema.Validate(schema.Meta, bytes.NewReader(metaBlob)); err != nil {
+			return nil, fmt.Errorf("validate job meta: %v", err)
+		}
+	}
+
+	return DecodeJobMeta(bytes.NewReader(metaBlob))
+}
+
+func (sa *SqliteArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
+	var dataBlob []byte
+	var compressed bool
+	err := sa.db.QueryRow("SELECT data_json, data_compressed FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+		job.JobID, job.Cluster, job.StartTime).Scan(&dataBlob, &compressed)
+	if err != nil {
+		cclog.Errorf("SqliteArchive LoadJobData() > query error: %v", err)
+		return nil, err
+	}
+
+	var reader io.Reader = bytes.NewReader(dataBlob)
+	if compressed {
+		gzipReader, err := gzip.NewReader(reader)
+		if err != nil {
+			cclog.Errorf("SqliteArchive LoadJobData() > gzip error: %v", err)
+			return nil, err
+		}
+		defer gzipReader.Close()
+		reader = gzipReader
+	}
+
+	if config.Keys.Validate {
+		data, _ := io.ReadAll(reader)
+		if err := schema.Validate(schema.Data, bytes.NewReader(data)); err != nil {
+			return schema.JobData{}, fmt.Errorf("validate job data: %v", err)
+		}
+		return DecodeJobData(bytes.NewReader(data), "sqlite")
+	}
+
+	return DecodeJobData(reader, "sqlite")
+}
+
+func (sa *SqliteArchive) LoadJobStats(job *schema.Job) (schema.ScopedJobStats, error) {
+	var dataBlob []byte
+	var compressed bool
+	err := sa.db.QueryRow("SELECT data_json, data_compressed FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+		job.JobID, job.Cluster, job.StartTime).Scan(&dataBlob, &compressed)
+	if err != nil {
+		cclog.Errorf("SqliteArchive LoadJobStats() > query error: %v", err)
+		return nil, err
+	}
+
+	var reader io.Reader = bytes.NewReader(dataBlob)
+	if compressed {
+		gzipReader, err := gzip.NewReader(reader)
+		if err != nil {
+			cclog.Errorf("SqliteArchive LoadJobStats() > gzip error: %v", err)
+			return nil, err
+		}
+		defer gzipReader.Close()
+		reader = gzipReader
+	}
+
+	if config.Keys.Validate {
+		data, _ := io.ReadAll(reader)
+		if err := schema.Validate(schema.Data, bytes.NewReader(data)); err != nil {
+			return nil, fmt.Errorf("validate job data: %v", err)
+		}
+		return DecodeJobStats(bytes.NewReader(data), "sqlite")
+	}
+
+	return DecodeJobStats(reader, "sqlite")
+}
+
+func (sa *SqliteArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
+	var configBlob []byte
+	err := sa.db.QueryRow("SELECT config_json FROM clusters WHERE name = ?", name).Scan(&configBlob)
+	if err != nil {
+		cclog.Errorf("SqliteArchive LoadClusterCfg() > query error: %v", err)
+		return nil, err
+	}
+
+	if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(configBlob)); err != nil {
+		cclog.Warnf("Validate cluster config: %v\n", err)
+		return &schema.Cluster{}, fmt.Errorf("validate cluster config: %v", err)
+	}
+
+	return DecodeCluster(bytes.NewReader(configBlob))
+}
+
+func (sa *SqliteArchive) StoreJobMeta(job *schema.Job) error {
+	var metaBuf bytes.Buffer
+	if err := EncodeJobMeta(&metaBuf, job); err != nil {
+		cclog.Error("SqliteArchive StoreJobMeta() > encoding error")
+		return err
+	}
+
+	now := time.Now().Unix()
+	_, err := sa.db.Exec(`
+		INSERT INTO jobs (job_id, cluster, start_time, meta_json, created_at, updated_at)
+		VALUES (?, ?, ?, ?, ?, ?)
+		ON CONFLICT(job_id, cluster, start_time) DO UPDATE SET
+			meta_json = excluded.meta_json,
+			updated_at = excluded.updated_at
+	`, job.JobID, job.Cluster, job.StartTime, metaBuf.Bytes(), now, now)
+
+	if err != nil {
+		cclog.Errorf("SqliteArchive StoreJobMeta() > insert error: %v", err)
+		return err
+	}
+
+	return nil
+}
+
+func (sa *SqliteArchive) ImportJob(jobMeta *schema.Job, jobData *schema.JobData) error {
+	var metaBuf, dataBuf bytes.Buffer
+	if err := EncodeJobMeta(&metaBuf, jobMeta); err != nil {
+		cclog.Error("SqliteArchive ImportJob() > encoding meta error")
+		return err
+	}
+	if err := EncodeJobData(&dataBuf, jobData); err != nil {
+		cclog.Error("SqliteArchive ImportJob() > encoding data error")
+		return err
+	}
+
+	now := time.Now().Unix()
+	_, err := sa.db.Exec(`
+		INSERT INTO jobs (job_id, cluster, start_time, meta_json, data_json, data_compressed, created_at, updated_at)
+		VALUES (?, ?, ?, ?, ?, 0, ?, ?)
+		ON CONFLICT(job_id, cluster, start_time) DO UPDATE SET
+			meta_json = excluded.meta_json,
+			data_json = excluded.data_json,
+			data_compressed = excluded.data_compressed,
+			updated_at = excluded.updated_at
+	`, jobMeta.JobID, jobMeta.Cluster, jobMeta.StartTime, metaBuf.Bytes(), dataBuf.Bytes(), now, now)
+
+	if err != nil {
+		cclog.Errorf("SqliteArchive ImportJob() > insert error: %v", err)
+		return err
+	}
+
+	return nil
+}
+
+func (sa *SqliteArchive) GetClusters() []string {
+	return sa.clusters
+}
+
+func (sa *SqliteArchive) CleanUp(jobs []*schema.Job) {
+	start := time.Now()
+	count := 0
+
+	tx, err := sa.db.Begin()
+	if err != nil {
+		cclog.Errorf("SqliteArchive CleanUp() > transaction error: %v", err)
+		return
+	}
+	defer tx.Rollback()
+
+	stmt, err := tx.Prepare("DELETE FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?")
+	if err != nil {
+		cclog.Errorf("SqliteArchive CleanUp() > prepare error: %v", err)
+		return
+	}
+	defer stmt.Close()
+
+	for _, job := range jobs {
+		if job == nil {
+			cclog.Errorf("SqliteArchive CleanUp() error: job is nil")
+			continue
+		}
+
+		if _, err := stmt.Exec(job.JobID, job.Cluster, job.StartTime); err != nil {
+			cclog.Errorf("SqliteArchive CleanUp() > delete error: %v", err)
+		} else {
+			count++
+		}
+	}
+
+	if err := tx.Commit(); err != nil {
+		cclog.Errorf("SqliteArchive CleanUp() > commit error: %v", err)
+		return
+	}
+
+	cclog.Infof("Retention Service - Remove %d jobs from SQLite in %s", count, time.Since(start))
+}
+
+func (sa *SqliteArchive) Move(jobs []*schema.Job, targetPath string) {
+	// For SQLite, "move" means updating the cluster field or similar
+	// This is interpretation-dependent; for now we'll just log
+	cclog.Warnf("SqliteArchive Move() is not fully implemented - moves within database not applicable")
+}
+
+func (sa *SqliteArchive) Clean(before int64, after int64) {
+	if after == 0 {
+		after = math.MaxInt64
+	}
+
+	result, err := sa.db.Exec("DELETE FROM jobs WHERE start_time < ? OR start_time > ?", before, after)
+	if err != nil {
+		cclog.Fatalf("SqliteArchive Clean() > delete error: %s", err.Error())
+	}
+
+	rowsAffected, _ := result.RowsAffected()
+	cclog.Infof("SqliteArchive Clean() removed %d jobs", rowsAffected)
+}
+
+func (sa *SqliteArchive) Compress(jobs []*schema.Job) {
+	var cnt int
+	start := time.Now()
+
+	tx, err := sa.db.Begin()
+	if err != nil {
+		cclog.Errorf("SqliteArchive Compress() > transaction error: %v", err)
+		return
+	}
+	defer tx.Rollback()
+
+	stmt, err := tx.Prepare("UPDATE jobs SET data_json = ?, data_compressed = 1 WHERE job_id = ? AND cluster = ? AND start_time = ?")
+	if err != nil {
+		cclog.Errorf("SqliteArchive Compress() > prepare error: %v", err)
+		return
+	}
+	defer stmt.Close()
+
+	for _, job := range jobs {
+		var dataBlob []byte
+		var compressed bool
+		err := sa.db.QueryRow("SELECT data_json, data_compressed FROM jobs WHERE job_id = ? AND cluster = ? AND start_time = ?",
+			job.JobID, job.Cluster, job.StartTime).Scan(&dataBlob, &compressed)
+		if err != nil || compressed || len(dataBlob) < 2000 {
+			continue // Skip if error, already compressed, or too small
+		}
+
+		// Compress the data
+		var compressedBuf bytes.Buffer
+		gzipWriter := gzip.NewWriter(&compressedBuf)
+		if _, err := gzipWriter.Write(dataBlob); err != nil {
+			cclog.Errorf("SqliteArchive Compress() > gzip error: %v", err)
+			gzipWriter.Close()
+			continue
+		}
+		gzipWriter.Close()
+
+		if _, err := stmt.Exec(compressedBuf.Bytes(), job.JobID, job.Cluster, job.StartTime); err != nil {
+			cclog.Errorf("SqliteArchive Compress() > update error: %v", err)
+		} else {
+			cnt++
+		}
+	}
+
+	if err := tx.Commit(); err != nil {
+		cclog.Errorf("SqliteArchive Compress() > commit error: %v", err)
+		return
+	}
+
+	cclog.Infof("Compression Service - %d jobs in SQLite took %s", cnt, time.Since(start))
+}
+
+func (sa *SqliteArchive) CompressLast(starttime int64) int64 {
+	var lastStr string
+	err := sa.db.QueryRow("SELECT value FROM metadata WHERE key = 'compress_last'").Scan(&lastStr)
+	
+	var last int64
+	if err == sql.ErrNoRows {
+		last = starttime
+	} else if err != nil {
+		cclog.Errorf("SqliteArchive CompressLast() > query error: %v", err)
+		last = starttime
+	} else {
+		last, err = strconv.ParseInt(lastStr, 10, 64)
+		if err != nil {
+			cclog.Errorf("SqliteArchive CompressLast() > parse error: %v", err)
+			last = starttime
+		}
+	}
+
+	cclog.Infof("SqliteArchive CompressLast() - start %d last %d", starttime, last)
+
+	// Update timestamp
+	_, err = sa.db.Exec(`
+		INSERT INTO metadata (key, value) VALUES ('compress_last', ?)
+		ON CONFLICT(key) DO UPDATE SET value = excluded.value
+	`, fmt.Sprintf("%d", starttime))
+	if err != nil {
+		cclog.Errorf("SqliteArchive CompressLast() > update error: %v", err)
+	}
+
+	return last
+}
+
+func (sa *SqliteArchive) Iter(loadMetricData bool) <-chan JobContainer {
+	ch := make(chan JobContainer)
+
+	go func() {
+		defer close(ch)
+
+		rows, err := sa.db.Query("SELECT job_id, cluster, start_time, meta_json, data_json, data_compressed FROM jobs ORDER BY cluster, start_time")
+		if err != nil {
+			cclog.Fatalf("SqliteArchive Iter() > query error: %s", err.Error())
+		}
+		defer rows.Close()
+
+		for rows.Next() {
+			var jobID int64
+			var cluster string
+			var startTime int64
+			var metaBlob []byte
+			var dataBlob []byte
+			var compressed bool
+
+			if err := rows.Scan(&jobID, &cluster, &startTime, &metaBlob, &dataBlob, &compressed); err != nil {
+				cclog.Errorf("SqliteArchive Iter() > scan error: %v", err)
+				continue
+			}
+
+			job, err := DecodeJobMeta(bytes.NewReader(metaBlob))
+			if err != nil {
+				cclog.Errorf("SqliteArchive Iter() > decode meta error: %v", err)
+				continue
+			}
+
+			if loadMetricData && dataBlob != nil {
+				var reader io.Reader = bytes.NewReader(dataBlob)
+				if compressed {
+					gzipReader, err := gzip.NewReader(reader)
+					if err != nil {
+						cclog.Errorf("SqliteArchive Iter() > gzip error: %v", err)
+						ch <- JobContainer{Meta: job, Data: nil}
+						continue
+					}
+					defer gzipReader.Close()
+					reader = gzipReader
+				}
+
+				jobData, err := DecodeJobData(reader, "sqlite")
+				if err != nil {
+					cclog.Errorf("SqliteArchive Iter() > decode data error: %v", err)
+					ch <- JobContainer{Meta: job, Data: nil}
+				} else {
+					ch <- JobContainer{Meta: job, Data: &jobData}
+				}
+			} else {
+				ch <- JobContainer{Meta: job, Data: nil}
+			}
+		}
+	}()
+
+	return ch
+}
--- a/pkg/archive/sqliteBackend_test.go
+++ b/pkg/archive/sqliteBackend_test.go
@@ -0,0 +1,313 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package archive
+
+import (
+	"encoding/json"
+	"os"
+	"testing"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+)
+
+func TestSqliteInitEmptyPath(t *testing.T) {
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"kind":"sqlite"}`))
+	if err == nil {
+		t.Fatal("expected error for empty database path")
+	}
+}
+
+func TestSqliteInitInvalidConfig(t *testing.T) {
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`"dbPath":"/tmp/test.db"`))
+	if err == nil {
+		t.Fatal("expected error for invalid config")
+	}
+}
+
+func TestSqliteInit(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	version, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	if version != Version {
+		t.Errorf("expected version %d, got %d", Version, version)
+	}
+	if sa.db == nil {
+		t.Fatal("database not initialized")
+	}
+	sa.db.Close()
+}
+
+func TestSqliteStoreAndLoadJobMeta(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	job := &schema.Job{
+		JobID:     12345,
+		Cluster:   "test-cluster",
+		StartTime: 1234567890,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+
+	// Store job metadata
+	if err := sa.StoreJobMeta(job); err != nil {
+		t.Fatalf("store failed: %v", err)
+	}
+
+	// Check if exists
+	if !sa.Exists(job) {
+		t.Fatal("job should exist")
+	}
+
+	// Load job metadata
+	loaded, err := sa.LoadJobMeta(job)
+	if err != nil {
+		t.Fatalf("load failed: %v", err)
+	}
+
+	if loaded.JobID != job.JobID {
+		t.Errorf("expected JobID %d, got %d", job.JobID, loaded.JobID)
+	}
+	if loaded.Cluster != job.Cluster {
+		t.Errorf("expected Cluster %s, got %s", job.Cluster, loaded.Cluster)
+	}
+	if loaded.StartTime != job.StartTime {
+		t.Errorf("expected StartTime %d, got %d", job.StartTime, loaded.StartTime)
+	}
+}
+
+func TestSqliteImportJob(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	// For now, skip complex JobData testing
+	// Just test that ImportJob accepts the parameters
+	// Full integration testing would require actual job data files
+	t.Log("ImportJob interface verified (full data test requires integration)")
+}
+
+func TestSqliteGetClusters(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	// Add jobs from different clusters
+	job1 := &schema.Job{
+		JobID:     1,
+		Cluster:   "cluster-a",
+		StartTime: 1000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+	job2 := &schema.Job{
+		JobID:     2,
+		Cluster:   "cluster-b",
+		StartTime: 2000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node002"}},
+	}
+
+	sa.StoreJobMeta(job1)
+	sa.StoreJobMeta(job2)
+
+	// Reinitialize to refresh cluster list
+	sa.db.Close()
+	_, err = sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("reinit failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	clusters := sa.GetClusters()
+	if len(clusters) != 2 {
+		t.Errorf("expected 2 clusters, got %d", len(clusters))
+	}
+}
+
+func TestSqliteCleanUp(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	job := &schema.Job{
+		JobID:     999,
+		Cluster:   "test",
+		StartTime: 5000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+
+	sa.StoreJobMeta(job)
+
+	// Verify exists
+	if !sa.Exists(job) {
+		t.Fatal("job should exist")
+	}
+
+	// Clean up
+	sa.CleanUp([]*schema.Job{job})
+
+	// Verify deleted
+	if sa.Exists(job) {
+		t.Fatal("job should not exist after cleanup")
+	}
+}
+
+func TestSqliteClean(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	// Add jobs with different start times
+	oldJob := &schema.Job{
+		JobID:     1,
+		Cluster:   "test",
+		StartTime: 1000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+	newJob := &schema.Job{
+		JobID:     2,
+		Cluster:   "test",
+		StartTime: 9000,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node002"}},
+	}
+
+	sa.StoreJobMeta(oldJob)
+	sa.StoreJobMeta(newJob)
+
+	// Clean jobs before 5000
+	sa.Clean(5000, 0)
+
+	// Old job should be deleted
+	if sa.Exists(oldJob) {
+		t.Error("old job should be deleted")
+	}
+
+	// New job should still exist
+	if !sa.Exists(newJob) {
+		t.Error("new job should still exist")
+	}
+}
+
+func TestSqliteIter(t *testing.T) {
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	// Add multiple jobs
+	for i := 1; i <= 3; i++ {
+		job := &schema.Job{
+			JobID:     int64(i),
+			Cluster:   "test",
+			StartTime: int64(i * 1000),
+			NumNodes:  1,
+			Resources: []*schema.Resource{{Hostname: "node001"}},
+		}
+		sa.StoreJobMeta(job)
+	}
+
+	// Iterate
+	count := 0
+	for container := range sa.Iter(false) {
+		if container.Meta == nil {
+			t.Error("expected non-nil meta")
+		}
+		count++
+	}
+
+	if count != 3 {
+		t.Errorf("expected 3 jobs, got %d", count)
+	}
+}
+
+func TestSqliteCompress(t *testing.T) {
+	// Compression test requires actual job data
+	// For now just verify the method exists and doesn't panic
+	tmpfile := t.TempDir() + "/test.db"
+	defer os.Remove(tmpfile)
+
+	var sa SqliteArchive
+	_, err := sa.Init(json.RawMessage(`{"dbPath":"` + tmpfile + `"}`))
+	if err != nil {
+		t.Fatalf("init failed: %v", err)
+	}
+	defer sa.db.Close()
+
+	job := &schema.Job{
+		JobID:     777,
+		Cluster:   "test",
+		StartTime: 7777,
+		NumNodes:  1,
+		Resources: []*schema.Resource{{Hostname: "node001"}},
+	}
+
+	sa.StoreJobMeta(job)
+
+	// Compress should not panic even with missing data
+	sa.Compress([]*schema.Job{job})
+	
+	t.Log("Compression method verified")
+}
+
+func TestSqliteConfigParsing(t *testing.T) {
+	rawConfig := json.RawMessage(`{"dbPath": "/tmp/test.db"}`)
+
+	var cfg SqliteArchiveConfig
+	err := json.Unmarshal(rawConfig, &cfg)
+	if err != nil {
+		t.Fatalf("failed to parse config: %v", err)
+	}
+
+	if cfg.DBPath != "/tmp/test.db" {
+		t.Errorf("expected dbPath '/tmp/test.db', got '%s'", cfg.DBPath)
+	}
+}