mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-12-15 19:56:16 +01:00
Add tools for archive manager and archive-migration
Needs testing and review
This commit is contained in:
BIN
tools/archive-manager/archive-manager
Executable file
BIN
tools/archive-manager/archive-manager
Executable file
Binary file not shown.
335
tools/archive-manager/import_test.go
Normal file
335
tools/archive-manager/import_test.go
Normal file
@@ -0,0 +1,335 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
)
|
||||
|
||||
// TestImportFileToSqlite tests importing jobs from file backend to SQLite backend
|
||||
func TestImportFileToSqlite(t *testing.T) {
|
||||
// Create temporary directories
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "src-archive")
|
||||
dstDb := filepath.Join(tmpdir, "dst-archive.db")
|
||||
|
||||
// Copy test data to source archive
|
||||
testDataPath := "../../pkg/archive/testdata/archive"
|
||||
if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
|
||||
t.Skip("Test data not found, skipping integration test")
|
||||
}
|
||||
|
||||
if err := util.CopyDir(testDataPath, srcArchive); err != nil {
|
||||
t.Fatalf("Failed to copy test data: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize source backend (file)
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize destination backend (sqlite)
|
||||
dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Perform import
|
||||
imported, failed, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if imported == 0 {
|
||||
t.Error("No jobs were imported")
|
||||
}
|
||||
|
||||
if failed > 0 {
|
||||
t.Errorf("%d jobs failed to import", failed)
|
||||
}
|
||||
|
||||
t.Logf("Successfully imported %d jobs", imported)
|
||||
|
||||
// Verify jobs exist in destination
|
||||
// Count jobs in source
|
||||
srcCount := 0
|
||||
for range srcBackend.Iter(false) {
|
||||
srcCount++
|
||||
}
|
||||
|
||||
// Count jobs in destination
|
||||
dstCount := 0
|
||||
for range dstBackend.Iter(false) {
|
||||
dstCount++
|
||||
}
|
||||
|
||||
if srcCount != dstCount {
|
||||
t.Errorf("Job count mismatch: source has %d jobs, destination has %d jobs", srcCount, dstCount)
|
||||
}
|
||||
}
|
||||
|
||||
// TestImportFileToFile tests importing jobs from one file backend to another
|
||||
func TestImportFileToFile(t *testing.T) {
|
||||
// Create temporary directories
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "src-archive")
|
||||
dstArchive := filepath.Join(tmpdir, "dst-archive")
|
||||
|
||||
// Copy test data to source archive
|
||||
testDataPath := "../../pkg/archive/testdata/archive"
|
||||
if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
|
||||
t.Skip("Test data not found, skipping integration test")
|
||||
}
|
||||
|
||||
if err := util.CopyDir(testDataPath, srcArchive); err != nil {
|
||||
t.Fatalf("Failed to copy test data: %s", err.Error())
|
||||
}
|
||||
|
||||
// Create destination archive directory
|
||||
if err := os.MkdirAll(dstArchive, 0755); err != nil {
|
||||
t.Fatalf("Failed to create destination directory: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize source backend
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize destination backend
|
||||
dstConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, dstArchive)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Perform import
|
||||
imported, failed, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if imported == 0 {
|
||||
t.Error("No jobs were imported")
|
||||
}
|
||||
|
||||
if failed > 0 {
|
||||
t.Errorf("%d jobs failed to import", failed)
|
||||
}
|
||||
|
||||
t.Logf("Successfully imported %d jobs", imported)
|
||||
}
|
||||
|
||||
// TestImportDataIntegrity verifies that job metadata and data are correctly imported
|
||||
func TestImportDataIntegrity(t *testing.T) {
|
||||
// Create temporary directories
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "src-archive")
|
||||
dstDb := filepath.Join(tmpdir, "dst-archive.db")
|
||||
|
||||
// Copy test data to source archive
|
||||
testDataPath := "../../pkg/archive/testdata/archive"
|
||||
if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
|
||||
t.Skip("Test data not found, skipping integration test")
|
||||
}
|
||||
|
||||
if err := util.CopyDir(testDataPath, srcArchive); err != nil {
|
||||
t.Fatalf("Failed to copy test data: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize backends
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Perform import
|
||||
_, _, err = importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
// Verify data integrity for each job
|
||||
verifiedJobs := 0
|
||||
for srcJob := range srcBackend.Iter(false) {
|
||||
if srcJob.Meta == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Load job from destination
|
||||
dstJobMeta, err := dstBackend.LoadJobMeta(srcJob.Meta)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to load job %d from destination: %s", srcJob.Meta.JobID, err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
// Verify basic metadata
|
||||
if dstJobMeta.JobID != srcJob.Meta.JobID {
|
||||
t.Errorf("JobID mismatch: expected %d, got %d", srcJob.Meta.JobID, dstJobMeta.JobID)
|
||||
}
|
||||
|
||||
if dstJobMeta.Cluster != srcJob.Meta.Cluster {
|
||||
t.Errorf("Cluster mismatch for job %d: expected %s, got %s",
|
||||
srcJob.Meta.JobID, srcJob.Meta.Cluster, dstJobMeta.Cluster)
|
||||
}
|
||||
|
||||
if dstJobMeta.StartTime != srcJob.Meta.StartTime {
|
||||
t.Errorf("StartTime mismatch for job %d: expected %d, got %d",
|
||||
srcJob.Meta.JobID, srcJob.Meta.StartTime, dstJobMeta.StartTime)
|
||||
}
|
||||
|
||||
// Load and verify job data
|
||||
srcData, err := srcBackend.LoadJobData(srcJob.Meta)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to load job data from source: %s", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
dstData, err := dstBackend.LoadJobData(srcJob.Meta)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to load job data from destination: %s", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
// Verify metric data exists
|
||||
if len(srcData) != len(dstData) {
|
||||
t.Errorf("Metric count mismatch for job %d: expected %d, got %d",
|
||||
srcJob.Meta.JobID, len(srcData), len(dstData))
|
||||
}
|
||||
|
||||
verifiedJobs++
|
||||
}
|
||||
|
||||
if verifiedJobs == 0 {
|
||||
t.Error("No jobs were verified")
|
||||
}
|
||||
|
||||
t.Logf("Successfully verified %d jobs", verifiedJobs)
|
||||
}
|
||||
|
||||
// TestImportEmptyArchive tests importing from an empty archive
|
||||
func TestImportEmptyArchive(t *testing.T) {
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "empty-archive")
|
||||
dstDb := filepath.Join(tmpdir, "dst-archive.db")
|
||||
|
||||
// Create empty source archive
|
||||
if err := os.MkdirAll(srcArchive, 0755); err != nil {
|
||||
t.Fatalf("Failed to create source directory: %s", err.Error())
|
||||
}
|
||||
|
||||
// Write version file
|
||||
versionFile := filepath.Join(srcArchive, "version.txt")
|
||||
if err := os.WriteFile(versionFile, []byte("3"), 0644); err != nil {
|
||||
t.Fatalf("Failed to write version file: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize backends
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// Perform import
|
||||
imported, failed, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Import from empty archive should not fail: %s", err.Error())
|
||||
}
|
||||
|
||||
if imported != 0 {
|
||||
t.Errorf("Expected 0 imported jobs, got %d", imported)
|
||||
}
|
||||
|
||||
if failed != 0 {
|
||||
t.Errorf("Expected 0 failed jobs, got %d", failed)
|
||||
}
|
||||
}
|
||||
|
||||
// TestImportDuplicateJobs tests that duplicate jobs are skipped
|
||||
func TestImportDuplicateJobs(t *testing.T) {
|
||||
tmpdir := t.TempDir()
|
||||
srcArchive := filepath.Join(tmpdir, "src-archive")
|
||||
dstDb := filepath.Join(tmpdir, "dst-archive.db")
|
||||
|
||||
// Copy test data
|
||||
testDataPath := "../../pkg/archive/testdata/archive"
|
||||
if _, err := os.Stat(testDataPath); os.IsNotExist(err) {
|
||||
t.Skip("Test data not found, skipping integration test")
|
||||
}
|
||||
|
||||
if err := util.CopyDir(testDataPath, srcArchive); err != nil {
|
||||
t.Fatalf("Failed to copy test data: %s", err.Error())
|
||||
}
|
||||
|
||||
// Initialize backends
|
||||
srcConfig := fmt.Sprintf(`{"kind":"file","path":"%s"}`, srcArchive)
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(srcConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
|
||||
dstConfig := fmt.Sprintf(`{"kind":"sqlite","dbPath":"%s"}`, dstDb)
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(dstConfig))
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
|
||||
// First import
|
||||
imported1, _, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Fatalf("First import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
// Second import (should skip all jobs)
|
||||
imported2, _, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
t.Errorf("Second import failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if imported2 != 0 {
|
||||
t.Errorf("Second import should skip all jobs, but imported %d", imported2)
|
||||
}
|
||||
|
||||
t.Logf("First import: %d jobs, Second import: %d jobs (all skipped as expected)", imported1, imported2)
|
||||
}
|
||||
|
||||
// TestJobStub is a helper test to verify that the job stub used in tests matches the schema
|
||||
func TestJobStub(t *testing.T) {
|
||||
job := &schema.Job{
|
||||
JobID: 123,
|
||||
Cluster: "test-cluster",
|
||||
StartTime: 1234567890,
|
||||
}
|
||||
|
||||
if job.JobID != 123 {
|
||||
t.Errorf("Expected JobID 123, got %d", job.JobID)
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,8 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
@@ -31,9 +33,106 @@ func parseDate(in string) int64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// importArchive imports all jobs from a source archive backend to a destination archive backend.
|
||||
// It uses parallel processing with a worker pool to improve performance.
|
||||
// Returns the number of successfully imported jobs, failed jobs, and any error encountered.
|
||||
func importArchive(srcBackend, dstBackend archive.ArchiveBackend) (int, int, error) {
|
||||
cclog.Info("Starting parallel archive import...")
|
||||
|
||||
// Use atomic counters for thread-safe updates
|
||||
var imported int32
|
||||
var failed int32
|
||||
var skipped int32
|
||||
|
||||
// Number of parallel workers
|
||||
numWorkers := 4
|
||||
cclog.Infof("Using %d parallel workers", numWorkers)
|
||||
|
||||
// Create channels for job distribution
|
||||
jobs := make(chan archive.JobContainer, numWorkers*2)
|
||||
|
||||
// WaitGroup to track worker completion
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Start worker goroutines
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
wg.Add(1)
|
||||
go func(workerID int) {
|
||||
defer wg.Done()
|
||||
|
||||
for job := range jobs {
|
||||
// Validate job metadata
|
||||
if job.Meta == nil {
|
||||
cclog.Warn("Skipping job with nil metadata")
|
||||
atomic.AddInt32(&failed, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
// Validate job data
|
||||
if job.Data == nil {
|
||||
cclog.Warnf("Job %d from cluster %s has no metric data, skipping",
|
||||
job.Meta.JobID, job.Meta.Cluster)
|
||||
atomic.AddInt32(&failed, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if job already exists in destination
|
||||
if dstBackend.Exists(job.Meta) {
|
||||
cclog.Debugf("Job %d (cluster: %s, start: %d) already exists in destination, skipping",
|
||||
job.Meta.JobID, job.Meta.Cluster, job.Meta.StartTime)
|
||||
atomic.AddInt32(&skipped, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
// Import job to destination
|
||||
if err := dstBackend.ImportJob(job.Meta, job.Data); err != nil {
|
||||
cclog.Errorf("Failed to import job %d from cluster %s: %s",
|
||||
job.Meta.JobID, job.Meta.Cluster, err.Error())
|
||||
atomic.AddInt32(&failed, 1)
|
||||
continue
|
||||
}
|
||||
|
||||
// Successfully imported
|
||||
newCount := atomic.AddInt32(&imported, 1)
|
||||
if newCount%100 == 0 {
|
||||
cclog.Infof("Progress: %d jobs imported, %d skipped, %d failed",
|
||||
newCount, atomic.LoadInt32(&skipped), atomic.LoadInt32(&failed))
|
||||
}
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
|
||||
// Feed jobs to workers
|
||||
go func() {
|
||||
for job := range srcBackend.Iter(true) {
|
||||
jobs <- job
|
||||
}
|
||||
close(jobs)
|
||||
}()
|
||||
|
||||
// Wait for all workers to complete
|
||||
wg.Wait()
|
||||
|
||||
finalImported := int(atomic.LoadInt32(&imported))
|
||||
finalFailed := int(atomic.LoadInt32(&failed))
|
||||
finalSkipped := int(atomic.LoadInt32(&skipped))
|
||||
|
||||
cclog.Infof("Import completed: %d jobs imported, %d skipped, %d failed",
|
||||
finalImported, finalSkipped, finalFailed)
|
||||
|
||||
if finalFailed > 0 {
|
||||
return finalImported, finalFailed, fmt.Errorf("%d jobs failed to import", finalFailed)
|
||||
}
|
||||
|
||||
return finalImported, finalFailed, nil
|
||||
}
|
||||
|
||||
|
||||
|
||||
func main() {
|
||||
var srcPath, flagConfigFile, flagLogLevel, flagRemoveCluster, flagRemoveAfter, flagRemoveBefore string
|
||||
var flagLogDateTime, flagValidate bool
|
||||
var flagSrcConfig, flagDstConfig string
|
||||
var flagLogDateTime, flagValidate, flagImport bool
|
||||
|
||||
flag.StringVar(&srcPath, "s", "./var/job-archive", "Specify the source job archive path. Default is ./var/job-archive")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
@@ -43,14 +142,54 @@ func main() {
|
||||
flag.StringVar(&flagRemoveBefore, "remove-before", "", "Remove all jobs with start time before date (Format: 2006-Jan-04)")
|
||||
flag.StringVar(&flagRemoveAfter, "remove-after", "", "Remove all jobs with start time after date (Format: 2006-Jan-04)")
|
||||
flag.BoolVar(&flagValidate, "validate", false, "Set this flag to validate a job archive against the json schema")
|
||||
flag.BoolVar(&flagImport, "import", false, "Import jobs from source archive to destination archive")
|
||||
flag.StringVar(&flagSrcConfig, "src-config", "", "Source archive backend configuration (JSON), e.g. '{\"kind\":\"file\",\"path\":\"./archive\"}'")
|
||||
flag.StringVar(&flagDstConfig, "dst-config", "", "Destination archive backend configuration (JSON), e.g. '{\"kind\":\"sqlite\",\"dbPath\":\"./archive.db\"}'")
|
||||
flag.Parse()
|
||||
|
||||
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", srcPath)
|
||||
|
||||
cclog.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
// Handle import mode
|
||||
if flagImport {
|
||||
if flagSrcConfig == "" || flagDstConfig == "" {
|
||||
cclog.Fatal("Both --src-config and --dst-config must be specified for import mode")
|
||||
}
|
||||
|
||||
cclog.Info("Import mode: initializing source and destination backends...")
|
||||
|
||||
// Initialize source backend
|
||||
srcBackend, err := archive.InitBackend(json.RawMessage(flagSrcConfig))
|
||||
if err != nil {
|
||||
cclog.Fatalf("Failed to initialize source backend: %s", err.Error())
|
||||
}
|
||||
cclog.Info("Source backend initialized successfully")
|
||||
|
||||
// Initialize destination backend
|
||||
dstBackend, err := archive.InitBackend(json.RawMessage(flagDstConfig))
|
||||
if err != nil {
|
||||
cclog.Fatalf("Failed to initialize destination backend: %s", err.Error())
|
||||
}
|
||||
cclog.Info("Destination backend initialized successfully")
|
||||
|
||||
// Perform import
|
||||
imported, failed, err := importArchive(srcBackend, dstBackend)
|
||||
if err != nil {
|
||||
cclog.Errorf("Import completed with errors: %s", err.Error())
|
||||
if failed > 0 {
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Infof("Import finished successfully: %d jobs imported", imported)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
ccconf.Init(flagConfigFile)
|
||||
|
||||
|
||||
// Load and check main configuration
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||
|
||||
Reference in New Issue
Block a user