Add tools for archive manager and archive-migration

Needs testing and review
This commit is contained in:
2025-12-03 14:55:22 +01:00
parent 78530029ef
commit 2333068de7
7 changed files with 907 additions and 1 deletions

View File

@@ -0,0 +1,133 @@
# Archive Migration Tool
## Overview
The `archive-migration` tool migrates job archives from old schema versions to the current schema version. It handles schema changes such as the `exclusive``shared` field transformation and adds/removes fields as needed.
## Features
- **Parallel Processing**: Uses worker pool for fast migration
- **Dry-Run Mode**: Preview changes without modifying files
- **Safe Transformations**: Applies well-defined schema transformations
- **Progress Reporting**: Shows real-time migration progress
- **Error Handling**: Continues on individual failures, reports at end
## Schema Transformations
### Exclusive → Shared
Converts the old `exclusive` integer field to the new `shared` string field:
- `0``"multi_user"`
- `1``"none"`
- `2``"single_user"`
### Missing Fields
Adds fields required by current schema:
- `submitTime`: Defaults to `startTime` if missing
- `energy`: Defaults to `0.0`
- `requestedMemory`: Defaults to `0`
- `shared`: Defaults to `"none"` if still missing after transformation
### Deprecated Fields
Removes fields no longer in schema:
- `mem_used_max`, `flops_any_avg`, `mem_bw_avg`
- `load_avg`, `net_bw_avg`, `net_data_vol_total`
- `file_bw_avg`, `file_data_vol_total`
## Usage
### Build
```bash
cd /Users/jan/prg/cc-backend/tools/archive-migration
go build
```
### Dry Run (Preview Changes)
```bash
./archive-migration --archive /path/to/archive --dry-run
```
### Migrate Archive
```bash
# IMPORTANT: Backup your archive first!
cp -r /path/to/archive /path/to/archive-backup
# Run migration
./archive-migration --archive /path/to/archive
```
### Command-Line Options
- `--archive <path>`: Path to job archive (required)
- `--dry-run`: Preview changes without modifying files
- `--workers <n>`: Number of parallel workers (default: 4)
- `--loglevel <level>`: Logging level: debug, info, warn, err, fatal, crit (default: info)
- `--logdate`: Add timestamps to log messages
## Examples
```bash
# Preview what would change
./archive-migration --archive ./var/job-archive --dry-run
# Migrate with verbose logging
./archive-migration --archive ./var/job-archive --loglevel debug
# Migrate with 8 workers for faster processing
./archive-migration --archive ./var/job-archive --workers 8
```
## Safety
> [!CAUTION]
> **Always backup your archive before running migration!**
The tool modifies `meta.json` files in place. While transformations are designed to be safe, unexpected issues could occur. Follow these safety practices:
1. **Always run with `--dry-run` first** to preview changes
2. **Backup your archive** before migration
3. **Test on a copy** of your archive first
4. **Verify results** after migration
## Verification
After migration, verify the archive:
```bash
# Use archive-manager to check the archive
cd ../archive-manager
./archive-manager -s /path/to/migrated-archive
# Or validate specific jobs
./archive-manager -s /path/to/migrated-archive --validate
```
## Troubleshooting
### Migration Failures
If individual jobs fail to migrate:
- Check the error messages for specific files
- Examine the failing `meta.json` files manually
- Fix invalid JSON or unexpected field types
- Re-run migration (already-migrated jobs will be processed again)
### Performance
For large archives:
- Increase `--workers` for more parallelism
- Use `--loglevel warn` to reduce log output
- Monitor disk I/O if migration is slow
## Technical Details
The migration process:
1. Walks archive directory recursively
2. Finds all `meta.json` files
3. Distributes jobs to worker pool
4. For each job:
- Reads JSON file
- Applies transformations in order
- Writes back migrated data (if not dry-run)
5. Reports statistics and errors
Transformations are idempotent - running migration multiple times is safe (though not recommended for performance).

Binary file not shown.

View File

@@ -0,0 +1,67 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"flag"
"fmt"
"os"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
)
func main() {
var archivePath string
var dryRun bool
var numWorkers int
var flagLogLevel string
var flagLogDateTime bool
flag.StringVar(&archivePath, "archive", "", "Path to job archive to migrate (required)")
flag.BoolVar(&dryRun, "dry-run", false, "Preview changes without modifying files")
flag.IntVar(&numWorkers, "workers", 4, "Number of parallel workers")
flag.StringVar(&flagLogLevel, "loglevel", "info", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
flag.BoolVar(&flagLogDateTime, "logdate", false, "Add date and time to log messages")
flag.Parse()
// Initialize logger
cclog.Init(flagLogLevel, flagLogDateTime)
// Validate inputs
if archivePath == "" {
fmt.Fprintf(os.Stderr, "Error: --archive flag is required\n\n")
flag.Usage()
os.Exit(1)
}
// Check if archive path exists
if _, err := os.Stat(archivePath); os.IsNotExist(err) {
cclog.Fatalf("Archive path does not exist: %s", archivePath)
}
// Display warning for non-dry-run mode
if !dryRun {
cclog.Warn("WARNING: This will modify files in the archive!")
cclog.Warn("It is strongly recommended to backup your archive first.")
cclog.Warn("Run with --dry-run first to preview changes.")
cclog.Info("")
}
// Run migration
migrated, failed, err := migrateArchive(archivePath, dryRun, numWorkers)
if err != nil {
cclog.Errorf("Migration completed with errors: %s", err.Error())
if failed > 0 {
os.Exit(1)
}
}
if dryRun {
cclog.Infof("Dry run completed: %d jobs would be migrated", migrated)
} else {
cclog.Infof("Migration completed successfully: %d jobs migrated", migrated)
}
}

View File

@@ -0,0 +1,232 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"sync/atomic"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
)
// transformExclusiveToShared converts the old 'exclusive' field to the new 'shared' field
// Mapping: 0 -> "multi_user", 1 -> "none", 2 -> "single_user"
func transformExclusiveToShared(jobData map[string]interface{}) error {
// Check if 'exclusive' field exists
if exclusive, ok := jobData["exclusive"]; ok {
var exclusiveVal int
// Handle both int and float64 (JSON unmarshaling can produce float64)
switch v := exclusive.(type) {
case float64:
exclusiveVal = int(v)
case int:
exclusiveVal = v
default:
return fmt.Errorf("exclusive field has unexpected type: %T", exclusive)
}
// Map exclusive to shared
var shared string
switch exclusiveVal {
case 0:
shared = "multi_user"
case 1:
shared = "none"
case 2:
shared = "single_user"
default:
return fmt.Errorf("invalid exclusive value: %d", exclusiveVal)
}
// Add shared field and remove exclusive
jobData["shared"] = shared
delete(jobData, "exclusive")
cclog.Debugf("Transformed exclusive=%d to shared=%s", exclusiveVal, shared)
}
return nil
}
// addMissingFields adds fields that are required in the current schema but might be missing in old archives
func addMissingFields(jobData map[string]interface{}) error {
// Add submitTime if missing (default to startTime)
if _, ok := jobData["submitTime"]; !ok {
if startTime, ok := jobData["startTime"]; ok {
jobData["submitTime"] = startTime
cclog.Debug("Added submitTime (defaulted to startTime)")
}
}
// Add energy if missing (default to 0.0)
if _, ok := jobData["energy"]; !ok {
jobData["energy"] = 0.0
}
// Add requestedMemory if missing (default to 0)
if _, ok := jobData["requestedMemory"]; !ok {
jobData["requestedMemory"] = 0
}
// Ensure shared field exists (if still missing, default to "none")
if _, ok := jobData["shared"]; !ok {
jobData["shared"] = "none"
cclog.Debug("Added default shared field: none")
}
return nil
}
// removeDeprecatedFields removes fields that are no longer in the current schema
func removeDeprecatedFields(jobData map[string]interface{}) error {
// List of deprecated fields to remove
deprecatedFields := []string{
"mem_used_max",
"flops_any_avg",
"mem_bw_avg",
"load_avg",
"net_bw_avg",
"net_data_vol_total",
"file_bw_avg",
"file_data_vol_total",
}
for _, field := range deprecatedFields {
if _, ok := jobData[field]; ok {
delete(jobData, field)
cclog.Debugf("Removed deprecated field: %s", field)
}
}
return nil
}
// migrateJobMetadata applies all transformations to a job metadata map
func migrateJobMetadata(jobData map[string]interface{}) error {
// Apply transformations in order
if err := transformExclusiveToShared(jobData); err != nil {
return fmt.Errorf("transformExclusiveToShared failed: %w", err)
}
if err := addMissingFields(jobData); err != nil {
return fmt.Errorf("addMissingFields failed: %w", err)
}
if err := removeDeprecatedFields(jobData); err != nil {
return fmt.Errorf("removeDeprecatedFields failed: %w", err)
}
return nil
}
// processJob reads, migrates, and writes a job metadata file
func processJob(metaPath string, dryRun bool) error {
// Read the meta.json file
data, err := os.ReadFile(metaPath)
if err != nil {
return fmt.Errorf("failed to read %s: %w", metaPath, err)
}
// Parse JSON
var jobData map[string]interface{}
if err := json.Unmarshal(data, &jobData); err != nil {
return fmt.Errorf("failed to parse JSON from %s: %w", metaPath, err)
}
// Apply migrations
if err := migrateJobMetadata(jobData); err != nil {
return fmt.Errorf("migration failed for %s: %w", metaPath, err)
}
// If dry-run, just report what would change
if dryRun {
cclog.Infof("Would migrate: %s", metaPath)
return nil
}
// Write back the migrated data
migratedData, err := json.MarshalIndent(jobData, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal migrated data: %w", err)
}
if err := os.WriteFile(metaPath, migratedData, 0644); err != nil {
return fmt.Errorf("failed to write %s: %w", metaPath, err)
}
return nil
}
// migrateArchive walks through an archive directory and migrates all meta.json files
func migrateArchive(archivePath string, dryRun bool, numWorkers int) (int, int, error) {
cclog.Infof("Starting archive migration at %s", archivePath)
if dryRun {
cclog.Info("DRY RUN MODE - no files will be modified")
}
var migrated int32
var failed int32
// Channel for job paths
jobs :=make(chan string, numWorkers*2)
var wg sync.WaitGroup
// Start worker goroutines
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
for metaPath := range jobs {
if err := processJob(metaPath, dryRun); err != nil {
cclog.Errorf("Failed to migrate %s: %s", metaPath, err.Error())
atomic.AddInt32(&failed, 1)
continue
}
newCount := atomic.AddInt32(&migrated, 1)
if newCount%100 == 0 {
cclog.Infof("Progress: %d jobs migrated, %d failed", newCount, atomic.LoadInt32(&failed))
}
}
}(i)
}
// Walk the archive directory and find all meta.json files
go func() {
filepath.Walk(archivePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
cclog.Errorf("Error accessing path %s: %s", path, err.Error())
return nil // Continue walking
}
if !info.IsDir() && info.Name() == "meta.json" {
jobs <- path
}
return nil
})
close(jobs)
}()
// Wait for all workers to complete
wg.Wait()
finalMigrated := int(atomic.LoadInt32(&migrated))
finalFailed := int(atomic.LoadInt32(&failed))
cclog.Infof("Migration completed: %d jobs migrated, %d failed", finalMigrated, finalFailed)
if finalFailed > 0 {
return finalMigrated, finalFailed, fmt.Errorf("%d jobs failed to migrate", finalFailed)
}
return finalMigrated, finalFailed, nil
}