mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-11 13:31:45 +01:00
Add parquet file job archiving target
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
pqarchive "github.com/ClusterCockpit/cc-backend/pkg/archive/parquet"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
@@ -66,3 +67,96 @@ func RegisterRetentionMoveService(age int, includeDB bool, location string, omit
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
func RegisterRetentionParquetService(retention Retention) {
|
||||
cclog.Info("Register retention parquet service")
|
||||
|
||||
maxFileSizeMB := retention.MaxFileSizeMB
|
||||
if maxFileSizeMB <= 0 {
|
||||
maxFileSizeMB = 512
|
||||
}
|
||||
|
||||
var target pqarchive.ParquetTarget
|
||||
var err error
|
||||
|
||||
switch retention.TargetKind {
|
||||
case "s3":
|
||||
target, err = pqarchive.NewS3Target(pqarchive.S3TargetConfig{
|
||||
Endpoint: retention.TargetEndpoint,
|
||||
Bucket: retention.TargetBucket,
|
||||
AccessKey: retention.TargetAccessKey,
|
||||
SecretKey: retention.TargetSecretKey,
|
||||
Region: retention.TargetRegion,
|
||||
UsePathStyle: retention.TargetUsePathStyle,
|
||||
})
|
||||
default:
|
||||
target, err = pqarchive.NewFileTarget(retention.TargetPath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cclog.Errorf("Parquet retention: failed to create target: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(5, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
startTime := time.Now().Unix() - int64(retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime, retention.OmitTagged)
|
||||
if err != nil {
|
||||
cclog.Warnf("Parquet retention: error finding jobs: %v", err)
|
||||
return
|
||||
}
|
||||
if len(jobs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Infof("Parquet retention: processing %d jobs", len(jobs))
|
||||
ar := archive.GetHandle()
|
||||
pw := pqarchive.NewParquetWriter(target, maxFileSizeMB)
|
||||
|
||||
for _, job := range jobs {
|
||||
meta, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
cclog.Warnf("Parquet retention: load meta for job %d: %v", job.JobID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
data, err := ar.LoadJobData(job)
|
||||
if err != nil {
|
||||
cclog.Warnf("Parquet retention: load data for job %d: %v", job.JobID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
row, err := pqarchive.JobToParquetRow(meta, &data)
|
||||
if err != nil {
|
||||
cclog.Warnf("Parquet retention: convert job %d: %v", job.JobID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := pw.AddJob(*row); err != nil {
|
||||
cclog.Errorf("Parquet retention: add job %d to writer: %v", job.JobID, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if err := pw.Close(); err != nil {
|
||||
cclog.Errorf("Parquet retention: close writer: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
ar.CleanUp(jobs)
|
||||
|
||||
if retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime, retention.OmitTagged)
|
||||
if err != nil {
|
||||
cclog.Errorf("Parquet retention: delete jobs from db: %v", err)
|
||||
} else {
|
||||
cclog.Infof("Parquet retention: removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
cclog.Errorf("Parquet retention: db optimization error: %v", err)
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -23,11 +23,20 @@ const (
|
||||
|
||||
// Retention defines the configuration for job retention policies.
|
||||
type Retention struct {
|
||||
Policy string `json:"policy"`
|
||||
Location string `json:"location"`
|
||||
Age int `json:"age"`
|
||||
IncludeDB bool `json:"includeDB"`
|
||||
OmitTagged bool `json:"omitTagged"`
|
||||
Policy string `json:"policy"`
|
||||
Location string `json:"location"`
|
||||
Age int `json:"age"`
|
||||
IncludeDB bool `json:"includeDB"`
|
||||
OmitTagged bool `json:"omitTagged"`
|
||||
TargetKind string `json:"target-kind"`
|
||||
TargetPath string `json:"target-path"`
|
||||
TargetEndpoint string `json:"target-endpoint"`
|
||||
TargetBucket string `json:"target-bucket"`
|
||||
TargetAccessKey string `json:"target-access-key"`
|
||||
TargetSecretKey string `json:"target-secret-key"`
|
||||
TargetRegion string `json:"target-region"`
|
||||
TargetUsePathStyle bool `json:"target-use-path-style"`
|
||||
MaxFileSizeMB int `json:"max-file-size-mb"`
|
||||
}
|
||||
|
||||
// CronFrequency defines the execution intervals for various background workers.
|
||||
@@ -87,6 +96,8 @@ func initArchiveServices(config json.RawMessage) {
|
||||
cfg.Retention.IncludeDB,
|
||||
cfg.Retention.Location,
|
||||
cfg.Retention.OmitTagged)
|
||||
case "parquet":
|
||||
RegisterRetentionParquetService(cfg.Retention)
|
||||
}
|
||||
|
||||
if cfg.Compression > 0 {
|
||||
|
||||
Reference in New Issue
Block a user