mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-11 13:31:45 +01:00
Add parquet file job archiving target
This commit is contained in:
@@ -9,6 +9,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
pqarchive "github.com/ClusterCockpit/cc-backend/pkg/archive/parquet"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
@@ -66,3 +67,96 @@ func RegisterRetentionMoveService(age int, includeDB bool, location string, omit
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
func RegisterRetentionParquetService(retention Retention) {
|
||||
cclog.Info("Register retention parquet service")
|
||||
|
||||
maxFileSizeMB := retention.MaxFileSizeMB
|
||||
if maxFileSizeMB <= 0 {
|
||||
maxFileSizeMB = 512
|
||||
}
|
||||
|
||||
var target pqarchive.ParquetTarget
|
||||
var err error
|
||||
|
||||
switch retention.TargetKind {
|
||||
case "s3":
|
||||
target, err = pqarchive.NewS3Target(pqarchive.S3TargetConfig{
|
||||
Endpoint: retention.TargetEndpoint,
|
||||
Bucket: retention.TargetBucket,
|
||||
AccessKey: retention.TargetAccessKey,
|
||||
SecretKey: retention.TargetSecretKey,
|
||||
Region: retention.TargetRegion,
|
||||
UsePathStyle: retention.TargetUsePathStyle,
|
||||
})
|
||||
default:
|
||||
target, err = pqarchive.NewFileTarget(retention.TargetPath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cclog.Errorf("Parquet retention: failed to create target: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(5, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
startTime := time.Now().Unix() - int64(retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime, retention.OmitTagged)
|
||||
if err != nil {
|
||||
cclog.Warnf("Parquet retention: error finding jobs: %v", err)
|
||||
return
|
||||
}
|
||||
if len(jobs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Infof("Parquet retention: processing %d jobs", len(jobs))
|
||||
ar := archive.GetHandle()
|
||||
pw := pqarchive.NewParquetWriter(target, maxFileSizeMB)
|
||||
|
||||
for _, job := range jobs {
|
||||
meta, err := ar.LoadJobMeta(job)
|
||||
if err != nil {
|
||||
cclog.Warnf("Parquet retention: load meta for job %d: %v", job.JobID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
data, err := ar.LoadJobData(job)
|
||||
if err != nil {
|
||||
cclog.Warnf("Parquet retention: load data for job %d: %v", job.JobID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
row, err := pqarchive.JobToParquetRow(meta, &data)
|
||||
if err != nil {
|
||||
cclog.Warnf("Parquet retention: convert job %d: %v", job.JobID, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if err := pw.AddJob(*row); err != nil {
|
||||
cclog.Errorf("Parquet retention: add job %d to writer: %v", job.JobID, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if err := pw.Close(); err != nil {
|
||||
cclog.Errorf("Parquet retention: close writer: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
ar.CleanUp(jobs)
|
||||
|
||||
if retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime, retention.OmitTagged)
|
||||
if err != nil {
|
||||
cclog.Errorf("Parquet retention: delete jobs from db: %v", err)
|
||||
} else {
|
||||
cclog.Infof("Parquet retention: removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
cclog.Errorf("Parquet retention: db optimization error: %v", err)
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user