Restructure metricstore cleanup archiving to stay withinh 32k parquet-go limit

Entire-Checkpoint: 1660b8cf2571
This commit is contained in:
2026-03-23 06:32:24 +01:00
parent 01ec70baa8
commit 586c902044
2 changed files with 16 additions and 5 deletions

View File

@@ -22,6 +22,7 @@ import (
func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
if Keys.Cleanup.Mode == "archive" {
cclog.Info("[METRICSTORE]> enable archive cleanup to parquet")
// Run as Archiver
cleanUpWorker(wg, ctx,
Keys.RetentionInMemory,
@@ -43,7 +44,6 @@ func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
// cleanUpWorker takes simple values to configure what it does
func cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {
wg.Go(func() {
d, err := time.ParseDuration(interval)
if err != nil {
cclog.Fatalf("[METRICSTORE]> error parsing %s interval duration: %v\n", mode, err)
@@ -99,8 +99,8 @@ func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {
}
type workItem struct {
dir string
cluster, host string
dir string
cluster, host string
}
var wg sync.WaitGroup
@@ -275,6 +275,12 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
break
}
}
// Flush once per host to keep row group count within parquet limits.
if writeErr == nil {
if err := writer.FlushRowGroup(); err != nil {
writeErr = err
}
}
}
// Always track files for deletion (even if write failed, we still drain)
toDelete = append(toDelete, deleteItem{dir: r.dir, files: r.files})