mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-23 08:07:29 +01:00
Restructure metricstore cleanup archiving to stay withinh 32k parquet-go limit
Entire-Checkpoint: 1660b8cf2571
This commit is contained in:
@@ -22,6 +22,7 @@ import (
|
|||||||
|
|
||||||
func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
|
func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
if Keys.Cleanup.Mode == "archive" {
|
if Keys.Cleanup.Mode == "archive" {
|
||||||
|
cclog.Info("[METRICSTORE]> enable archive cleanup to parquet")
|
||||||
// Run as Archiver
|
// Run as Archiver
|
||||||
cleanUpWorker(wg, ctx,
|
cleanUpWorker(wg, ctx,
|
||||||
Keys.RetentionInMemory,
|
Keys.RetentionInMemory,
|
||||||
@@ -43,7 +44,6 @@ func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
// cleanUpWorker takes simple values to configure what it does
|
// cleanUpWorker takes simple values to configure what it does
|
||||||
func cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {
|
func cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {
|
||||||
wg.Go(func() {
|
wg.Go(func() {
|
||||||
|
|
||||||
d, err := time.ParseDuration(interval)
|
d, err := time.ParseDuration(interval)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Fatalf("[METRICSTORE]> error parsing %s interval duration: %v\n", mode, err)
|
cclog.Fatalf("[METRICSTORE]> error parsing %s interval duration: %v\n", mode, err)
|
||||||
@@ -99,8 +99,8 @@ func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type workItem struct {
|
type workItem struct {
|
||||||
dir string
|
dir string
|
||||||
cluster, host string
|
cluster, host string
|
||||||
}
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
@@ -275,6 +275,12 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Flush once per host to keep row group count within parquet limits.
|
||||||
|
if writeErr == nil {
|
||||||
|
if err := writer.FlushRowGroup(); err != nil {
|
||||||
|
writeErr = err
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Always track files for deletion (even if write failed, we still drain)
|
// Always track files for deletion (even if write failed, we still drain)
|
||||||
toDelete = append(toDelete, deleteItem{dir: r.dir, files: r.files})
|
toDelete = append(toDelete, deleteItem{dir: r.dir, files: r.files})
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ func newParquetArchiveWriter(filename string) (*parquetArchiveWriter, error) {
|
|||||||
|
|
||||||
// WriteCheckpointFile streams a CheckpointFile tree directly to Parquet rows,
|
// WriteCheckpointFile streams a CheckpointFile tree directly to Parquet rows,
|
||||||
// writing metrics in sorted order without materializing all rows in memory.
|
// writing metrics in sorted order without materializing all rows in memory.
|
||||||
// Produces one row group per call (typically one host's data).
|
// Call FlushRowGroup() after writing all checkpoint files for a host.
|
||||||
func (w *parquetArchiveWriter) WriteCheckpointFile(cf *CheckpointFile, cluster, hostname, scope, scopeID string) error {
|
func (w *parquetArchiveWriter) WriteCheckpointFile(cf *CheckpointFile, cluster, hostname, scope, scopeID string) error {
|
||||||
w.writeLevel(cf, cluster, hostname, scope, scopeID)
|
w.writeLevel(cf, cluster, hostname, scope, scopeID)
|
||||||
|
|
||||||
@@ -112,10 +112,15 @@ func (w *parquetArchiveWriter) WriteCheckpointFile(cf *CheckpointFile, cluster,
|
|||||||
w.batch = w.batch[:0]
|
w.batch = w.batch[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FlushRowGroup flushes the current row group to the Parquet file.
|
||||||
|
// Should be called once per host after all checkpoint files for that host are written.
|
||||||
|
func (w *parquetArchiveWriter) FlushRowGroup() error {
|
||||||
if err := w.writer.Flush(); err != nil {
|
if err := w.writer.Flush(); err != nil {
|
||||||
return fmt.Errorf("flushing parquet row group: %w", err)
|
return fmt.Errorf("flushing parquet row group: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user