Patching parquet archive writer for high memory usage

This commit is contained in:
Aditya Ujeniya
2026-03-17 18:41:47 +01:00
parent 032d1e0692
commit 0a4c4d8e57
3 changed files with 112 additions and 52 deletions

View File

@@ -69,7 +69,16 @@ func TestFlattenCheckpointFile(t *testing.T) {
},
}
rows := flattenCheckpointFile(cf, "fritz", "node001", "node", "", nil)
rowChan := make(chan *ParquetMetricRow, 100)
go func() {
flattenCheckpointFile(cf, "fritz", "node001", "node", "", rowChan)
close(rowChan)
}()
var rows []ParquetMetricRow
for r := range rowChan {
rows = append(rows, *r)
}
// cpu_load: 2 non-NaN values at node scope
// mem_bw: 2 non-NaN values at socket0 scope
@@ -153,17 +162,28 @@ func TestParquetArchiveRoundtrip(t *testing.T) {
// Archive to Parquet
archiveDir := filepath.Join(tmpDir, "archive")
rows, files, err := archiveCheckpointsToParquet(cpDir, "testcluster", "node001", 2000)
rowChan := make(chan *ParquetMetricRow, 100)
var files []string
var archiveErr error
done := make(chan struct{})
go func() {
files, archiveErr = archiveCheckpointsToParquet(cpDir, "testcluster", "node001", 2000, rowChan)
close(rowChan)
close(done)
}()
parquetFile := filepath.Join(archiveDir, "testcluster", "1000.parquet")
_, err = writeParquetArchiveStream(parquetFile, rowChan)
if err != nil {
t.Fatal(err)
}
if len(files) != 1 || files[0] != "1000.json" {
t.Fatalf("expected 1 file, got %v", files)
<-done
if archiveErr != nil {
t.Fatal(archiveErr)
}
parquetFile := filepath.Join(archiveDir, "testcluster", "1000.parquet")
if err := writeParquetArchive(parquetFile, rows); err != nil {
t.Fatal(err)
if len(files) != 1 || len(files) > 0 && files[0] != "1000.json" {
t.Fatalf("expected 1 file (1000.json), got %v", files)
}
// Read back and verify