From 2e24fde4306b7ecf01570ea79a3c1387874c4054 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 18 Feb 2026 08:06:00 +0100 Subject: [PATCH] Optimize sort order in nodestate parquet files --- internal/repository/node.go | 2 +- pkg/archive/parquet/nodestate_writer.go | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/repository/node.go b/internal/repository/node.go index 36dd2141..2e3c6746 100644 --- a/internal/repository/node.go +++ b/internal/repository/node.go @@ -283,7 +283,7 @@ func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode Join("node ON node_state.node_id = node.id"). Where(sq.Lt{"node_state.time_stamp": cutoff}). Where("node_state.id NOT IN (SELECT ns2.id FROM node_state ns2 WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id))"). - OrderBy("node_state.time_stamp ASC"). + OrderBy("node.cluster ASC", "node.subcluster ASC", "node.hostname ASC", "node_state.time_stamp ASC"). RunWith(r.DB).Query() if err != nil { return nil, err diff --git a/pkg/archive/parquet/nodestate_writer.go b/pkg/archive/parquet/nodestate_writer.go index e8487556..074e02e4 100644 --- a/pkg/archive/parquet/nodestate_writer.go +++ b/pkg/archive/parquet/nodestate_writer.go @@ -84,6 +84,12 @@ func writeNodeStateParquetBytes(rows []ParquetNodeStateRow) ([]byte, error) { writer := pq.NewGenericWriter[ParquetNodeStateRow](&buf, pq.Compression(&pq.Zstd), + pq.SortingWriterConfig(pq.SortingColumns( + pq.Ascending("cluster"), + pq.Ascending("subcluster"), + pq.Ascending("hostname"), + pq.Ascending("time_stamp"), + )), ) if _, err := writer.Write(rows); err != nil {