Optimize sort order in nodestate parquet files

This commit is contained in:
2026-02-18 08:06:00 +01:00
parent 757be60b22
commit 2e24fde430
2 changed files with 7 additions and 1 deletions

View File

@@ -283,7 +283,7 @@ func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode
Join("node ON node_state.node_id = node.id"). Join("node ON node_state.node_id = node.id").
Where(sq.Lt{"node_state.time_stamp": cutoff}). Where(sq.Lt{"node_state.time_stamp": cutoff}).
Where("node_state.id NOT IN (SELECT ns2.id FROM node_state ns2 WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id))"). Where("node_state.id NOT IN (SELECT ns2.id FROM node_state ns2 WHERE ns2.time_stamp = (SELECT MAX(ns3.time_stamp) FROM node_state ns3 WHERE ns3.node_id = ns2.node_id))").
OrderBy("node_state.time_stamp ASC"). OrderBy("node.cluster ASC", "node.subcluster ASC", "node.hostname ASC", "node_state.time_stamp ASC").
RunWith(r.DB).Query() RunWith(r.DB).Query()
if err != nil { if err != nil {
return nil, err return nil, err

View File

@@ -84,6 +84,12 @@ func writeNodeStateParquetBytes(rows []ParquetNodeStateRow) ([]byte, error) {
writer := pq.NewGenericWriter[ParquetNodeStateRow](&buf, writer := pq.NewGenericWriter[ParquetNodeStateRow](&buf,
pq.Compression(&pq.Zstd), pq.Compression(&pq.Zstd),
pq.SortingWriterConfig(pq.SortingColumns(
pq.Ascending("cluster"),
pq.Ascending("subcluster"),
pq.Ascending("hostname"),
pq.Ascending("time_stamp"),
)),
) )
if _, err := writer.Write(rows); err != nil { if _, err := writer.Write(rows); err != nil {