From 2da35909c1b68c8caa5f0cfd1aac851e35464038 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Wed, 18 Feb 2026 08:13:00 +0100 Subject: [PATCH] Optimize sort order in job parquet files --- internal/repository/job.go | 2 ++ pkg/archive/parquet/writer.go | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/internal/repository/job.go b/internal/repository/job.go index 6b0b2b12..a1cd9719 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -844,6 +844,8 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64 query = query.Where("NOT EXISTS (SELECT 1 FROM jobtag WHERE jobtag.job_id = job.id)") } + query = query.OrderBy("job.cluster ASC", "job.subcluster ASC", "job.project ASC", "job.start_time ASC") + rows, err := query.RunWith(r.stmtCache).Query() if err != nil { cclog.Errorf("Error while running FindJobsBetween query: %v", err) diff --git a/pkg/archive/parquet/writer.go b/pkg/archive/parquet/writer.go index e5dc0d9d..bfe4490f 100644 --- a/pkg/archive/parquet/writer.go +++ b/pkg/archive/parquet/writer.go @@ -88,6 +88,11 @@ func writeParquetBytes(rows []ParquetJobRow) ([]byte, error) { writer := pq.NewGenericWriter[ParquetJobRow](&buf, pq.Compression(&pq.Zstd), + pq.SortingWriterConfig(pq.SortingColumns( + pq.Ascending("sub_cluster"), + pq.Ascending("project"), + pq.Ascending("start_time"), + )), ) if _, err := writer.Write(rows); err != nil {