Optimize sort order in job parquet files

This commit is contained in:
2026-02-18 08:13:00 +01:00
parent 2e24fde430
commit 2da35909c1
2 changed files with 7 additions and 0 deletions

View File

@@ -844,6 +844,8 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
query = query.Where("NOT EXISTS (SELECT 1 FROM jobtag WHERE jobtag.job_id = job.id)") query = query.Where("NOT EXISTS (SELECT 1 FROM jobtag WHERE jobtag.job_id = job.id)")
} }
query = query.OrderBy("job.cluster ASC", "job.subcluster ASC", "job.project ASC", "job.start_time ASC")
rows, err := query.RunWith(r.stmtCache).Query() rows, err := query.RunWith(r.stmtCache).Query()
if err != nil { if err != nil {
cclog.Errorf("Error while running FindJobsBetween query: %v", err) cclog.Errorf("Error while running FindJobsBetween query: %v", err)

View File

@@ -88,6 +88,11 @@ func writeParquetBytes(rows []ParquetJobRow) ([]byte, error) {
writer := pq.NewGenericWriter[ParquetJobRow](&buf, writer := pq.NewGenericWriter[ParquetJobRow](&buf,
pq.Compression(&pq.Zstd), pq.Compression(&pq.Zstd),
pq.SortingWriterConfig(pq.SortingColumns(
pq.Ascending("sub_cluster"),
pq.Ascending("project"),
pq.Ascending("start_time"),
)),
) )
if _, err := writer.Write(rows); err != nil { if _, err := writer.Write(rows); err != nil {