Optimize usage dashboard: partial indexes, request cache, parallel histograms

- Add migration 14: partial covering indexes WHERE job_state='running' for user/project/subcluster groupings (tiny B-tree vs full table) - Inline literal state value in BuildWhereClause so SQLite matches partial indexes instead of parameterized placeholders - Add per-request statsGroupCache (sync.Once per filter+groupBy key) so identical grouped stats queries execute only once per GQL operation - Parallelize 4 histogram queries in AddHistograms using errgroup - Consolidate frontend from 6 GQL aliases to 2, sort+slice top-10 client-side via $derived Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> Entire-Checkpoint: 5b26a6e5ff10
2026-05-01 19:27:30 +02:00 · 2026-03-13 14:31:37 +01:00
parent cbe46c3524
commit d586fe4b43
10 changed files with 277 additions and 87 deletions
--- a/internal/repository/jobQuery.go
+++ b/internal/repository/jobQuery.go
@@ -198,8 +198,10 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
 	}
 	if filter.State != nil {
 		if len(filter.State) == 1 {
+			// Inline literal value so SQLite can match partial indexes (WHERE job_state = 'running').
+			// Safe: values come from validated GraphQL enum (model.JobState).
 			singleStat := string(filter.State[0])
-			query = query.Where("job.job_state = ?", singleStat)
+			query = query.Where(fmt.Sprintf("job.job_state = '%s'", singleStat))
 		} else {
 			states := make([]string, len(filter.State))
 			for i, val := range filter.State {
--- a/internal/repository/migration.go
+++ b/internal/repository/migration.go
@@ -21,13 +21,14 @@ import (
 // is added to internal/repository/migrations/sqlite3/.
 //
 // Version history:
+//   - Version 14: Partial covering indexes for running jobs (tiny B-tree vs millions)
 //   - Version 13: Add covering indexes for status/dashboard queries (cluster, job_state, ...)
 //   - Version 12: Add covering index for stats queries (cluster, start_time, hpc_user, ...)
 //   - Version 11: Optimize job table indexes (reduce from ~78 to 48)
 //   - Version 10: Node table
 //
 // Migration files are embedded at build time from the migrations directory.
-const Version uint = 13
+const Version uint = 14

 //go:embed migrations/*
 var migrationFiles embed.FS
--- a/internal/repository/migrations/sqlite3/14_running-partial-indexes.down.sql
+++ b/internal/repository/migrations/sqlite3/14_running-partial-indexes.down.sql
@@ -0,0 +1,5 @@
+-- Reverse migration 14: Drop partial indexes for running jobs
+
+DROP INDEX IF EXISTS jobs_running_user_stats;
+DROP INDEX IF EXISTS jobs_running_project_stats;
+DROP INDEX IF EXISTS jobs_running_subcluster_stats;
--- a/internal/repository/migrations/sqlite3/14_running-partial-indexes.up.sql
+++ b/internal/repository/migrations/sqlite3/14_running-partial-indexes.up.sql
@@ -0,0 +1,18 @@
+-- Migration 14: Partial covering indexes for running jobs
+-- Only running jobs are in the B-tree, so these indexes are tiny compared to
+-- the full-table indexes from migration 13. SQLite uses them when the query
+-- contains the literal `job_state = 'running'` (not a parameter placeholder).
+
+CREATE INDEX IF NOT EXISTS jobs_running_user_stats
+  ON job (cluster, hpc_user, num_nodes, num_hwthreads, num_acc, duration, start_time)
+  WHERE job_state = 'running';
+
+CREATE INDEX IF NOT EXISTS jobs_running_project_stats
+  ON job (cluster, project, num_nodes, num_hwthreads, num_acc, duration, start_time)
+  WHERE job_state = 'running';
+
+CREATE INDEX IF NOT EXISTS jobs_running_subcluster_stats
+  ON job (cluster, subcluster, num_nodes, num_hwthreads, num_acc, duration, start_time)
+  WHERE job_state = 'running';
+
+PRAGMA optimize;
--- a/internal/repository/stats.go
+++ b/internal/repository/stats.go
@@ -55,6 +55,7 @@ import (
 	cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
 	"github.com/ClusterCockpit/cc-lib/v2/schema"
 	sq "github.com/Masterminds/squirrel"
+	"golang.org/x/sync/errgroup"
 )

 // groupBy2column maps GraphQL Aggregate enum values to their corresponding database column names.
@@ -640,30 +641,45 @@ func (r *JobRepository) AddHistograms(
 		targetBinSize = 3600
 	}

-	var err error
-	// Return X-Values always as seconds, will be formatted into minutes and hours in frontend
+	// Run all 4 histogram queries in parallel — each writes a distinct struct field.
+	g, gctx := errgroup.WithContext(ctx)
+
 	value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = 'running' THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as int) as value`, time.Now().Unix(), targetBinSize)
-	stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount)
-	if err != nil {
-		cclog.Warn("Error while loading job statistics histogram: job duration")
-		return nil, err
-	}

-	stat.HistNumNodes, err = r.jobsStatisticsHistogram(ctx, "job.num_nodes as value", filter)
-	if err != nil {
-		cclog.Warn("Error while loading job statistics histogram: num nodes")
-		return nil, err
-	}
+	g.Go(func() error {
+		var err error
+		stat.HistDuration, err = r.jobsDurationStatisticsHistogram(gctx, value, filter, targetBinSize, &targetBinCount)
+		if err != nil {
+			cclog.Warn("Error while loading job statistics histogram: job duration")
+		}
+		return err
+	})
+	g.Go(func() error {
+		var err error
+		stat.HistNumNodes, err = r.jobsStatisticsHistogram(gctx, "job.num_nodes as value", filter)
+		if err != nil {
+			cclog.Warn("Error while loading job statistics histogram: num nodes")
+		}
+		return err
+	})
+	g.Go(func() error {
+		var err error
+		stat.HistNumCores, err = r.jobsStatisticsHistogram(gctx, "job.num_hwthreads as value", filter)
+		if err != nil {
+			cclog.Warn("Error while loading job statistics histogram: num hwthreads")
+		}
+		return err
+	})
+	g.Go(func() error {
+		var err error
+		stat.HistNumAccs, err = r.jobsStatisticsHistogram(gctx, "job.num_acc as value", filter)
+		if err != nil {
+			cclog.Warn("Error while loading job statistics histogram: num acc")
+		}
+		return err
+	})

-	stat.HistNumCores, err = r.jobsStatisticsHistogram(ctx, "job.num_hwthreads as value", filter)
-	if err != nil {
-		cclog.Warn("Error while loading job statistics histogram: num hwthreads")
-		return nil, err
-	}
-
-	stat.HistNumAccs, err = r.jobsStatisticsHistogram(ctx, "job.num_acc as value", filter)
-	if err != nil {
-		cclog.Warn("Error while loading job statistics histogram: num acc")
+	if err := g.Wait(); err != nil {
 		return nil, err
 	}