mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-15 12:27:30 +01:00
Compare commits
5 Commits
hotfix
...
optimize-d
| Author | SHA1 | Date | |
|---|---|---|---|
|
dc8e8b9095
|
|||
|
2080e268ce
|
|||
|
e8204ff1a2
|
|||
|
09fa239e8b
|
|||
|
301e590580
|
@@ -1,24 +0,0 @@
|
||||
{"time":"2026-03-10T21:13:26.857514+01:00","level":"INFO","msg":"session-start","component":"lifecycle","agent":"claude-code","event":"SessionStart","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","session_ref":"/Users/jan/.claude/projects/-Users-jan-prg-CC-cc-backend/28147033-ddc8-4056-b064-e0558fbc614e.jsonl"}
|
||||
{"time":"2026-03-10T21:15:58.745665+01:00","level":"INFO","msg":"turn-start","component":"lifecycle","agent":"claude-code","event":"TurnStart","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","session_ref":"/Users/jan/.claude/projects/-Users-jan-prg-CC-cc-backend/28147033-ddc8-4056-b064-e0558fbc614e.jsonl"}
|
||||
{"time":"2026-03-10T21:15:59.052384+01:00","level":"INFO","msg":"phase transition","component":"session","agent":"claude-code","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","event":"TurnStart","from":"","to":"active"}
|
||||
{"time":"2026-03-10T21:15:59.247089+01:00","level":"INFO","msg":"initialized shadow session","component":"hooks","agent":"claude-code","session_id":"28147033-ddc8-4056-b064-e0558fbc614e"}
|
||||
{"time":"2026-03-10T21:16:11.273515+01:00","level":"INFO","msg":"subagent started","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"lifecycle","agent":"claude-code","event":"SubagentStart","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","tool_use_id":"toolu_019bWLMJ1nzVAT8Fg2W5qRmi","transcript":"/Users/jan/.claude/projects/-Users-jan-prg-CC-cc-backend/28147033-ddc8-4056-b064-e0558fbc614e.jsonl"}
|
||||
{"time":"2026-03-10T21:17:50.542621+01:00","level":"INFO","msg":"subagent completed","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"lifecycle","agent":"claude-code","event":"SubagentEnd","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","tool_use_id":"toolu_019bWLMJ1nzVAT8Fg2W5qRmi","agent_id":"a03483f2df2e8cef0"}
|
||||
{"time":"2026-03-10T21:17:50.755441+01:00","level":"INFO","msg":"no file changes detected, skipping task checkpoint","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"lifecycle","agent":"claude-code"}
|
||||
{"time":"2026-03-10T21:28:03.798482+01:00","level":"INFO","msg":"turn-end","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"lifecycle","agent":"claude-code","event":"TurnEnd","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","session_ref":"/Users/jan/.claude/projects/-Users-jan-prg-CC-cc-backend/28147033-ddc8-4056-b064-e0558fbc614e.jsonl"}
|
||||
{"time":"2026-03-10T21:28:04.579301+01:00","level":"INFO","msg":"created shadow branch and committed changes","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"checkpoint","agent":"claude-code","shadow_branch":"entire/70fea39-e3b0c4"}
|
||||
{"time":"2026-03-10T21:28:04.579308+01:00","level":"INFO","msg":"checkpoint saved","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"checkpoint","agent":"claude-code","strategy":"manual-commit","checkpoint_type":"session","checkpoint_count":1,"modified_files":3,"new_files":0,"deleted_files":0,"shadow_branch":"entire/70fea39-e3b0c4","branch_created":true}
|
||||
{"time":"2026-03-10T21:28:04.579367+01:00","level":"INFO","msg":"phase transition","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"session","agent":"claude-code","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","event":"TurnEnd","from":"active","to":"idle"}
|
||||
{"time":"2026-03-10T21:28:04.656228+01:00","level":"INFO","msg":"session-end","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"lifecycle","agent":"claude-code","event":"SessionEnd","session_id":"28147033-ddc8-4056-b064-e0558fbc614e"}
|
||||
{"time":"2026-03-10T21:28:04.656276+01:00","level":"INFO","msg":"phase transition","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"session","agent":"claude-code","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","event":"SessionStop","from":"idle","to":"ended"}
|
||||
{"time":"2026-03-10T21:28:04.7209+01:00","level":"INFO","msg":"session-start","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"lifecycle","agent":"claude-code","event":"SessionStart","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","session_ref":"/Users/jan/.claude/projects/-Users-jan-prg-CC-cc-backend/42401d2e-7d1c-4c0e-abe6-356cb2d48747.jsonl"}
|
||||
{"time":"2026-03-10T21:28:44.262904+01:00","level":"INFO","msg":"turn-end","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"lifecycle","agent":"claude-code","event":"TurnEnd","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","session_ref":"/Users/jan/.claude/projects/-Users-jan-prg-CC-cc-backend/42401d2e-7d1c-4c0e-abe6-356cb2d48747.jsonl"}
|
||||
{"time":"2026-03-10T21:28:44.697919+01:00","level":"INFO","msg":"committed changes to shadow branch","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"checkpoint","agent":"claude-code","shadow_branch":"entire/70fea39-e3b0c4"}
|
||||
{"time":"2026-03-10T21:28:44.697926+01:00","level":"INFO","msg":"checkpoint saved","session_id":"28147033-ddc8-4056-b064-e0558fbc614e","component":"checkpoint","agent":"claude-code","strategy":"manual-commit","checkpoint_type":"session","checkpoint_count":1,"modified_files":3,"new_files":1,"deleted_files":0,"shadow_branch":"entire/70fea39-e3b0c4","branch_created":false}
|
||||
{"time":"2026-03-11T05:07:15.22488+01:00","level":"INFO","msg":"turn-start","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","component":"lifecycle","agent":"claude-code","event":"TurnStart","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","session_ref":"/Users/jan/.claude/projects/-Users-jan-prg-CC-cc-backend/42401d2e-7d1c-4c0e-abe6-356cb2d48747.jsonl"}
|
||||
{"time":"2026-03-11T05:07:15.492241+01:00","level":"INFO","msg":"phase transition","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","component":"session","agent":"claude-code","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","event":"TurnStart","from":"idle","to":"active"}
|
||||
{"time":"2026-03-11T05:07:15.7169+01:00","level":"INFO","msg":"moved shadow branch (HEAD changed during session)","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","component":"migration","agent":"claude-code","from":"entire/70fea39-e3b0c4","to":"entire/1cf9920-e3b0c4"}
|
||||
{"time":"2026-03-11T05:08:52.386135+01:00","level":"INFO","msg":"turn-end","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","component":"lifecycle","agent":"claude-code","event":"TurnEnd","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","session_ref":"/Users/jan/.claude/projects/-Users-jan-prg-CC-cc-backend/42401d2e-7d1c-4c0e-abe6-356cb2d48747.jsonl"}
|
||||
{"time":"2026-03-11T05:08:52.825155+01:00","level":"INFO","msg":"committed changes to shadow branch","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","component":"checkpoint","agent":"claude-code","shadow_branch":"entire/1cf9920-e3b0c4"}
|
||||
{"time":"2026-03-11T05:08:52.825161+01:00","level":"INFO","msg":"checkpoint saved","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","component":"checkpoint","agent":"claude-code","strategy":"manual-commit","checkpoint_type":"session","checkpoint_count":2,"modified_files":1,"new_files":0,"deleted_files":0,"shadow_branch":"entire/1cf9920-e3b0c4","branch_created":false}
|
||||
{"time":"2026-03-11T05:08:52.82522+01:00","level":"INFO","msg":"phase transition","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","component":"session","agent":"claude-code","session_id":"42401d2e-7d1c-4c0e-abe6-356cb2d48747","event":"TurnEnd","from":"active","to":"idle"}
|
||||
File diff suppressed because one or more lines are too long
@@ -1 +1 @@
|
||||
There also have to be bugs in jobQuery.go . Especially the following query triggers the memory leak: SELECT * FROM job WHERE job.job_state IN ("completed", "running", "failed") ORDER BY job.start_time DESC LIMIT 1 OFFSET 10; Dig deeper to find the cause. Also investigate why no existing index is used for this query.
|
||||
What optimal index selection do you suggest? The job table contains almost 20 million jobs in production causing timouts on any filter query that does not use and index.
|
||||
@@ -1,27 +1,9 @@
|
||||
All passes. Here's a summary of findings and fixes:
|
||||
|
||||
---
|
||||
|
||||
## Findings
|
||||
The plan reduces 79 indexes to 20 by:
|
||||
|
||||
### Bug 1: Missing `defer rows.Close()` in `user.go:GetLdapUsernames()` (line 100)
|
||||
**Fixed.** Every call leaked a database connection and its buffers.
|
||||
- **Eliminating entire categories**: all `shared`-prefixed (16), all user/project sort variants (11), all standalone single-filter sorts (11)
|
||||
- **Consolidating**: cluster+jobstate sort variants replaced by `(cluster, sort_col)` which works for any state combo with LIMIT
|
||||
- **Keeping what matters**: the indexes SQLite actually picks with ANALYZE — `jobs_starttime`, `jobs_cluster_starttime_duration`, `jobs_user_starttime_duration`, `jobs_cluster_jobstate_duration_starttime`, and a few secondary sort indexes
|
||||
|
||||
### Bug 2: SQLite never runs `ANALYZE` — wrong index selection
|
||||
**Fixed.** Added `ANALYZE` to `setupSqlite()` in `dbConnection.go`.
|
||||
|
||||
Without statistics in `sqlite_stat1`, SQLite's query planner **guessed** and chose `jobs_jobstate_energy` (a terrible index for this query) instead of scanning `jobs_starttime` in reverse. The result:
|
||||
|
||||
| | Before ANALYZE | After ANALYZE |
|
||||
|---|---|---|
|
||||
| Index used | `jobs_jobstate_energy` | `jobs_starttime` |
|
||||
| Sort | **TEMP B-TREE** (materializes ALL rows) | None (index order) |
|
||||
| Memory | Proportional to total matching rows | Constant |
|
||||
| I/O | Full scan of all matching rows | Stops at OFFSET+LIMIT |
|
||||
|
||||
### Bug 3: `IN` clause + `ORDER BY` is fundamentally incompatible with composite indexes
|
||||
|
||||
Even with the "correct" index `(job_state, start_time)`, SQLite **cannot** merge-sort across 3 separate index range scans for `IN ('completed','running','failed')`. It always falls back to a temp B-tree sort. The only efficient plan is to use the standalone `jobs_starttime` index — which SQLite does automatically **after ANALYZE** because it realizes the 3 states cover virtually all rows, making the WHERE clause nearly a no-op.
|
||||
|
||||
### Observation: 79 indexes on the `job` table
|
||||
This is excessive and actively harmful — it confuses the query planner (especially without ANALYZE) and slows writes. The `jobs_jobstate_starttime` index from migration 08 is also missing from the actual DB (only the 3-column `jobs_jobstate_starttime_duration` exists). This is worth investigating separately but is a schema/migration concern, not a code bug.
|
||||
Key trade-off: ~20% of queries that sort by rare columns (num_hwthreads, num_acc, energy) with a state filter will now do a cheap per-row state check instead of using a 3-column composite. With LIMIT this is negligible.
|
||||
86
README.md
86
README.md
@@ -133,6 +133,92 @@ ln -s <your-existing-job-archive> ./var/job-archive
|
||||
./cc-backend -help
|
||||
```
|
||||
|
||||
## Database Configuration
|
||||
|
||||
cc-backend uses SQLite as its database. For large installations, SQLite memory
|
||||
usage can be tuned via the optional `db-config` section in config.json under
|
||||
`main`:
|
||||
|
||||
```json
|
||||
{
|
||||
"main": {
|
||||
"db": "./var/job.db",
|
||||
"db-config": {
|
||||
"cache-size-mb": 2048,
|
||||
"soft-heap-limit-mb": 16384,
|
||||
"max-open-connections": 4,
|
||||
"max-idle-connections": 4,
|
||||
"max-idle-time-minutes": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
All fields are optional. If `db-config` is omitted entirely, built-in defaults
|
||||
are used.
|
||||
|
||||
### Options
|
||||
|
||||
| Option | Default | Description |
|
||||
| ----------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `cache-size-mb` | 2048 | SQLite page cache size per connection in MB. Maps to `PRAGMA cache_size`. Total cache memory is up to `cache-size-mb × max-open-connections`. |
|
||||
| `soft-heap-limit-mb` | 16384 | Process-wide SQLite soft heap limit in MB. SQLite will try to release cache pages to stay under this limit. Queries won't fail if exceeded, but cache eviction becomes more aggressive. |
|
||||
| `max-open-connections` | 4 | Maximum number of open database connections. |
|
||||
| `max-idle-connections` | 4 | Maximum number of idle database connections kept in the pool. |
|
||||
| `max-idle-time-minutes` | 10 | Maximum time in minutes a connection can sit idle before being closed. |
|
||||
|
||||
### Sizing Guidelines
|
||||
|
||||
SQLite's `cache_size` is a **per-connection** setting — each connection
|
||||
maintains its own independent page cache. With multiple connections, the total
|
||||
memory available for caching is the sum across all connections.
|
||||
|
||||
In practice, different connections tend to cache **different pages** (e.g., one
|
||||
handles a job listing query while another runs a statistics aggregation), so
|
||||
their caches naturally spread across the database. The formula
|
||||
`DB_size / max-open-connections` gives enough per-connection cache that the
|
||||
combined caches can cover the entire database.
|
||||
|
||||
However, this is a best-case estimate. Connections running similar queries will
|
||||
cache the same pages redundantly. In the worst case (all connections caching
|
||||
identical pages), only `cache-size-mb` worth of unique data is cached rather
|
||||
than `cache-size-mb × max-open-connections`. For workloads with diverse
|
||||
concurrent queries, cache overlap is typically low.
|
||||
|
||||
**Rules of thumb:**
|
||||
|
||||
- **cache-size-mb**: Set to `DB_size_in_MB / max-open-connections` to allow the
|
||||
entire database to be cached in memory. For example, an 80GB database with 8
|
||||
connections needs at least 10240 MB (10GB) per connection. If your workload
|
||||
has many similar concurrent queries, consider setting it higher to account for
|
||||
cache overlap between connections.
|
||||
|
||||
- **soft-heap-limit-mb**: Should be >= `cache-size-mb × max-open-connections` to
|
||||
avoid cache thrashing. This is the total SQLite memory budget for the process.
|
||||
- On small installations the defaults work well. On servers with large databases
|
||||
(tens of GB) and plenty of RAM, increasing these values significantly improves
|
||||
query performance by reducing disk I/O.
|
||||
|
||||
### Example: Large Server (512GB RAM, 80GB database)
|
||||
|
||||
```json
|
||||
{
|
||||
"main": {
|
||||
"db-config": {
|
||||
"cache-size-mb": 16384,
|
||||
"soft-heap-limit-mb": 131072,
|
||||
"max-open-connections": 8,
|
||||
"max-idle-time-minutes": 30
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This allows the entire 80GB database to be cached (8 × 16GB = 128GB page cache)
|
||||
with a 128GB soft heap limit, using about 25% of available RAM.
|
||||
|
||||
The effective configuration is logged at startup for verification.
|
||||
|
||||
## Project file structure
|
||||
|
||||
- [`.github/`](https://github.com/ClusterCockpit/cc-backend/tree/master/.github)
|
||||
|
||||
@@ -108,6 +108,26 @@ func initConfiguration() error {
|
||||
}
|
||||
|
||||
func initDatabase() error {
|
||||
if config.Keys.DbConfig != nil {
|
||||
cfg := repository.DefaultConfig()
|
||||
dc := config.Keys.DbConfig
|
||||
if dc.CacheSizeMB > 0 {
|
||||
cfg.DbCacheSizeMB = dc.CacheSizeMB
|
||||
}
|
||||
if dc.SoftHeapLimitMB > 0 {
|
||||
cfg.DbSoftHeapLimitMB = dc.SoftHeapLimitMB
|
||||
}
|
||||
if dc.MaxOpenConnections > 0 {
|
||||
cfg.MaxOpenConnections = dc.MaxOpenConnections
|
||||
}
|
||||
if dc.MaxIdleConnections > 0 {
|
||||
cfg.MaxIdleConnections = dc.MaxIdleConnections
|
||||
}
|
||||
if dc.ConnectionMaxIdleTimeMins > 0 {
|
||||
cfg.ConnectionMaxIdleTime = time.Duration(dc.ConnectionMaxIdleTimeMins) * time.Minute
|
||||
}
|
||||
repository.SetConfig(cfg)
|
||||
}
|
||||
repository.Connect(config.Keys.DB)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -77,6 +77,17 @@ type ProgramConfig struct {
|
||||
|
||||
// Node state retention configuration
|
||||
NodeStateRetention *NodeStateRetention `json:"nodestate-retention"`
|
||||
|
||||
// Database tuning configuration
|
||||
DbConfig *DbConfig `json:"db-config"`
|
||||
}
|
||||
|
||||
type DbConfig struct {
|
||||
CacheSizeMB int `json:"cache-size-mb"`
|
||||
SoftHeapLimitMB int `json:"soft-heap-limit-mb"`
|
||||
MaxOpenConnections int `json:"max-open-connections"`
|
||||
MaxIdleConnections int `json:"max-idle-connections"`
|
||||
ConnectionMaxIdleTimeMins int `json:"max-idle-time-minutes"`
|
||||
}
|
||||
|
||||
type NodeStateRetention struct {
|
||||
|
||||
@@ -177,6 +177,32 @@ var configSchema = `
|
||||
}
|
||||
},
|
||||
"required": ["policy"]
|
||||
},
|
||||
"db-config": {
|
||||
"description": "SQLite database tuning configuration.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cache-size-mb": {
|
||||
"description": "SQLite page cache size per connection in MB (default: 2048).",
|
||||
"type": "integer"
|
||||
},
|
||||
"soft-heap-limit-mb": {
|
||||
"description": "Process-wide SQLite soft heap limit in MB (default: 16384).",
|
||||
"type": "integer"
|
||||
},
|
||||
"max-open-connections": {
|
||||
"description": "Maximum number of open database connections (default: 4).",
|
||||
"type": "integer"
|
||||
},
|
||||
"max-idle-connections": {
|
||||
"description": "Maximum number of idle database connections (default: 4).",
|
||||
"type": "integer"
|
||||
},
|
||||
"max-idle-time-minutes": {
|
||||
"description": "Maximum idle time for a connection in minutes (default: 10).",
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`
|
||||
|
||||
@@ -27,13 +27,25 @@ type RepositoryConfig struct {
|
||||
ConnectionMaxLifetime time.Duration
|
||||
|
||||
// ConnectionMaxIdleTime is the maximum amount of time a connection may be idle.
|
||||
// Default: 1 hour
|
||||
// Default: 10 minutes
|
||||
ConnectionMaxIdleTime time.Duration
|
||||
|
||||
// MinRunningJobDuration is the minimum duration in seconds for a job to be
|
||||
// considered in "running jobs" queries. This filters out very short jobs.
|
||||
// Default: 600 seconds (10 minutes)
|
||||
MinRunningJobDuration int
|
||||
|
||||
// DbCacheSizeMB is the SQLite page cache size per connection in MB.
|
||||
// Uses negative PRAGMA cache_size notation (KiB). With MaxOpenConnections=4
|
||||
// and DbCacheSizeMB=2048, total page cache is up to 8GB.
|
||||
// Default: 2048 (2GB)
|
||||
DbCacheSizeMB int
|
||||
|
||||
// DbSoftHeapLimitMB is the process-wide SQLite soft heap limit in MB.
|
||||
// SQLite will try to release cache pages to stay under this limit.
|
||||
// It's a soft limit — queries won't fail, but cache eviction becomes more aggressive.
|
||||
// Default: 16384 (16GB)
|
||||
DbSoftHeapLimitMB int
|
||||
}
|
||||
|
||||
// DefaultConfig returns the default repository configuration.
|
||||
@@ -44,8 +56,10 @@ func DefaultConfig() *RepositoryConfig {
|
||||
MaxOpenConnections: 4,
|
||||
MaxIdleConnections: 4,
|
||||
ConnectionMaxLifetime: time.Hour,
|
||||
ConnectionMaxIdleTime: time.Hour,
|
||||
MinRunningJobDuration: 600, // 10 minutes
|
||||
ConnectionMaxIdleTime: 10 * time.Minute,
|
||||
MinRunningJobDuration: 600, // 10 minutes
|
||||
DbCacheSizeMB: 2048, // 2GB per connection
|
||||
DbSoftHeapLimitMB: 16384, // 16GB process-wide
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -36,9 +36,10 @@ type DatabaseOptions struct {
|
||||
ConnectionMaxIdleTime time.Duration
|
||||
}
|
||||
|
||||
func setupSqlite(db *sql.DB) error {
|
||||
func setupSqlite(db *sql.DB, cfg *RepositoryConfig) error {
|
||||
pragmas := []string{
|
||||
"temp_store = memory",
|
||||
fmt.Sprintf("soft_heap_limit = %d", int64(cfg.DbSoftHeapLimitMB)*1024*1024),
|
||||
}
|
||||
|
||||
for _, pragma := range pragmas {
|
||||
@@ -79,7 +80,8 @@ func Connect(db string) {
|
||||
connectionURLParams.Add("_journal_mode", "WAL")
|
||||
connectionURLParams.Add("_busy_timeout", "5000")
|
||||
connectionURLParams.Add("_synchronous", "NORMAL")
|
||||
connectionURLParams.Add("_cache_size", "1000000000")
|
||||
cacheSizeKiB := repoConfig.DbCacheSizeMB * 1024 // Convert MB to KiB
|
||||
connectionURLParams.Add("_cache_size", fmt.Sprintf("-%d", cacheSizeKiB))
|
||||
connectionURLParams.Add("_foreign_keys", "true")
|
||||
opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())
|
||||
|
||||
@@ -94,11 +96,14 @@ func Connect(db string) {
|
||||
cclog.Abortf("DB Connection: Could not connect to SQLite database with sqlx.Open().\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
err = setupSqlite(dbHandle.DB)
|
||||
err = setupSqlite(dbHandle.DB, repoConfig)
|
||||
if err != nil {
|
||||
cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
cclog.Infof("SQLite config: cache_size=%dMB/conn, soft_heap_limit=%dMB, max_conns=%d",
|
||||
repoConfig.DbCacheSizeMB, repoConfig.DbSoftHeapLimitMB, repoConfig.MaxOpenConnections)
|
||||
|
||||
dbHandle.SetMaxOpenConns(opts.MaxOpenConnections)
|
||||
dbHandle.SetMaxIdleConns(opts.MaxIdleConnections)
|
||||
dbHandle.SetConnMaxLifetime(opts.ConnectionMaxLifetime)
|
||||
|
||||
@@ -171,7 +171,7 @@ func (r *JobRepository) FindByID(ctx context.Context, jobID int64) (*schema.Job,
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRowContext(ctx))
|
||||
}
|
||||
|
||||
// FindByIDWithUser executes a SQL query to find a specific batch job.
|
||||
@@ -217,7 +217,7 @@ func (r *JobRepository) FindByJobID(ctx context.Context, jobID int64, startTime
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRowContext(ctx))
|
||||
}
|
||||
|
||||
// IsJobOwner checks if the specified user owns the batch job identified by jobID,
|
||||
|
||||
@@ -84,7 +84,7 @@ func (r *JobRepository) QueryJobs(
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
rows, err := query.RunWith(r.stmtCache).QueryContext(ctx)
|
||||
if err != nil {
|
||||
queryString, queryVars, _ := query.ToSql()
|
||||
return nil, fmt.Errorf("query failed [%s] %v: %w", queryString, queryVars, err)
|
||||
@@ -126,7 +126,7 @@ func (r *JobRepository) CountJobs(
|
||||
}
|
||||
|
||||
var count int
|
||||
if err := query.RunWith(r.DB).Scan(&count); err != nil {
|
||||
if err := query.RunWith(r.DB).QueryRowContext(ctx).Scan(&count); err != nil {
|
||||
return 0, fmt.Errorf("failed to count jobs: %w", err)
|
||||
}
|
||||
|
||||
|
||||
@@ -21,10 +21,11 @@ import (
|
||||
// is added to internal/repository/migrations/sqlite3/.
|
||||
//
|
||||
// Version history:
|
||||
// - Version 10: Current version
|
||||
// - Version 11: Optimize job table indexes (reduce from ~78 to 20)
|
||||
// - Version 10: Node table
|
||||
//
|
||||
// Migration files are embedded at build time from the migrations directory.
|
||||
const Version uint = 10
|
||||
const Version uint = 11
|
||||
|
||||
//go:embed migrations/*
|
||||
var migrationFiles embed.FS
|
||||
|
||||
@@ -0,0 +1,161 @@
|
||||
-- Migration 11 DOWN: Restore all indexes from migration 09
|
||||
-- Reverts the index optimization by dropping the 20 optimized indexes
|
||||
-- and recreating the original full set.
|
||||
|
||||
-- ============================================================
|
||||
-- Drop optimized indexes
|
||||
-- ============================================================
|
||||
|
||||
DROP INDEX IF EXISTS jobs_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_subcluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_user_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_project_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_arrayjobid;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_energy;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
|
||||
|
||||
-- ============================================================
|
||||
-- Recreate all indexes from migration 09
|
||||
-- ============================================================
|
||||
|
||||
-- Cluster Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
|
||||
|
||||
-- Cluster Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration_starttime ON job (cluster, duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime_duration ON job (cluster, start_time, duration);
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_user ON job (cluster, cluster_partition, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_project ON job (cluster, cluster_partition, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_shared ON job (cluster, cluster_partition, shared);
|
||||
|
||||
-- Cluster+Partition Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
|
||||
|
||||
-- Cluster+Partition Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration_starttime ON job (cluster, cluster_partition, duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime_duration ON job (cluster, cluster_partition, start_time, duration);
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||
-- Cluster+JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
|
||||
|
||||
-- Cluster+JobState Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime_duration ON job (cluster, job_state, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration_starttime ON job (cluster, job_state, duration, start_time);
|
||||
|
||||
-- Cluster+Shared Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_user ON job (cluster, shared, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_project ON job (cluster, shared, project);
|
||||
-- Cluster+Shared Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numnodes ON job (cluster, shared, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numhwthreads ON job (cluster, shared, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_numacc ON job (cluster, shared, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_energy ON job (cluster, shared, energy);
|
||||
|
||||
-- Cluster+Shared Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_starttime_duration ON job (cluster, shared, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_shared_duration_starttime ON job (cluster, shared, duration, start_time);
|
||||
|
||||
-- User Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
|
||||
|
||||
-- User Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime_duration ON job (hpc_user, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration_starttime ON job (hpc_user, duration, start_time);
|
||||
|
||||
-- Project Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||
-- Project Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
|
||||
|
||||
-- Project Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime_duration ON job (project, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration_starttime ON job (project, duration, start_time);
|
||||
|
||||
-- JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||
-- JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
|
||||
|
||||
-- JobState Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime_duration ON job (job_state, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration_starttime ON job (job_state, duration, start_time);
|
||||
|
||||
-- Shared Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_user ON job (shared, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_project ON job (shared, project);
|
||||
-- Shared Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_numnodes ON job (shared, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_numhwthreads ON job (shared, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_numacc ON job (shared, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_energy ON job (shared, energy);
|
||||
|
||||
-- Shared Time Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_starttime_duration ON job (shared, start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_shared_duration_starttime ON job (shared, duration, start_time);
|
||||
|
||||
-- ArrayJob Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||
|
||||
-- Single filters with duration sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime_duration ON job (start_time, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_duration ON job (num_nodes, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_duration ON job (num_hwthreads, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_duration ON job (num_acc, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_duration ON job (energy, duration);
|
||||
|
||||
-- Backup Indices For High Variety Columns
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
|
||||
-- Optimize DB index usage
|
||||
PRAGMA optimize;
|
||||
@@ -0,0 +1,221 @@
|
||||
-- Migration 11: Optimize job table indexes
|
||||
-- Reduces from ~78 indexes to 20 for better write performance,
|
||||
-- reduced disk usage, and more reliable query planner decisions.
|
||||
-- Requires ANALYZE to be run after migration (done automatically on startup).
|
||||
|
||||
-- ============================================================
|
||||
-- Drop ALL existing job indexes (from migrations 08/09)
|
||||
-- sqlite_autoindex_job_1 (UNIQUE constraint) is kept automatically
|
||||
-- ============================================================
|
||||
|
||||
-- Cluster Filter
|
||||
DROP INDEX IF EXISTS jobs_cluster_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_subcluster;
|
||||
-- Cluster Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_cluster_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_energy;
|
||||
-- Cluster Time Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_cluster_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_starttime_duration;
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_shared;
|
||||
-- Cluster+Partition Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_energy;
|
||||
-- Cluster+Partition Time Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_starttime_duration;
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
|
||||
-- Cluster+JobState Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_energy;
|
||||
-- Cluster+JobState Time Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration_starttime;
|
||||
|
||||
-- Cluster+Shared Filter
|
||||
DROP INDEX IF EXISTS jobs_cluster_shared_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_shared_project;
|
||||
-- Cluster+Shared Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_cluster_shared_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_shared_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_shared_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_shared_energy;
|
||||
-- Cluster+Shared Time Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_cluster_shared_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_shared_duration_starttime;
|
||||
|
||||
-- User Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_user_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_user_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_user_numacc;
|
||||
DROP INDEX IF EXISTS jobs_user_energy;
|
||||
-- User Time Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_user_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_user_duration_starttime;
|
||||
|
||||
-- Project Filter
|
||||
DROP INDEX IF EXISTS jobs_project_user;
|
||||
-- Project Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_project_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_project_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_project_numacc;
|
||||
DROP INDEX IF EXISTS jobs_project_energy;
|
||||
-- Project Time Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_project_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_project_duration_starttime;
|
||||
|
||||
-- JobState Filter
|
||||
DROP INDEX IF EXISTS jobs_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_project;
|
||||
-- JobState Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numacc;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_energy;
|
||||
-- JobState Time Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_jobstate_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_duration_starttime;
|
||||
|
||||
-- Shared Filter
|
||||
DROP INDEX IF EXISTS jobs_shared_user;
|
||||
DROP INDEX IF EXISTS jobs_shared_project;
|
||||
-- Shared Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_shared_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_shared_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_shared_numacc;
|
||||
DROP INDEX IF EXISTS jobs_shared_energy;
|
||||
-- Shared Time Filter Sorting
|
||||
DROP INDEX IF EXISTS jobs_shared_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_shared_duration_starttime;
|
||||
|
||||
-- ArrayJob Filter
|
||||
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
DROP INDEX IF EXISTS jobs_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numnodes_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numhwthreads_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numacc_starttime;
|
||||
DROP INDEX IF EXISTS jobs_energy_starttime;
|
||||
|
||||
-- Single filters with duration sorting
|
||||
DROP INDEX IF EXISTS jobs_starttime_duration;
|
||||
DROP INDEX IF EXISTS jobs_numnodes_duration;
|
||||
DROP INDEX IF EXISTS jobs_numhwthreads_duration;
|
||||
DROP INDEX IF EXISTS jobs_numacc_duration;
|
||||
DROP INDEX IF EXISTS jobs_energy_duration;
|
||||
|
||||
-- Backup Indices
|
||||
DROP INDEX IF EXISTS jobs_starttime;
|
||||
DROP INDEX IF EXISTS jobs_duration;
|
||||
|
||||
-- Legacy indexes from migration 08 (may exist on older DBs)
|
||||
DROP INDEX IF EXISTS jobs_cluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_energy;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_energy;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_user;
|
||||
DROP INDEX IF EXISTS jobs_user_starttime;
|
||||
DROP INDEX IF EXISTS jobs_user_duration;
|
||||
DROP INDEX IF EXISTS jobs_project;
|
||||
DROP INDEX IF EXISTS jobs_project_starttime;
|
||||
DROP INDEX IF EXISTS jobs_project_duration;
|
||||
DROP INDEX IF EXISTS jobs_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_cluster;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_numacc;
|
||||
DROP INDEX IF EXISTS jobs_energy;
|
||||
|
||||
-- ============================================================
|
||||
-- Create optimized set of 20 indexes
|
||||
-- ============================================================
|
||||
|
||||
-- GROUP 1: Global sort (1 index)
|
||||
-- Default sort for unfiltered/multi-state IN queries, time range, delete-before
|
||||
CREATE INDEX jobs_starttime ON job (start_time);
|
||||
|
||||
-- GROUP 2: Cluster-prefixed (8 indexes)
|
||||
-- Cluster + default sort, concurrent jobs, time range within cluster
|
||||
CREATE INDEX jobs_cluster_starttime_duration ON job (cluster, start_time, duration);
|
||||
-- Cluster + sort by duration
|
||||
CREATE INDEX jobs_cluster_duration_starttime ON job (cluster, duration, start_time);
|
||||
-- COVERING for cluster+state aggregation; running jobs (cluster, state, duration>?)
|
||||
CREATE INDEX jobs_cluster_jobstate_duration_starttime ON job (cluster, job_state, duration, start_time);
|
||||
-- Cluster+state+sort start_time (single state equality)
|
||||
CREATE INDEX jobs_cluster_jobstate_starttime_duration ON job (cluster, job_state, start_time, duration);
|
||||
-- COVERING for GROUP BY user with cluster filter
|
||||
CREATE INDEX jobs_cluster_user ON job (cluster, hpc_user);
|
||||
-- GROUP BY project with cluster filter
|
||||
CREATE INDEX jobs_cluster_project ON job (cluster, project);
|
||||
-- GROUP BY subcluster with cluster filter
|
||||
CREATE INDEX jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster + sort by num_nodes (state filtered per-row, fast with LIMIT)
|
||||
CREATE INDEX jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
|
||||
-- GROUP 3: User-prefixed (1 index)
|
||||
-- Security filter (user role) + default sort
|
||||
CREATE INDEX jobs_user_starttime_duration ON job (hpc_user, start_time, duration);
|
||||
|
||||
-- GROUP 4: Project-prefixed (1 index)
|
||||
-- Security filter (manager role) + default sort
|
||||
CREATE INDEX jobs_project_starttime_duration ON job (project, start_time, duration);
|
||||
|
||||
-- GROUP 5: JobState-prefixed (3 indexes)
|
||||
-- State + project filter (for manager security within state query)
|
||||
CREATE INDEX jobs_jobstate_project ON job (job_state, project);
|
||||
-- State + user filter/aggregation
|
||||
CREATE INDEX jobs_jobstate_user ON job (job_state, hpc_user);
|
||||
-- COVERING for non-running jobs scan, state + sort duration
|
||||
CREATE INDEX jobs_jobstate_duration_starttime ON job (job_state, duration, start_time);
|
||||
|
||||
-- GROUP 6: Rare filters (1 index)
|
||||
-- Array job lookup
|
||||
CREATE INDEX jobs_arrayjobid ON job (array_job_id);
|
||||
|
||||
-- GROUP 7: Secondary sort columns (5 indexes)
|
||||
CREATE INDEX jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
|
||||
CREATE INDEX jobs_cluster_numacc ON job (cluster, num_acc);
|
||||
CREATE INDEX jobs_cluster_energy ON job (cluster, energy);
|
||||
-- Cluster+partition + sort start_time
|
||||
CREATE INDEX jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||
-- Cluster+partition+state filter
|
||||
CREATE INDEX jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
|
||||
-- Optimize DB index usage
|
||||
PRAGMA optimize;
|
||||
@@ -230,7 +230,7 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
rows, err := query.RunWith(r.DB).QueryContext(ctx)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while querying DB for job statistics")
|
||||
return nil, err
|
||||
@@ -355,7 +355,7 @@ func (r *JobRepository) JobsStats(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
row := query.RunWith(r.DB).QueryRow()
|
||||
row := query.RunWith(r.DB).QueryRowContext(ctx)
|
||||
stats := make([]*model.JobsStatistics, 0, 1)
|
||||
|
||||
var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
||||
@@ -440,7 +440,7 @@ func (r *JobRepository) JobCountGrouped(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
rows, err := query.RunWith(r.DB).QueryContext(ctx)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while querying DB for job statistics")
|
||||
return nil, err
|
||||
@@ -501,7 +501,7 @@ func (r *JobRepository) AddJobCountGrouped(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rows, err := query.RunWith(r.DB).Query()
|
||||
rows, err := query.RunWith(r.DB).QueryContext(ctx)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while querying DB for job statistics")
|
||||
return nil, err
|
||||
@@ -566,7 +566,7 @@ func (r *JobRepository) AddJobCount(
|
||||
return nil, err
|
||||
}
|
||||
var cnt sql.NullInt64
|
||||
if err := query.RunWith(r.DB).QueryRow().Scan(&cnt); err != nil {
|
||||
if err := query.RunWith(r.DB).QueryRowContext(ctx).Scan(&cnt); err != nil {
|
||||
cclog.Warn("Error while querying DB for job count")
|
||||
return nil, err
|
||||
}
|
||||
@@ -755,7 +755,7 @@ func (r *JobRepository) jobsStatisticsHistogram(
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
|
||||
rows, err := query.GroupBy("value").RunWith(r.DB).QueryContext(ctx)
|
||||
if err != nil {
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
@@ -829,7 +829,7 @@ func (r *JobRepository) jobsDurationStatisticsHistogram(
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
|
||||
rows, err := query.GroupBy("value").RunWith(r.DB).QueryContext(ctx)
|
||||
if err != nil {
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
@@ -959,7 +959,7 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
|
||||
mainQuery = mainQuery.GroupBy("bin").OrderBy("bin")
|
||||
|
||||
rows, err := mainQuery.RunWith(r.DB).Query()
|
||||
rows, err := mainQuery.RunWith(r.DB).QueryContext(ctx)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while running mainQuery: %s", err)
|
||||
return nil, err
|
||||
|
||||
Reference in New Issue
Block a user