Revert hpc_cluster to cluster. Refactor.

This commit is contained in:
2025-10-16 14:32:06 +02:00
parent eefb6f6265
commit 440cd59e50
13 changed files with 96 additions and 157 deletions

View File

@@ -52,7 +52,7 @@ func GetJobRepository() *JobRepository {
}
var jobColumns []string = []string{
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.hpc_cluster", "job.subcluster",
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
"job.num_hwthreads", "job.num_acc", "job.shared", "job.monitoring_status",
"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
@@ -60,7 +60,7 @@ var jobColumns []string = []string{
}
var jobCacheColumns []string = []string{
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.hpc_cluster",
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
"job_cache.num_acc", "job_cache.shared", "job_cache.monitoring_status", "job_cache.smt",
@@ -73,7 +73,7 @@ func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
if err := row.Scan(
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster,
&job.StartTime, &job.Partition, &job.ArrayJobId, &job.NumNodes, &job.NumHWThreads,
&job.StartTime, &job.Partition, &job.ArrayJobID, &job.NumNodes, &job.NumHWThreads,
&job.NumAcc, &job.Shared, &job.MonitoringStatus, &job.SMT, &job.State,
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
cclog.Warnf("Error while scanning rows (Job): %v", err)
@@ -390,7 +390,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
start := time.Now()
partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
parts := []string{}
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.hpc_cluster = ?;`, cluster); err != nil {
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
return nil, 0, 1000
}
@@ -410,7 +410,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
subclusters := make(map[string]map[string]int)
rows, err := sq.Select("resources", "subcluster").From("job").
Where("job.job_state = 'running'").
Where("job.hpc_cluster = ?", cluster).
Where("job.cluster = ?", cluster).
RunWith(r.stmtCache).Query()
if err != nil {
cclog.Error("Error while running query")
@@ -505,7 +505,7 @@ func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) {
// FIXME: Reconsider filtering short jobs with harcoded threshold
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
query := sq.Select(jobColumns...).From("job").
Where(fmt.Sprintf("job.hpc_cluster = '%s'", cluster)).
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
Where("job.job_state = 'running'").
Where("job.duration > 600")
@@ -587,7 +587,7 @@ func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32
Where("job.id = ?", job)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
return err
}
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {

View File

@@ -14,18 +14,18 @@ import (
)
const NamedJobCacheInsert string = `INSERT INTO job_cache (
job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
) VALUES (
:job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
);`
const NamedJobInsert string = `INSERT INTO job (
job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
) VALUES (
:job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
);`
@@ -70,7 +70,7 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
}
_, err = r.DB.Exec(
"INSERT INTO job (job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
if err != nil {
cclog.Warnf("Error while Job sync: %v", err)
return nil, err
@@ -120,7 +120,7 @@ func (r *JobRepository) Stop(
Where("job.id = ?", jobId)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
return err
}
func (r *JobRepository) StopCached(
@@ -136,5 +136,5 @@ func (r *JobRepository) StopCached(
Where("job.id = ?", jobId)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
return err
}

View File

@@ -31,7 +31,7 @@ func (r *JobRepository) Find(
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.hpc_cluster = ?", *cluster)
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
@@ -52,7 +52,7 @@ func (r *JobRepository) FindCached(
Where("job_cache.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job_cache.hpc_cluster = ?", *cluster)
q = q.Where("job_cache.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job_cache.start_time = ?", *startTime)
@@ -78,7 +78,7 @@ func (r *JobRepository) FindAll(
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.hpc_cluster = ?", *cluster)
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
@@ -183,7 +183,7 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
q := sq.Select(jobColumns...).
From("job").
Where("job.job_id = ?", jobId).
Where("job.hpc_cluster = ?", cluster).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
q, qerr := SecurityCheck(ctx, q)
@@ -203,7 +203,7 @@ func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cl
From("job").
Where("job.job_id = ?", jobId).
Where("job.hpc_user = ?", user).
Where("job.hpc_cluster = ?", cluster).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())

View File

@@ -168,7 +168,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
query = buildMetaJsonCondition("jobName", filter.JobName, query)
}
if filter.Cluster != nil {
query = buildStringCondition("job.hpc_cluster", filter.Cluster, query)
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
if filter.Partition != nil {
query = buildStringCondition("job.cluster_partition", filter.Partition, query)

View File

@@ -1,7 +1,7 @@
CREATE TABLE "job_cache" (
id INTEGER PRIMARY KEY,
job_id BIGINT NOT NULL,
hpc_cluster VARCHAR(255) NOT NULL,
cluster VARCHAR(255) NOT NULL,
subcluster VARCHAR(255) NOT NULL,
submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
@@ -30,13 +30,13 @@ CREATE TABLE "job_cache" (
energy REAL NOT NULL DEFAULT 0.0,
energy_footprint TEXT DEFAULT NULL,
footprint TEXT DEFAULT NULL,
UNIQUE (job_id, hpc_cluster, start_time)
UNIQUE (job_id, cluster, start_time)
);
CREATE TABLE "job_new" (
id INTEGER PRIMARY KEY,
job_id BIGINT NOT NULL,
hpc_cluster TEXT NOT NULL,
cluster TEXT NOT NULL,
subcluster TEXT NOT NULL,
submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
@@ -65,10 +65,9 @@ CREATE TABLE "job_new" (
energy REAL NOT NULL DEFAULT 0.0,
energy_footprint TEXT DEFAULT NULL,
footprint TEXT DEFAULT NULL,
UNIQUE (job_id, hpc_cluster, start_time)
UNIQUE (job_id, cluster, start_time)
);
ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster;
CREATE TABLE IF NOT EXISTS lookup_exclusive (
id INTEGER PRIMARY KEY,
@@ -76,20 +75,43 @@ CREATE TABLE IF NOT EXISTS lookup_exclusive (
);
INSERT INTO lookup_exclusive (id, name) VALUES
(0, 'multi_user'),
(1, 'none'),
(2, 'single_user');
(0, 'multi_user'),
(1, 'none'),
(2, 'single_user');
INSERT INTO job_new (
id, job_id, hpc_cluster, subcluster, submit_time, start_time, hpc_user, project,
id, job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
num_nodes, num_hwthreads, num_acc, smt, shared, monitoring_status, energy,
energy_footprint, footprint
) SELECT
id, job_id, hpc_cluster, subcluster, 0, start_time, hpc_user, project,
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
num_nodes, num_hwthreads, num_acc, smt, (SELECT name FROM lookup_exclusive WHERE id=job.exclusive), monitoring_status, energy,
energy_footprint, footprint
id,
job_id,
cluster,
subcluster,
0,
start_time,
hpc_user,
project,
cluster_partition,
array_job_id,
duration,
walltime,
job_state,
meta_data,
resources,
num_nodes,
num_hwthreads,
num_acc,
smt,
(
SELECT name FROM lookup_exclusive
WHERE id = job.exclusive
),
monitoring_status,
energy,
energy_footprint,
footprint
FROM job;
DROP TABLE lookup_exclusive;

View File

@@ -27,104 +27,23 @@ CREATE TABLE "node_state" (
FOREIGN KEY (node_id) REFERENCES node (id)
);
-- DROP indices using old column name "cluster"
DROP INDEX IF EXISTS jobs_cluster;
DROP INDEX IF EXISTS jobs_cluster_user;
DROP INDEX IF EXISTS jobs_cluster_project;
DROP INDEX IF EXISTS jobs_cluster_subcluster;
DROP INDEX IF EXISTS jobs_cluster_starttime;
DROP INDEX IF EXISTS jobs_cluster_duration;
DROP INDEX IF EXISTS jobs_cluster_numnodes;
DROP INDEX IF EXISTS jobs_cluster_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_numacc;
DROP INDEX IF EXISTS jobs_cluster_energy;
DROP INDEX IF EXISTS jobs_cluster_partition;
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_partition_numacc;
DROP INDEX IF EXISTS jobs_cluster_partition_energy;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numacc;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_energy;
DROP INDEX IF EXISTS jobs_cluster_jobstate;
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numacc;
DROP INDEX IF EXISTS jobs_cluster_jobstate_energy;
-- -- CREATE UPDATED indices with new column names
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (hpc_cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (hpc_cluster, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (hpc_cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (hpc_cluster, subcluster);
-- Cluster Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (hpc_cluster, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (hpc_cluster, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (hpc_cluster, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (hpc_cluster, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (hpc_cluster, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (hpc_cluster, energy);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (hpc_cluster, cluster_partition);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (hpc_cluster, cluster_partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (hpc_cluster, cluster_partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (hpc_cluster, cluster_partition, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (hpc_cluster, cluster_partition, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (hpc_cluster, cluster_partition, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (hpc_cluster, cluster_partition, energy);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (hpc_cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (hpc_cluster, cluster_partition, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (hpc_cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (hpc_cluster, cluster_partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (hpc_cluster, cluster_partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (hpc_cluster, cluster_partition, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (hpc_cluster, cluster_partition, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (hpc_cluster, cluster_partition, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (hpc_cluster, cluster_partition, job_state, energy);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (hpc_cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (hpc_cluster, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (hpc_cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (hpc_cluster, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (hpc_cluster, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (hpc_cluster, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (hpc_cluster, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (hpc_cluster, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (hpc_cluster, job_state, energy);
--- --- END UPDATE existing indices
-- Add NEW Indices For New Job Table Columns
CREATE INDEX IF NOT EXISTS jobs_cluster_submittime ON job (hpc_cluster, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_submittime ON job (hpc_cluster, cluster_partition, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_submittime ON job (hpc_cluster, cluster_partition, job_state, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_submittime ON job (hpc_cluster, job_state, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_submittime ON job (cluster, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_submittime ON job (cluster, cluster_partition, submit_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_submittime ON job (
cluster, cluster_partition, job_state, submit_time
);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_submittime ON job (cluster, job_state, submit_time);
-- Add NEW Indices For New Node Table VARCHAR Fields
CREATE INDEX IF NOT EXISTS nodes_cluster ON node (cluster);
CREATE INDEX IF NOT EXISTS nodes_cluster_subcluster ON node (cluster, subcluster);
-- Add NEW Indices For New Node_State Table Fields
CREATE INDEX IF NOT EXISTS nodeStates_state ON node_state (node_state);
CREATE INDEX IF NOT EXISTS nodeStates_health ON node_state (health_state);
CREATE INDEX IF NOT EXISTS nodeStates_nodeid_state ON node (node_id, node_state);
CREATE INDEX IF NOT EXISTS nodeStates_nodeid_health ON node (node_id, health_state);
CREATE INDEX IF NOT EXISTS nodestates_state ON node_state (node_state);
CREATE INDEX IF NOT EXISTS nodestates_health ON node_state (health_state);
CREATE INDEX IF NOT EXISTS nodestates_nodeid_state ON node_state (node_id, node_state);
CREATE INDEX IF NOT EXISTS nodestates_nodeid_health ON node_state (node_id, health_state);
-- Add NEW Indices For Increased Amounts of Tags
CREATE INDEX IF NOT EXISTS tags_jobid ON jobtag (job_id);

View File

@@ -23,7 +23,7 @@ import (
var groupBy2column = map[model.Aggregate]string{
model.AggregateUser: "job.hpc_user",
model.AggregateProject: "job.project",
model.AggregateCluster: "job.hpc_cluster",
model.AggregateCluster: "job.cluster",
model.AggregateSubcluster: "job.subcluster",
}

View File

@@ -79,15 +79,15 @@ func TestGetUIConfig(t *testing.T) {
t.Fatal("No config")
}
for key := range cfg {
print("%s\n", key)
}
// t.Fatal("No config")
tmp, exists := cfg["metricConfig_jobListMetrics"]
if exists {
// tmp := cfg["plot_list_selectedMetrics"]
// metrics := tmp.([]string)
// str := metrics[2]
// if str != "flops_any" {
// t.Errorf("wrong config\ngot: %s \nwant: flops_any", str)
// }
metrics := tmp.([]string)
str := metrics[2]
if str != "flops_any" {
t.Errorf("wrong config\ngot: %s \nwant: flops_any", str)
}
} else {
t.Fatal("Key metricConfig_jobListMetrics is missing")
}
}