cc-backend/internal/repository/init.go

// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository

import (
	"encoding/json"
	"fmt"
	"time"

	"github.com/ClusterCockpit/cc-backend/pkg/archive"
	"github.com/ClusterCockpit/cc-backend/pkg/log"
	"github.com/ClusterCockpit/cc-backend/pkg/schema"
)

// `AUTO_INCREMENT` is in a comment because of this hack:
// https://stackoverflow.com/a/41028314 (sqlite creates unique ids automatically)
const JobsDBSchema string = `
	DROP TABLE IF EXISTS jobtag;
	DROP TABLE IF EXISTS job;
	DROP TABLE IF EXISTS tag;

	CREATE TABLE job (
		id                INTEGER PRIMARY KEY /*!40101 AUTO_INCREMENT */,
		job_id            BIGINT NOT NULL,
		cluster           VARCHAR(255) NOT NULL,
		subcluster        VARCHAR(255) NOT NULL,
		start_time        BIGINT NOT NULL, -- Unix timestamp

		user              VARCHAR(255) NOT NULL,
		project           VARCHAR(255) NOT NULL,
		` + "`partition`" + ` VARCHAR(255) NOT NULL, -- partition is a keyword in mysql -.-
		array_job_id      BIGINT NOT NULL,
		duration          INT NOT NULL DEFAULT 0,
		walltime          INT NOT NULL DEFAULT 0,
		job_state         VARCHAR(255) NOT NULL CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),
		meta_data         TEXT,          -- JSON
		resources         TEXT NOT NULL, -- JSON

		num_nodes         INT NOT NULL,
		num_hwthreads     INT NOT NULL,
		num_acc           INT NOT NULL,
		smt               TINYINT NOT NULL DEFAULT 1 CHECK(smt               IN (0, 1   )),
		exclusive         TINYINT NOT NULL DEFAULT 1 CHECK(exclusive         IN (0, 1, 2)),
		monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),

		mem_used_max        REAL NOT NULL DEFAULT 0.0,
		flops_any_avg       REAL NOT NULL DEFAULT 0.0,
		mem_bw_avg          REAL NOT NULL DEFAULT 0.0,
		load_avg            REAL NOT NULL DEFAULT 0.0,
		net_bw_avg          REAL NOT NULL DEFAULT 0.0,
		net_data_vol_total  REAL NOT NULL DEFAULT 0.0,
		file_bw_avg         REAL NOT NULL DEFAULT 0.0,
		file_data_vol_total REAL NOT NULL DEFAULT 0.0);

	CREATE TABLE tag (
		id       INTEGER PRIMARY KEY,
		tag_type VARCHAR(255) NOT NULL,
		tag_name VARCHAR(255) NOT NULL,
		CONSTRAINT be_unique UNIQUE (tag_type, tag_name));

	CREATE TABLE jobtag (
		job_id INTEGER,
		tag_id INTEGER,
		PRIMARY KEY (job_id, tag_id),
		FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,
		FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);
`

// Indexes are created after the job-archive is traversed for faster inserts.
const JobsDbIndexes string = `
	CREATE INDEX job_by_user      ON job (user);
	CREATE INDEX job_by_starttime ON job (start_time);
	CREATE INDEX job_by_job_id    ON job (job_id);
	CREATE INDEX job_by_state     ON job (job_state);
`
const NamedJobInsert string = `INSERT INTO job (
	job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
	exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
	mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
) VALUES (
	:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
	:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
	:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`

// Delete the tables "job", "tag" and "jobtag" from the database and
// repopulate them using the jobs found in `archive`.
func InitDB() error {
	db := GetConnection()
	starttime := time.Now()
	log.Print("Building job table...")

	// Basic database structure:
	_, err := db.DB.Exec(JobsDBSchema)
	if err != nil {
		return err
	}

	// Inserts are bundled into transactions because in sqlite,
	// that speeds up inserts A LOT.
	tx, err := db.DB.Beginx()
	if err != nil {
		return err
	}

	stmt, err := tx.PrepareNamed(NamedJobInsert)
	if err != nil {
		return err
	}
	tags := make(map[string]int64)

	// Not using log.Print because we want the line to end with `\r` and
	// this function is only ever called when a special command line flag
	// is passed anyways.
	fmt.Printf("%d jobs inserted...\r", 0)

	ar := archive.GetHandle()
	i := 0
	errorOccured := false

	for jobMeta := range ar.Iter() {

		fmt.Printf("Import job %d\n", jobMeta.JobID)
		// // Bundle 100 inserts into one transaction for better performance:
		if i%10 == 0 {
			if tx != nil {
				if err := tx.Commit(); err != nil {
					return err
				}
			}

			tx, err = db.DB.Beginx()
			if err != nil {
				return err
			}

			stmt = tx.NamedStmt(stmt)
			fmt.Printf("%d jobs inserted...\r", i)
		}

		jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
		job := schema.Job{
			BaseJob:       jobMeta.BaseJob,
			StartTime:     time.Unix(jobMeta.StartTime, 0),
			StartTimeUnix: jobMeta.StartTime,
		}

		// TODO: Other metrics...
		job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
		job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
		job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
		job.FileBwAvg = loadJobStat(jobMeta, "file_bw")

		job.RawResources, err = json.Marshal(job.Resources)
		if err != nil {
			log.Errorf("fsBackend LoadClusterCfg()- %v", err)
			errorOccured = true
			continue
		}

		job.RawMetaData, err = json.Marshal(job.MetaData)
		if err != nil {
			log.Errorf("fsBackend LoadClusterCfg()- %v", err)
			errorOccured = true
			continue
		}

		if err := SanityChecks(&job.BaseJob); err != nil {
			log.Errorf("fsBackend LoadClusterCfg()- %v", err)
			errorOccured = true
			continue
		}

		res, err := stmt.Exec(job)
		if err != nil {
			log.Errorf("fsBackend LoadClusterCfg()- %v", err)
			errorOccured = true
			continue
		}

		id, err := res.LastInsertId()
		if err != nil {
			log.Errorf("fsBackend LoadClusterCfg()- %v", err)
			errorOccured = true
			continue
		}

		for _, tag := range job.Tags {
			tagstr := tag.Name + ":" + tag.Type
			tagId, ok := tags[tagstr]
			if !ok {
				res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
				if err != nil {
					return err
				}
				tagId, err = res.LastInsertId()
				if err != nil {
					return err
				}
				tags[tagstr] = tagId
			}

			if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
				return err
			}
		}

		if err == nil {
			i += 1
		}
	}

	if errorOccured {
		log.Errorf("An error occured!")
	}

	if err := tx.Commit(); err != nil {
		return err
	}

	// Create indexes after inserts so that they do not
	// need to be continually updated.
	if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
		return err
	}

	log.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
	return nil
}

// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
	if c := archive.GetCluster(job.Cluster); c == nil {
		return fmt.Errorf("no such cluster: %#v", job.Cluster)
	}
	if err := archive.AssignSubCluster(job); err != nil {
		return err
	}
	if !job.State.Valid() {
		return fmt.Errorf("not a valid job state: %#v", job.State)
	}
	if len(job.Resources) == 0 || len(job.User) == 0 {
		return fmt.Errorf("'resources' and 'user' should not be empty")
	}
	if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
		return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
	}
	if len(job.Resources) != int(job.NumNodes) {
		return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
	}

	return nil
}

func loadJobStat(job *schema.JobMeta, metric string) float64 {
	if stats, ok := job.Statistics[metric]; ok {
		return stats.Avg
	}

	return 0.0
}
Add copyright and license header. Update license year 2022-07-29 06:29:21 +02:00			`// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.`
			`// All rights reserved.`
			`// Use of this source code is governed by a MIT-style`
			`// license that can be found in the LICENSE file.`
cleanup and comments 2022-03-15 08:29:29 +01:00			`package repository`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00
			`import (`
			`"encoding/json"`
Massive speedup in job table initialization It went from taking like 15 minutes on a ramdisk to taking 430 seconds on my SSD (~900000 jobs inserted). - Create indexes after inserts so that they do not need to be updated continually - Use prepared statements for the job insert - Bundle 200 job inserts into a single transaction 2021-10-20 09:30:50 +02:00			`"fmt"`
			`"time"`

Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`"github.com/ClusterCockpit/cc-backend/pkg/archive"`
Refactor directory structure 2022-06-21 17:52:36 +02:00			`"github.com/ClusterCockpit/cc-backend/pkg/log"`
			`"github.com/ClusterCockpit/cc-backend/pkg/schema"`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`)`

make database schema mysql compatible; use prepared statements 2022-01-20 10:00:55 +01:00			// `AUTO_INCREMENT` is in a comment because of this hack:
			`// https://stackoverflow.com/a/41028314 (sqlite creates unique ids automatically)`
cleanup and comments 2022-03-15 08:29:29 +01:00			const JobsDBSchema string = `
make database schema mysql compatible; use prepared statements 2022-01-20 10:00:55 +01:00			`DROP TABLE IF EXISTS jobtag;`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`DROP TABLE IF EXISTS job;`
			`DROP TABLE IF EXISTS tag;`

			`CREATE TABLE job (`
make database schema mysql compatible; use prepared statements 2022-01-20 10:00:55 +01:00			`id INTEGER PRIMARY KEY /!40101 AUTO_INCREMENT /,`
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00			`job_id BIGINT NOT NULL,`
			`cluster VARCHAR(255) NOT NULL,`
Add subcluster and walltime to Job types 2022-03-14 09:08:02 +01:00			`subcluster VARCHAR(255) NOT NULL,`
make database schema mysql compatible; use prepared statements 2022-01-20 10:00:55 +01:00			`start_time BIGINT NOT NULL, -- Unix timestamp`
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00
			`user VARCHAR(255) NOT NULL,`
			`project VARCHAR(255) NOT NULL,`
make database schema mysql compatible; use prepared statements 2022-01-20 10:00:55 +01:00			` + "`partition`" + ` VARCHAR(255) NOT NULL, -- partition is a keyword in mysql -.-
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00			`array_job_id BIGINT NOT NULL,`
cleanup and comments 2022-03-15 08:29:29 +01:00			`duration INT NOT NULL DEFAULT 0,`
			`walltime INT NOT NULL DEFAULT 0,`
Add and correct job states 2022-02-17 08:56:37 +01:00			`job_state VARCHAR(255) NOT NULL CHECK(job_state IN ('running', 'completed', 'failed', 'cancelled', 'stopped', 'timeout', 'preempted', 'out_of_memory')),`
make database schema mysql compatible; use prepared statements 2022-01-20 10:00:55 +01:00			`meta_data TEXT, -- JSON`
			`resources TEXT NOT NULL, -- JSON`
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00
			`num_nodes INT NOT NULL,`
			`num_hwthreads INT NOT NULL,`
			`num_acc INT NOT NULL,`
make database schema mysql compatible; use prepared statements 2022-01-20 10:00:55 +01:00			`smt TINYINT NOT NULL DEFAULT 1 CHECK(smt IN (0, 1 )),`
			`exclusive TINYINT NOT NULL DEFAULT 1 CHECK(exclusive IN (0, 1, 2)),`
fix stop_job returned state; handle monitoring status 2022-02-15 14:25:39 +01:00			`monitoring_status TINYINT NOT NULL DEFAULT 1 CHECK(monitoring_status IN (0, 1, 2, 3)),`
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00
			`mem_used_max REAL NOT NULL DEFAULT 0.0,`
			`flops_any_avg REAL NOT NULL DEFAULT 0.0,`
			`mem_bw_avg REAL NOT NULL DEFAULT 0.0,`
			`load_avg REAL NOT NULL DEFAULT 0.0,`
			`net_bw_avg REAL NOT NULL DEFAULT 0.0,`
			`net_data_vol_total REAL NOT NULL DEFAULT 0.0,`
			`file_bw_avg REAL NOT NULL DEFAULT 0.0,`
			`file_data_vol_total REAL NOT NULL DEFAULT 0.0);`

Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`CREATE TABLE tag (`
			`id INTEGER PRIMARY KEY,`
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00			`tag_type VARCHAR(255) NOT NULL,`
Create tags if needed 2022-02-08 12:49:28 +01:00			`tag_name VARCHAR(255) NOT NULL,`
			`CONSTRAINT be_unique UNIQUE (tag_type, tag_name));`
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`CREATE TABLE jobtag (`
			`job_id INTEGER,`
			`tag_id INTEGER,`
			`PRIMARY KEY (job_id, tag_id),`
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00			`FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE,`
			`FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE);`
			`

cleanup and comments 2022-03-15 08:29:29 +01:00			`// Indexes are created after the job-archive is traversed for faster inserts.`
			const JobsDbIndexes string = `
Add /search endpoint which redirects to user/job 2022-02-09 15:03:12 +01:00			`CREATE INDEX job_by_user ON job (user);`
			`CREATE INDEX job_by_starttime ON job (start_time);`
			`CREATE INDEX job_by_job_id ON job (job_id);`
Minor fixes; Update frontend 2022-03-08 10:33:56 +01:00			`CREATE INDEX job_by_state ON job (job_state);`
Add /search endpoint which redirects to user/job 2022-02-09 15:03:12 +01:00			`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			const NamedJobInsert string = `INSERT INTO job (
			job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
			`exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,`
			`mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total`
			`) VALUES (`
			`:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,`
			`:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,`
			`:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total`
			);`
Add /search endpoint which redirects to user/job 2022-02-09 15:03:12 +01:00
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00			`// Delete the tables "job", "tag" and "jobtag" from the database and`
			// repopulate them using the jobs found in `archive`.
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`func InitDB() error {`
			`db := GetConnection()`
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00			`starttime := time.Now()`
better logging 2022-01-31 15:14:37 +01:00			`log.Print("Building job table...")`
BC: new schemas for basically everything 2021-12-16 13:17:48 +01:00
			`// Basic database structure:`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`_, err := db.DB.Exec(JobsDBSchema)`
Massive speedup in job table initialization It went from taking like 15 minutes on a ramdisk to taking 430 seconds on my SSD (~900000 jobs inserted). - Create indexes after inserts so that they do not need to be updated continually - Use prepared statements for the job insert - Bundle 200 job inserts into a single transaction 2021-10-20 09:30:50 +02:00			`if err != nil {`
			`return err`
			`}`

cleanup and comments 2022-03-15 08:29:29 +01:00			`// Inserts are bundled into transactions because in sqlite,`
			`// that speeds up inserts A LOT.`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`tx, err := db.DB.Beginx()`
support the new job archive directory structure 2021-12-08 10:08:41 +01:00			`if err != nil {`
			`return err`
			`}`
all schemas new 2021-12-17 15:49:22 +01:00
cleanup and comments 2022-03-15 08:29:29 +01:00			`stmt, err := tx.PrepareNamed(NamedJobInsert)`
all schemas new 2021-12-17 15:49:22 +01:00			`if err != nil {`
			`return err`
			`}`
Cleanup and improve error handling 2022-09-11 07:13:08 +02:00			`tags := make(map[string]int64)`
all schemas new 2021-12-17 15:49:22 +01:00
cleanup and comments 2022-03-15 08:29:29 +01:00			// Not using log.Print because we want the line to end with `\r` and
			`// this function is only ever called when a special command line flag`
			`// is passed anyways.`
better logging 2022-01-31 15:14:37 +01:00			`fmt.Printf("%d jobs inserted...\r", 0)`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00
			`ar := archive.GetHandle()`
			`i := 0`
Cleanup and improve error handling 2022-09-11 07:13:08 +02:00			`errorOccured := false`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00
			`for jobMeta := range ar.Iter() {`
Cleanup and improve error handling 2022-09-11 07:13:08 +02:00
			`fmt.Printf("Import job %d\n", jobMeta.JobID)`
			`// // Bundle 100 inserts into one transaction for better performance:`
			`if i%10 == 0 {`
support the new job archive directory structure 2021-12-08 10:08:41 +01:00			`if tx != nil {`
			`if err := tx.Commit(); err != nil {`
			`return err`
			`}`
			`}`

Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`tx, err = db.DB.Beginx()`
support the new job archive directory structure 2021-12-08 10:08:41 +01:00			`if err != nil {`
			`return err`
			`}`

all schemas new 2021-12-17 15:49:22 +01:00			`stmt = tx.NamedStmt(stmt)`
support the new job archive directory structure 2021-12-08 10:08:41 +01:00			`fmt.Printf("%d jobs inserted...\r", i)`
			`}`

Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful`
			`job := schema.Job{`
			`BaseJob: jobMeta.BaseJob,`
			`StartTime: time.Unix(jobMeta.StartTime, 0),`
			`StartTimeUnix: jobMeta.StartTime,`
support the new job archive directory structure 2021-12-08 10:08:41 +01:00			`}`

Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`// TODO: Other metrics...`
			`job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")`
			`job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")`
			`job.NetBwAvg = loadJobStat(jobMeta, "net_bw")`
			`job.FileBwAvg = loadJobStat(jobMeta, "file_bw")`
support the new job archive directory structure 2021-12-08 10:08:41 +01:00
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`job.RawResources, err = json.Marshal(job.Resources)`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`if err != nil {`
Cleanup and improve error handling 2022-09-11 07:13:08 +02:00			`log.Errorf("fsBackend LoadClusterCfg()- %v", err)`
			`errorOccured = true`
			`continue`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`}`

Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`job.RawMetaData, err = json.Marshal(job.MetaData)`
			`if err != nil {`
Cleanup and improve error handling 2022-09-11 07:13:08 +02:00			`log.Errorf("fsBackend LoadClusterCfg()- %v", err)`
			`errorOccured = true`
			`continue`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`}`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`if err := SanityChecks(&job.BaseJob); err != nil {`
Cleanup and improve error handling 2022-09-11 07:13:08 +02:00			`log.Errorf("fsBackend LoadClusterCfg()- %v", err)`
			`errorOccured = true`
			`continue`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`}`

Cleanup and improve error handling 2022-09-11 07:13:08 +02:00			`res, err := stmt.Exec(job)`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`if err != nil {`
Cleanup and improve error handling 2022-09-11 07:13:08 +02:00			`log.Errorf("fsBackend LoadClusterCfg()- %v", err)`
			`errorOccured = true`
			`continue`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`}`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`id, err := res.LastInsertId()`
			`if err != nil {`
Cleanup and improve error handling 2022-09-11 07:13:08 +02:00			`log.Errorf("fsBackend LoadClusterCfg()- %v", err)`
			`errorOccured = true`
			`continue`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`}`

			`for _, tag := range job.Tags {`
			`tagstr := tag.Name + ":" + tag.Type`
			`tagId, ok := tags[tagstr]`
			`if !ok {`
			res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
support the new job archive directory structure 2021-12-08 10:08:41 +01:00			`if err != nil {`
			`return err`
Massive speedup in job table initialization It went from taking like 15 minutes on a ramdisk to taking 430 seconds on my SSD (~900000 jobs inserted). - Create indexes after inserts so that they do not need to be updated continually - Use prepared statements for the job insert - Bundle 200 job inserts into a single transaction 2021-10-20 09:30:50 +02:00			`}`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`tagId, err = res.LastInsertId()`
			`if err != nil {`
			`return err`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`}`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`tags[tagstr] = tagId`
			`}`

			if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
			`return err`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`}`
			`}`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00
			`if err == nil {`
			`i += 1`
			`}`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`}`

Cleanup and improve error handling 2022-09-11 07:13:08 +02:00			`if errorOccured {`
			`log.Errorf("An error occured!")`
			`}`

Massive speedup in job table initialization It went from taking like 15 minutes on a ramdisk to taking 430 seconds on my SSD (~900000 jobs inserted). - Create indexes after inserts so that they do not need to be updated continually - Use prepared statements for the job insert - Bundle 200 job inserts into a single transaction 2021-10-20 09:30:50 +02:00			`if err := tx.Commit(); err != nil {`
			`return err`
			`}`

			`// Create indexes after inserts so that they do not`
			`// need to be continually updated.`
Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`if _, err := db.DB.Exec(JobsDbIndexes); err != nil {`
Massive speedup in job table initialization It went from taking like 15 minutes on a ramdisk to taking 430 seconds on my SSD (~900000 jobs inserted). - Create indexes after inserts so that they do not need to be updated continually - Use prepared statements for the job insert - Bundle 200 job inserts into a single transaction 2021-10-20 09:30:50 +02:00			`return err`
			`}`

support the new job archive directory structure 2021-12-08 10:08:41 +01:00			`log.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())`
Optionally initialise new db from JSON files 2021-10-11 11:11:14 +02:00			`return nil`
			`}`

Refactor package structure Builds but not tested 2022-09-05 17:46:38 +02:00			`// This function also sets the subcluster if necessary!`
			`func SanityChecks(job *schema.BaseJob) error {`
			`if c := archive.GetCluster(job.Cluster); c == nil {`
			`return fmt.Errorf("no such cluster: %#v", job.Cluster)`
			`}`
			`if err := archive.AssignSubCluster(job); err != nil {`
			`return err`
			`}`
			`if !job.State.Valid() {`
			`return fmt.Errorf("not a valid job state: %#v", job.State)`
			`}`
			`if len(job.Resources) == 0 \|\| len(job.User) == 0 {`
			`return fmt.Errorf("'resources' and 'user' should not be empty")`
			`}`
			`if job.NumAcc < 0 \|\| job.NumHWThreads < 0 \|\| job.NumNodes < 1 {`
			`return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")`
			`}`
			`if len(job.Resources) != int(job.NumNodes) {`
			`return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)`
			`}`

			`return nil`
			`}`

			`func loadJobStat(job *schema.JobMeta, metric string) float64 {`
			`if stats, ok := job.Statistics[metric]; ok {`
			`return stats.Avg`
			`}`

			`return 0.0`
			`}`