package main import ( "bufio" "database/sql" "encoding/json" "fmt" "log" "os" "path/filepath" "time" "github.com/ClusterCockpit/cc-jobarchive/schema" "github.com/jmoiron/sqlx" ) const JOBS_DB_SCHEMA string = ` DROP TABLE IF EXISTS job; DROP TABLE IF EXISTS tag; DROP TABLE IF EXISTS jobtag; CREATE TABLE job ( id INTEGER PRIMARY KEY AUTOINCREMENT, -- Not needed in sqlite job_id BIGINT NOT NULL, cluster VARCHAR(255) NOT NULL, start_time BITINT NOT NULL, user VARCHAR(255) NOT NULL, project VARCHAR(255) NOT NULL, partition VARCHAR(255) NOT NULL, array_job_id BIGINT NOT NULL, duration INT, job_state VARCHAR(255) CHECK(job_state IN ('running', 'completed', 'failed', 'canceled', 'stopped', 'timeout')) NOT NULL, meta_data TEXT, -- json, but sqlite has no json type resources TEXT NOT NULL, -- json, but sqlite has no json type num_nodes INT NOT NULL, num_hwthreads INT NOT NULL, num_acc INT NOT NULL, smt TINYINT CHECK(smt IN (0, 1 )) NOT NULL DEFAULT 1, exclusive TINYINT CHECK(exclusive IN (0, 1, 2)) NOT NULL DEFAULT 1, monitoring_status TINYINT CHECK(monitoring_status IN (0, 1 )) NOT NULL DEFAULT 1, mem_used_max REAL NOT NULL DEFAULT 0.0, flops_any_avg REAL NOT NULL DEFAULT 0.0, mem_bw_avg REAL NOT NULL DEFAULT 0.0, load_avg REAL NOT NULL DEFAULT 0.0, net_bw_avg REAL NOT NULL DEFAULT 0.0, net_data_vol_total REAL NOT NULL DEFAULT 0.0, file_bw_avg REAL NOT NULL DEFAULT 0.0, file_data_vol_total REAL NOT NULL DEFAULT 0.0); CREATE TABLE tag ( id INTEGER PRIMARY KEY, tag_type VARCHAR(255) NOT NULL, tag_name VARCHAR(255) NOT NULL); CREATE TABLE jobtag ( job_id INTEGER, tag_id INTEGER, PRIMARY KEY (job_id, tag_id), FOREIGN KEY (job_id) REFERENCES job (id) ON DELETE CASCADE, FOREIGN KEY (tag_id) REFERENCES tag (id) ON DELETE CASCADE); ` // Delete the tables "job", "tag" and "jobtag" from the database and // repopulate them using the jobs found in `archive`. func initDB(db *sqlx.DB, archive string) error { starttime := time.Now() fmt.Println("Building database...") // Basic database structure: _, err := db.Exec(JOBS_DB_SCHEMA) if err != nil { return err } clustersDir, err := os.ReadDir(archive) if err != nil { return err } insertstmt, err := db.Prepare(`INSERT INTO job ( job_id, cluster, start_time, user, project, partition, array_job_id, duration, job_state, meta_data, resources, num_nodes, num_hwthreads, num_acc, smt, exclusive, monitoring_status, flops_any_avg, mem_bw_avg ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? );`) if err != nil { return err } tx, err := db.Begin() if err != nil { return err } i := 0 tags := make(map[string]int64) handleDirectory := func(filename string) error { // Bundle 100 inserts into one transaction for better performance: if i%100 == 0 { if tx != nil { if err := tx.Commit(); err != nil { return err } } tx, err = db.Begin() if err != nil { return err } insertstmt = tx.Stmt(insertstmt) fmt.Printf("%d jobs inserted...\r", i) } err := loadJob(tx, insertstmt, tags, filename) if err == nil { i += 1 } return err } for _, clusterDir := range clustersDir { lvl1Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name())) if err != nil { return err } for _, lvl1Dir := range lvl1Dirs { if !lvl1Dir.IsDir() { // Could be the cluster.json file continue } lvl2Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name())) if err != nil { return err } for _, lvl2Dir := range lvl2Dirs { dirpath := filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name()) startTimeDirs, err := os.ReadDir(dirpath) if err != nil { return err } for _, startTiemDir := range startTimeDirs { if startTiemDir.Type().IsRegular() && startTiemDir.Name() == "meta.json" { if err := handleDirectory(dirpath); err != nil { log.Printf("in %s: %s\n", dirpath, err.Error()) } } else if startTiemDir.IsDir() { if err := handleDirectory(filepath.Join(dirpath, startTiemDir.Name())); err != nil { log.Printf("in %s: %s\n", filepath.Join(dirpath, startTiemDir.Name()), err.Error()) } } } } } } if err := tx.Commit(); err != nil { return err } // Create indexes after inserts so that they do not // need to be continually updated. if _, err := db.Exec(` CREATE INDEX job_by_user ON job (user); CREATE INDEX job_by_starttime ON job (start_time);`); err != nil { return err } log.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds()) return nil } // Read the `meta.json` file at `path` and insert it to the database using the prepared // insert statement `stmt`. `tags` maps all existing tags to their database ID. func loadJob(tx *sql.Tx, stmt *sql.Stmt, tags map[string]int64, path string) error { f, err := os.Open(filepath.Join(path, "meta.json")) if err != nil { return err } defer f.Close() var job schema.JobMeta = schema.JobMeta{ Exclusive: 1, } if err := json.NewDecoder(bufio.NewReader(f)).Decode(&job); err != nil { return err } // TODO: Other metrics... flopsAnyAvg := loadJobStat(&job, "flops_any") memBwAvg := loadJobStat(&job, "mem_bw") resources, err := json.Marshal(job.Resources) if err != nil { return err } res, err := stmt.Exec( job.JobId, job.Cluster, job.StartTime, job.User, job.Project, job.Partition, job.ArrayJobId, job.Duration, job.JobState, job.MetaData, string(resources), job.NumNodes, job.NumHWThreads, job.NumAcc, job.SMT, job.Exclusive, job.MonitoringStatus, flopsAnyAvg, memBwAvg) if err != nil { return err } id, err := res.LastInsertId() if err != nil { return err } for _, tag := range job.Tags { tagstr := tag.Name + ":" + tag.Type tagId, ok := tags[tagstr] if !ok { res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type) if err != nil { return err } tagId, err = res.LastInsertId() if err != nil { return err } tags[tagstr] = tagId } if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil { return err } } return nil } func loadJobStat(job *schema.JobMeta, metric string) sql.NullFloat64 { val := sql.NullFloat64{Valid: false} if stats, ok := job.Statistics[metric]; ok { val.Valid = true val.Float64 = stats.Avg } return val }