Refactor package structure

Builds but not tested
This commit is contained in:
Jan Eitzinger
2022-09-05 17:46:38 +02:00
parent 26df1e7c14
commit fc76eed899
30 changed files with 1426 additions and 1027 deletions

View File

@@ -1,159 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"bytes"
"database/sql"
"encoding/json"
"fmt"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`
// Import all jobs specified as `<path-to-meta.json>:<path-to-data.json>,...`
func (r *JobRepository) HandleImportFlag(flag string) error {
for _, pair := range strings.Split(flag, ",") {
files := strings.Split(pair, ":")
if len(files) != 2 {
return fmt.Errorf("invalid import flag format")
}
raw, err := os.ReadFile(files[0])
if err != nil {
return err
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := dec.Decode(&jobMeta); err != nil {
return err
}
raw, err = os.ReadFile(files[1])
if err != nil {
return err
}
dec = json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobData := schema.JobData{}
if err := dec.Decode(&jobData); err != nil {
return err
}
if err := r.ImportJob(&jobMeta, &jobData); err != nil {
return err
}
}
return nil
}
func (r *JobRepository) ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) (err error) {
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
if err := metricdata.ImportJob(jobMeta, jobData); err != nil {
return err
}
if job, err := r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
if err != nil {
return err
}
return fmt.Errorf("a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
}
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return err
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
return err
}
res, err := r.DB.NamedExec(NamedJobInsert, job)
if err != nil {
return err
}
id, err := res.LastInsertId()
if err != nil {
return err
}
for _, tag := range job.Tags {
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
return err
}
}
log.Infof("Successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
return nil
}
// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
if c := config.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %#v", job.Cluster)
}
if err := config.AssignSubCluster(job); err != nil {
return err
}
if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %#v", job.State)
}
if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty")
}
if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
}
if len(job.Resources) != int(job.NumNodes) {
return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
}
return nil
}
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
return stats.Avg
}
return 0.0
}

View File

@@ -12,6 +12,7 @@ import (
"path/filepath"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/jmoiron/sqlx"
@@ -78,31 +79,32 @@ const JobsDbIndexes string = `
CREATE INDEX job_by_job_id ON job (job_id);
CREATE INDEX job_by_state ON job (job_state);
`
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`
// Delete the tables "job", "tag" and "jobtag" from the database and
// repopulate them using the jobs found in `archive`.
func InitDB(db *sqlx.DB, archive string) error {
func InitDB() error {
db := GetConnection()
starttime := time.Now()
log.Print("Building job table...")
// Basic database structure:
_, err := db.Exec(JobsDBSchema)
if err != nil {
return err
}
clustersDir, err := os.ReadDir(archive)
if err != nil {
return err
}
_, err := db.DB.Exec(JobsDBSchema)
if err != nil {
return err
}
// Inserts are bundled into transactions because in sqlite,
// that speeds up inserts A LOT.
tx, err := db.Beginx()
tx, err := db.DB.Beginx()
if err != nil {
return err
}
@@ -116,9 +118,12 @@ func InitDB(db *sqlx.DB, archive string) error {
// this function is only ever called when a special command line flag
// is passed anyways.
fmt.Printf("%d jobs inserted...\r", 0)
i := 0
tags := make(map[string]int64)
handleDirectory := func(filename string) error {
ar := archive.GetHandle()
i := 0
for jobMeta := range ar.Iter() {
// Bundle 100 inserts into one transaction for better performance:
if i%100 == 0 {
if tx != nil {
@@ -127,7 +132,7 @@ func InitDB(db *sqlx.DB, archive string) error {
}
}
tx, err = db.Beginx()
tx, err = db.DB.Beginx()
if err != nil {
return err
}
@@ -136,52 +141,65 @@ func InitDB(db *sqlx.DB, archive string) error {
fmt.Printf("%d jobs inserted...\r", i)
}
err := loadJob(tx, stmt, tags, filename)
if err == nil {
i += 1
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
return err
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
for _, clusterDir := range clustersDir {
lvl1Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name()))
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return err
}
for _, lvl1Dir := range lvl1Dirs {
if !lvl1Dir.IsDir() {
// Could be the cluster.json file
continue
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return err
}
lvl2Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name()))
if err != nil {
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
return err
}
for _, lvl2Dir := range lvl2Dirs {
dirpath := filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
startTimeDirs, err := os.ReadDir(dirpath)
res, err := db.DB.NamedExec(NamedJobInsert, job)
if err != nil {
return err
}
id, err := res.LastInsertId()
if err != nil {
return err
}
for _, tag := range job.Tags {
tagstr := tag.Name + ":" + tag.Type
tagId, ok := tags[tagstr]
if !ok {
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
if err != nil {
return err
}
// For compability with the old job-archive directory structure where
// there was no start time directory.
for _, startTimeDir := range startTimeDirs {
if startTimeDir.Type().IsRegular() && startTimeDir.Name() == "meta.json" {
if err := handleDirectory(dirpath); err != nil {
log.Errorf("in %s: %s", dirpath, err.Error())
}
} else if startTimeDir.IsDir() {
if err := handleDirectory(filepath.Join(dirpath, startTimeDir.Name())); err != nil {
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
}
}
tagId, err = res.LastInsertId()
if err != nil {
return err
}
tags[tagstr] = tagId
}
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
return err
}
}
if err == nil {
i += 1
}
}
@@ -191,7 +209,7 @@ func InitDB(db *sqlx.DB, archive string) error {
// Create indexes after inserts so that they do not
// need to be continually updated.
if _, err := db.Exec(JobsDbIndexes); err != nil {
if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
return err
}
@@ -202,7 +220,10 @@ func InitDB(db *sqlx.DB, archive string) error {
// TODO: Remove double logic, use repository/import.go!
// Read the `meta.json` file at `path` and insert it to the database using the prepared
// insert statement `stmt`. `tags` maps all existing tags to their database ID.
func loadJob(tx *sqlx.Tx, stmt *sqlx.NamedStmt, tags map[string]int64, path string) error {
func loadJob(tx *sqlx.Tx,
stmt *sqlx.NamedStmt,
tags map[string]int64,
path string) error {
f, err := os.Open(filepath.Join(path, "meta.json"))
if err != nil {
return err
@@ -273,3 +294,35 @@ func loadJob(tx *sqlx.Tx, stmt *sqlx.NamedStmt, tags map[string]int64, path stri
return nil
}
// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
if c := archive.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %#v", job.Cluster)
}
if err := archive.AssignSubCluster(job); err != nil {
return err
}
if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %#v", job.State)
}
if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty")
}
if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
}
if len(job.Resources) != int(job.NumNodes) {
return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
}
return nil
}
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
return stats.Avg
}
return 0.0
}

View File

@@ -35,7 +35,7 @@ type JobRepository struct {
cache *lrucache.Cache
}
func GetRepository() *JobRepository {
func GetJobRepository() *JobRepository {
jobRepoOnce.Do(func() {
db := GetConnection()

View File

@@ -19,7 +19,7 @@ func init() {
}
func setup(t *testing.T) *JobRepository {
return GetRepository()
return GetJobRepository()
}
func TestFind(t *testing.T) {

View File

@@ -5,7 +5,7 @@
package repository
import (
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
@@ -26,7 +26,7 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
return nil, err
}
return tags, metricdata.UpdateTags(j, tags)
return tags, archive.UpdateTags(j, tags)
}
// Removes a tag from a job
@@ -45,7 +45,7 @@ func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
return nil, err
}
return tags, metricdata.UpdateTags(j, tags)
return tags, archive.UpdateTags(j, tags)
}
// CreateTag creates a new tag with the specified type and name and returns its database id.

140
internal/repository/user.go Normal file
View File

@@ -0,0 +1,140 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"encoding/json"
"log"
"net/http"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/jmoiron/sqlx"
)
var (
userCfgRepoOnce sync.Once
userCfgRepoInstance *UserCfgRepo
)
type UserCfgRepo struct {
DB *sqlx.DB
Lookup *sqlx.Stmt
lock sync.RWMutex
uiDefaults map[string]interface{}
cache *lrucache.Cache
}
func GetUserCfgRepo() *UserCfgRepo {
userCfgRepoOnce.Do(func() {
db := GetConnection()
_, err := db.DB.Exec(`
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
if err != nil {
log.Fatal(err)
}
lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
if err != nil {
log.Fatal(err)
}
userCfgRepoInstance = &UserCfgRepo{
DB: db.DB,
Lookup: lookupConfigStmt,
cache: lrucache.New(1024),
}
})
return userCfgRepoInstance
}
// Return the personalised UI config for the currently authenticated
// user or return the plain default config.
func (uCfg *UserCfgRepo) GetUIConfig(r *http.Request) (map[string]interface{}, error) {
user := auth.GetUser(r.Context())
if user == nil {
uCfg.lock.RLock()
copy := make(map[string]interface{}, len(uCfg.uiDefaults))
for k, v := range uCfg.uiDefaults {
copy[k] = v
}
uCfg.lock.RUnlock()
return copy, nil
}
data := uCfg.cache.Get(user.Username, func() (interface{}, time.Duration, int) {
config := make(map[string]interface{}, len(uCfg.uiDefaults))
for k, v := range uCfg.uiDefaults {
config[k] = v
}
rows, err := uCfg.Lookup.Query(user.Username)
if err != nil {
return err, 0, 0
}
size := 0
defer rows.Close()
for rows.Next() {
var key, rawval string
if err := rows.Scan(&key, &rawval); err != nil {
return err, 0, 0
}
var val interface{}
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
return err, 0, 0
}
size += len(key)
size += len(rawval)
config[key] = val
}
return config, 24 * time.Hour, size
})
if err, ok := data.(error); ok {
return nil, err
}
return data.(map[string]interface{}), nil
}
// If the context does not have a user, update the global ui configuration
// without persisting it! If there is a (authenticated) user, update only his
// configuration.
func (uCfg *UserCfgRepo) UpdateConfig(key, value string, ctx context.Context) error {
user := auth.GetUser(ctx)
if user == nil {
var val interface{}
if err := json.Unmarshal([]byte(value), &val); err != nil {
return err
}
uCfg.lock.Lock()
defer uCfg.lock.Unlock()
uCfg.uiDefaults[key] = val
return nil
}
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
user, key, value); err != nil {
return err
}
uCfg.cache.Del(user.Username)
return nil
}