mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-07-23 04:51:39 +02:00
Refactor package structure
Builds but not tested
This commit is contained in:
@@ -21,11 +21,11 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/gorilla/mux"
|
||||
@@ -46,12 +46,11 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
|
||||
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/jobs/stop_job/", api.stopJob).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/jobs/stop_job/{id}", api.stopJob).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
|
||||
// r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
|
||||
|
||||
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
|
||||
// r.HandleFunc("/jobs/{id}", api.getJob).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch)
|
||||
|
||||
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
||||
|
||||
if api.Authentication != nil {
|
||||
@@ -198,7 +197,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
if res.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
|
||||
res.Statistics, err = metricdata.GetStatistics(job)
|
||||
res.Statistics, err = archive.GetStatistics(job)
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
@@ -425,28 +424,28 @@ func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
|
||||
}()
|
||||
}
|
||||
|
||||
func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
// func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
|
||||
// if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
|
||||
// handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
|
||||
// return
|
||||
// }
|
||||
|
||||
var body struct {
|
||||
Meta *schema.JobMeta `json:"meta"`
|
||||
Data *schema.JobData `json:"data"`
|
||||
}
|
||||
if err := decode(r.Body, &body); err != nil {
|
||||
handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
// var body struct {
|
||||
// Meta *schema.JobMeta `json:"meta"`
|
||||
// Data *schema.JobData `json:"data"`
|
||||
// }
|
||||
// if err := decode(r.Body, &body); err != nil {
|
||||
// handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusBadRequest, rw)
|
||||
// return
|
||||
// }
|
||||
|
||||
if err := api.JobRepository.ImportJob(body.Meta, body.Data); err != nil {
|
||||
handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
// if err := api.JobRepository.ImportJob(body.Meta, body.Data); err != nil {
|
||||
// handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusUnprocessableEntity, rw)
|
||||
// return
|
||||
// }
|
||||
|
||||
rw.Write([]byte(`{ "status": "OK" }`))
|
||||
}
|
||||
// rw.Write([]byte(`{ "status": "OK" }`))
|
||||
// }
|
||||
|
||||
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
id := mux.Vars(r)["id"]
|
||||
@@ -596,7 +595,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
|
||||
|
||||
fmt.Printf("KEY: %#v\nVALUE: %#v\n", key, value)
|
||||
|
||||
if err := config.UpdateConfig(key, value, r.Context()); err != nil {
|
||||
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, r.Context()); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
return
|
||||
}
|
||||
|
@@ -5,298 +5,123 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
var db *sqlx.DB
|
||||
var lookupConfigStmt *sqlx.Stmt
|
||||
type Cluster struct {
|
||||
Name string `json:"name"`
|
||||
FilterRanges *model.FilterRanges `json:"filterRanges"`
|
||||
MetricDataRepository json.RawMessage `json:"metricDataRepository"`
|
||||
}
|
||||
|
||||
var lock sync.RWMutex
|
||||
var uiDefaults map[string]interface{}
|
||||
// Format of the configuration (file). See below for the defaults.
|
||||
type ProgramConfig struct {
|
||||
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||
Addr string `json:"addr"`
|
||||
|
||||
var cache *lrucache.Cache = lrucache.New(1024)
|
||||
// Drop root permissions once .env was read and the port was taken.
|
||||
User string `json:"user"`
|
||||
Group string `json:"group"`
|
||||
|
||||
var Clusters []*model.Cluster
|
||||
var nodeLists map[string]map[string]NodeList
|
||||
// Disable authentication (for everything: API, Web-UI, ...)
|
||||
DisableAuthentication bool `json:"disable-authentication"`
|
||||
|
||||
func Init(usersdb *sqlx.DB, authEnabled bool, uiConfig map[string]interface{}, jobArchive string) error {
|
||||
db = usersdb
|
||||
uiDefaults = uiConfig
|
||||
entries, err := os.ReadDir(jobArchive)
|
||||
// If `embed-static-files` is true (default), the frontend files are directly
|
||||
// embeded into the go binary and expected to be in web/frontend. Only if
|
||||
// it is false the files in `static-files` are served instead.
|
||||
EmbedStaticFiles bool `json:"embed-static-files"`
|
||||
StaticFiles string `json:"static-files"`
|
||||
|
||||
// 'sqlite3' or 'mysql' (mysql will work for mariadb as well)
|
||||
DBDriver string `json:"db-driver"`
|
||||
|
||||
// For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).
|
||||
DB string `json:"db"`
|
||||
|
||||
// Config for job archive
|
||||
Archive json.RawMessage `json:"archive"`
|
||||
|
||||
// Keep all metric data in the metric data repositories,
|
||||
// do not write to the job-archive.
|
||||
DisableArchive bool `json:"disable-archive"`
|
||||
|
||||
// For LDAP Authentication and user synchronisation.
|
||||
LdapConfig *auth.LdapConfig `json:"ldap"`
|
||||
JwtConfig *auth.JWTAuthConfig `json:"jwts"`
|
||||
|
||||
// If 0 or empty, the session/token does not expire!
|
||||
SessionMaxAge string `json:"session-max-age"`
|
||||
|
||||
// If both those options are not empty, use HTTPS using those certificates.
|
||||
HttpsCertFile string `json:"https-cert-file"`
|
||||
HttpsKeyFile string `json:"https-key-file"`
|
||||
|
||||
// If not the empty string and `addr` does not end in ":80",
|
||||
// redirect every request incoming at port 80 to that url.
|
||||
RedirectHttpTo string `json:"redirect-http-to"`
|
||||
|
||||
// If overwriten, at least all the options in the defaults below must
|
||||
// be provided! Most options here can be overwritten by the user.
|
||||
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
||||
|
||||
// Where to store MachineState files
|
||||
MachineStateDir string `json:"machine-state-dir"`
|
||||
|
||||
// If not zero, automatically mark jobs as stopped running X seconds longer than their walltime.
|
||||
StopJobsExceedingWalltime int `json:"stop-jobs-exceeding-walltime"`
|
||||
|
||||
// Array of Clusters
|
||||
Clusters []*Cluster `json:"Clusters"`
|
||||
}
|
||||
|
||||
var Keys ProgramConfig = ProgramConfig{
|
||||
Addr: ":8080",
|
||||
DisableAuthentication: false,
|
||||
EmbedStaticFiles: true,
|
||||
DBDriver: "sqlite3",
|
||||
DB: "./var/job.db",
|
||||
Archive: []byte(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`),
|
||||
DisableArchive: false,
|
||||
LdapConfig: nil,
|
||||
SessionMaxAge: "168h",
|
||||
UiDefaults: map[string]interface{}{
|
||||
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
||||
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"},
|
||||
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"plot_general_colorBackground": true,
|
||||
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
||||
"plot_general_lineWidth": 3,
|
||||
"plot_list_hideShortRunningJobs": 5 * 60,
|
||||
"plot_list_jobsPerPage": 50,
|
||||
"plot_list_selectedMetrics": []string{"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"},
|
||||
"plot_view_plotsPerRow": 3,
|
||||
"plot_view_showPolarplot": true,
|
||||
"plot_view_showRoofline": true,
|
||||
"plot_view_showStatTable": true,
|
||||
"system_view_selectedMetric": "cpu_load",
|
||||
},
|
||||
StopJobsExceedingWalltime: 0,
|
||||
}
|
||||
|
||||
func Init(flagConfigFile string) {
|
||||
f, err := os.Open(flagConfigFile)
|
||||
if err != nil {
|
||||
return err
|
||||
if !os.IsNotExist(err) || flagConfigFile != "./config.json" {
|
||||
log.Fatal(err)
|
||||
}
|
||||
} else {
|
||||
dec := json.NewDecoder(f)
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
f.Close()
|
||||
}
|
||||
|
||||
Clusters = []*model.Cluster{}
|
||||
nodeLists = map[string]map[string]NodeList{}
|
||||
for _, de := range entries {
|
||||
raw, err := os.ReadFile(filepath.Join(jobArchive, de.Name(), "cluster.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var cluster model.Cluster
|
||||
|
||||
// Disabled because of the historic 'measurement' field.
|
||||
// dec := json.NewDecoder(bytes.NewBuffer(raw))
|
||||
// dec.DisallowUnknownFields()
|
||||
// if err := dec.Decode(&cluster); err != nil {
|
||||
// return err
|
||||
// }
|
||||
|
||||
if err := json.Unmarshal(raw, &cluster); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(cluster.Name) == 0 || len(cluster.MetricConfig) == 0 || len(cluster.SubClusters) == 0 {
|
||||
return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty")
|
||||
}
|
||||
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
if len(mc.Name) == 0 {
|
||||
return errors.New("cluster.metricConfig.name should not be empty")
|
||||
}
|
||||
if mc.Timestep < 1 {
|
||||
return errors.New("cluster.metricConfig.timestep should not be smaller than one")
|
||||
}
|
||||
|
||||
// For backwards compability...
|
||||
if mc.Scope == "" {
|
||||
mc.Scope = schema.MetricScopeNode
|
||||
}
|
||||
if !mc.Scope.Valid() {
|
||||
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
|
||||
}
|
||||
}
|
||||
|
||||
if cluster.FilterRanges.StartTime.To.IsZero() {
|
||||
cluster.FilterRanges.StartTime.To = time.Unix(0, 0)
|
||||
}
|
||||
|
||||
if cluster.Name != de.Name() {
|
||||
return fmt.Errorf("the file '.../%s/cluster.json' contains the clusterId '%s'", de.Name(), cluster.Name)
|
||||
}
|
||||
|
||||
Clusters = append(Clusters, &cluster)
|
||||
|
||||
nodeLists[cluster.Name] = make(map[string]NodeList)
|
||||
for _, sc := range cluster.SubClusters {
|
||||
if sc.Nodes == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
nl, err := ParseNodeList(sc.Nodes)
|
||||
if err != nil {
|
||||
return fmt.Errorf("in %s/cluster.json: %w", cluster.Name, err)
|
||||
}
|
||||
nodeLists[cluster.Name][sc.Name] = nl
|
||||
}
|
||||
}
|
||||
|
||||
if authEnabled {
|
||||
_, err := db.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS configuration (
|
||||
username varchar(255),
|
||||
confkey varchar(255),
|
||||
value varchar(255),
|
||||
PRIMARY KEY (username, confkey),
|
||||
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
lookupConfigStmt, err = db.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Return the personalised UI config for the currently authenticated
|
||||
// user or return the plain default config.
|
||||
func GetUIConfig(r *http.Request) (map[string]interface{}, error) {
|
||||
user := auth.GetUser(r.Context())
|
||||
if user == nil {
|
||||
lock.RLock()
|
||||
copy := make(map[string]interface{}, len(uiDefaults))
|
||||
for k, v := range uiDefaults {
|
||||
copy[k] = v
|
||||
}
|
||||
lock.RUnlock()
|
||||
return copy, nil
|
||||
}
|
||||
|
||||
data := cache.Get(user.Username, func() (interface{}, time.Duration, int) {
|
||||
config := make(map[string]interface{}, len(uiDefaults))
|
||||
for k, v := range uiDefaults {
|
||||
config[k] = v
|
||||
}
|
||||
|
||||
rows, err := lookupConfigStmt.Query(user.Username)
|
||||
if err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
size := 0
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var key, rawval string
|
||||
if err := rows.Scan(&key, &rawval); err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
size += len(key)
|
||||
size += len(rawval)
|
||||
config[key] = val
|
||||
}
|
||||
|
||||
return config, 24 * time.Hour, size
|
||||
})
|
||||
if err, ok := data.(error); ok {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(map[string]interface{}), nil
|
||||
}
|
||||
|
||||
// If the context does not have a user, update the global ui configuration without persisting it!
|
||||
// If there is a (authenticated) user, update only his configuration.
|
||||
func UpdateConfig(key, value string, ctx context.Context) error {
|
||||
user := auth.GetUser(ctx)
|
||||
if user == nil {
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(value), &val); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
lock.Lock()
|
||||
defer lock.Unlock()
|
||||
uiDefaults[key] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
// Disabled because now `plot_list_selectedMetrics:<cluster>` is possible.
|
||||
// if _, ok := uiDefaults[key]; !ok {
|
||||
// return errors.New("this configuration key does not exist")
|
||||
// }
|
||||
|
||||
if _, err := db.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
|
||||
user.Username, key, value); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cache.Del(user.Username)
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetCluster(cluster string) *model.Cluster {
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
return c
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetSubCluster(cluster, subcluster string) *model.SubCluster {
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
for _, p := range c.SubClusters {
|
||||
if p.Name == subcluster {
|
||||
return p
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetMetricConfig(cluster, metric string) *model.MetricConfig {
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
for _, m := range c.MetricConfig {
|
||||
if m.Name == metric {
|
||||
return m
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// AssignSubCluster sets the `job.subcluster` property of the job based
|
||||
// on its cluster and resources.
|
||||
func AssignSubCluster(job *schema.BaseJob) error {
|
||||
cluster := GetCluster(job.Cluster)
|
||||
if cluster == nil {
|
||||
return fmt.Errorf("unkown cluster: %#v", job.Cluster)
|
||||
}
|
||||
|
||||
if job.SubCluster != "" {
|
||||
for _, sc := range cluster.SubClusters {
|
||||
if sc.Name == job.SubCluster {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("already assigned subcluster %#v unkown (cluster: %#v)", job.SubCluster, job.Cluster)
|
||||
}
|
||||
|
||||
if len(job.Resources) == 0 {
|
||||
return fmt.Errorf("job without any resources/hosts")
|
||||
}
|
||||
|
||||
host0 := job.Resources[0].Hostname
|
||||
for sc, nl := range nodeLists[job.Cluster] {
|
||||
if nl != nil && nl.Contains(host0) {
|
||||
job.SubCluster = sc
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if cluster.SubClusters[0].Nodes == "" {
|
||||
job.SubCluster = cluster.SubClusters[0].Name
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0)
|
||||
}
|
||||
|
||||
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||
for sc, nl := range nodeLists[cluster] {
|
||||
if nl != nil && nl.Contains(hostname) {
|
||||
return sc, nil
|
||||
}
|
||||
}
|
||||
|
||||
c := GetCluster(cluster)
|
||||
if c == nil {
|
||||
return "", fmt.Errorf("unkown cluster: %#v", cluster)
|
||||
}
|
||||
|
||||
if c.SubClusters[0].Nodes == "" {
|
||||
return c.SubClusters[0].Name, nil
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname)
|
||||
}
|
||||
|
@@ -1,175 +0,0 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
type NodeList [][]interface {
|
||||
consume(input string) (next string, ok bool)
|
||||
}
|
||||
|
||||
func (nl *NodeList) Contains(name string) bool {
|
||||
var ok bool
|
||||
for _, term := range *nl {
|
||||
str := name
|
||||
for _, expr := range term {
|
||||
str, ok = expr.consume(str)
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if ok && str == "" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type NLExprString string
|
||||
|
||||
func (nle NLExprString) consume(input string) (next string, ok bool) {
|
||||
str := string(nle)
|
||||
if strings.HasPrefix(input, str) {
|
||||
return strings.TrimPrefix(input, str), true
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
type NLExprIntRanges []NLExprIntRange
|
||||
|
||||
func (nles NLExprIntRanges) consume(input string) (next string, ok bool) {
|
||||
for _, nle := range nles {
|
||||
if next, ok := nle.consume(input); ok {
|
||||
return next, ok
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
type NLExprIntRange struct {
|
||||
start, end int64
|
||||
zeroPadded bool
|
||||
digits int
|
||||
}
|
||||
|
||||
func (nle NLExprIntRange) consume(input string) (next string, ok bool) {
|
||||
if !nle.zeroPadded || nle.digits < 1 {
|
||||
log.Error("node list: only zero-padded ranges are allowed")
|
||||
return "", false
|
||||
}
|
||||
|
||||
if len(input) < nle.digits {
|
||||
return "", false
|
||||
}
|
||||
|
||||
numerals, rest := input[:nle.digits], input[nle.digits:]
|
||||
for len(numerals) > 1 && numerals[0] == '0' {
|
||||
numerals = numerals[1:]
|
||||
}
|
||||
|
||||
x, err := strconv.ParseInt(numerals, 10, 32)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
|
||||
if nle.start <= x && x <= nle.end {
|
||||
return rest, true
|
||||
}
|
||||
|
||||
return "", false
|
||||
}
|
||||
|
||||
func ParseNodeList(raw string) (NodeList, error) {
|
||||
isLetter := func(r byte) bool { return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') }
|
||||
isDigit := func(r byte) bool { return '0' <= r && r <= '9' }
|
||||
|
||||
rawterms := []string{}
|
||||
prevterm := 0
|
||||
for i := 0; i < len(raw); i++ {
|
||||
if raw[i] == '[' {
|
||||
for i < len(raw) && raw[i] != ']' {
|
||||
i++
|
||||
}
|
||||
if i == len(raw) {
|
||||
return nil, fmt.Errorf("node list: unclosed '['")
|
||||
}
|
||||
} else if raw[i] == ',' {
|
||||
rawterms = append(rawterms, raw[prevterm:i])
|
||||
prevterm = i + 1
|
||||
}
|
||||
}
|
||||
if prevterm != len(raw) {
|
||||
rawterms = append(rawterms, raw[prevterm:])
|
||||
}
|
||||
|
||||
nl := NodeList{}
|
||||
for _, rawterm := range rawterms {
|
||||
exprs := []interface {
|
||||
consume(input string) (next string, ok bool)
|
||||
}{}
|
||||
for i := 0; i < len(rawterm); i++ {
|
||||
c := rawterm[i]
|
||||
if isLetter(c) || isDigit(c) {
|
||||
j := i
|
||||
for j < len(rawterm) && (isLetter(rawterm[j]) || isDigit(rawterm[j])) {
|
||||
j++
|
||||
}
|
||||
exprs = append(exprs, NLExprString(rawterm[i:j]))
|
||||
i = j - 1
|
||||
} else if c == '[' {
|
||||
end := strings.Index(rawterm[i:], "]")
|
||||
if end == -1 {
|
||||
return nil, fmt.Errorf("node list: unclosed '['")
|
||||
}
|
||||
|
||||
parts := strings.Split(rawterm[i+1:i+end], ",")
|
||||
nles := NLExprIntRanges{}
|
||||
for _, part := range parts {
|
||||
minus := strings.Index(part, "-")
|
||||
if minus == -1 {
|
||||
return nil, fmt.Errorf("node list: no '-' found inside '[...]'")
|
||||
}
|
||||
|
||||
s1, s2 := part[0:minus], part[minus+1:]
|
||||
if len(s1) != len(s2) || len(s1) == 0 {
|
||||
return nil, fmt.Errorf("node list: %#v and %#v are not of equal length or of length zero", s1, s2)
|
||||
}
|
||||
|
||||
x1, err := strconv.ParseInt(s1, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("node list: %w", err)
|
||||
}
|
||||
x2, err := strconv.ParseInt(s2, 10, 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("node list: %w", err)
|
||||
}
|
||||
|
||||
nles = append(nles, NLExprIntRange{
|
||||
start: x1,
|
||||
end: x2,
|
||||
digits: len(s1),
|
||||
zeroPadded: true,
|
||||
})
|
||||
}
|
||||
|
||||
exprs = append(exprs, nles)
|
||||
i += end
|
||||
} else {
|
||||
return nil, fmt.Errorf("node list: invalid character: %#v", rune(c))
|
||||
}
|
||||
}
|
||||
nl = append(nl, exprs)
|
||||
}
|
||||
|
||||
return nl, nil
|
||||
}
|
@@ -1,59 +0,0 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestNodeList(t *testing.T) {
|
||||
nl, err := ParseNodeList("hallo,wel123t,emmy[01-99],fritz[005-500],woody[100-200]")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if nl.Contains("hello") || nl.Contains("woody") {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if nl.Contains("fritz1") || nl.Contains("fritz9") || nl.Contains("fritz004") || nl.Contains("woody201") {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if !nl.Contains("hallo") || !nl.Contains("wel123t") {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if !nl.Contains("emmy01") || !nl.Contains("emmy42") || !nl.Contains("emmy99") {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if !nl.Contains("woody100") || !nl.Contains("woody199") {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodeListCommasInBrackets(t *testing.T) {
|
||||
nl, err := ParseNodeList("a[1000-2000,2010-2090,3000-5000]")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if nl.Contains("hello") || nl.Contains("woody") {
|
||||
t.Fatal("1")
|
||||
}
|
||||
|
||||
if nl.Contains("a0") || nl.Contains("a0000") || nl.Contains("a5001") || nl.Contains("a2005") {
|
||||
t.Fatal("2")
|
||||
}
|
||||
|
||||
if !nl.Contains("a1001") || !nl.Contains("a2000") {
|
||||
t.Fatal("3")
|
||||
}
|
||||
|
||||
if !nl.Contains("a2042") || !nl.Contains("a4321") || !nl.Contains("a3000") {
|
||||
t.Fatal("4")
|
||||
}
|
||||
}
|
@@ -15,10 +15,11 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
@@ -95,7 +96,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
||||
}
|
||||
|
||||
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
|
||||
if err := config.UpdateConfig(name, value, ctx); err != nil {
|
||||
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -103,7 +104,7 @@ func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string,
|
||||
}
|
||||
|
||||
func (r *queryResolver) Clusters(ctx context.Context) ([]*model.Cluster, error) {
|
||||
return config.Clusters, nil
|
||||
return archive.Clusters, nil
|
||||
}
|
||||
|
||||
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||
@@ -233,7 +234,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, mc := range config.GetCluster(cluster).MetricConfig {
|
||||
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
@@ -249,7 +250,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
Host: hostname,
|
||||
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||
}
|
||||
host.SubCluster, _ = config.GetSubClusterByNode(cluster, hostname)
|
||||
host.SubCluster, _ = archive.GetSubClusterByNode(cluster, hostname)
|
||||
|
||||
for metric, scopedMetrics := range metrics {
|
||||
for _, scopedMetric := range scopedMetrics {
|
||||
|
@@ -13,10 +13,10 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
@@ -36,7 +36,7 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
stats := map[string]*model.JobsStatistics{}
|
||||
|
||||
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
|
||||
for _, cluster := range config.Clusters {
|
||||
for _, cluster := range archive.Clusters {
|
||||
for _, subcluster := range cluster.SubClusters {
|
||||
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", subcluster.SocketsPerNode, subcluster.CoresPerSocket)
|
||||
var query sq.SelectBuilder
|
||||
|
@@ -1,261 +0,0 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricdata
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
// For a given job, return the path of the `data.json`/`meta.json` file.
|
||||
// TODO: Implement Issue ClusterCockpit/ClusterCockpit#97
|
||||
func getPath(job *schema.Job, file string, checkLegacy bool) (string, error) {
|
||||
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
|
||||
if !checkLegacy {
|
||||
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||
}
|
||||
|
||||
legacyPath := filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, file)
|
||||
if _, err := os.Stat(legacyPath); errors.Is(err, os.ErrNotExist) {
|
||||
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
|
||||
}
|
||||
|
||||
return legacyPath, nil
|
||||
}
|
||||
|
||||
// Assuming job is completed/archived, return the jobs metric data.
|
||||
func loadFromArchive(job *schema.Job) (schema.JobData, error) {
|
||||
filename, err := getPath(job, "data.json", true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data := cache.Get(filename, func() (value interface{}, ttl time.Duration, size int) {
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return err, 0, 1000
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var data schema.JobData
|
||||
if err := json.NewDecoder(bufio.NewReader(f)).Decode(&data); err != nil {
|
||||
return err, 0, 1000
|
||||
}
|
||||
|
||||
return data, 1 * time.Hour, data.Size()
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(schema.JobData), nil
|
||||
}
|
||||
|
||||
func loadMetaJson(job *schema.Job) (*schema.JobMeta, error) {
|
||||
filename, err := getPath(job, "meta.json", true)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
bytes, err := os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var metaFile schema.JobMeta = schema.JobMeta{
|
||||
BaseJob: schema.JobDefaults,
|
||||
}
|
||||
if err := json.Unmarshal(bytes, &metaFile); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &metaFile, nil
|
||||
}
|
||||
|
||||
// If the job is archived, find its `meta.json` file and override the tags list
|
||||
// in that JSON file. If the job is not archived, nothing is done.
|
||||
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
||||
if job.State == schema.JobStateRunning {
|
||||
return nil
|
||||
}
|
||||
|
||||
filename, err := getPath(job, "meta.json", true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f, err := os.Open(filename)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
var metaFile schema.JobMeta = schema.JobMeta{
|
||||
BaseJob: schema.JobDefaults,
|
||||
}
|
||||
if err := json.NewDecoder(f).Decode(&metaFile); err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
|
||||
metaFile.Tags = make([]*schema.Tag, 0)
|
||||
for _, tag := range tags {
|
||||
metaFile.Tags = append(metaFile.Tags, &schema.Tag{
|
||||
Name: tag.Name,
|
||||
Type: tag.Type,
|
||||
})
|
||||
}
|
||||
|
||||
bytes, err := json.Marshal(metaFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.WriteFile(filename, bytes, 0644)
|
||||
}
|
||||
|
||||
// Helper to metricdata.LoadAverages().
|
||||
func loadAveragesFromArchive(job *schema.Job, metrics []string, data [][]schema.Float) error {
|
||||
metaFile, err := loadMetaJson(job)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i, m := range metrics {
|
||||
if stat, ok := metaFile.Statistics[m]; ok {
|
||||
data[i] = append(data[i], schema.Float(stat.Avg))
|
||||
} else {
|
||||
data[i] = append(data[i], schema.NaN)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
|
||||
metaFile, err := loadMetaJson(job)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return metaFile.Statistics, nil
|
||||
}
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := config.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
// TODO: Talk about this! What resolutions to store data at...
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
if job.NumNodes <= 8 {
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
|
||||
jobData, err := LoadData(job, allMetrics, scopes, ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// TODO/FIXME: Calc average for non-node metrics as well!
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: config.GetMetricConfig(job.Cluster, metric).Unit,
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
// If the file based archive is disabled,
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if !useArchive {
|
||||
return jobMeta, nil
|
||||
}
|
||||
|
||||
dir, err := getPath(job, "", false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return jobMeta, writeFiles(dir, jobMeta, &jobData)
|
||||
}
|
||||
|
||||
func writeFiles(dir string, jobMeta *schema.JobMeta, jobData *schema.JobData) error {
|
||||
if err := os.MkdirAll(dir, 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f, err := os.Create(path.Join(dir, "meta.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := json.NewEncoder(f).Encode(jobMeta); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := f.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f, err = os.Create(path.Join(dir, "data.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := json.NewEncoder(f).Encode(jobData); err != nil {
|
||||
return err
|
||||
}
|
||||
return f.Close()
|
||||
}
|
||||
|
||||
// Used to import a non-running job into the job-archive.
|
||||
func ImportJob(job *schema.JobMeta, jobData *schema.JobData) error {
|
||||
dir, err := getPath(&schema.Job{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTimeUnix: job.StartTime,
|
||||
StartTime: time.Unix(job.StartTime, 0),
|
||||
}, "", false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return writeFiles(dir, job, jobData)
|
||||
}
|
@@ -15,7 +15,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
@@ -149,7 +149,7 @@ func (ccms *CCMetricStore) doRequest(ctx context.Context, body *ApiQueryRequest)
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
topology := config.GetSubCluster(job.Cluster, job.SubCluster).Topology
|
||||
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
|
||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -175,7 +175,7 @@ func (ccms *CCMetricStore) LoadData(job *schema.Job, metrics []string, scopes []
|
||||
query := req.Queries[i]
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
scope := assignedScope[i]
|
||||
mc := config.GetMetricConfig(job.Cluster, metric)
|
||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||
if _, ok := jobData[metric]; !ok {
|
||||
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
}
|
||||
@@ -252,12 +252,12 @@ var (
|
||||
|
||||
func (ccms *CCMetricStore) buildQueries(job *schema.Job, metrics []string, scopes []schema.MetricScope) ([]ApiQuery, []schema.MetricScope, error) {
|
||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
||||
topology := config.GetSubCluster(job.Cluster, job.SubCluster).Topology
|
||||
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
|
||||
assignedScope := []schema.MetricScope{}
|
||||
|
||||
for _, metric := range metrics {
|
||||
remoteName := ccms.toRemoteName(metric)
|
||||
mc := config.GetMetricConfig(job.Cluster, metric)
|
||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||
if mc == nil {
|
||||
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
// log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
@@ -584,7 +584,7 @@ func (ccms *CCMetricStore) LoadNodeData(cluster string, metrics, nodes []string,
|
||||
data[query.Hostname] = hostdata
|
||||
}
|
||||
|
||||
mc := config.GetMetricConfig(cluster, metric)
|
||||
mc := archive.GetMetricConfig(cluster, metric)
|
||||
hostdata[metric] = append(hostdata[metric], &schema.JobMetric{
|
||||
Unit: mc.Unit,
|
||||
Scope: schema.MetricScopeNode,
|
||||
|
@@ -14,7 +14,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||
@@ -124,7 +124,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *schema.Job, metrics []string,
|
||||
for _, metric := range metrics {
|
||||
jobMetric, ok := jobData[metric]
|
||||
if !ok {
|
||||
mc := config.GetMetricConfig(job.Cluster, metric)
|
||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||
jobMetric = map[schema.MetricScope]*schema.JobMetric{
|
||||
scope: { // uses scope var from above!
|
||||
Unit: mc.Unit,
|
||||
|
@@ -8,9 +8,11 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@@ -33,14 +35,11 @@ type MetricDataRepository interface {
|
||||
|
||||
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
||||
|
||||
var JobArchivePath string
|
||||
|
||||
var useArchive bool
|
||||
|
||||
func Init(jobArchivePath string, disableArchive bool) error {
|
||||
func Init(disableArchive bool) error {
|
||||
useArchive = !disableArchive
|
||||
JobArchivePath = jobArchivePath
|
||||
for _, cluster := range config.Clusters {
|
||||
for _, cluster := range config.Keys.Clusters {
|
||||
if cluster.MetricDataRepository != nil {
|
||||
var kind struct {
|
||||
Kind string `json:"kind"`
|
||||
@@ -73,97 +72,88 @@ func Init(jobArchivePath string, disableArchive bool) error {
|
||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
|
||||
// Fetches the metric data for a job.
|
||||
func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
if job.State == schema.JobStateRunning ||
|
||||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
||||
!useArchive {
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
}
|
||||
func LoadData(job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
if scopes == nil {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
if job.State == schema.JobStateRunning ||
|
||||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
||||
!useArchive {
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
if scopes == nil {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
cluster := archive.GetCluster(job.Cluster)
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Errorf("partial error: %s", err.Error())
|
||||
} else {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
jd, err = archive.GetHandle().LoadJobData(job)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Avoid sending unrequested data to the client:
|
||||
if metrics != nil || scopes != nil {
|
||||
if metrics == nil {
|
||||
cluster := config.GetCluster(job.Cluster)
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
metrics = make([]string, 0, len(jd))
|
||||
for k := range jd {
|
||||
metrics = append(metrics, k)
|
||||
}
|
||||
}
|
||||
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Errorf("partial error: %s", err.Error())
|
||||
} else {
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
size = jd.Size()
|
||||
} else {
|
||||
jd, err = loadFromArchive(job)
|
||||
if err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
// Avoid sending unrequested data to the client:
|
||||
if metrics != nil || scopes != nil {
|
||||
if metrics == nil {
|
||||
metrics = make([]string, 0, len(jd))
|
||||
for k := range jd {
|
||||
metrics = append(metrics, k)
|
||||
}
|
||||
}
|
||||
|
||||
res := schema.JobData{}
|
||||
for _, metric := range metrics {
|
||||
if perscope, ok := jd[metric]; ok {
|
||||
if len(perscope) > 1 {
|
||||
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
||||
for _, scope := range scopes {
|
||||
if jm, ok := perscope[scope]; ok {
|
||||
subset[scope] = jm
|
||||
}
|
||||
}
|
||||
|
||||
if len(subset) > 0 {
|
||||
perscope = subset
|
||||
res := schema.JobData{}
|
||||
for _, metric := range metrics {
|
||||
if perscope, ok := jd[metric]; ok {
|
||||
if len(perscope) > 1 {
|
||||
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
||||
for _, scope := range scopes {
|
||||
if jm, ok := perscope[scope]; ok {
|
||||
subset[scope] = jm
|
||||
}
|
||||
}
|
||||
|
||||
res[metric] = perscope
|
||||
if len(subset) > 0 {
|
||||
perscope = subset
|
||||
}
|
||||
}
|
||||
|
||||
res[metric] = perscope
|
||||
}
|
||||
jd = res
|
||||
}
|
||||
size = 1 // loadFromArchive() caches in the same cache.
|
||||
jd = res
|
||||
}
|
||||
|
||||
ttl = 5 * time.Hour
|
||||
if job.State == schema.JobStateRunning {
|
||||
ttl = 2 * time.Minute
|
||||
}
|
||||
|
||||
prepareJobData(job, jd, scopes)
|
||||
return jd, ttl, size
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(schema.JobData), nil
|
||||
prepareJobData(job, jd, scopes)
|
||||
|
||||
return jd, nil
|
||||
}
|
||||
|
||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||
func LoadAverages(job *schema.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
|
||||
if job.State != schema.JobStateRunning && useArchive {
|
||||
return loadAveragesFromArchive(job, metrics, data)
|
||||
return archive.LoadAveragesFromArchive(job, metrics, data)
|
||||
}
|
||||
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
@@ -201,7 +191,7 @@ func LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.Metri
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range config.GetCluster(cluster).MetricConfig {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
@@ -256,3 +246,60 @@ func prepareJobData(job *schema.Job, jobData schema.JobData, scopes []schema.Met
|
||||
jobData.AddNodeScope("mem_bw")
|
||||
}
|
||||
}
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
// TODO: Talk about this! What resolutions to store data at...
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
if job.NumNodes <= 8 {
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
|
||||
jobData, err := LoadData(job, allMetrics, scopes, ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// TODO/FIXME: Calc average for non-node metrics as well!
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: archive.GetMetricConfig(job.Cluster, metric).Unit,
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
// If the file based archive is disabled,
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if !useArchive {
|
||||
return jobMeta, nil
|
||||
}
|
||||
|
||||
return jobMeta, archive.Import(jobMeta, &jobData)
|
||||
}
|
||||
|
@@ -1,159 +0,0 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
|
||||
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
|
||||
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
|
||||
);`
|
||||
|
||||
// Import all jobs specified as `<path-to-meta.json>:<path-to-data.json>,...`
|
||||
func (r *JobRepository) HandleImportFlag(flag string) error {
|
||||
for _, pair := range strings.Split(flag, ",") {
|
||||
files := strings.Split(pair, ":")
|
||||
if len(files) != 2 {
|
||||
return fmt.Errorf("invalid import flag format")
|
||||
}
|
||||
|
||||
raw, err := os.ReadFile(files[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err := dec.Decode(&jobMeta); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
raw, err = os.ReadFile(files[1])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dec = json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
jobData := schema.JobData{}
|
||||
if err := dec.Decode(&jobData); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := r.ImportJob(&jobMeta, &jobData); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) (err error) {
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
if err := metricdata.ImportJob(jobMeta, jobData); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if job, err := r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return fmt.Errorf("a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
|
||||
}
|
||||
|
||||
job := schema.Job{
|
||||
BaseJob: jobMeta.BaseJob,
|
||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
}
|
||||
|
||||
// TODO: Other metrics...
|
||||
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
|
||||
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
|
||||
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
|
||||
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
|
||||
return nil
|
||||
}
|
||||
|
||||
// This function also sets the subcluster if necessary!
|
||||
func SanityChecks(job *schema.BaseJob) error {
|
||||
if c := config.GetCluster(job.Cluster); c == nil {
|
||||
return fmt.Errorf("no such cluster: %#v", job.Cluster)
|
||||
}
|
||||
if err := config.AssignSubCluster(job); err != nil {
|
||||
return err
|
||||
}
|
||||
if !job.State.Valid() {
|
||||
return fmt.Errorf("not a valid job state: %#v", job.State)
|
||||
}
|
||||
if len(job.Resources) == 0 || len(job.User) == 0 {
|
||||
return fmt.Errorf("'resources' and 'user' should not be empty")
|
||||
}
|
||||
if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
|
||||
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
|
||||
}
|
||||
if len(job.Resources) != int(job.NumNodes) {
|
||||
return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
return stats.Avg
|
||||
}
|
||||
|
||||
return 0.0
|
||||
}
|
@@ -12,6 +12,7 @@ import (
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/jmoiron/sqlx"
|
||||
@@ -78,31 +79,32 @@ const JobsDbIndexes string = `
|
||||
CREATE INDEX job_by_job_id ON job (job_id);
|
||||
CREATE INDEX job_by_state ON job (job_state);
|
||||
`
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
|
||||
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
|
||||
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
|
||||
);`
|
||||
|
||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||
// repopulate them using the jobs found in `archive`.
|
||||
func InitDB(db *sqlx.DB, archive string) error {
|
||||
func InitDB() error {
|
||||
db := GetConnection()
|
||||
starttime := time.Now()
|
||||
log.Print("Building job table...")
|
||||
|
||||
// Basic database structure:
|
||||
_, err := db.Exec(JobsDBSchema)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
clustersDir, err := os.ReadDir(archive)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err := db.DB.Exec(JobsDBSchema)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Inserts are bundled into transactions because in sqlite,
|
||||
// that speeds up inserts A LOT.
|
||||
tx, err := db.Beginx()
|
||||
tx, err := db.DB.Beginx()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -116,9 +118,12 @@ func InitDB(db *sqlx.DB, archive string) error {
|
||||
// this function is only ever called when a special command line flag
|
||||
// is passed anyways.
|
||||
fmt.Printf("%d jobs inserted...\r", 0)
|
||||
i := 0
|
||||
tags := make(map[string]int64)
|
||||
handleDirectory := func(filename string) error {
|
||||
|
||||
ar := archive.GetHandle()
|
||||
i := 0
|
||||
|
||||
for jobMeta := range ar.Iter() {
|
||||
// Bundle 100 inserts into one transaction for better performance:
|
||||
if i%100 == 0 {
|
||||
if tx != nil {
|
||||
@@ -127,7 +132,7 @@ func InitDB(db *sqlx.DB, archive string) error {
|
||||
}
|
||||
}
|
||||
|
||||
tx, err = db.Beginx()
|
||||
tx, err = db.DB.Beginx()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -136,52 +141,65 @@ func InitDB(db *sqlx.DB, archive string) error {
|
||||
fmt.Printf("%d jobs inserted...\r", i)
|
||||
}
|
||||
|
||||
err := loadJob(tx, stmt, tags, filename)
|
||||
if err == nil {
|
||||
i += 1
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
job := schema.Job{
|
||||
BaseJob: jobMeta.BaseJob,
|
||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
// TODO: Other metrics...
|
||||
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
|
||||
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
|
||||
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
|
||||
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
|
||||
|
||||
for _, clusterDir := range clustersDir {
|
||||
lvl1Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name()))
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, lvl1Dir := range lvl1Dirs {
|
||||
if !lvl1Dir.IsDir() {
|
||||
// Could be the cluster.json file
|
||||
continue
|
||||
}
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
lvl2Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, lvl2Dir := range lvl2Dirs {
|
||||
dirpath := filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
|
||||
startTimeDirs, err := os.ReadDir(dirpath)
|
||||
res, err := db.DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
tagstr := tag.Name + ":" + tag.Type
|
||||
tagId, ok := tags[tagstr]
|
||||
if !ok {
|
||||
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// For compability with the old job-archive directory structure where
|
||||
// there was no start time directory.
|
||||
for _, startTimeDir := range startTimeDirs {
|
||||
if startTimeDir.Type().IsRegular() && startTimeDir.Name() == "meta.json" {
|
||||
if err := handleDirectory(dirpath); err != nil {
|
||||
log.Errorf("in %s: %s", dirpath, err.Error())
|
||||
}
|
||||
} else if startTimeDir.IsDir() {
|
||||
if err := handleDirectory(filepath.Join(dirpath, startTimeDir.Name())); err != nil {
|
||||
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
||||
}
|
||||
}
|
||||
tagId, err = res.LastInsertId()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tags[tagstr] = tagId
|
||||
}
|
||||
|
||||
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
i += 1
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,7 +209,7 @@ func InitDB(db *sqlx.DB, archive string) error {
|
||||
|
||||
// Create indexes after inserts so that they do not
|
||||
// need to be continually updated.
|
||||
if _, err := db.Exec(JobsDbIndexes); err != nil {
|
||||
if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -202,7 +220,10 @@ func InitDB(db *sqlx.DB, archive string) error {
|
||||
// TODO: Remove double logic, use repository/import.go!
|
||||
// Read the `meta.json` file at `path` and insert it to the database using the prepared
|
||||
// insert statement `stmt`. `tags` maps all existing tags to their database ID.
|
||||
func loadJob(tx *sqlx.Tx, stmt *sqlx.NamedStmt, tags map[string]int64, path string) error {
|
||||
func loadJob(tx *sqlx.Tx,
|
||||
stmt *sqlx.NamedStmt,
|
||||
tags map[string]int64,
|
||||
path string) error {
|
||||
f, err := os.Open(filepath.Join(path, "meta.json"))
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -273,3 +294,35 @@ func loadJob(tx *sqlx.Tx, stmt *sqlx.NamedStmt, tags map[string]int64, path stri
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// This function also sets the subcluster if necessary!
|
||||
func SanityChecks(job *schema.BaseJob) error {
|
||||
if c := archive.GetCluster(job.Cluster); c == nil {
|
||||
return fmt.Errorf("no such cluster: %#v", job.Cluster)
|
||||
}
|
||||
if err := archive.AssignSubCluster(job); err != nil {
|
||||
return err
|
||||
}
|
||||
if !job.State.Valid() {
|
||||
return fmt.Errorf("not a valid job state: %#v", job.State)
|
||||
}
|
||||
if len(job.Resources) == 0 || len(job.User) == 0 {
|
||||
return fmt.Errorf("'resources' and 'user' should not be empty")
|
||||
}
|
||||
if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
|
||||
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
|
||||
}
|
||||
if len(job.Resources) != int(job.NumNodes) {
|
||||
return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
return stats.Avg
|
||||
}
|
||||
|
||||
return 0.0
|
||||
}
|
||||
|
@@ -35,7 +35,7 @@ type JobRepository struct {
|
||||
cache *lrucache.Cache
|
||||
}
|
||||
|
||||
func GetRepository() *JobRepository {
|
||||
func GetJobRepository() *JobRepository {
|
||||
jobRepoOnce.Do(func() {
|
||||
db := GetConnection()
|
||||
|
||||
|
@@ -19,7 +19,7 @@ func init() {
|
||||
}
|
||||
|
||||
func setup(t *testing.T) *JobRepository {
|
||||
return GetRepository()
|
||||
return GetJobRepository()
|
||||
}
|
||||
|
||||
func TestFind(t *testing.T) {
|
||||
|
@@ -5,7 +5,7 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
@@ -26,7 +26,7 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, metricdata.UpdateTags(j, tags)
|
||||
return tags, archive.UpdateTags(j, tags)
|
||||
}
|
||||
|
||||
// Removes a tag from a job
|
||||
@@ -45,7 +45,7 @@ func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, metricdata.UpdateTags(j, tags)
|
||||
return tags, archive.UpdateTags(j, tags)
|
||||
}
|
||||
|
||||
// CreateTag creates a new tag with the specified type and name and returns its database id.
|
||||
|
140
internal/repository/user.go
Normal file
140
internal/repository/user.go
Normal file
@@ -0,0 +1,140 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
var (
|
||||
userCfgRepoOnce sync.Once
|
||||
userCfgRepoInstance *UserCfgRepo
|
||||
)
|
||||
|
||||
type UserCfgRepo struct {
|
||||
DB *sqlx.DB
|
||||
Lookup *sqlx.Stmt
|
||||
lock sync.RWMutex
|
||||
uiDefaults map[string]interface{}
|
||||
cache *lrucache.Cache
|
||||
}
|
||||
|
||||
func GetUserCfgRepo() *UserCfgRepo {
|
||||
userCfgRepoOnce.Do(func() {
|
||||
db := GetConnection()
|
||||
|
||||
_, err := db.DB.Exec(`
|
||||
CREATE TABLE IF NOT EXISTS configuration (
|
||||
username varchar(255),
|
||||
confkey varchar(255),
|
||||
value varchar(255),
|
||||
PRIMARY KEY (username, confkey),
|
||||
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
|
||||
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
userCfgRepoInstance = &UserCfgRepo{
|
||||
DB: db.DB,
|
||||
Lookup: lookupConfigStmt,
|
||||
cache: lrucache.New(1024),
|
||||
}
|
||||
})
|
||||
|
||||
return userCfgRepoInstance
|
||||
}
|
||||
|
||||
// Return the personalised UI config for the currently authenticated
|
||||
// user or return the plain default config.
|
||||
func (uCfg *UserCfgRepo) GetUIConfig(r *http.Request) (map[string]interface{}, error) {
|
||||
user := auth.GetUser(r.Context())
|
||||
if user == nil {
|
||||
uCfg.lock.RLock()
|
||||
copy := make(map[string]interface{}, len(uCfg.uiDefaults))
|
||||
for k, v := range uCfg.uiDefaults {
|
||||
copy[k] = v
|
||||
}
|
||||
uCfg.lock.RUnlock()
|
||||
return copy, nil
|
||||
}
|
||||
|
||||
data := uCfg.cache.Get(user.Username, func() (interface{}, time.Duration, int) {
|
||||
config := make(map[string]interface{}, len(uCfg.uiDefaults))
|
||||
for k, v := range uCfg.uiDefaults {
|
||||
config[k] = v
|
||||
}
|
||||
|
||||
rows, err := uCfg.Lookup.Query(user.Username)
|
||||
if err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
size := 0
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var key, rawval string
|
||||
if err := rows.Scan(&key, &rawval); err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
size += len(key)
|
||||
size += len(rawval)
|
||||
config[key] = val
|
||||
}
|
||||
|
||||
return config, 24 * time.Hour, size
|
||||
})
|
||||
if err, ok := data.(error); ok {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(map[string]interface{}), nil
|
||||
}
|
||||
|
||||
// If the context does not have a user, update the global ui configuration
|
||||
// without persisting it! If there is a (authenticated) user, update only his
|
||||
// configuration.
|
||||
func (uCfg *UserCfgRepo) UpdateConfig(key, value string, ctx context.Context) error {
|
||||
user := auth.GetUser(ctx)
|
||||
if user == nil {
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(value), &val); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
uCfg.lock.Lock()
|
||||
defer uCfg.lock.Unlock()
|
||||
uCfg.uiDefaults[key] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
|
||||
user, key, value); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
uCfg.cache.Del(user.Username)
|
||||
return nil
|
||||
}
|
@@ -13,10 +13,10 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
@@ -55,7 +55,7 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
|
||||
TotalJobs int
|
||||
RecentShortJobs int
|
||||
}
|
||||
jobRepo := repository.GetRepository()
|
||||
jobRepo := repository.GetJobRepository()
|
||||
|
||||
runningJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{
|
||||
State: []schema.JobState{schema.JobStateRunning},
|
||||
@@ -80,7 +80,7 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
|
||||
}
|
||||
|
||||
clusters := make([]cluster, 0)
|
||||
for _, c := range config.Clusters {
|
||||
for _, c := range archive.Clusters {
|
||||
clusters = append(clusters, cluster{
|
||||
Name: c.Name,
|
||||
RunningJobs: runningJobs[c.Name],
|
||||
@@ -99,7 +99,7 @@ func setupJobRoute(i InfoType, r *http.Request) InfoType {
|
||||
}
|
||||
|
||||
func setupUserRoute(i InfoType, r *http.Request) InfoType {
|
||||
jobRepo := repository.GetRepository()
|
||||
jobRepo := repository.GetJobRepository()
|
||||
username := mux.Vars(r)["id"]
|
||||
i["id"] = username
|
||||
i["username"] = username
|
||||
@@ -142,7 +142,7 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType {
|
||||
|
||||
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
|
||||
var username *string = nil
|
||||
jobRepo := repository.GetRepository()
|
||||
jobRepo := repository.GetJobRepository()
|
||||
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleAdmin) {
|
||||
username = &user.Username
|
||||
}
|
||||
@@ -254,10 +254,11 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||
}
|
||||
|
||||
func SetupRoutes(router *mux.Router) {
|
||||
userCfgRepo := repository.GetUserCfgRepo()
|
||||
for _, route := range routes {
|
||||
route := route
|
||||
router.HandleFunc(route.Route, func(rw http.ResponseWriter, r *http.Request) {
|
||||
conf, err := config.GetUIConfig(r)
|
||||
conf, err := userCfgRepo.GetUIConfig(r)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
|
Reference in New Issue
Block a user