Refactor package structure

Builds but not tested
This commit is contained in:
Jan Eitzinger
2022-09-05 17:46:38 +02:00
parent 26df1e7c14
commit fc76eed899
30 changed files with 1426 additions and 1027 deletions

View File

@@ -21,11 +21,11 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/gorilla/mux"
@@ -46,12 +46,11 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/stop_job/", api.stopJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/stop_job/{id}", api.stopJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
// r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
// r.HandleFunc("/jobs/{id}", api.getJob).Methods(http.MethodGet)
r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch)
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
if api.Authentication != nil {
@@ -198,7 +197,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
}
if res.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
res.Statistics, err = metricdata.GetStatistics(job)
res.Statistics, err = archive.GetStatistics(job)
if err != nil {
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
@@ -425,28 +424,28 @@ func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
}()
}
func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
return
}
// func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
// if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
// handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
// return
// }
var body struct {
Meta *schema.JobMeta `json:"meta"`
Data *schema.JobData `json:"data"`
}
if err := decode(r.Body, &body); err != nil {
handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusBadRequest, rw)
return
}
// var body struct {
// Meta *schema.JobMeta `json:"meta"`
// Data *schema.JobData `json:"data"`
// }
// if err := decode(r.Body, &body); err != nil {
// handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusBadRequest, rw)
// return
// }
if err := api.JobRepository.ImportJob(body.Meta, body.Data); err != nil {
handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusUnprocessableEntity, rw)
return
}
// if err := api.JobRepository.ImportJob(body.Meta, body.Data); err != nil {
// handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusUnprocessableEntity, rw)
// return
// }
rw.Write([]byte(`{ "status": "OK" }`))
}
// rw.Write([]byte(`{ "status": "OK" }`))
// }
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
id := mux.Vars(r)["id"]
@@ -596,7 +595,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
fmt.Printf("KEY: %#v\nVALUE: %#v\n", key, value)
if err := config.UpdateConfig(key, value, r.Context()); err != nil {
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, r.Context()); err != nil {
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
return
}

View File

@@ -5,298 +5,123 @@
package config
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/http"
"log"
"os"
"path/filepath"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/jmoiron/sqlx"
)
var db *sqlx.DB
var lookupConfigStmt *sqlx.Stmt
type Cluster struct {
Name string `json:"name"`
FilterRanges *model.FilterRanges `json:"filterRanges"`
MetricDataRepository json.RawMessage `json:"metricDataRepository"`
}
var lock sync.RWMutex
var uiDefaults map[string]interface{}
// Format of the configuration (file). See below for the defaults.
type ProgramConfig struct {
// Address where the http (or https) server will listen on (for example: 'localhost:80').
Addr string `json:"addr"`
var cache *lrucache.Cache = lrucache.New(1024)
// Drop root permissions once .env was read and the port was taken.
User string `json:"user"`
Group string `json:"group"`
var Clusters []*model.Cluster
var nodeLists map[string]map[string]NodeList
// Disable authentication (for everything: API, Web-UI, ...)
DisableAuthentication bool `json:"disable-authentication"`
func Init(usersdb *sqlx.DB, authEnabled bool, uiConfig map[string]interface{}, jobArchive string) error {
db = usersdb
uiDefaults = uiConfig
entries, err := os.ReadDir(jobArchive)
// If `embed-static-files` is true (default), the frontend files are directly
// embeded into the go binary and expected to be in web/frontend. Only if
// it is false the files in `static-files` are served instead.
EmbedStaticFiles bool `json:"embed-static-files"`
StaticFiles string `json:"static-files"`
// 'sqlite3' or 'mysql' (mysql will work for mariadb as well)
DBDriver string `json:"db-driver"`
// For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).
DB string `json:"db"`
// Config for job archive
Archive json.RawMessage `json:"archive"`
// Keep all metric data in the metric data repositories,
// do not write to the job-archive.
DisableArchive bool `json:"disable-archive"`
// For LDAP Authentication and user synchronisation.
LdapConfig *auth.LdapConfig `json:"ldap"`
JwtConfig *auth.JWTAuthConfig `json:"jwts"`
// If 0 or empty, the session/token does not expire!
SessionMaxAge string `json:"session-max-age"`
// If both those options are not empty, use HTTPS using those certificates.
HttpsCertFile string `json:"https-cert-file"`
HttpsKeyFile string `json:"https-key-file"`
// If not the empty string and `addr` does not end in ":80",
// redirect every request incoming at port 80 to that url.
RedirectHttpTo string `json:"redirect-http-to"`
// If overwriten, at least all the options in the defaults below must
// be provided! Most options here can be overwritten by the user.
UiDefaults map[string]interface{} `json:"ui-defaults"`
// Where to store MachineState files
MachineStateDir string `json:"machine-state-dir"`
// If not zero, automatically mark jobs as stopped running X seconds longer than their walltime.
StopJobsExceedingWalltime int `json:"stop-jobs-exceeding-walltime"`
// Array of Clusters
Clusters []*Cluster `json:"Clusters"`
}
var Keys ProgramConfig = ProgramConfig{
Addr: ":8080",
DisableAuthentication: false,
EmbedStaticFiles: true,
DBDriver: "sqlite3",
DB: "./var/job.db",
Archive: []byte(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`),
DisableArchive: false,
LdapConfig: nil,
SessionMaxAge: "168h",
UiDefaults: map[string]interface{}{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"},
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"plot_general_colorBackground": true,
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
"plot_general_lineWidth": 3,
"plot_list_hideShortRunningJobs": 5 * 60,
"plot_list_jobsPerPage": 50,
"plot_list_selectedMetrics": []string{"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"},
"plot_view_plotsPerRow": 3,
"plot_view_showPolarplot": true,
"plot_view_showRoofline": true,
"plot_view_showStatTable": true,
"system_view_selectedMetric": "cpu_load",
},
StopJobsExceedingWalltime: 0,
}
func Init(flagConfigFile string) {
f, err := os.Open(flagConfigFile)
if err != nil {
return err
if !os.IsNotExist(err) || flagConfigFile != "./config.json" {
log.Fatal(err)
}
} else {
dec := json.NewDecoder(f)
dec.DisallowUnknownFields()
if err := dec.Decode(&Keys); err != nil {
log.Fatal(err)
}
f.Close()
}
Clusters = []*model.Cluster{}
nodeLists = map[string]map[string]NodeList{}
for _, de := range entries {
raw, err := os.ReadFile(filepath.Join(jobArchive, de.Name(), "cluster.json"))
if err != nil {
return err
}
var cluster model.Cluster
// Disabled because of the historic 'measurement' field.
// dec := json.NewDecoder(bytes.NewBuffer(raw))
// dec.DisallowUnknownFields()
// if err := dec.Decode(&cluster); err != nil {
// return err
// }
if err := json.Unmarshal(raw, &cluster); err != nil {
return err
}
if len(cluster.Name) == 0 || len(cluster.MetricConfig) == 0 || len(cluster.SubClusters) == 0 {
return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty")
}
for _, mc := range cluster.MetricConfig {
if len(mc.Name) == 0 {
return errors.New("cluster.metricConfig.name should not be empty")
}
if mc.Timestep < 1 {
return errors.New("cluster.metricConfig.timestep should not be smaller than one")
}
// For backwards compability...
if mc.Scope == "" {
mc.Scope = schema.MetricScopeNode
}
if !mc.Scope.Valid() {
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
}
}
if cluster.FilterRanges.StartTime.To.IsZero() {
cluster.FilterRanges.StartTime.To = time.Unix(0, 0)
}
if cluster.Name != de.Name() {
return fmt.Errorf("the file '.../%s/cluster.json' contains the clusterId '%s'", de.Name(), cluster.Name)
}
Clusters = append(Clusters, &cluster)
nodeLists[cluster.Name] = make(map[string]NodeList)
for _, sc := range cluster.SubClusters {
if sc.Nodes == "" {
continue
}
nl, err := ParseNodeList(sc.Nodes)
if err != nil {
return fmt.Errorf("in %s/cluster.json: %w", cluster.Name, err)
}
nodeLists[cluster.Name][sc.Name] = nl
}
}
if authEnabled {
_, err := db.Exec(`
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
if err != nil {
return err
}
lookupConfigStmt, err = db.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
if err != nil {
return err
}
}
return nil
}
// Return the personalised UI config for the currently authenticated
// user or return the plain default config.
func GetUIConfig(r *http.Request) (map[string]interface{}, error) {
user := auth.GetUser(r.Context())
if user == nil {
lock.RLock()
copy := make(map[string]interface{}, len(uiDefaults))
for k, v := range uiDefaults {
copy[k] = v
}
lock.RUnlock()
return copy, nil
}
data := cache.Get(user.Username, func() (interface{}, time.Duration, int) {
config := make(map[string]interface{}, len(uiDefaults))
for k, v := range uiDefaults {
config[k] = v
}
rows, err := lookupConfigStmt.Query(user.Username)
if err != nil {
return err, 0, 0
}
size := 0
defer rows.Close()
for rows.Next() {
var key, rawval string
if err := rows.Scan(&key, &rawval); err != nil {
return err, 0, 0
}
var val interface{}
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
return err, 0, 0
}
size += len(key)
size += len(rawval)
config[key] = val
}
return config, 24 * time.Hour, size
})
if err, ok := data.(error); ok {
return nil, err
}
return data.(map[string]interface{}), nil
}
// If the context does not have a user, update the global ui configuration without persisting it!
// If there is a (authenticated) user, update only his configuration.
func UpdateConfig(key, value string, ctx context.Context) error {
user := auth.GetUser(ctx)
if user == nil {
var val interface{}
if err := json.Unmarshal([]byte(value), &val); err != nil {
return err
}
lock.Lock()
defer lock.Unlock()
uiDefaults[key] = val
return nil
}
// Disabled because now `plot_list_selectedMetrics:<cluster>` is possible.
// if _, ok := uiDefaults[key]; !ok {
// return errors.New("this configuration key does not exist")
// }
if _, err := db.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
user.Username, key, value); err != nil {
return err
}
cache.Del(user.Username)
return nil
}
func GetCluster(cluster string) *model.Cluster {
for _, c := range Clusters {
if c.Name == cluster {
return c
}
}
return nil
}
func GetSubCluster(cluster, subcluster string) *model.SubCluster {
for _, c := range Clusters {
if c.Name == cluster {
for _, p := range c.SubClusters {
if p.Name == subcluster {
return p
}
}
}
}
return nil
}
func GetMetricConfig(cluster, metric string) *model.MetricConfig {
for _, c := range Clusters {
if c.Name == cluster {
for _, m := range c.MetricConfig {
if m.Name == metric {
return m
}
}
}
}
return nil
}
// AssignSubCluster sets the `job.subcluster` property of the job based
// on its cluster and resources.
func AssignSubCluster(job *schema.BaseJob) error {
cluster := GetCluster(job.Cluster)
if cluster == nil {
return fmt.Errorf("unkown cluster: %#v", job.Cluster)
}
if job.SubCluster != "" {
for _, sc := range cluster.SubClusters {
if sc.Name == job.SubCluster {
return nil
}
}
return fmt.Errorf("already assigned subcluster %#v unkown (cluster: %#v)", job.SubCluster, job.Cluster)
}
if len(job.Resources) == 0 {
return fmt.Errorf("job without any resources/hosts")
}
host0 := job.Resources[0].Hostname
for sc, nl := range nodeLists[job.Cluster] {
if nl != nil && nl.Contains(host0) {
job.SubCluster = sc
return nil
}
}
if cluster.SubClusters[0].Nodes == "" {
job.SubCluster = cluster.SubClusters[0].Name
return nil
}
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0)
}
func GetSubClusterByNode(cluster, hostname string) (string, error) {
for sc, nl := range nodeLists[cluster] {
if nl != nil && nl.Contains(hostname) {
return sc, nil
}
}
c := GetCluster(cluster)
if c == nil {
return "", fmt.Errorf("unkown cluster: %#v", cluster)
}
if c.SubClusters[0].Nodes == "" {
return c.SubClusters[0].Name, nil
}
return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname)
}

View File

@@ -1,175 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package config
import (
"fmt"
"strconv"
"strings"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
type NodeList [][]interface {
consume(input string) (next string, ok bool)
}
func (nl *NodeList) Contains(name string) bool {
var ok bool
for _, term := range *nl {
str := name
for _, expr := range term {
str, ok = expr.consume(str)
if !ok {
break
}
}
if ok && str == "" {
return true
}
}
return false
}
type NLExprString string
func (nle NLExprString) consume(input string) (next string, ok bool) {
str := string(nle)
if strings.HasPrefix(input, str) {
return strings.TrimPrefix(input, str), true
}
return "", false
}
type NLExprIntRanges []NLExprIntRange
func (nles NLExprIntRanges) consume(input string) (next string, ok bool) {
for _, nle := range nles {
if next, ok := nle.consume(input); ok {
return next, ok
}
}
return "", false
}
type NLExprIntRange struct {
start, end int64
zeroPadded bool
digits int
}
func (nle NLExprIntRange) consume(input string) (next string, ok bool) {
if !nle.zeroPadded || nle.digits < 1 {
log.Error("node list: only zero-padded ranges are allowed")
return "", false
}
if len(input) < nle.digits {
return "", false
}
numerals, rest := input[:nle.digits], input[nle.digits:]
for len(numerals) > 1 && numerals[0] == '0' {
numerals = numerals[1:]
}
x, err := strconv.ParseInt(numerals, 10, 32)
if err != nil {
return "", false
}
if nle.start <= x && x <= nle.end {
return rest, true
}
return "", false
}
func ParseNodeList(raw string) (NodeList, error) {
isLetter := func(r byte) bool { return ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') }
isDigit := func(r byte) bool { return '0' <= r && r <= '9' }
rawterms := []string{}
prevterm := 0
for i := 0; i < len(raw); i++ {
if raw[i] == '[' {
for i < len(raw) && raw[i] != ']' {
i++
}
if i == len(raw) {
return nil, fmt.Errorf("node list: unclosed '['")
}
} else if raw[i] == ',' {
rawterms = append(rawterms, raw[prevterm:i])
prevterm = i + 1
}
}
if prevterm != len(raw) {
rawterms = append(rawterms, raw[prevterm:])
}
nl := NodeList{}
for _, rawterm := range rawterms {
exprs := []interface {
consume(input string) (next string, ok bool)
}{}
for i := 0; i < len(rawterm); i++ {
c := rawterm[i]
if isLetter(c) || isDigit(c) {
j := i
for j < len(rawterm) && (isLetter(rawterm[j]) || isDigit(rawterm[j])) {
j++
}
exprs = append(exprs, NLExprString(rawterm[i:j]))
i = j - 1
} else if c == '[' {
end := strings.Index(rawterm[i:], "]")
if end == -1 {
return nil, fmt.Errorf("node list: unclosed '['")
}
parts := strings.Split(rawterm[i+1:i+end], ",")
nles := NLExprIntRanges{}
for _, part := range parts {
minus := strings.Index(part, "-")
if minus == -1 {
return nil, fmt.Errorf("node list: no '-' found inside '[...]'")
}
s1, s2 := part[0:minus], part[minus+1:]
if len(s1) != len(s2) || len(s1) == 0 {
return nil, fmt.Errorf("node list: %#v and %#v are not of equal length or of length zero", s1, s2)
}
x1, err := strconv.ParseInt(s1, 10, 32)
if err != nil {
return nil, fmt.Errorf("node list: %w", err)
}
x2, err := strconv.ParseInt(s2, 10, 32)
if err != nil {
return nil, fmt.Errorf("node list: %w", err)
}
nles = append(nles, NLExprIntRange{
start: x1,
end: x2,
digits: len(s1),
zeroPadded: true,
})
}
exprs = append(exprs, nles)
i += end
} else {
return nil, fmt.Errorf("node list: invalid character: %#v", rune(c))
}
}
nl = append(nl, exprs)
}
return nl, nil
}

View File

@@ -1,59 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package config
import (
"testing"
)
func TestNodeList(t *testing.T) {
nl, err := ParseNodeList("hallo,wel123t,emmy[01-99],fritz[005-500],woody[100-200]")
if err != nil {
t.Fatal(err)
}
if nl.Contains("hello") || nl.Contains("woody") {
t.Fail()
}
if nl.Contains("fritz1") || nl.Contains("fritz9") || nl.Contains("fritz004") || nl.Contains("woody201") {
t.Fail()
}
if !nl.Contains("hallo") || !nl.Contains("wel123t") {
t.Fail()
}
if !nl.Contains("emmy01") || !nl.Contains("emmy42") || !nl.Contains("emmy99") {
t.Fail()
}
if !nl.Contains("woody100") || !nl.Contains("woody199") {
t.Fail()
}
}
func TestNodeListCommasInBrackets(t *testing.T) {
nl, err := ParseNodeList("a[1000-2000,2010-2090,3000-5000]")
if err != nil {
t.Fatal(err)
}
if nl.Contains("hello") || nl.Contains("woody") {
t.Fatal("1")
}
if nl.Contains("a0") || nl.Contains("a0000") || nl.Contains("a5001") || nl.Contains("a2005") {
t.Fatal("2")
}
if !nl.Contains("a1001") || !nl.Contains("a2000") {
t.Fatal("3")
}
if !nl.Contains("a2042") || !nl.Contains("a4321") || !nl.Contains("a3000") {
t.Fatal("4")
}
}

View File

@@ -15,10 +15,11 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@@ -95,7 +96,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
}
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
if err := config.UpdateConfig(name, value, ctx); err != nil {
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, ctx); err != nil {
return nil, err
}
@@ -103,7 +104,7 @@ func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string,
}
func (r *queryResolver) Clusters(ctx context.Context) ([]*model.Cluster, error) {
return config.Clusters, nil
return archive.Clusters, nil
}
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
@@ -233,7 +234,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
}
if metrics == nil {
for _, mc := range config.GetCluster(cluster).MetricConfig {
for _, mc := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, mc.Name)
}
}
@@ -249,7 +250,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
Host: hostname,
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
}
host.SubCluster, _ = config.GetSubClusterByNode(cluster, hostname)
host.SubCluster, _ = archive.GetSubClusterByNode(cluster, hostname)
for metric, scopedMetrics := range metrics {
for _, scopedMetric := range scopedMetrics {

View File

@@ -13,10 +13,10 @@ import (
"time"
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
@@ -36,7 +36,7 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
stats := map[string]*model.JobsStatistics{}
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
for _, cluster := range config.Clusters {
for _, cluster := range archive.Clusters {
for _, subcluster := range cluster.SubClusters {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", subcluster.SocketsPerNode, subcluster.CoresPerSocket)
var query sq.SelectBuilder

View File

@@ -1,261 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricdata
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"math"
"os"
"path"
"path/filepath"
"strconv"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
// For a given job, return the path of the `data.json`/`meta.json` file.
// TODO: Implement Issue ClusterCockpit/ClusterCockpit#97
func getPath(job *schema.Job, file string, checkLegacy bool) (string, error) {
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
if !checkLegacy {
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
}
legacyPath := filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, file)
if _, err := os.Stat(legacyPath); errors.Is(err, os.ErrNotExist) {
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
}
return legacyPath, nil
}
// Assuming job is completed/archived, return the jobs metric data.
func loadFromArchive(job *schema.Job) (schema.JobData, error) {
filename, err := getPath(job, "data.json", true)
if err != nil {
return nil, err
}
data := cache.Get(filename, func() (value interface{}, ttl time.Duration, size int) {
f, err := os.Open(filename)
if err != nil {
return err, 0, 1000
}
defer f.Close()
var data schema.JobData
if err := json.NewDecoder(bufio.NewReader(f)).Decode(&data); err != nil {
return err, 0, 1000
}
return data, 1 * time.Hour, data.Size()
})
if err, ok := data.(error); ok {
return nil, err
}
return data.(schema.JobData), nil
}
func loadMetaJson(job *schema.Job) (*schema.JobMeta, error) {
filename, err := getPath(job, "meta.json", true)
if err != nil {
return nil, err
}
bytes, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
var metaFile schema.JobMeta = schema.JobMeta{
BaseJob: schema.JobDefaults,
}
if err := json.Unmarshal(bytes, &metaFile); err != nil {
return nil, err
}
return &metaFile, nil
}
// If the job is archived, find its `meta.json` file and override the tags list
// in that JSON file. If the job is not archived, nothing is done.
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
if job.State == schema.JobStateRunning {
return nil
}
filename, err := getPath(job, "meta.json", true)
if err != nil {
return err
}
f, err := os.Open(filename)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
var metaFile schema.JobMeta = schema.JobMeta{
BaseJob: schema.JobDefaults,
}
if err := json.NewDecoder(f).Decode(&metaFile); err != nil {
return err
}
f.Close()
metaFile.Tags = make([]*schema.Tag, 0)
for _, tag := range tags {
metaFile.Tags = append(metaFile.Tags, &schema.Tag{
Name: tag.Name,
Type: tag.Type,
})
}
bytes, err := json.Marshal(metaFile)
if err != nil {
return err
}
return os.WriteFile(filename, bytes, 0644)
}
// Helper to metricdata.LoadAverages().
func loadAveragesFromArchive(job *schema.Job, metrics []string, data [][]schema.Float) error {
metaFile, err := loadMetaJson(job)
if err != nil {
return err
}
for i, m := range metrics {
if stat, ok := metaFile.Statistics[m]; ok {
data[i] = append(data[i], schema.Float(stat.Avg))
} else {
data[i] = append(data[i], schema.NaN)
}
}
return nil
}
func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
metaFile, err := loadMetaJson(job)
if err != nil {
return nil, err
}
return metaFile.Statistics, nil
}
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := config.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
// TODO: Talk about this! What resolutions to store data at...
scopes := []schema.MetricScope{schema.MetricScopeNode}
if job.NumNodes <= 8 {
scopes = append(scopes, schema.MetricScopeCore)
}
jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil {
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// TODO/FIXME: Calc average for non-node metrics as well!
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: config.GetMetricConfig(job.Cluster, metric).Unit,
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if !useArchive {
return jobMeta, nil
}
dir, err := getPath(job, "", false)
if err != nil {
return nil, err
}
return jobMeta, writeFiles(dir, jobMeta, &jobData)
}
func writeFiles(dir string, jobMeta *schema.JobMeta, jobData *schema.JobData) error {
if err := os.MkdirAll(dir, 0777); err != nil {
return err
}
f, err := os.Create(path.Join(dir, "meta.json"))
if err != nil {
return err
}
if err := json.NewEncoder(f).Encode(jobMeta); err != nil {
return err
}
if err := f.Close(); err != nil {
return err
}
f, err = os.Create(path.Join(dir, "data.json"))
if err != nil {
return err
}
if err := json.NewEncoder(f).Encode(jobData); err != nil {
return err
}
return f.Close()
}
// Used to import a non-running job into the job-archive.
func ImportJob(job *schema.JobMeta, jobData *schema.JobData) error {
dir, err := getPath(&schema.Job{
BaseJob: job.BaseJob,
StartTimeUnix: job.StartTime,
StartTime: time.Unix(job.StartTime, 0),
}, "", false)
if err != nil {
return err
}
return writeFiles(dir, job, jobData)
}

View File

@@ -15,7 +15,7 @@ import (
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@@ -149,7 +149,7 @@ func (ccms *CCMetricStore) doRequest(ctx context.Context, body *ApiQueryRequest)
}
func (ccms *CCMetricStore) LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
topology := config.GetSubCluster(job.Cluster, job.SubCluster).Topology
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
if err != nil {
return nil, err
@@ -175,7 +175,7 @@ func (ccms *CCMetricStore) LoadData(job *schema.Job, metrics []string, scopes []
query := req.Queries[i]
metric := ccms.toLocalName(query.Metric)
scope := assignedScope[i]
mc := config.GetMetricConfig(job.Cluster, metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
if _, ok := jobData[metric]; !ok {
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
}
@@ -252,12 +252,12 @@ var (
func (ccms *CCMetricStore) buildQueries(job *schema.Job, metrics []string, scopes []schema.MetricScope) ([]ApiQuery, []schema.MetricScope, error) {
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
topology := config.GetSubCluster(job.Cluster, job.SubCluster).Topology
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
assignedScope := []schema.MetricScope{}
for _, metric := range metrics {
remoteName := ccms.toRemoteName(metric)
mc := config.GetMetricConfig(job.Cluster, metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
if mc == nil {
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
@@ -584,7 +584,7 @@ func (ccms *CCMetricStore) LoadNodeData(cluster string, metrics, nodes []string,
data[query.Hostname] = hostdata
}
mc := config.GetMetricConfig(cluster, metric)
mc := archive.GetMetricConfig(cluster, metric)
hostdata[metric] = append(hostdata[metric], &schema.JobMetric{
Unit: mc.Unit,
Scope: schema.MetricScopeNode,

View File

@@ -14,7 +14,7 @@ import (
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
@@ -124,7 +124,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *schema.Job, metrics []string,
for _, metric := range metrics {
jobMetric, ok := jobData[metric]
if !ok {
mc := config.GetMetricConfig(job.Cluster, metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
jobMetric = map[schema.MetricScope]*schema.JobMetric{
scope: { // uses scope var from above!
Unit: mc.Unit,

View File

@@ -8,9 +8,11 @@ import (
"context"
"encoding/json"
"fmt"
"math"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -33,14 +35,11 @@ type MetricDataRepository interface {
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
var JobArchivePath string
var useArchive bool
func Init(jobArchivePath string, disableArchive bool) error {
func Init(disableArchive bool) error {
useArchive = !disableArchive
JobArchivePath = jobArchivePath
for _, cluster := range config.Clusters {
for _, cluster := range config.Keys.Clusters {
if cluster.MetricDataRepository != nil {
var kind struct {
Kind string `json:"kind"`
@@ -73,97 +72,88 @@ func Init(jobArchivePath string, disableArchive bool) error {
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
// Fetches the metric data for a job.
func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
var jd schema.JobData
var err error
if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
!useArchive {
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster), 0, 0
}
func LoadData(job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
var jd schema.JobData
var err error
if scopes == nil {
scopes = append(scopes, schema.MetricScopeNode)
}
if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
!useArchive {
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
}
if scopes == nil {
scopes = append(scopes, schema.MetricScopeNode)
}
if metrics == nil {
cluster := archive.GetCluster(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
}
}
jd, err = repo.LoadData(job, metrics, scopes, ctx)
if err != nil {
if len(jd) != 0 {
log.Errorf("partial error: %s", err.Error())
} else {
return nil, err
}
}
} else {
jd, err = archive.GetHandle().LoadJobData(job)
if err != nil {
return nil, err
}
// Avoid sending unrequested data to the client:
if metrics != nil || scopes != nil {
if metrics == nil {
cluster := config.GetCluster(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
metrics = make([]string, 0, len(jd))
for k := range jd {
metrics = append(metrics, k)
}
}
jd, err = repo.LoadData(job, metrics, scopes, ctx)
if err != nil {
if len(jd) != 0 {
log.Errorf("partial error: %s", err.Error())
} else {
return err, 0, 0
}
}
size = jd.Size()
} else {
jd, err = loadFromArchive(job)
if err != nil {
return err, 0, 0
}
// Avoid sending unrequested data to the client:
if metrics != nil || scopes != nil {
if metrics == nil {
metrics = make([]string, 0, len(jd))
for k := range jd {
metrics = append(metrics, k)
}
}
res := schema.JobData{}
for _, metric := range metrics {
if perscope, ok := jd[metric]; ok {
if len(perscope) > 1 {
subset := make(map[schema.MetricScope]*schema.JobMetric)
for _, scope := range scopes {
if jm, ok := perscope[scope]; ok {
subset[scope] = jm
}
}
if len(subset) > 0 {
perscope = subset
res := schema.JobData{}
for _, metric := range metrics {
if perscope, ok := jd[metric]; ok {
if len(perscope) > 1 {
subset := make(map[schema.MetricScope]*schema.JobMetric)
for _, scope := range scopes {
if jm, ok := perscope[scope]; ok {
subset[scope] = jm
}
}
res[metric] = perscope
if len(subset) > 0 {
perscope = subset
}
}
res[metric] = perscope
}
jd = res
}
size = 1 // loadFromArchive() caches in the same cache.
jd = res
}
ttl = 5 * time.Hour
if job.State == schema.JobStateRunning {
ttl = 2 * time.Minute
}
prepareJobData(job, jd, scopes)
return jd, ttl, size
})
if err, ok := data.(error); ok {
return nil, err
}
return data.(schema.JobData), nil
prepareJobData(job, jd, scopes)
return jd, nil
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(job *schema.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
if job.State != schema.JobStateRunning && useArchive {
return loadAveragesFromArchive(job, metrics, data)
return archive.LoadAveragesFromArchive(job, metrics, data)
}
repo, ok := metricDataRepos[job.Cluster]
@@ -201,7 +191,7 @@ func LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.Metri
}
if metrics == nil {
for _, m := range config.GetCluster(cluster).MetricConfig {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
@@ -256,3 +246,60 @@ func prepareJobData(job *schema.Job, jobData schema.JobData, scopes []schema.Met
jobData.AddNodeScope("mem_bw")
}
}
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
// TODO: Talk about this! What resolutions to store data at...
scopes := []schema.MetricScope{schema.MetricScopeNode}
if job.NumNodes <= 8 {
scopes = append(scopes, schema.MetricScopeCore)
}
jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil {
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// TODO/FIXME: Calc average for non-node metrics as well!
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: archive.GetMetricConfig(job.Cluster, metric).Unit,
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if !useArchive {
return jobMeta, nil
}
return jobMeta, archive.Import(jobMeta, &jobData)
}

View File

@@ -1,159 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"bytes"
"database/sql"
"encoding/json"
"fmt"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`
// Import all jobs specified as `<path-to-meta.json>:<path-to-data.json>,...`
func (r *JobRepository) HandleImportFlag(flag string) error {
for _, pair := range strings.Split(flag, ",") {
files := strings.Split(pair, ":")
if len(files) != 2 {
return fmt.Errorf("invalid import flag format")
}
raw, err := os.ReadFile(files[0])
if err != nil {
return err
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := dec.Decode(&jobMeta); err != nil {
return err
}
raw, err = os.ReadFile(files[1])
if err != nil {
return err
}
dec = json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobData := schema.JobData{}
if err := dec.Decode(&jobData); err != nil {
return err
}
if err := r.ImportJob(&jobMeta, &jobData); err != nil {
return err
}
}
return nil
}
func (r *JobRepository) ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) (err error) {
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
if err := metricdata.ImportJob(jobMeta, jobData); err != nil {
return err
}
if job, err := r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
if err != nil {
return err
}
return fmt.Errorf("a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
}
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return err
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
return err
}
res, err := r.DB.NamedExec(NamedJobInsert, job)
if err != nil {
return err
}
id, err := res.LastInsertId()
if err != nil {
return err
}
for _, tag := range job.Tags {
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
return err
}
}
log.Infof("Successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
return nil
}
// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
if c := config.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %#v", job.Cluster)
}
if err := config.AssignSubCluster(job); err != nil {
return err
}
if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %#v", job.State)
}
if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty")
}
if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
}
if len(job.Resources) != int(job.NumNodes) {
return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
}
return nil
}
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
return stats.Avg
}
return 0.0
}

View File

@@ -12,6 +12,7 @@ import (
"path/filepath"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/jmoiron/sqlx"
@@ -78,31 +79,32 @@ const JobsDbIndexes string = `
CREATE INDEX job_by_job_id ON job (job_id);
CREATE INDEX job_by_state ON job (job_state);
`
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`
// Delete the tables "job", "tag" and "jobtag" from the database and
// repopulate them using the jobs found in `archive`.
func InitDB(db *sqlx.DB, archive string) error {
func InitDB() error {
db := GetConnection()
starttime := time.Now()
log.Print("Building job table...")
// Basic database structure:
_, err := db.Exec(JobsDBSchema)
if err != nil {
return err
}
clustersDir, err := os.ReadDir(archive)
if err != nil {
return err
}
_, err := db.DB.Exec(JobsDBSchema)
if err != nil {
return err
}
// Inserts are bundled into transactions because in sqlite,
// that speeds up inserts A LOT.
tx, err := db.Beginx()
tx, err := db.DB.Beginx()
if err != nil {
return err
}
@@ -116,9 +118,12 @@ func InitDB(db *sqlx.DB, archive string) error {
// this function is only ever called when a special command line flag
// is passed anyways.
fmt.Printf("%d jobs inserted...\r", 0)
i := 0
tags := make(map[string]int64)
handleDirectory := func(filename string) error {
ar := archive.GetHandle()
i := 0
for jobMeta := range ar.Iter() {
// Bundle 100 inserts into one transaction for better performance:
if i%100 == 0 {
if tx != nil {
@@ -127,7 +132,7 @@ func InitDB(db *sqlx.DB, archive string) error {
}
}
tx, err = db.Beginx()
tx, err = db.DB.Beginx()
if err != nil {
return err
}
@@ -136,52 +141,65 @@ func InitDB(db *sqlx.DB, archive string) error {
fmt.Printf("%d jobs inserted...\r", i)
}
err := loadJob(tx, stmt, tags, filename)
if err == nil {
i += 1
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
return err
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
for _, clusterDir := range clustersDir {
lvl1Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name()))
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return err
}
for _, lvl1Dir := range lvl1Dirs {
if !lvl1Dir.IsDir() {
// Could be the cluster.json file
continue
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return err
}
lvl2Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name()))
if err != nil {
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
return err
}
for _, lvl2Dir := range lvl2Dirs {
dirpath := filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
startTimeDirs, err := os.ReadDir(dirpath)
res, err := db.DB.NamedExec(NamedJobInsert, job)
if err != nil {
return err
}
id, err := res.LastInsertId()
if err != nil {
return err
}
for _, tag := range job.Tags {
tagstr := tag.Name + ":" + tag.Type
tagId, ok := tags[tagstr]
if !ok {
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
if err != nil {
return err
}
// For compability with the old job-archive directory structure where
// there was no start time directory.
for _, startTimeDir := range startTimeDirs {
if startTimeDir.Type().IsRegular() && startTimeDir.Name() == "meta.json" {
if err := handleDirectory(dirpath); err != nil {
log.Errorf("in %s: %s", dirpath, err.Error())
}
} else if startTimeDir.IsDir() {
if err := handleDirectory(filepath.Join(dirpath, startTimeDir.Name())); err != nil {
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
}
}
tagId, err = res.LastInsertId()
if err != nil {
return err
}
tags[tagstr] = tagId
}
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
return err
}
}
if err == nil {
i += 1
}
}
@@ -191,7 +209,7 @@ func InitDB(db *sqlx.DB, archive string) error {
// Create indexes after inserts so that they do not
// need to be continually updated.
if _, err := db.Exec(JobsDbIndexes); err != nil {
if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
return err
}
@@ -202,7 +220,10 @@ func InitDB(db *sqlx.DB, archive string) error {
// TODO: Remove double logic, use repository/import.go!
// Read the `meta.json` file at `path` and insert it to the database using the prepared
// insert statement `stmt`. `tags` maps all existing tags to their database ID.
func loadJob(tx *sqlx.Tx, stmt *sqlx.NamedStmt, tags map[string]int64, path string) error {
func loadJob(tx *sqlx.Tx,
stmt *sqlx.NamedStmt,
tags map[string]int64,
path string) error {
f, err := os.Open(filepath.Join(path, "meta.json"))
if err != nil {
return err
@@ -273,3 +294,35 @@ func loadJob(tx *sqlx.Tx, stmt *sqlx.NamedStmt, tags map[string]int64, path stri
return nil
}
// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
if c := archive.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %#v", job.Cluster)
}
if err := archive.AssignSubCluster(job); err != nil {
return err
}
if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %#v", job.State)
}
if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty")
}
if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
}
if len(job.Resources) != int(job.NumNodes) {
return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
}
return nil
}
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
return stats.Avg
}
return 0.0
}

View File

@@ -35,7 +35,7 @@ type JobRepository struct {
cache *lrucache.Cache
}
func GetRepository() *JobRepository {
func GetJobRepository() *JobRepository {
jobRepoOnce.Do(func() {
db := GetConnection()

View File

@@ -19,7 +19,7 @@ func init() {
}
func setup(t *testing.T) *JobRepository {
return GetRepository()
return GetJobRepository()
}
func TestFind(t *testing.T) {

View File

@@ -5,7 +5,7 @@
package repository
import (
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
@@ -26,7 +26,7 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
return nil, err
}
return tags, metricdata.UpdateTags(j, tags)
return tags, archive.UpdateTags(j, tags)
}
// Removes a tag from a job
@@ -45,7 +45,7 @@ func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
return nil, err
}
return tags, metricdata.UpdateTags(j, tags)
return tags, archive.UpdateTags(j, tags)
}
// CreateTag creates a new tag with the specified type and name and returns its database id.

140
internal/repository/user.go Normal file
View File

@@ -0,0 +1,140 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"encoding/json"
"log"
"net/http"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/jmoiron/sqlx"
)
var (
userCfgRepoOnce sync.Once
userCfgRepoInstance *UserCfgRepo
)
type UserCfgRepo struct {
DB *sqlx.DB
Lookup *sqlx.Stmt
lock sync.RWMutex
uiDefaults map[string]interface{}
cache *lrucache.Cache
}
func GetUserCfgRepo() *UserCfgRepo {
userCfgRepoOnce.Do(func() {
db := GetConnection()
_, err := db.DB.Exec(`
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
if err != nil {
log.Fatal(err)
}
lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
if err != nil {
log.Fatal(err)
}
userCfgRepoInstance = &UserCfgRepo{
DB: db.DB,
Lookup: lookupConfigStmt,
cache: lrucache.New(1024),
}
})
return userCfgRepoInstance
}
// Return the personalised UI config for the currently authenticated
// user or return the plain default config.
func (uCfg *UserCfgRepo) GetUIConfig(r *http.Request) (map[string]interface{}, error) {
user := auth.GetUser(r.Context())
if user == nil {
uCfg.lock.RLock()
copy := make(map[string]interface{}, len(uCfg.uiDefaults))
for k, v := range uCfg.uiDefaults {
copy[k] = v
}
uCfg.lock.RUnlock()
return copy, nil
}
data := uCfg.cache.Get(user.Username, func() (interface{}, time.Duration, int) {
config := make(map[string]interface{}, len(uCfg.uiDefaults))
for k, v := range uCfg.uiDefaults {
config[k] = v
}
rows, err := uCfg.Lookup.Query(user.Username)
if err != nil {
return err, 0, 0
}
size := 0
defer rows.Close()
for rows.Next() {
var key, rawval string
if err := rows.Scan(&key, &rawval); err != nil {
return err, 0, 0
}
var val interface{}
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
return err, 0, 0
}
size += len(key)
size += len(rawval)
config[key] = val
}
return config, 24 * time.Hour, size
})
if err, ok := data.(error); ok {
return nil, err
}
return data.(map[string]interface{}), nil
}
// If the context does not have a user, update the global ui configuration
// without persisting it! If there is a (authenticated) user, update only his
// configuration.
func (uCfg *UserCfgRepo) UpdateConfig(key, value string, ctx context.Context) error {
user := auth.GetUser(ctx)
if user == nil {
var val interface{}
if err := json.Unmarshal([]byte(value), &val); err != nil {
return err
}
uCfg.lock.Lock()
defer uCfg.lock.Unlock()
uCfg.uiDefaults[key] = val
return nil
}
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
user, key, value); err != nil {
return err
}
uCfg.cache.Del(user.Username)
return nil
}

View File

@@ -13,10 +13,10 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/web"
@@ -55,7 +55,7 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
TotalJobs int
RecentShortJobs int
}
jobRepo := repository.GetRepository()
jobRepo := repository.GetJobRepository()
runningJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{
State: []schema.JobState{schema.JobStateRunning},
@@ -80,7 +80,7 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
}
clusters := make([]cluster, 0)
for _, c := range config.Clusters {
for _, c := range archive.Clusters {
clusters = append(clusters, cluster{
Name: c.Name,
RunningJobs: runningJobs[c.Name],
@@ -99,7 +99,7 @@ func setupJobRoute(i InfoType, r *http.Request) InfoType {
}
func setupUserRoute(i InfoType, r *http.Request) InfoType {
jobRepo := repository.GetRepository()
jobRepo := repository.GetJobRepository()
username := mux.Vars(r)["id"]
i["id"] = username
i["username"] = username
@@ -142,7 +142,7 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType {
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
var username *string = nil
jobRepo := repository.GetRepository()
jobRepo := repository.GetJobRepository()
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleAdmin) {
username = &user.Username
}
@@ -254,10 +254,11 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
}
func SetupRoutes(router *mux.Router) {
userCfgRepo := repository.GetUserCfgRepo()
for _, route := range routes {
route := route
router.HandleFunc(route.Route, func(rw http.ResponseWriter, r *http.Request) {
conf, err := config.GetUIConfig(r)
conf, err := userCfgRepo.GetUIConfig(r)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return