Merge pull request #289 from ClusterCockpit/dev

Update Q4 2024
This commit is contained in:
Jan Eitzinger
2024-11-09 09:05:25 +01:00
committed by GitHub
161 changed files with 18303 additions and 9125 deletions

View File

@@ -7,7 +7,7 @@ jobs:
- name: Install Go
uses: actions/setup-go@v4
with:
go-version: 1.20.x
go-version: 1.22.x
- name: Checkout code
uses: actions/checkout@v3
- name: Build, Vet & Test

View File

@@ -2,7 +2,7 @@ TARGET = ./cc-backend
VAR = ./var
CFG = config.json .env
FRONTEND = ./web/frontend
VERSION = 1.3.1
VERSION = 1.4.0
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
@@ -22,11 +22,21 @@ SVELTE_COMPONENTS = status \
header
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
$(wildcard $(FRONTEND)/src/*.js) \
$(wildcard $(FRONTEND)/src/filters/*.svelte) \
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
$(wildcard $(FRONTEND)/src/*.js) \
$(wildcard $(FRONTEND)/src/analysis/*.svelte) \
$(wildcard $(FRONTEND)/src/config/*.svelte) \
$(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/*.js) \
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
$(wildcard $(FRONTEND)/src/header/*.svelte) \
$(wildcard $(FRONTEND)/src/job/*.svelte)
.PHONY: clean distclean test tags frontend swagger graphql $(TARGET)

View File

@@ -18,6 +18,7 @@ type Job {
numNodes: Int!
numHWThreads: Int!
numAcc: Int!
energy: Float!
SMT: Int!
exclusive: Int!
partition: String!
@@ -27,12 +28,8 @@ type Job {
tags: [Tag!]!
resources: [Resource!]!
concurrentJobs: JobLinkResultList
memUsedMax: Float
flopsAnyAvg: Float
memBwAvg: Float
loadAvg: Float
footprint: [FootprintValue]
energyFootprint: [EnergyFootprintValue]
metaData: Any
userData: User
}
@@ -45,7 +42,6 @@ type JobLink {
type Cluster {
name: String!
partitions: [String!]! # Slurm partitions
metricConfig: [MetricConfig!]!
subClusters: [SubCluster!]! # Hardware partitions/subclusters
}
@@ -61,9 +57,24 @@ type SubCluster {
flopRateSimd: MetricValue!
memoryBandwidth: MetricValue!
topology: Topology!
metricConfig: [MetricConfig!]!
footprint: [String!]!
}
type FootprintValue {
name: String!
stat: String!
value: Float!
}
type EnergyFootprintValue {
hardware: String!
metric: String!
value: Float!
}
type MetricValue {
name: String
unit: Unit!
value: Float!
}
@@ -102,6 +113,7 @@ type MetricConfig {
normal: Float
caution: Float!
alert: Float!
lowerIsBetter: Boolean
subClusters: [SubClusterConfig!]!
}
@@ -109,6 +121,7 @@ type Tag {
id: ID!
type: String!
name: String!
scope: String!
}
type Resource {
@@ -150,9 +163,10 @@ type MetricStatistics {
}
type StatsSeries {
mean: [NullableFloat!]!
min: [NullableFloat!]!
max: [NullableFloat!]!
mean: [NullableFloat!]!
median: [NullableFloat!]!
min: [NullableFloat!]!
max: [NullableFloat!]!
}
type MetricFootprints {
@@ -180,6 +194,19 @@ type NodeMetrics {
metrics: [JobMetricWithName!]!
}
type ClusterSupport {
cluster: String!
subClusters: [String!]!
}
type GlobalMetricListItem {
name: String!
unit: Unit!
scope: MetricScope!
footprint: String
availability: [ClusterSupport!]!
}
type Count {
name: String!
count: Int!
@@ -191,15 +218,21 @@ type User {
email: String!
}
input MetricStatItem {
metricName: String!
range: FloatRange!
}
type Query {
clusters: [Cluster!]! # List of all clusters
tags: [Tag!]! # List of all tags
globalMetrics: [GlobalMetricListItem!]!
user(username: String!): User
allocatedNodes(cluster: String!): [Count!]!
job(id: ID!): Job
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
@@ -211,7 +244,7 @@ type Query {
}
type Mutation {
createTag(type: String!, name: String!): Tag!
createTag(type: String!, name: String!, scope: String!): Tag!
deleteTag(id: ID!): ID!
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
@@ -220,7 +253,7 @@ type Mutation {
}
type IntRangeOutput { from: Int!, to: Int! }
type TimeRangeOutput { from: Time!, to: Time! }
type TimeRangeOutput { range: String, from: Time!, to: Time! }
input JobFilter {
tags: [ID!]
@@ -232,6 +265,7 @@ input JobFilter {
cluster: StringInput
partition: StringInput
duration: IntRange
energy: FloatRange
minRunningFor: Int
@@ -241,17 +275,14 @@ input JobFilter {
startTime: TimeRange
state: [JobState!]
flopsAnyAvg: FloatRange
memBwAvg: FloatRange
loadAvg: FloatRange
memUsedMax: FloatRange
metricStats: [MetricStatItem!]
exclusive: Int
node: StringInput
}
input OrderByInput {
field: String!
type: String!,
order: SortDirectionEnum! = ASC
}
@@ -269,9 +300,13 @@ input StringInput {
in: [String!]
}
input IntRange { from: Int!, to: Int! }
input FloatRange { from: Float!, to: Float! }
input TimeRange { from: Time, to: Time }
input IntRange { from: Int!, to: Int! }
input TimeRange { range: String, from: Time, to: Time }
input FloatRange {
from: Float!
to: Float!
}
type JobResultList {
items: [Job!]!
@@ -295,6 +330,7 @@ type HistoPoint {
type MetricHistoPoints {
metric: String!
unit: String!
stat: String
data: [MetricHistoPoint!]
}

33
cmd/cc-backend/cli.go Normal file
View File

@@ -0,0 +1,33 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import "flag"
var (
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
)
func cliInit() {
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
flag.Parse()
}

85
cmd/cc-backend/init.go Normal file
View File

@@ -0,0 +1,85 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"fmt"
"os"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
const envString = `
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
# You can generate your own keypair using the gen-keypair tool
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
`
const configString = `
{
"addr": "127.0.0.1:8080",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"jwts": {
"max-age": "2000h"
},
"clusters": [
{
"name": "name",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2023-01-01T00:00:00Z",
"to": null
}
}
}
]
}
`
func initEnv() {
if util.CheckFileExists("var") {
fmt.Print("Directory ./var already exists. Exiting!\n")
os.Exit(0)
}
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
log.Fatalf("Writing config.json failed: %s", err.Error())
}
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
log.Fatalf("Writing .env failed: %s", err.Error())
}
if err := os.Mkdir("var", 0o777); err != nil {
log.Fatalf("Mkdir var failed: %s", err.Error())
}
err := repository.MigrateDB("sqlite3", "./var/job.db")
if err != nil {
log.Fatalf("Initialize job.db failed: %s", err.Error())
}
}

View File

@@ -5,158 +5,48 @@
package main
import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"net"
"net/http"
"os"
"os/signal"
"runtime"
"runtime/debug"
"strings"
"sync"
"syscall"
"time"
"github.com/99designs/gqlgen/graphql/handler"
"github.com/99designs/gqlgen/graphql/playground"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/importer"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/web"
"github.com/go-co-op/gocron"
"github.com/google/gops/agent"
"github.com/gorilla/handlers"
"github.com/gorilla/mux"
httpSwagger "github.com/swaggo/http-swagger"
_ "github.com/go-sql-driver/mysql"
_ "github.com/mattn/go-sqlite3"
)
const logoString = `
____ _ _ ____ _ _ _
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
_____ _ _ ____ _ _ _
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
\____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
\_____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|_|
`
const envString = `
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
# You can generate your own keypair using the gen-keypair tool
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
`
const configString = `
{
"addr": "127.0.0.1:8080",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"jwts": {
"max-age": "2000h"
},
"clusters": [
{
"name": "name",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2023-01-01T00:00:00Z",
"to": null
}
}
}
]
}
`
var (
date string
commit string
version string
)
func initEnv() {
if util.CheckFileExists("var") {
fmt.Print("Directory ./var already exists. Exiting!\n")
os.Exit(0)
}
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
log.Fatalf("Writing config.json failed: %s", err.Error())
}
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
log.Fatalf("Writing .env failed: %s", err.Error())
}
if err := os.Mkdir("var", 0o777); err != nil {
log.Fatalf("Mkdir var failed: %s", err.Error())
}
err := repository.MigrateDB("sqlite3", "./var/job.db")
if err != nil {
log.Fatalf("Initialize job.db failed: %s", err.Error())
}
}
func main() {
var flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
flag.Parse()
cliInit()
if flagVersion {
fmt.Print(logoString)
@@ -171,14 +61,6 @@ func main() {
// Apply config flags for pkg/log
log.Init(flagLogLevel, flagLogDateTime)
if flagInit {
initEnv()
fmt.Print("Succesfully setup environment!\n")
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
fmt.Print("Add your job-archive at ./var/job-archive.\n")
os.Exit(0)
}
// See https://github.com/google/gops (Runtime overhead is almost zero)
if flagGops {
if err := agent.Listen(agent.Options{}); err != nil {
@@ -227,18 +109,18 @@ func main() {
}
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
db := repository.GetConnection()
var authentication *auth.Authentication
if flagInit {
initEnv()
fmt.Print("Succesfully setup environment!\n")
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
fmt.Print("Add your job-archive at ./var/job-archive.\n")
os.Exit(0)
}
if !config.Keys.DisableAuthentication {
var err error
if authentication, err = auth.Init(); err != nil {
log.Fatalf("auth initialization failed: %v", err)
}
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
authentication.SessionMaxAge = d
}
auth.Init()
if flagNewUser != "" {
parts := strings.SplitN(flagNewUser, ":", 3)
@@ -260,12 +142,14 @@ func main() {
}
}
authHandle := auth.GetAuthInstance()
if flagSyncLDAP {
if authentication.LdapAuth == nil {
if authHandle.LdapAuth == nil {
log.Fatal("cannot sync: LDAP authentication is not configured")
}
if err := authentication.LdapAuth.Sync(); err != nil {
if err := authHandle.LdapAuth.Sync(); err != nil {
log.Fatalf("LDAP sync failed: %v", err)
}
log.Info("LDAP sync successfull")
@@ -282,7 +166,7 @@ func main() {
log.Warnf("user '%s' does not have the API role", user.Username)
}
jwt, err := authentication.JwtAuth.ProvideJWT(user)
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
if err != nil {
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
}
@@ -298,7 +182,7 @@ func main() {
log.Fatalf("failed to initialize archive: %s", err.Error())
}
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
if err := metricdata.Init(); err != nil {
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
}
@@ -318,228 +202,16 @@ func main() {
return
}
// Setup the http.Handler/Router used by the server
jobRepo := repository.GetJobRepository()
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
if os.Getenv("DEBUG") != "1" {
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
// The problem with this is that then, no more stacktrace is printed to stderr.
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
switch e := err.(type) {
case string:
return fmt.Errorf("MAIN > Panic: %s", e)
case error:
return fmt.Errorf("MAIN > Panic caused by: %w", e)
}
return errors.New("MAIN > Internal server error (panic)")
})
}
api := &api.RestApi{
JobRepository: jobRepo,
Resolver: resolver,
MachineStateDir: config.Keys.MachineStateDir,
Authentication: authentication,
}
r := mux.NewRouter()
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
info := map[string]interface{}{}
info["hasOpenIDConnect"] = false
if config.Keys.OpenIDConfig != nil {
openIDConnect := auth.NewOIDC(authentication)
openIDConnect.RegisterEndpoints(r)
info["hasOpenIDConnect"] = true
}
r.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
log.Debugf("##%v##", info)
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
}).Methods(http.MethodGet)
r.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
})
r.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
})
secured := r.PathPrefix("/").Subrouter()
if !config.Keys.DisableAuthentication {
r.Handle("/login", authentication.Login(
// On success:
http.RedirectHandler("/", http.StatusTemporaryRedirect),
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
})).Methods(http.MethodPost)
r.Handle("/jwt-login", authentication.Login(
// On success:
http.RedirectHandler("/", http.StatusTemporaryRedirect),
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
}))
r.Handle("/logout", authentication.Logout(
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusOK)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Bye - ClusterCockpit",
MsgType: "alert-info",
Message: "Logout successful",
Build: buildInfo,
Infos: info,
})
}))).Methods(http.MethodPost)
secured.Use(func(next http.Handler) http.Handler {
return authentication.Auth(
// On success;
next,
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Authentication failed - ClusterCockpit",
MsgType: "alert-danger",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
})
})
}
if flagDev {
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
r.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
}
secured.Handle("/query", graphQLEndpoint)
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
routerConfig.HandleSearchBar(rw, r, buildInfo)
})
// Mount all /monitoring/... and /api/... routes.
routerConfig.SetupRoutes(secured, buildInfo)
api.MountRoutes(secured)
if config.Keys.EmbedStaticFiles {
if i, err := os.Stat("./var/img"); err == nil {
if i.IsDir() {
log.Info("Use local directory for static images")
r.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
}
}
r.PathPrefix("/").Handler(web.ServeFiles())
} else {
r.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
}
r.Use(handlers.CompressHandler)
r.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
r.Use(handlers.CORS(
handlers.AllowCredentials(),
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
handlers.AllowedOrigins([]string{"*"})))
handler := handlers.CustomLoggingHandler(io.Discard, r, func(_ io.Writer, params handlers.LogFormatterParams) {
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
} else {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
}
})
archiver.Start(repository.GetJobRepository())
taskManager.Start()
serverInit()
var wg sync.WaitGroup
server := http.Server{
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
Handler: handler,
Addr: config.Keys.Addr,
}
// Start http or https server
listener, err := net.Listen("tcp", config.Keys.Addr)
if err != nil {
log.Fatalf("starting http listener failed: %v", err)
}
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
go func() {
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
}()
}
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
if err != nil {
log.Fatalf("loading X509 keypair failed: %v", err)
}
listener = tls.NewListener(listener, &tls.Config{
Certificates: []tls.Certificate{cert},
CipherSuites: []uint16{
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
},
MinVersion: tls.VersionTLS12,
PreferServerCipherSuites: true,
})
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
} else {
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
}
// Because this program will want to bind to a privileged port (like 80), the listener must
// be established first, then the user can be changed, and after that,
// the actual http server can be started.
if err = runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
log.Fatalf("error while preparing server start: %s", err.Error())
}
wg.Add(1)
go func() {
defer wg.Done()
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
log.Fatalf("starting server failed: %v", err)
}
serverStart()
}()
wg.Add(1)
@@ -550,113 +222,11 @@ func main() {
<-sigs
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
// First shut down the server gracefully (waiting for all ongoing requests)
server.Shutdown(context.Background())
serverShutdown()
// Then, wait for any async archivings still pending...
api.JobRepository.WaitForArchiving()
taskManager.Shutdown()
}()
s := gocron.NewScheduler(time.Local)
if config.Keys.StopJobsExceedingWalltime > 0 {
log.Info("Register undead jobs service")
s.Every(1).Day().At("3:00").Do(func() {
err = jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
if err != nil {
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
}
runtime.GC()
})
}
var cfg struct {
Retention schema.Retention `json:"retention"`
Compression int `json:"compression"`
}
cfg.Retention.IncludeDB = true
if err = json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
}
switch cfg.Retention.Policy {
case "delete":
log.Info("Register retention delete service")
s.Every(1).Day().At("4:00").Do(func() {
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
jobs, err := jobRepo.FindJobsBetween(0, startTime)
if err != nil {
log.Warnf("Error while looking for retention jobs: %s", err.Error())
}
archive.GetHandle().CleanUp(jobs)
if cfg.Retention.IncludeDB {
cnt, err := jobRepo.DeleteJobsBefore(startTime)
if err != nil {
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
} else {
log.Infof("Retention: Removed %d jobs from db", cnt)
}
if err = jobRepo.Optimize(); err != nil {
log.Errorf("Error occured in db optimization: %s", err.Error())
}
}
})
case "move":
log.Info("Register retention move service")
s.Every(1).Day().At("4:00").Do(func() {
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
jobs, err := jobRepo.FindJobsBetween(0, startTime)
if err != nil {
log.Warnf("Error while looking for retention jobs: %s", err.Error())
}
archive.GetHandle().Move(jobs, cfg.Retention.Location)
if cfg.Retention.IncludeDB {
cnt, err := jobRepo.DeleteJobsBefore(startTime)
if err != nil {
log.Errorf("Error while deleting retention jobs from db: %v", err)
} else {
log.Infof("Retention: Removed %d jobs from db", cnt)
}
if err = jobRepo.Optimize(); err != nil {
log.Errorf("Error occured in db optimization: %v", err)
}
}
})
}
if cfg.Compression > 0 {
log.Info("Register compression service")
s.Every(1).Day().At("5:00").Do(func() {
var jobs []*schema.Job
ar := archive.GetHandle()
startTime := time.Now().Unix() - int64(cfg.Compression*24*3600)
lastTime := ar.CompressLast(startTime)
if startTime == lastTime {
log.Info("Compression Service - Complete archive run")
jobs, err = jobRepo.FindJobsBetween(0, startTime)
} else {
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
}
if err != nil {
log.Warnf("Error while looking for compression jobs: %v", err)
}
ar.Compress(jobs)
})
}
s.StartAsync()
if os.Getenv("GOGC") == "" {
debug.SetGCPercent(25)
}

321
cmd/cc-backend/server.go Normal file
View File

@@ -0,0 +1,321 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"strings"
"time"
"github.com/99designs/gqlgen/graphql/handler"
"github.com/99designs/gqlgen/graphql/playground"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
"github.com/ClusterCockpit/cc-backend/web"
"github.com/gorilla/handlers"
"github.com/gorilla/mux"
httpSwagger "github.com/swaggo/http-swagger"
)
var (
router *mux.Router
server *http.Server
apiHandle *api.RestApi
)
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusUnauthorized)
json.NewEncoder(rw).Encode(map[string]string{
"status": http.StatusText(http.StatusUnauthorized),
"error": err.Error(),
})
}
func serverInit() {
// Setup the http.Handler/Router used by the server
graph.Init()
resolver := graph.GetResolverInstance()
graphQLEndpoint := handler.NewDefaultServer(
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
if os.Getenv("DEBUG") != "1" {
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
// The problem with this is that then, no more stacktrace is printed to stderr.
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
switch e := err.(type) {
case string:
return fmt.Errorf("MAIN > Panic: %s", e)
case error:
return fmt.Errorf("MAIN > Panic caused by: %w", e)
}
return errors.New("MAIN > Internal server error (panic)")
})
}
authHandle := auth.GetAuthInstance()
apiHandle = api.New()
router = mux.NewRouter()
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
info := map[string]interface{}{}
info["hasOpenIDConnect"] = false
if config.Keys.OpenIDConfig != nil {
openIDConnect := auth.NewOIDC(authHandle)
openIDConnect.RegisterEndpoints(router)
info["hasOpenIDConnect"] = true
}
router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
log.Debugf("##%v##", info)
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
}).Methods(http.MethodGet)
router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
})
router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
})
secured := router.PathPrefix("/").Subrouter()
securedapi := router.PathPrefix("/api").Subrouter()
userapi := router.PathPrefix("/userapi").Subrouter()
configapi := router.PathPrefix("/config").Subrouter()
frontendapi := router.PathPrefix("/frontend").Subrouter()
if !config.Keys.DisableAuthentication {
router.Handle("/login", authHandle.Login(
// On success:
http.RedirectHandler("/", http.StatusTemporaryRedirect),
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
})).Methods(http.MethodPost)
router.Handle("/jwt-login", authHandle.Login(
// On success:
http.RedirectHandler("/", http.StatusTemporaryRedirect),
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
}))
router.Handle("/logout", authHandle.Logout(
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusOK)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Bye - ClusterCockpit",
MsgType: "alert-info",
Message: "Logout successful",
Build: buildInfo,
Infos: info,
})
}))).Methods(http.MethodPost)
secured.Use(func(next http.Handler) http.Handler {
return authHandle.Auth(
// On success;
next,
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Authentication failed - ClusterCockpit",
MsgType: "alert-danger",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
})
})
securedapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
userapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthUserApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
configapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthConfigApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
frontendapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthFrontendApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
}
if flagDev {
router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
}
secured.Handle("/query", graphQLEndpoint)
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
routerConfig.HandleSearchBar(rw, r, buildInfo)
})
// Mount all /monitoring/... and /api/... routes.
routerConfig.SetupRoutes(secured, buildInfo)
apiHandle.MountApiRoutes(securedapi)
apiHandle.MountUserApiRoutes(userapi)
apiHandle.MountConfigApiRoutes(configapi)
apiHandle.MountFrontendApiRoutes(frontendapi)
if config.Keys.EmbedStaticFiles {
if i, err := os.Stat("./var/img"); err == nil {
if i.IsDir() {
log.Info("Use local directory for static images")
router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
}
}
router.PathPrefix("/").Handler(web.ServeFiles())
} else {
router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
}
router.Use(handlers.CompressHandler)
router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
router.Use(handlers.CORS(
handlers.AllowCredentials(),
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
handlers.AllowedOrigins([]string{"*"})))
}
func serverStart() {
handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
} else {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
}
})
server = &http.Server{
ReadTimeout: 20 * time.Second,
WriteTimeout: 20 * time.Second,
Handler: handler,
Addr: config.Keys.Addr,
}
// Start http or https server
listener, err := net.Listen("tcp", config.Keys.Addr)
if err != nil {
log.Fatalf("starting http listener failed: %v", err)
}
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
go func() {
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
}()
}
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(
config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
if err != nil {
log.Fatalf("loading X509 keypair failed: %v", err)
}
listener = tls.NewListener(listener, &tls.Config{
Certificates: []tls.Certificate{cert},
CipherSuites: []uint16{
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
},
MinVersion: tls.VersionTLS12,
PreferServerCipherSuites: true,
})
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
} else {
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
}
//
// Because this program will want to bind to a privileged port (like 80), the listener must
// be established first, then the user can be changed, and after that,
// the actual http server can be started.
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
log.Fatalf("error while preparing server start: %s", err.Error())
}
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
log.Fatalf("starting server failed: %v", err)
}
}
func serverShutdown() {
// First shut down the server gracefully (waiting for all ongoing requests)
server.Shutdown(context.Background())
// Then, wait for any async archivings still pending...
archiver.WaitForArchiving()
}

94
go.mod
View File

@@ -1,91 +1,89 @@
module github.com/ClusterCockpit/cc-backend
go 1.18
go 1.22
require (
github.com/99designs/gqlgen v0.17.45
github.com/99designs/gqlgen v0.17.49
github.com/ClusterCockpit/cc-units v0.4.0
github.com/Masterminds/squirrel v1.5.3
github.com/coreos/go-oidc/v3 v3.9.0
github.com/go-co-op/gocron v1.25.0
github.com/go-ldap/ldap/v3 v3.4.4
github.com/go-sql-driver/mysql v1.7.0
github.com/Masterminds/squirrel v1.5.4
github.com/coreos/go-oidc/v3 v3.11.0
github.com/go-co-op/gocron/v2 v2.9.0
github.com/go-ldap/ldap/v3 v3.4.8
github.com/go-sql-driver/mysql v1.8.1
github.com/golang-jwt/jwt/v5 v5.2.1
github.com/golang-migrate/migrate/v4 v4.15.2
github.com/google/gops v0.3.27
github.com/gorilla/handlers v1.5.1
github.com/gorilla/mux v1.8.0
github.com/gorilla/sessions v1.2.1
github.com/influxdata/influxdb-client-go/v2 v2.12.2
github.com/jmoiron/sqlx v1.3.5
github.com/mattn/go-sqlite3 v1.14.16
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/common v0.40.0
github.com/golang-migrate/migrate/v4 v4.17.1
github.com/google/gops v0.3.28
github.com/gorilla/handlers v1.5.2
github.com/gorilla/mux v1.8.1
github.com/gorilla/sessions v1.3.0
github.com/influxdata/influxdb-client-go/v2 v2.13.0
github.com/jmoiron/sqlx v1.4.0
github.com/mattn/go-sqlite3 v1.14.22
github.com/prometheus/client_golang v1.19.1
github.com/prometheus/common v0.55.0
github.com/qustavo/sqlhooks/v2 v2.1.0
github.com/santhosh-tekuri/jsonschema/v5 v5.2.0
github.com/swaggo/http-swagger v1.3.3
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
github.com/swaggo/http-swagger v1.3.4
github.com/swaggo/swag v1.16.3
github.com/vektah/gqlparser/v2 v2.5.11
golang.org/x/crypto v0.21.0
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea
golang.org/x/oauth2 v0.13.0
github.com/vektah/gqlparser/v2 v2.5.16
golang.org/x/crypto v0.25.0
golang.org/x/exp v0.0.0-20240707233637-46b078467d37
golang.org/x/oauth2 v0.21.0
)
require (
filippo.io/edwards25519 v1.1.0 // indirect
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
github.com/KyleBanks/depth v1.2.1 // indirect
github.com/agnivade/levenshtein v1.1.1 // indirect
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/containerd/containerd v1.6.26 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
github.com/deepmap/oapi-codegen v1.12.4 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
github.com/go-jose/go-jose/v3 v3.0.3 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
github.com/go-jose/go-jose/v4 v4.0.3 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-openapi/spec v0.21.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/securecookie v1.1.1 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/gorilla/securecookie v1.1.2 // indirect
github.com/gorilla/websocket v1.5.3 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
github.com/jonboulle/clockwork v0.4.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
github.com/oapi-codegen/runtime v1.1.1 // indirect
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/procfs v0.9.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sosodev/duration v1.2.0 // indirect
github.com/swaggo/files v1.0.0 // indirect
github.com/urfave/cli/v2 v2.27.1 // indirect
github.com/sosodev/duration v1.3.1 // indirect
github.com/swaggo/files v1.0.1 // indirect
github.com/urfave/cli/v2 v2.27.2 // indirect
github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect
go.uber.org/atomic v1.10.0 // indirect
golang.org/x/mod v0.16.0 // indirect
golang.org/x/net v0.22.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/tools v0.19.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect
google.golang.org/protobuf v1.33.0 // indirect
go.uber.org/atomic v1.11.0 // indirect
golang.org/x/mod v0.19.0 // indirect
golang.org/x/net v0.27.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.22.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/tools v0.23.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect

1988
go.sum

File diff suppressed because it is too large Load Diff

View File

@@ -61,23 +61,50 @@ models:
fields:
partitions:
resolver: true
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
NullableFloat:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
MetricScope:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
MetricValue:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
JobStatistics:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
GlobalMetricListItem:
{
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.GlobalMetricListItem",
}
ClusterSupport:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.ClusterSupport" }
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
Resource:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
JobState:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
TimeRange:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
IntRange:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
JobMetric:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" }
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" }
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
MetricStatistics:
{
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics",
}
MetricConfig:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
SubClusterConfig:
{
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig",
}
Accelerator:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
Topology:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
FilterRanges:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
SubCluster:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
StatsSeries:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }

View File

@@ -19,8 +19,11 @@ import (
"testing"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -144,23 +147,20 @@ func setup(t *testing.T) *api.RestApi {
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
repository.Connect("sqlite3", dbfilepath)
db := repository.GetConnection()
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
t.Fatal(err)
}
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
if err := metricdata.Init(); err != nil {
t.Fatal(err)
}
jobRepo := repository.GetJobRepository()
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
archiver.Start(repository.GetJobRepository())
auth.Init()
graph.Init()
return &api.RestApi{
JobRepository: resolver.Repo,
Resolver: resolver,
}
return api.New()
}
func cleanup() {
@@ -175,7 +175,6 @@ func cleanup() {
func TestRestApi(t *testing.T) {
restapi := setup(t)
t.Cleanup(cleanup)
testData := schema.JobData{
"load_one": map[schema.MetricScope]*schema.JobMetric{
schema.MetricScopeNode: {
@@ -192,12 +191,14 @@ func TestRestApi(t *testing.T) {
},
}
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
return testData, nil
}
r := mux.NewRouter()
restapi.MountRoutes(r)
r.PathPrefix("/api").Subrouter()
r.StrictSlash(true)
restapi.MountApiRoutes(r)
const startJobBody string = `{
"jobId": 123,
@@ -213,7 +214,7 @@ func TestRestApi(t *testing.T) {
"exclusive": 1,
"monitoringStatus": 1,
"smt": 1,
"tags": [{ "type": "testTagType", "name": "testTagName" }],
"tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
"resources": [
{
"hostname": "host123",
@@ -225,11 +226,22 @@ func TestRestApi(t *testing.T) {
}`
var dbid int64
const contextUserKey repository.ContextKey = "user"
contextUserValue := &schema.User{
Username: "testuser",
Projects: make([]string, 0),
Roles: []string{"user"},
AuthType: 0,
AuthSource: 2,
}
if ok := t.Run("StartJob", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req)
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result()
if response.StatusCode != http.StatusCreated {
t.Fatal(response.Status, recorder.Body.String())
@@ -240,12 +252,13 @@ func TestRestApi(t *testing.T) {
t.Fatal(err)
}
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(res.DBID)))
resolver := graph.GetResolverInstance()
job, err := resolver.Query().Job(ctx, strconv.Itoa(int(res.DBID)))
if err != nil {
t.Fatal(err)
}
job.Tags, err = restapi.Resolver.Job().Tags(context.Background(), job)
job.Tags, err = resolver.Job().Tags(ctx, job)
if err != nil {
t.Fatal(err)
}
@@ -269,7 +282,7 @@ func TestRestApi(t *testing.T) {
t.Fatalf("unexpected job properties: %#v", job)
}
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" {
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
t.Fatalf("unexpected tags: %#v", job.Tags)
}
@@ -289,17 +302,20 @@ func TestRestApi(t *testing.T) {
var stoppedJob *schema.Job
if ok := t.Run("StopJob", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req)
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result()
if response.StatusCode != http.StatusOK {
t.Fatal(response.Status, recorder.Body.String())
}
restapi.JobRepository.WaitForArchiving()
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(dbid)))
archiver.WaitForArchiving()
resolver := graph.GetResolverInstance()
job, err := resolver.Query().Job(ctx, strconv.Itoa(int(dbid)))
if err != nil {
t.Fatal(err)
}
@@ -327,7 +343,7 @@ func TestRestApi(t *testing.T) {
}
t.Run("CheckArchive", func(t *testing.T) {
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
if err != nil {
t.Fatal(err)
}
@@ -341,10 +357,12 @@ func TestRestApi(t *testing.T) {
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(body)))
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req)
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result()
if response.StatusCode != http.StatusUnprocessableEntity {
t.Fatal(response.Status, recorder.Body.String())
@@ -371,10 +389,12 @@ func TestRestApi(t *testing.T) {
}`
ok := t.Run("StartJobFailed", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req)
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result()
if response.StatusCode != http.StatusCreated {
t.Fatal(response.Status, recorder.Body.String())
@@ -393,16 +413,18 @@ func TestRestApi(t *testing.T) {
}`
ok = t.Run("StopJobFailed", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req)
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result()
if response.StatusCode != http.StatusOK {
t.Fatal(response.Status, recorder.Body.String())
}
restapi.JobRepository.WaitForArchiving()
archiver.WaitForArchiving()
jobid, cluster := int64(12345), "testcluster"
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
if err != nil {

View File

@@ -19,12 +19,13 @@ import (
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/importer"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -53,14 +54,20 @@ import (
type RestApi struct {
JobRepository *repository.JobRepository
Resolver *graph.Resolver
Authentication *auth.Authentication
MachineStateDir string
RepositoryMutex sync.Mutex
}
func (api *RestApi) MountRoutes(r *mux.Router) {
r = r.PathPrefix("/api").Subrouter()
func New() *RestApi {
return &RestApi{
JobRepository: repository.GetJobRepository(),
MachineStateDir: config.Keys.MachineStateDir,
Authentication: auth.GetAuthInstance(),
}
}
func (api *RestApi) MountApiRoutes(r *mux.Router) {
r.StrictSlash(true)
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
@@ -84,14 +91,34 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet)
r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost)
}
}
func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
r.StrictSlash(true)
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodPost)
r.HandleFunc("/jobs/{id}", api.getCompleteJobById).Methods(http.MethodGet)
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
}
func (api *RestApi) MountConfigApiRoutes(r *mux.Router) {
r.StrictSlash(true)
if api.Authentication != nil {
r.HandleFunc("/jwt/", api.getJWT).Methods(http.MethodGet)
r.HandleFunc("/roles/", api.getRoles).Methods(http.MethodGet)
r.HandleFunc("/users/", api.createUser).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/users/", api.getUsers).Methods(http.MethodGet)
r.HandleFunc("/users/", api.deleteUser).Methods(http.MethodDelete)
r.HandleFunc("/user/{id}", api.updateUser).Methods(http.MethodPost)
}
}
func (api *RestApi) MountFrontendApiRoutes(r *mux.Router) {
r.StrictSlash(true)
if api.Authentication != nil {
r.HandleFunc("/jwt/", api.getJWT).Methods(http.MethodGet)
r.HandleFunc("/configuration/", api.updateConfiguration).Methods(http.MethodPost)
}
}
@@ -150,8 +177,9 @@ type ErrorResponse struct {
// ApiTag model
type ApiTag struct {
// Tag Type
Type string `json:"type" example:"Debug"`
Name string `json:"name" example:"Testjob"` // Tag Name
Type string `json:"type" example:"Debug"`
Name string `json:"name" example:"Testjob"` // Tag Name
Scope string `json:"scope" example:"global"` // Tag Scope for Frontend Display
}
// ApiMeta model
@@ -311,13 +339,6 @@ func (api *RestApi) getClusters(rw http.ResponseWriter, r *http.Request) {
// @security ApiKeyAuth
// @router /jobs/ [get]
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
withMetadata := false
filter := &model.JobFilter{}
page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
@@ -400,7 +421,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
StartTime: job.StartTime.Unix(),
}
res.Tags, err = api.JobRepository.GetTags(&job.ID)
res.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
@@ -434,7 +455,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
}
}
// getJobById godoc
// getCompleteJobById godoc
// @summary Get job meta and optional all metric data
// @tags Job query
// @description Job to get is specified by database ID
@@ -452,14 +473,6 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
// @security ApiKeyAuth
// @router /jobs/{id} [get]
func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v",
schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
// Fetch job from db
id, ok := mux.Vars(r)["id"]
var job *schema.Job
@@ -471,7 +484,7 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
return
}
job, err = api.JobRepository.FindById(id)
job, err = api.JobRepository.FindById(r.Context(), id) // Get Job from Repo by ID
} else {
handleError(fmt.Errorf("the parameter 'id' is required"), http.StatusBadRequest, rw)
return
@@ -481,7 +494,7 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
return
}
job.Tags, err = api.JobRepository.GetTags(&job.ID)
job.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
@@ -503,8 +516,15 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
var data schema.JobData
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
resolution := 0
for _, mc := range metricConfigs {
resolution = max(resolution, mc.Timestep)
}
if r.URL.Query().Get("all-metrics") == "true" {
data, err = metricdata.LoadData(job, nil, scopes, r.Context())
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context(), resolution)
if err != nil {
log.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
return
@@ -546,14 +566,6 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
// @security ApiKeyAuth
// @router /jobs/{id} [post]
func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v",
schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
// Fetch job from db
id, ok := mux.Vars(r)["id"]
var job *schema.Job
@@ -565,7 +577,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
return
}
job, err = api.JobRepository.FindById(id)
job, err = api.JobRepository.FindById(r.Context(), id)
} else {
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
return
@@ -575,7 +587,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
return
}
job.Tags, err = api.JobRepository.GetTags(&job.ID)
job.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
@@ -601,7 +613,14 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
scopes = []schema.MetricScope{"node"}
}
data, err := metricdata.LoadData(job, metrics, scopes, r.Context())
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
resolution := 0
for _, mc := range metricConfigs {
resolution = max(resolution, mc.Timestep)
}
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context(), resolution)
if err != nil {
log.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
return
@@ -651,19 +670,13 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
// @security ApiKeyAuth
// @router /jobs/edit_meta/{id} [post]
func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
iid, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
if err != nil {
http.Error(rw, err.Error(), http.StatusBadRequest)
return
}
job, err := api.JobRepository.FindById(iid)
job, err := api.JobRepository.FindById(r.Context(), id)
if err != nil {
http.Error(rw, err.Error(), http.StatusNotFound)
return
@@ -689,6 +702,7 @@ func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
// @summary Adds one or more tags to a job
// @tags Job add and modify
// @description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
// @description Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username.
// @description If tagged job is already finished: Tag will be written directly to respective archive files.
// @accept json
// @produce json
@@ -702,26 +716,19 @@ func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
// @security ApiKeyAuth
// @router /jobs/tag_job/{id} [post]
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
iid, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
if err != nil {
http.Error(rw, err.Error(), http.StatusBadRequest)
return
}
job, err := api.JobRepository.FindById(iid)
job, err := api.JobRepository.FindById(r.Context(), id)
if err != nil {
http.Error(rw, err.Error(), http.StatusNotFound)
return
}
job.Tags, err = api.JobRepository.GetTags(&job.ID)
job.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
@@ -734,16 +741,17 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
}
for _, tag := range req {
tagId, err := api.JobRepository.AddTagOrCreate(job.ID, tag.Type, tag.Name)
tagId, err := api.JobRepository.AddTagOrCreate(r.Context(), job.ID, tag.Type, tag.Name, tag.Scope)
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
job.Tags = append(job.Tags, &schema.Tag{
ID: tagId,
Type: tag.Type,
Name: tag.Name,
ID: tagId,
Type: tag.Type,
Name: tag.Name,
Scope: tag.Scope,
})
}
@@ -769,13 +777,6 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
// @security ApiKeyAuth
// @router /jobs/start_job/ [post]
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
req := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := decode(r.Body, &req); err != nil {
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
@@ -818,7 +819,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
unlockOnce.Do(api.RepositoryMutex.Unlock)
for _, tag := range req.Tags {
if _, err := api.JobRepository.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
if _, err := api.JobRepository.AddTagOrCreate(r.Context(), id, tag.Type, tag.Name, tag.Scope); err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
return
@@ -852,13 +853,6 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
// @security ApiKeyAuth
// @router /jobs/stop_job/{id} [post]
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
// Parse request body: Only StopTime and State
req := StopJobApiRequest{}
if err := decode(r.Body, &req); err != nil {
@@ -877,7 +871,7 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
return
}
job, err = api.JobRepository.FindById(id)
job, err = api.JobRepository.FindById(r.Context(), id)
} else {
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
return
@@ -907,13 +901,6 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
// @security ApiKeyAuth
// @router /jobs/stop_job/ [post]
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
// Parse request body
req := StopJobApiRequest{}
if err := decode(r.Body, &req); err != nil {
@@ -931,7 +918,6 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
// log.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
return
@@ -956,11 +942,6 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
// @security ApiKeyAuth
// @router /jobs/delete_job/{id} [delete]
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil && !user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
// Fetch job (that will be stopped) from db
id, ok := mux.Vars(r)["id"]
var err error
@@ -1004,12 +985,6 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
// @security ApiKeyAuth
// @router /jobs/delete_job/ [delete]
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
// Parse request body
req := DeleteJobApiRequest{}
if err := decode(r.Body, &req); err != nil {
@@ -1026,7 +1001,6 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
}
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
return
@@ -1061,11 +1035,6 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
// @security ApiKeyAuth
// @router /jobs/delete_job_before/{ts} [delete]
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil && !user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
var cnt int
// Fetch job (that will be stopped) from db
id, ok := mux.Vars(r)["ts"]
@@ -1131,7 +1100,7 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
}
// Trigger async archiving
api.JobRepository.TriggerArchiving(job)
archiver.TriggerArchiving(job)
}
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
@@ -1159,7 +1128,8 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
} `json:"error"`
}
data, err := api.Resolver.Query().JobMetrics(r.Context(), id, metrics, scopes)
resolver := graph.GetResolverInstance()
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
if err != nil {
json.NewEncoder(rw).Encode(Respone{
Error: &struct {

View File

@@ -0,0 +1,94 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archiver
import (
"context"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
var (
archivePending sync.WaitGroup
archiveChannel chan *schema.Job
jobRepo *repository.JobRepository
)
func Start(r *repository.JobRepository) {
archiveChannel = make(chan *schema.Job, 128)
jobRepo = r
go archivingWorker()
}
// Archiving worker thread
func archivingWorker() {
for {
select {
case job, ok := <-archiveChannel:
if !ok {
break
}
start := time.Now()
// not using meta data, called to load JobMeta into Cache?
// will fail if job meta not in repository
if _, err := jobRepo.FetchMetadata(job); err != nil {
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
// TODO: Maybe use context with cancel/timeout here
jobMeta, err := ArchiveJob(job, context.Background())
if err != nil {
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
stmt := sq.Update("job").Where("job.id = ?", job.ID)
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
log.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
continue
}
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
log.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
continue
}
// Update the jobs database entry one last time:
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
if err := jobRepo.Execute(stmt); err != nil {
log.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
continue
}
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
log.Printf("archiving job (dbid: %d) successful", job.ID)
archivePending.Done()
}
}
}
// Trigger async archiving
func TriggerArchiving(job *schema.Job) {
if archiveChannel == nil {
log.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
}
archivePending.Add(1)
archiveChannel <- job
}
// Wait for background thread to finish pending archiving operations
func WaitForArchiving() {
// close channel and wait for worker to process remaining jobs
archivePending.Wait()
}

View File

@@ -0,0 +1,82 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archiver
import (
"context"
"math"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
scopes := []schema.MetricScope{schema.MetricScopeNode}
// FIXME: Add a config option for this
if job.NumNodes <= 8 {
// This will add the native scope if core scope is not available
scopes = append(scopes, schema.MetricScopeCore)
}
if job.NumAcc > 0 {
scopes = append(scopes, schema.MetricScopeAccelerator)
}
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// This should never happen ?
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
},
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if config.Keys.DisableArchive {
return jobMeta, nil
}
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
}

View File

@@ -12,6 +12,7 @@ import (
"errors"
"net/http"
"os"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
@@ -26,6 +27,11 @@ type Authenticator interface {
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
}
var (
initOnce sync.Once
authInstance *Authentication
)
type Authentication struct {
sessionStore *sessions.CookieStore
LdapAuth *LdapAuthenticator
@@ -62,82 +68,111 @@ func (auth *Authentication) AuthViaSession(
}, nil
}
func Init() (*Authentication, error) {
auth := &Authentication{}
func Init() {
initOnce.Do(func() {
authInstance = &Authentication{}
sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" {
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil {
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
} else {
bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
log.Error("Error while initializing authentication -> decoding session key failed")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
}
if config.Keys.LdapConfig != nil {
ldapAuth := &LdapAuthenticator{}
if err := ldapAuth.Init(); err != nil {
log.Warn("Error while initializing authentication -> ldapAuth init failed")
sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" {
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil {
log.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
}
authInstance.sessionStore = sessions.NewCookieStore(bytes)
} else {
auth.LdapAuth = ldapAuth
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
}
} else {
log.Info("Missing LDAP configuration: No LDAP support!")
}
if config.Keys.JwtConfig != nil {
auth.JwtAuth = &JWTAuthenticator{}
if err := auth.JwtAuth.Init(); err != nil {
log.Error("Error while initializing authentication -> jwtAuth init failed")
return nil, err
bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
log.Fatal("Error while initializing authentication -> decoding session key failed")
}
authInstance.sessionStore = sessions.NewCookieStore(bytes)
}
jwtSessionAuth := &JWTSessionAuthenticator{}
if err := jwtSessionAuth.Init(); err != nil {
log.Info("jwtSessionAuth init failed: No JWT login support!")
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
authInstance.SessionMaxAge = d
}
if config.Keys.LdapConfig != nil {
ldapAuth := &LdapAuthenticator{}
if err := ldapAuth.Init(); err != nil {
log.Warn("Error while initializing authentication -> ldapAuth init failed")
} else {
authInstance.LdapAuth = ldapAuth
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
}
} else {
auth.authenticators = append(auth.authenticators, jwtSessionAuth)
log.Info("Missing LDAP configuration: No LDAP support!")
}
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
if err := jwtCookieSessionAuth.Init(); err != nil {
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
if config.Keys.JwtConfig != nil {
authInstance.JwtAuth = &JWTAuthenticator{}
if err := authInstance.JwtAuth.Init(); err != nil {
log.Fatal("Error while initializing authentication -> jwtAuth init failed")
}
jwtSessionAuth := &JWTSessionAuthenticator{}
if err := jwtSessionAuth.Init(); err != nil {
log.Info("jwtSessionAuth init failed: No JWT login support!")
} else {
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
}
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
if err := jwtCookieSessionAuth.Init(); err != nil {
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
} else {
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
}
} else {
auth.authenticators = append(auth.authenticators, jwtCookieSessionAuth)
log.Info("Missing JWT configuration: No JWT token support!")
}
} else {
log.Info("Missing JWT configuration: No JWT token support!")
}
auth.LocalAuth = &LocalAuthenticator{}
if err := auth.LocalAuth.Init(); err != nil {
log.Error("Error while initializing authentication -> localAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
return auth, nil
authInstance.LocalAuth = &LocalAuthenticator{}
if err := authInstance.LocalAuth.Init(); err != nil {
log.Fatal("Error while initializing authentication -> localAuth init failed")
}
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
})
}
func persistUser(user *schema.User) {
func GetAuthInstance() *Authentication {
if authInstance == nil {
log.Fatal("Authentication module not initialized!")
}
return authInstance
}
func handleTokenUser(tokenUser *schema.User) {
r := repository.GetUserRepository()
_, err := r.GetUser(user.Username)
dbUser, err := r.GetUser(tokenUser.Username)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%s': %v", user.Username, err)
} else if err == sql.ErrNoRows {
if err := r.AddUser(user); err != nil {
log.Errorf("Error while adding user '%s' to DB: %v", user.Username, err)
log.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
} else if err == sql.ErrNoRows && config.Keys.JwtConfig.SyncUserOnLogin { // Adds New User
if err := r.AddUser(tokenUser); err != nil {
log.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
}
} else if err == nil && config.Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
if err := r.UpdateUser(dbUser, tokenUser); err != nil {
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
}
}
}
func handleOIDCUser(OIDCUser *schema.User) {
r := repository.GetUserRepository()
dbUser, err := r.GetUser(OIDCUser.Username)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
} else if err == sql.ErrNoRows && config.Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
if err := r.AddUser(OIDCUser); err != nil {
log.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
}
} else if err == nil && config.Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
}
}
}
@@ -219,31 +254,141 @@ func (auth *Authentication) Auth(
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
if err != nil {
log.Infof("authentication failed: %s", err.Error())
log.Infof("auth -> authentication failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusUnauthorized)
return
}
if user == nil {
user, err = auth.AuthViaSession(rw, r)
if err != nil {
log.Infof("authentication failed: %s", err.Error())
log.Infof("auth -> authentication failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusUnauthorized)
return
}
}
if user != nil {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
log.Debug("authentication failed")
log.Info("auth -> authentication failed")
onfailure(rw, r, errors.New("unauthorized (please login first)"))
})
}
func (auth *Authentication) AuthApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
if err != nil {
log.Infof("auth api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil {
switch {
case len(user.Roles) == 1:
if user.HasRole(schema.RoleApi) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
case len(user.Roles) >= 2:
if user.HasAllRoles([]schema.Role{schema.RoleAdmin, schema.RoleApi}) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
default:
log.Info("auth api -> authentication failed: missing role")
onfailure(rw, r, errors.New("unauthorized"))
}
}
log.Info("auth api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthUserApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
if err != nil {
log.Infof("auth user api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil {
switch {
case len(user.Roles) == 1:
if user.HasRole(schema.RoleApi) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
case len(user.Roles) >= 2:
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
default:
log.Info("auth user api -> authentication failed: missing role")
onfailure(rw, r, errors.New("unauthorized"))
}
}
log.Info("auth user api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthConfigApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.AuthViaSession(rw, r)
if err != nil {
log.Infof("auth config api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil && user.HasRole(schema.RoleAdmin) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
log.Info("auth config api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthFrontendApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.AuthViaSession(rw, r)
if err != nil {
log.Infof("auth frontend api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
log.Info("auth frontend api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
session, err := auth.sessionStore.Get(r, "session")

View File

@@ -198,8 +198,8 @@ func (ja *JWTCookieSessionAuthenticator) Login(
AuthSource: schema.AuthViaToken,
}
if jc.SyncUserOnLogin {
persistUser(user)
if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
handleTokenUser(user)
}
}

View File

@@ -138,8 +138,8 @@ func (ja *JWTSessionAuthenticator) Login(
AuthSource: schema.AuthViaToken,
}
if config.Keys.JwtConfig.SyncUserOnLogin {
persistUser(user)
if config.Keys.JwtConfig.SyncUserOnLogin || config.Keys.JwtConfig.UpdateUserOnLogin {
handleTokenUser(user)
}
}

View File

@@ -10,7 +10,6 @@ import (
"net/http"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
@@ -34,33 +33,6 @@ func (la *LdapAuthenticator) Init() error {
lc := config.Keys.LdapConfig
if lc.SyncInterval != "" {
interval, err := time.ParseDuration(lc.SyncInterval)
if err != nil {
log.Warnf("Could not parse duration for sync interval: %v",
lc.SyncInterval)
return err
}
if interval == 0 {
log.Info("Sync interval is zero")
return nil
}
go func() {
ticker := time.NewTicker(interval)
for t := range ticker.C {
log.Printf("sync started at %s", t.Format(time.RFC3339))
if err := la.Sync(); err != nil {
log.Errorf("sync failed: %s", err.Error())
}
log.Print("sync done")
}
}()
} else {
log.Info("LDAP configuration key sync_interval invalid")
}
if lc.UserAttr != "" {
la.UserAttr = lc.UserAttr
} else {

View File

@@ -168,8 +168,8 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
AuthSource: schema.AuthViaOIDC,
}
if config.Keys.OpenIDConfig.SyncUserOnLogin {
persistUser(user)
if config.Keys.OpenIDConfig.SyncUserOnLogin || config.Keys.OpenIDConfig.UpdateUserOnLogin {
handleOIDCUser(user)
}
oa.authentication.SaveSession(rw, r, user)

File diff suppressed because it is too large Load Diff

View File

@@ -16,11 +16,23 @@ type Count struct {
Count int `json:"count"`
}
type EnergyFootprintValue struct {
Hardware string `json:"hardware"`
Metric string `json:"metric"`
Value float64 `json:"value"`
}
type FloatRange struct {
From float64 `json:"from"`
To float64 `json:"to"`
}
type FootprintValue struct {
Name string `json:"name"`
Stat string `json:"stat"`
Value float64 `json:"value"`
}
type Footprints struct {
TimeWeights *TimeWeights `json:"timeWeights"`
Metrics []*MetricFootprints `json:"metrics"`
@@ -46,16 +58,14 @@ type JobFilter struct {
Cluster *StringInput `json:"cluster,omitempty"`
Partition *StringInput `json:"partition,omitempty"`
Duration *schema.IntRange `json:"duration,omitempty"`
Energy *FloatRange `json:"energy,omitempty"`
MinRunningFor *int `json:"minRunningFor,omitempty"`
NumNodes *schema.IntRange `json:"numNodes,omitempty"`
NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
StartTime *schema.TimeRange `json:"startTime,omitempty"`
State []schema.JobState `json:"state,omitempty"`
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg,omitempty"`
MemBwAvg *FloatRange `json:"memBwAvg,omitempty"`
LoadAvg *FloatRange `json:"loadAvg,omitempty"`
MemUsedMax *FloatRange `json:"memUsedMax,omitempty"`
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
Exclusive *int `json:"exclusive,omitempty"`
Node *StringInput `json:"node,omitempty"`
}
@@ -120,9 +130,15 @@ type MetricHistoPoint struct {
type MetricHistoPoints struct {
Metric string `json:"metric"`
Unit string `json:"unit"`
Stat *string `json:"stat,omitempty"`
Data []*MetricHistoPoint `json:"data,omitempty"`
}
type MetricStatItem struct {
MetricName string `json:"metricName"`
Range *FloatRange `json:"range"`
}
type Mutation struct {
}
@@ -134,6 +150,7 @@ type NodeMetrics struct {
type OrderByInput struct {
Field string `json:"field"`
Type string `json:"type"`
Order SortDirectionEnum `json:"order"`
}
@@ -155,8 +172,9 @@ type StringInput struct {
}
type TimeRangeOutput struct {
From time.Time `json:"from"`
To time.Time `json:"to"`
Range *string `json:"range,omitempty"`
From time.Time `json:"from"`
To time.Time `json:"to"`
}
type TimeWeights struct {

View File

@@ -1,15 +1,39 @@
package graph
import (
"sync"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/jmoiron/sqlx"
)
// This file will not be regenerated automatically.
//
// It serves as dependency injection for your app, add any dependencies you require here.
var (
initOnce sync.Once
resolverInstance *Resolver
)
type Resolver struct {
DB *sqlx.DB
Repo *repository.JobRepository
}
func Init() {
initOnce.Do(func() {
db := repository.GetConnection()
resolverInstance = &Resolver{
DB: db.DB, Repo: repository.GetJobRepository(),
}
})
}
func GetResolverInstance() *Resolver {
if resolverInstance == nil {
log.Fatal("Authentication module not initialized!")
}
return resolverInstance
}

View File

@@ -2,19 +2,22 @@ package graph
// This file will be automatically regenerated based on the schema, any resolver implementations
// will be copied through when generating and any unknown code will be moved to the end.
// Code generated by github.com/99designs/gqlgen version v0.17.45
// Code generated by github.com/99designs/gqlgen version v0.17.49
import (
"context"
"errors"
"fmt"
"regexp"
"slices"
"strconv"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
@@ -28,7 +31,7 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
// Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(&obj.ID)
return r.Repo.GetTags(ctx, &obj.ID)
}
// ConcurrentJobs is the resolver for the concurrentJobs field.
@@ -44,8 +47,72 @@ func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*mod
return nil, nil
}
// Footprint is the resolver for the footprint field.
func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.FootprintValue, error) {
rawFootprint, err := r.Repo.FetchFootprint(obj)
if err != nil {
log.Warn("Error while fetching job footprint data")
return nil, err
}
res := []*model.FootprintValue{}
for name, value := range rawFootprint {
parts := strings.Split(name, "_")
statPart := parts[len(parts)-1]
nameParts := parts[:len(parts)-1]
res = append(res, &model.FootprintValue{
Name: strings.Join(nameParts, "_"),
Stat: statPart,
Value: value,
})
}
return res, err
}
// EnergyFootprint is the resolver for the energyFootprint field.
func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*model.EnergyFootprintValue, error) {
rawEnergyFootprint, err := r.Repo.FetchEnergyFootprint(obj)
if err != nil {
log.Warn("Error while fetching job energy footprint data")
return nil, err
}
res := []*model.EnergyFootprintValue{}
for name, value := range rawEnergyFootprint {
// Suboptimal: Nearly hardcoded metric name expectations
matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`)
matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
matchMem := regexp.MustCompile(`mem|Mem|MEM`)
matchCore := regexp.MustCompile(`core|Core|CORE`)
hwType := ""
switch test := name; { // NOtice ';' for var declaration
case matchCpu.MatchString(test):
hwType = "CPU"
case matchAcc.MatchString(test):
hwType = "Accelerator"
case matchMem.MatchString(test):
hwType = "Memory"
case matchCore.MatchString(test):
hwType = "Core"
default:
hwType = "Other"
}
res = append(res, &model.EnergyFootprintValue{
Hardware: hwType,
Metric: name,
Value: value,
})
}
return res, err
}
// MetaData is the resolver for the metaData field.
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (any, error) {
return r.Repo.FetchMetadata(obj)
}
@@ -54,15 +121,20 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
}
// Name is the resolver for the name field.
func (r *metricValueResolver) Name(ctx context.Context, obj *schema.MetricValue) (*string, error) {
panic(fmt.Errorf("not implemented: Name - name"))
}
// CreateTag is the resolver for the createTag field.
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
id, err := r.Repo.CreateTag(typeArg, name)
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string, scope string) (*schema.Tag, error) {
id, err := r.Repo.CreateTag(typeArg, name, scope)
if err != nil {
log.Warn("Error while creating tag")
return nil, err
}
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
}
// DeleteTag is the resolver for the deleteTag field.
@@ -72,6 +144,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
// AddTagsToJob is the resolver for the addTagsToJob field.
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
// Selectable Tags Pre-Filtered by Scope in Frontend: No backend check required
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
log.Warn("Error while adding tag to job")
@@ -86,7 +159,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
return nil, err
}
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
if tags, err = r.Repo.AddTag(ctx, jid, tid); err != nil {
log.Warn("Error while adding tag")
return nil, err
}
@@ -97,6 +170,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
// Removable Tags Pre-Filtered by Scope in Frontend: No backend check required
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
log.Warn("Error while parsing job id")
@@ -111,7 +185,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
return nil, err
}
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
if tags, err = r.Repo.RemoveTag(ctx, jid, tid); err != nil {
log.Warn("Error while removing tag")
return nil, err
}
@@ -137,7 +211,12 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error)
// Tags is the resolver for the tags field.
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
return r.Repo.GetTags(nil)
return r.Repo.GetTags(ctx, nil)
}
// GlobalMetrics is the resolver for the globalMetrics field.
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
return archive.GlobalMetricList, nil
}
// User is the resolver for the user field.
@@ -172,7 +251,7 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
return nil, err
}
job, err := r.Repo.FindById(numericId)
job, err := r.Repo.FindById(ctx, numericId)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
@@ -188,14 +267,24 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
}
// JobMetrics is the resolver for the jobMetrics field.
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
if resolution == nil { // Load from Config
if config.Keys.EnableResampling != nil {
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
resolution = &defaultRes
} else { // Set 0 (Loads configured metric timestep)
defaultRes := 0
resolution = &defaultRes
}
}
job, err := r.Query().Job(ctx, id)
if err != nil {
log.Warn("Error while querying job for metrics")
return nil, err
}
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
if err != nil {
log.Warn("Error while loading job data")
return nil, err
@@ -347,7 +436,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
}
}
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
log.Warn("Error while loading node data")
return nil, err
@@ -392,6 +481,9 @@ func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver
// Job returns generated.JobResolver implementation.
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
// MetricValue returns generated.MetricValueResolver implementation.
func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricValueResolver{r} }
// Mutation returns generated.MutationResolver implementation.
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
@@ -403,6 +495,7 @@ func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subCluste
type clusterResolver struct{ *Resolver }
type jobResolver struct{ *Resolver }
type metricValueResolver struct{ *Resolver }
type mutationResolver struct{ *Resolver }
type queryResolver struct{ *Resolver }
type subClusterResolver struct{ *Resolver }

View File

@@ -11,7 +11,7 @@ import (
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -24,8 +24,8 @@ func (r *queryResolver) rooflineHeatmap(
ctx context.Context,
filter []*model.JobFilter,
rows int, cols int,
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
minX float64, minY float64, maxX float64, maxY float64,
) ([][]float64, error) {
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil {
log.Error("Error while querying jobs for roofline")
@@ -47,7 +47,14 @@ func (r *queryResolver) rooflineHeatmap(
continue
}
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
// resolution := 0
// for _, mc := range metricConfigs {
// resolution = max(resolution, mc.Timestep)
// }
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
if err != nil {
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
return nil, err
@@ -120,7 +127,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
continue
}
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
log.Error("Error while loading averages for footprint")
return nil, err
}

View File

@@ -10,7 +10,6 @@ import (
"fmt"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
@@ -42,8 +41,8 @@ func HandleImportFlag(flag string) error {
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err = dec.Decode(&jobMeta); err != nil {
job := schema.JobMeta{BaseJob: schema.JobDefaults}
if err = dec.Decode(&job); err != nil {
log.Warn("Error while decoding raw json metadata for import")
return err
}
@@ -67,33 +66,32 @@ func HandleImportFlag(flag string) error {
return err
}
// checkJobData(&jobData)
job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
// if _, err = r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
// if err != nil {
// log.Warn("Error while finding job in jobRepository")
// return err
// }
//
// return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist")
// }
//
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return err
}
// TODO: Other metrics...
job.LoadAvg = loadJobStat(&jobMeta, "cpu_load")
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
job.MemUsedMax = loadJobStat(&jobMeta, "mem_used")
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
job.Footprint = make(map[string]float64)
for _, fp := range sc.Footprint {
statType := "avg"
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
statType = sc.MetricConfig[i].Footprint
}
name := fmt.Sprintf("%s_%s", fp, statType)
job.Footprint[fp] = repository.LoadJobStat(&job, name, statType)
}
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
log.Warn("Error while marshaling job footprint")
return err
}
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
log.Warn("Error while marshaling job resources")
@@ -110,7 +108,7 @@ func HandleImportFlag(flag string) error {
return err
}
if err = archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
if err = archive.GetHandle().ImportJob(&job, &jobData); err != nil {
log.Error("Error while importing job")
return err
}
@@ -122,8 +120,8 @@ func HandleImportFlag(flag string) error {
}
for _, tag := range job.Tags {
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
log.Error("Error while adding or creating tag")
if err := r.ImportTag(id, tag.Type, tag.Name, tag.Scope); err != nil {
log.Error("Error while adding or creating tag on import")
return err
}
}

View File

@@ -16,6 +16,11 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
const (
addTagQuery = "INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)"
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
)
// Delete the tables "job", "tag" and "jobtag" from the database and
// repopulate them using the jobs found in `archive`.
func InitDB() error {
@@ -60,13 +65,30 @@ func InitDB() error {
StartTimeUnix: jobMeta.StartTime,
}
// TODO: Other metrics...
job.LoadAvg = loadJobStat(jobMeta, "cpu_load")
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
job.MemUsedMax = loadJobStat(jobMeta, "mem_used")
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return err
}
job.Footprint = make(map[string]float64)
for _, fp := range sc.Footprint {
statType := "avg"
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
statType = sc.MetricConfig[i].Footprint
}
name := fmt.Sprintf("%s_%s", fp, statType)
job.Footprint[fp] = repository.LoadJobStat(jobMeta, name, statType)
}
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
log.Warn("Error while marshaling job footprint")
return err
}
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
@@ -88,7 +110,8 @@ func InitDB() error {
continue
}
id, err := r.TransactionAdd(t, job)
id, err := r.TransactionAddNamed(t,
repository.NamedJobInsert, job)
if err != nil {
log.Errorf("repository initDB(): %v", err)
errorOccured++
@@ -99,7 +122,9 @@ func InitDB() error {
tagstr := tag.Name + ":" + tag.Type
tagId, ok := tags[tagstr]
if !ok {
tagId, err = r.TransactionAddTag(t, tag)
tagId, err = r.TransactionAdd(t,
addTagQuery,
tag.Name, tag.Type)
if err != nil {
log.Errorf("Error adding tag: %v", err)
errorOccured++
@@ -108,7 +133,9 @@ func InitDB() error {
tags[tagstr] = tagId
}
r.TransactionSetTag(t, id, tagId)
r.TransactionAdd(t,
setTagQuery,
id, tagId)
}
if err == nil {
@@ -150,18 +177,6 @@ func SanityChecks(job *schema.BaseJob) error {
return nil
}
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
if metric == "mem_used" {
return stats.Max
} else {
return stats.Avg
}
}
return 0.0
}
func checkJobData(d *schema.JobData) error {
for _, scopes := range *d {
// var newUnit schema.Unit

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,256 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricDataDispatcher
import (
"context"
"fmt"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/resampler"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
func cacheKey(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
resolution int,
) string {
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
job.ID, job.State, metrics, scopes, resolution)
}
// Fetches the metric data for a job.
func LoadData(job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int,
) (schema.JobData, error) {
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ interface{}, ttl time.Duration, size int) {
var jd schema.JobData
var err error
if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
config.Keys.DisableArchive {
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}
if scopes == nil {
scopes = append(scopes, schema.MetricScopeNode)
}
if metrics == nil {
cluster := archive.GetCluster(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
}
}
jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution)
if err != nil {
if len(jd) != 0 {
log.Warnf("partial error: %s", err.Error())
// return err, 0, 0 // Reactivating will block archiving on one partial error
} else {
log.Error("Error while loading job data from metric repository")
return err, 0, 0
}
}
size = jd.Size()
} else {
var jd_temp schema.JobData
jd_temp, err = archive.GetHandle().LoadJobData(job)
if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0
}
//Deep copy the cached archive hashmap
jd = metricdata.DeepCopy(jd_temp)
//Resampling for archived data.
//Pass the resolution from frontend here.
for _, v := range jd {
for _, v_ := range v {
timestep := 0
for i := 0; i < len(v_.Series); i += 1 {
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
if err != nil {
return err, 0, 0
}
}
v_.Timestep = timestep
}
}
// Avoid sending unrequested data to the client:
if metrics != nil || scopes != nil {
if metrics == nil {
metrics = make([]string, 0, len(jd))
for k := range jd {
metrics = append(metrics, k)
}
}
res := schema.JobData{}
for _, metric := range metrics {
if perscope, ok := jd[metric]; ok {
if len(perscope) > 1 {
subset := make(map[schema.MetricScope]*schema.JobMetric)
for _, scope := range scopes {
if jm, ok := perscope[scope]; ok {
subset[scope] = jm
}
}
if len(subset) > 0 {
perscope = subset
}
}
res[metric] = perscope
}
}
jd = res
}
size = jd.Size()
}
ttl = 5 * time.Hour
if job.State == schema.JobStateRunning {
ttl = 2 * time.Minute
}
// FIXME: Review: Is this really necessary or correct.
// Note: Lines 147-170 formerly known as prepareJobData(jobData, scopes)
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/median/max Graph can be
// used instead of a lot of single lines.
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
// Existing (archived) StatsSeries can be 'min/mean/max'!
const maxSeriesSize int = 15
for _, scopes := range jd {
for _, jm := range scopes {
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
continue
}
jm.AddStatisticsSeries()
}
}
nodeScopeRequested := false
for _, scope := range scopes {
if scope == schema.MetricScopeNode {
nodeScopeRequested = true
}
}
if nodeScopeRequested {
jd.AddNodeScope("flops_any")
jd.AddNodeScope("mem_bw")
}
return jd, ttl, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
return data.(schema.JobData), nil
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(
job *schema.Job,
metrics []string,
data [][]schema.Float,
ctx context.Context,
) error {
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
}
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}
for i, m := range metrics {
nodes, ok := stats[m]
if !ok {
data[i] = append(data[i], schema.NaN)
continue
}
sum := 0.0
for _, node := range nodes {
sum += node.Avg
}
data[i] = append(data[i], schema.Float(sum))
}
return nil
}
// Used for the node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
repo, err := metricdata.GetMetricDataRepo(cluster)
if err != nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil
}

View File

@@ -55,6 +55,7 @@ type ApiQuery struct {
SubType *string `json:"subtype,omitempty"`
Metric string `json:"metric"`
Hostname string `json:"host"`
Resolution int `json:"resolution"`
TypeIds []string `json:"type-ids,omitempty"`
SubTypeIds []string `json:"subtype-ids,omitempty"`
Aggregate bool `json:"aggreg"`
@@ -66,13 +67,14 @@ type ApiQueryResponse struct {
}
type ApiMetricData struct {
Error *string `json:"error"`
Data []schema.Float `json:"data"`
From int64 `json:"from"`
To int64 `json:"to"`
Avg schema.Float `json:"avg"`
Min schema.Float `json:"min"`
Max schema.Float `json:"max"`
Error *string `json:"error"`
Data []schema.Float `json:"data"`
From int64 `json:"from"`
To int64 `json:"to"`
Resolution int `json:"resolution"`
Avg schema.Float `json:"avg"`
Min schema.Float `json:"min"`
Max schema.Float `json:"max"`
}
func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
@@ -129,7 +131,7 @@ func (ccms *CCMetricStore) doRequest(
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf)
if err != nil {
log.Warn("Error while building request body")
return nil, err
@@ -138,6 +140,13 @@ func (ccms *CCMetricStore) doRequest(
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
}
// versioning the cc-metric-store query API.
// v2 = data with resampling
// v1 = data without resampling
q := req.URL.Query()
q.Add("version", "v2")
req.URL.RawQuery = q.Encode()
res, err := ccms.client.Do(req)
if err != nil {
log.Error("Error while performing request")
@@ -162,8 +171,9 @@ func (ccms *CCMetricStore) LoadData(
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int,
) (schema.JobData, error) {
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution)
if err != nil {
log.Warn("Error while building queries")
return nil, err
@@ -195,11 +205,17 @@ func (ccms *CCMetricStore) LoadData(
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
}
res := row[0].Resolution
if res == 0 {
res = mc.Timestep
}
jobMetric, ok := jobData[metric][scope]
if !ok {
jobMetric = &schema.JobMetric{
Unit: mc.Unit,
Timestep: mc.Timestep,
Timestep: res,
Series: make([]schema.Series, 0),
}
jobData[metric][scope] = jobMetric
@@ -251,7 +267,6 @@ func (ccms *CCMetricStore) LoadData(
/* Returns list for "partial errors" */
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
}
return jobData, nil
}
@@ -267,6 +282,7 @@ func (ccms *CCMetricStore) buildQueries(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
resolution int,
) ([]ApiQuery, []schema.MetricScope, error) {
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
assignedScope := []schema.MetricScope{}
@@ -318,11 +334,12 @@ func (ccms *CCMetricStore) buildQueries(
}
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &acceleratorString,
TypeIds: host.Accelerators,
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &acceleratorString,
TypeIds: host.Accelerators,
Resolution: resolution,
})
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
continue
@@ -335,11 +352,12 @@ func (ccms *CCMetricStore) buildQueries(
}
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &acceleratorString,
TypeIds: host.Accelerators,
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &acceleratorString,
TypeIds: host.Accelerators,
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -348,11 +366,12 @@ func (ccms *CCMetricStore) buildQueries(
// HWThread -> HWThead
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &hwthreadString,
TypeIds: intToStringSlice(hwthreads),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &hwthreadString,
TypeIds: intToStringSlice(hwthreads),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -363,11 +382,12 @@ func (ccms *CCMetricStore) buildQueries(
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
for _, core := range cores {
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &hwthreadString,
TypeIds: intToStringSlice(topology.Core[core]),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &hwthreadString,
TypeIds: intToStringSlice(topology.Core[core]),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
}
@@ -379,11 +399,12 @@ func (ccms *CCMetricStore) buildQueries(
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
for _, socket := range sockets {
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &hwthreadString,
TypeIds: intToStringSlice(topology.Socket[socket]),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &hwthreadString,
TypeIds: intToStringSlice(topology.Socket[socket]),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
}
@@ -393,11 +414,12 @@ func (ccms *CCMetricStore) buildQueries(
// HWThread -> Node
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &hwthreadString,
TypeIds: intToStringSlice(hwthreads),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &hwthreadString,
TypeIds: intToStringSlice(hwthreads),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -407,11 +429,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &coreString,
TypeIds: intToStringSlice(cores),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &coreString,
TypeIds: intToStringSlice(cores),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -421,11 +444,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &coreString,
TypeIds: intToStringSlice(cores),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &coreString,
TypeIds: intToStringSlice(cores),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -435,11 +459,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &memoryDomainString,
TypeIds: intToStringSlice(sockets),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &memoryDomainString,
TypeIds: intToStringSlice(sockets),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -449,11 +474,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &memoryDomainString,
TypeIds: intToStringSlice(sockets),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &memoryDomainString,
TypeIds: intToStringSlice(sockets),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -463,11 +489,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &socketString,
TypeIds: intToStringSlice(sockets),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: false,
Type: &socketString,
TypeIds: intToStringSlice(sockets),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -477,11 +504,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &socketString,
TypeIds: intToStringSlice(sockets),
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &socketString,
TypeIds: intToStringSlice(sockets),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -490,8 +518,9 @@ func (ccms *CCMetricStore) buildQueries(
// Node -> Node
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Metric: remoteName,
Hostname: host.Hostname,
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@@ -510,7 +539,15 @@ func (ccms *CCMetricStore) LoadStats(
metrics []string,
ctx context.Context,
) (map[string]map[string]schema.MetricStatistics, error) {
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}) // #166 Add scope shere for analysis view accelerator normalization?
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
// resolution := 9000
// for _, mc := range metricConfigs {
// resolution = min(resolution, mc.Timestep)
// }
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
if err != nil {
log.Warn("Error while building query")
return nil, err
@@ -588,8 +625,9 @@ func (ccms *CCMetricStore) LoadNodeData(
for _, node := range nodes {
for _, metric := range metrics {
req.Queries = append(req.Queries, ApiQuery{
Hostname: node,
Metric: ccms.toRemoteName(metric),
Hostname: node,
Metric: ccms.toRemoteName(metric),
Resolution: 60, // Default for Node Queries
})
}
}
@@ -597,7 +635,7 @@ func (ccms *CCMetricStore) LoadNodeData(
resBody, err := ccms.doRequest(ctx, &req)
if err != nil {
log.Error("Error while performing request")
log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
return nil, err
}

View File

@@ -60,7 +60,8 @@ func (idb *InfluxDBv2DataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
ctx context.Context,
resolution int) (schema.JobData, error) {
measurementsConds := make([]string, 0, len(metrics))
for _, m := range metrics {

View File

@@ -8,13 +8,10 @@ import (
"context"
"encoding/json"
"fmt"
"math"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@@ -24,7 +21,7 @@ type MetricDataRepository interface {
Init(rawConfig json.RawMessage) error
// Return the JobData for the given job, only with the requested metrics.
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
@@ -35,10 +32,7 @@ type MetricDataRepository interface {
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
var useArchive bool
func Init(disableArchive bool) error {
useArchive = !disableArchive
func Init() error {
for _, cluster := range config.Keys.Clusters {
if cluster.MetricDataRepository != nil {
var kind struct {
@@ -73,284 +67,13 @@ func Init(disableArchive bool) error {
return nil
}
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
// Fetches the metric data for a job.
func LoadData(job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
) (schema.JobData, error) {
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
var jd schema.JobData
var err error
if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
!useArchive {
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}
if scopes == nil {
scopes = append(scopes, schema.MetricScopeNode)
}
if metrics == nil {
cluster := archive.GetCluster(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
}
}
jd, err = repo.LoadData(job, metrics, scopes, ctx)
if err != nil {
if len(jd) != 0 {
log.Warnf("partial error: %s", err.Error())
// return err, 0, 0 // Reactivating will block archiving on one partial error
} else {
log.Error("Error while loading job data from metric repository")
return err, 0, 0
}
}
size = jd.Size()
} else {
jd, err = archive.GetHandle().LoadJobData(job)
if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0
}
// Avoid sending unrequested data to the client:
if metrics != nil || scopes != nil {
if metrics == nil {
metrics = make([]string, 0, len(jd))
for k := range jd {
metrics = append(metrics, k)
}
}
res := schema.JobData{}
for _, metric := range metrics {
if perscope, ok := jd[metric]; ok {
if len(perscope) > 1 {
subset := make(map[schema.MetricScope]*schema.JobMetric)
for _, scope := range scopes {
if jm, ok := perscope[scope]; ok {
subset[scope] = jm
}
}
if len(subset) > 0 {
perscope = subset
}
}
res[metric] = perscope
}
}
jd = res
}
size = jd.Size()
}
ttl = 5 * time.Hour
if job.State == schema.JobStateRunning {
ttl = 2 * time.Minute
}
prepareJobData(job, jd, scopes)
return jd, ttl, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
return data.(schema.JobData), nil
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(
job *schema.Job,
metrics []string,
data [][]schema.Float,
ctx context.Context,
) error {
if job.State != schema.JobStateRunning && useArchive {
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
}
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}
for i, m := range metrics {
nodes, ok := stats[m]
if !ok {
data[i] = append(data[i], schema.NaN)
continue
}
sum := 0.0
for _, node := range nodes {
sum += node.Avg
}
data[i] = append(data[i], schema.Float(sum))
}
return nil
}
// Used for the node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
func GetMetricDataRepo(cluster string) (MetricDataRepository, error) {
var err error
repo, ok := metricDataRepos[cluster]
if !ok {
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
err = fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil
}
func cacheKey(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
) string {
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
return fmt.Sprintf("%d(%s):[%v],[%v]",
job.ID, job.State, metrics, scopes)
}
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/mean/max Graph can be
// used instead of a lot of single lines.
func prepareJobData(
job *schema.Job,
jobData schema.JobData,
scopes []schema.MetricScope,
) {
const maxSeriesSize int = 15
for _, scopes := range jobData {
for _, jm := range scopes {
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
continue
}
jm.AddStatisticsSeries()
}
}
nodeScopeRequested := false
for _, scope := range scopes {
if scope == schema.MetricScopeNode {
nodeScopeRequested = true
}
}
if nodeScopeRequested {
jobData.AddNodeScope("flops_any")
jobData.AddNodeScope("mem_bw")
}
}
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
// TODO: Talk about this! What resolutions to store data at...
scopes := []schema.MetricScope{schema.MetricScopeNode}
if job.NumNodes <= 8 {
scopes = append(scopes, schema.MetricScopeCore)
}
jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// TODO/FIXME: Calc average for non-node metrics as well!
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
},
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if !useArchive {
return jobMeta, nil
}
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
return repo, err
}

View File

@@ -166,10 +166,10 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
var rt http.RoundTripper = nil
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
prom_pw := promcfg.Secret(prom_pw)
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
rt = promcfg.NewBasicAuthRoundTripper(promcfg.NewInlineSecret(config.Username), promcfg.NewInlineSecret(string(prom_pw)), promapi.DefaultRoundTripper)
} else {
if config.Username != "" {
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set")
}
}
// init client
@@ -204,8 +204,8 @@ func (pdb *PrometheusDataRepository) FormatQuery(
metric string,
scope schema.MetricScope,
nodes []string,
cluster string) (string, error) {
cluster string,
) (string, error) {
args := PromQLArgs{}
if len(nodes) > 0 {
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
@@ -233,12 +233,13 @@ func (pdb *PrometheusDataRepository) RowToSeries(
from time.Time,
step int64,
steps int64,
row *promm.SampleStream) schema.Series {
row *promm.SampleStream,
) schema.Series {
ts := from.Unix()
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
// init array of expected length with NaN
values := make([]schema.Float, steps+1)
for i, _ := range values {
for i := range values {
values[i] = schema.NaN
}
// copy recorded values from prom sample pair
@@ -263,8 +264,9 @@ func (pdb *PrometheusDataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
ctx context.Context,
resolution int,
) (schema.JobData, error) {
// TODO respect requested scope
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
scopes = append(scopes, schema.MetricScopeNode)
@@ -306,7 +308,6 @@ func (pdb *PrometheusDataRepository) LoadData(
Step: time.Duration(metricConfig.Timestep * 1e9),
}
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
return nil, errors.New("Prometheus query error")
@@ -335,7 +336,7 @@ func (pdb *PrometheusDataRepository) LoadData(
pdb.RowToSeries(from, step, steps, row))
}
// only add metric if at least one host returned data
if !ok && len(jobMetric.Series) > 0{
if !ok && len(jobMetric.Series) > 0 {
jobData[metric][scope] = jobMetric
}
// sort by hostname to get uniform coloring
@@ -351,12 +352,12 @@ func (pdb *PrometheusDataRepository) LoadData(
func (pdb *PrometheusDataRepository) LoadStats(
job *schema.Job,
metrics []string,
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
ctx context.Context,
) (map[string]map[string]schema.MetricStatistics, error) {
// map of metrics of nodes of stats
stats := map[string]map[string]schema.MetricStatistics{}
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
if err != nil {
log.Warn("Error while loading job for stats")
return nil, err
@@ -376,7 +377,8 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
t0 := time.Now()
// Map of hosts of metrics of value slices
data := make(map[string]map[string][]*schema.JobMetric)
@@ -411,7 +413,6 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
Step: time.Duration(metricConfig.Timestep * 1e9),
}
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil {
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
return nil, errors.New("Prometheus query error")

View File

@@ -12,7 +12,7 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
panic("TODO")
}
@@ -27,9 +27,10 @@ func (tmdr *TestMetricDataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
ctx context.Context,
resolution int) (schema.JobData, error) {
return TestLoadDataCallback(job, metrics, scopes, ctx)
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
}
func (tmdr *TestMetricDataRepository) LoadStats(
@@ -48,3 +49,49 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
panic("TODO")
}
func DeepCopy(jd_temp schema.JobData) schema.JobData {
var jd schema.JobData
jd = make(schema.JobData, len(jd_temp))
for k, v := range jd_temp {
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
for k_, v_ := range v {
jd[k][k_] = new(schema.JobMetric)
jd[k][k_].Series = make([]schema.Series, len(v_.Series))
for i := 0; i < len(v_.Series); i += 1 {
jd[k][k_].Series[i].Data = make([]schema.Float, len(v_.Series[i].Data))
copy(jd[k][k_].Series[i].Data, v_.Series[i].Data)
jd[k][k_].Series[i].Hostname = v_.Series[i].Hostname
jd[k][k_].Series[i].Id = v_.Series[i].Id
jd[k][k_].Series[i].Statistics.Avg = v_.Series[i].Statistics.Avg
jd[k][k_].Series[i].Statistics.Min = v_.Series[i].Statistics.Min
jd[k][k_].Series[i].Statistics.Max = v_.Series[i].Statistics.Max
}
jd[k][k_].Timestep = v_.Timestep
jd[k][k_].Unit.Base = v_.Unit.Base
jd[k][k_].Unit.Prefix = v_.Unit.Prefix
if v_.StatisticsSeries != nil {
// Init Slices
jd[k][k_].StatisticsSeries = new(schema.StatsSeries)
jd[k][k_].StatisticsSeries.Max = make([]schema.Float, len(v_.StatisticsSeries.Max))
jd[k][k_].StatisticsSeries.Min = make([]schema.Float, len(v_.StatisticsSeries.Min))
jd[k][k_].StatisticsSeries.Median = make([]schema.Float, len(v_.StatisticsSeries.Median))
jd[k][k_].StatisticsSeries.Mean = make([]schema.Float, len(v_.StatisticsSeries.Mean))
// Copy Data
copy(jd[k][k_].StatisticsSeries.Max, v_.StatisticsSeries.Max)
copy(jd[k][k_].StatisticsSeries.Min, v_.StatisticsSeries.Min)
copy(jd[k][k_].StatisticsSeries.Median, v_.StatisticsSeries.Median)
copy(jd[k][k_].StatisticsSeries.Mean, v_.StatisticsSeries.Mean)
// Handle Percentiles
for k__, v__ := range v_.StatisticsSeries.Percentiles {
jd[k][k_].StatisticsSeries.Percentiles[k__] = make([]schema.Float, len(v__))
copy(jd[k][k_].StatisticsSeries.Percentiles[k__], v__)
}
} else {
jd[k][k_].StatisticsSeries = v_.StatisticsSeries
}
}
}
return jd
}

View File

@@ -5,17 +5,16 @@
package repository
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"math"
"strconv"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
@@ -30,12 +29,10 @@ var (
)
type JobRepository struct {
DB *sqlx.DB
stmtCache *sq.StmtCache
cache *lrucache.Cache
archiveChannel chan *schema.Job
driver string
archivePending sync.WaitGroup
DB *sqlx.DB
stmtCache *sq.StmtCache
cache *lrucache.Cache
driver string
}
func GetJobRepository() *JobRepository {
@@ -46,12 +43,9 @@ func GetJobRepository() *JobRepository {
DB: db.DB,
driver: db.Driver,
stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024),
archiveChannel: make(chan *schema.Job, 128),
stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024),
}
// start archiving worker
go jobRepoInstance.archivingWorker()
})
return jobRepoInstance
}
@@ -59,23 +53,31 @@ func GetJobRepository() *JobRepository {
var jobColumns []string = []string{
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id",
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
"job.duration", "job.walltime", "job.resources", "job.mem_used_max", "job.flops_any_avg", "job.mem_bw_avg", "job.load_avg", // "job.meta_data",
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
}
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
job := &schema.Job{}
if err := row.Scan(
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
&job.Duration, &job.Walltime, &job.RawResources, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.LoadAvg /*&job.RawMetaData*/); err != nil {
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
log.Warnf("Error while scanning rows (Job): %v", err)
return nil, err
}
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
log.Warn("Error while unmarhsaling raw resources json")
log.Warn("Error while unmarshaling raw resources json")
return nil, err
}
job.RawResources = nil
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
log.Warnf("Error while unmarshaling raw footprint json: %v", err)
return nil, err
}
job.RawFootprint = nil
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
// return nil, err
@@ -86,7 +88,6 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
job.Duration = int32(time.Since(job.StartTime).Seconds())
}
job.RawResources = nil
return job, nil
}
@@ -205,7 +206,10 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
return err
}
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
if _, err = sq.Update("job").
Set("meta_data", job.RawMetaData).
Where("job.id = ?", job.ID).
RunWith(r.stmtCache).Exec(); err != nil {
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
return err
}
@@ -214,227 +218,60 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
return archive.UpdateMetadata(job, job.MetaData)
}
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) Find(
jobId *int64,
cluster *string,
startTime *int64,
) (*schema.Job, error) {
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
cachekey := fmt.Sprintf("footprint:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
job.Footprint = cached.(map[string]float64)
return job.Footprint, nil
}
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
// s, args, _ := q.ToSql()
// log.Printf("trying to find db job with query: %s | %v", s, args)
log.Debugf("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindAll(
jobId *int64,
cluster *string,
startTime *int64,
) ([]*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
log.Warn("Error while scanning for job footprint")
return nil, err
}
jobs := make([]*schema.Job, 0, 10)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Debugf("Timer FindAll %s", time.Since(start))
return jobs, nil
}
// FindById executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
func (r *JobRepository) FindConcurrentJobs(
ctx context.Context,
job *schema.Job,
) (*model.JobLinkResultList, error) {
if job == nil {
if len(job.RawFootprint) == 0 {
return nil, nil
}
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
if qerr != nil {
return nil, qerr
}
query = query.Where("cluster = ?", job.Cluster)
var startTime int64
var stopTime int64
startTime = job.StartTimeUnix
hostname := job.Resources[0].Hostname
if job.State == schema.JobStateRunning {
stopTime = time.Now().Unix()
} else {
stopTime = startTime + int64(job.Duration)
}
// Add 200s overlap for jobs start time at the end
startTimeTail := startTime + 10
stopTimeTail := stopTime - 200
startTimeFront := startTime + 200
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
"running", startTimeTail, stopTimeTail, startTime)
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
log.Warn("Error while unmarshaling raw footprint json")
return nil, err
}
items := make([]*model.JobLink, 0, 10)
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
r.cache.Put(cachekey, job.Footprint, len(job.Footprint), 24*time.Hour)
log.Debugf("Timer FetchFootprint %s", time.Since(start))
return job.Footprint, nil
}
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
start := time.Now()
cachekey := fmt.Sprintf("energyFootprint:%d", job.ID)
if cached := r.cache.Get(cachekey, nil); cached != nil {
job.EnergyFootprint = cached.(map[string]float64)
return job.EnergyFootprint, nil
}
rows, err = queryRunning.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
log.Warn("Error while scanning for job energy_footprint")
return nil, err
}
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
if len(job.RawEnergyFootprint) == 0 {
return nil, nil
}
cnt := len(items)
return &model.JobLinkResultList{
ListQuery: &queryString,
Items: items,
Count: &cnt,
}, nil
}
// Start inserts a new job in the table, returning the unique job ID.
// Statistics are not transfered!
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
log.Warn("Error while unmarshaling raw energy footprint json")
return nil, err
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
}
res, err := r.DB.NamedExec(`INSERT INTO job (
job_id, user, project, cluster, subcluster, `+"`partition`"+`, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data
);`, job)
if err != nil {
return -1, err
}
return res.LastInsertId()
}
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) Stop(
jobId int64,
duration int32,
state schema.JobState,
monitoringStatus int32,
) (err error) {
stmt := sq.Update("job").
Set("job_state", state).
Set("duration", duration).
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
log.Debugf("Timer FetchEnergyFootprint %s", time.Since(start))
return job.EnergyFootprint, nil
}
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
@@ -466,103 +303,6 @@ func (r *JobRepository) DeleteJobById(id int64) error {
return err
}
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
stmt := sq.Update("job").
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", job)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
}
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) MarkArchived(
jobId int64,
monitoringStatus int32,
metricStats map[string]schema.JobStatistics,
) error {
stmt := sq.Update("job").
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
for metric, stats := range metricStats {
switch metric {
case "flops_any":
stmt = stmt.Set("flops_any_avg", stats.Avg)
case "mem_used":
stmt = stmt.Set("mem_used_max", stats.Max)
case "mem_bw":
stmt = stmt.Set("mem_bw_avg", stats.Avg)
case "load":
stmt = stmt.Set("load_avg", stats.Avg)
case "cpu_load":
stmt = stmt.Set("load_avg", stats.Avg)
case "net_bw":
stmt = stmt.Set("net_bw_avg", stats.Avg)
case "file_bw":
stmt = stmt.Set("file_bw_avg", stats.Avg)
default:
log.Debugf("MarkArchived() Metric '%v' unknown", metric)
}
}
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
log.Warn("Error while marking job as archived")
return err
}
return nil
}
// Archiving worker thread
func (r *JobRepository) archivingWorker() {
for {
select {
case job, ok := <-r.archiveChannel:
if !ok {
break
}
start := time.Now()
// not using meta data, called to load JobMeta into Cache?
// will fail if job meta not in repository
if _, err := r.FetchMetadata(job); err != nil {
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// metricdata.ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
// TODO: Maybe use context with cancel/timeout here
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
if err != nil {
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// Update the jobs database entry one last time:
if err := r.MarkArchived(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
log.Errorf("archiving job (dbid: %d) failed at marking archived step: %s", job.ID, err.Error())
continue
}
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
log.Printf("archiving job (dbid: %d) successful", job.ID)
r.archivePending.Done()
}
}
}
// Trigger async archiving
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
r.archivePending.Add(1)
r.archiveChannel <- job
}
// Wait for background thread to finish pending archiving operations
func (r *JobRepository) WaitForArchiving() {
// close channel and wait for worker to process remaining jobs
r.archivePending.Wait()
}
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
return searchterm, "", "", ""
@@ -745,6 +485,46 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
return nil
}
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
query := sq.Select(jobColumns...).From("job").
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
Where("job.job_state = 'running'").
Where("job.duration > 600")
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
jobs := make([]*schema.Job, 0, 50)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Infof("Return job count %d", len(jobs))
return jobs, nil
}
func (r *JobRepository) UpdateDuration() error {
stmnt := sq.Update("job").
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
Where("job_state = 'running'")
_, err := stmnt.RunWith(r.stmtCache).Exec()
if err != nil {
return err
}
return nil
}
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
var query sq.SelectBuilder
@@ -783,27 +563,108 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
return jobs, nil
}
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
stmt := sq.Update("job").
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", job)
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
res, err := r.DB.NamedExec(NamedJobInsert, job)
if err != nil {
log.Warn("Error while NamedJobInsert")
return 0, err
}
id, err := res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
return 0, err
}
return id, nil
_, err = stmt.RunWith(r.stmtCache).Exec()
return
}
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
return err
}
return nil
}
func (r *JobRepository) MarkArchived(
stmt sq.UpdateBuilder,
monitoringStatus int32,
) sq.UpdateBuilder {
return stmt.Set("monitoring_status", monitoringStatus)
}
func (r *JobRepository) UpdateEnergy(
stmt sq.UpdateBuilder,
jobMeta *schema.JobMeta,
) (sq.UpdateBuilder, error) {
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return stmt, err
}
energyFootprint := make(map[string]float64)
var totalEnergy float64
var energy float64
for _, fp := range sc.EnergyFootprint {
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
energyFootprint[fp] = energy
totalEnergy += energy
}
var rawFootprint []byte
if rawFootprint, err = json.Marshal(energyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return stmt, err
}
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil
}
func (r *JobRepository) UpdateFootprint(
stmt sq.UpdateBuilder,
jobMeta *schema.JobMeta,
) (sq.UpdateBuilder, error) {
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return stmt, err
}
footprint := make(map[string]float64)
for _, fp := range sc.Footprint {
var statType string
for _, gm := range archive.GlobalMetricList {
if gm.Name == fp {
statType = gm.Footprint
}
}
if statType != "avg" && statType != "min" && statType != "max" {
log.Warnf("unknown statType for footprint update: %s", statType)
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
}
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
statType = sc.MetricConfig[i].Footprint
}
name := fmt.Sprintf("%s_%s", fp, statType)
footprint[name] = LoadJobStat(jobMeta, fp, statType)
}
var rawFootprint []byte
if rawFootprint, err = json.Marshal(footprint); err != nil {
log.Warnf("Error while marshaling footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return stmt, err
}
return stmt.Set("footprint", string(rawFootprint)), nil
}

View File

@@ -0,0 +1,75 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"encoding/json"
"fmt"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, resources, meta_data
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :resources, :meta_data
);`
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
res, err := r.DB.NamedExec(NamedJobInsert, job)
if err != nil {
log.Warn("Error while NamedJobInsert")
return 0, err
}
id, err := res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
return 0, err
}
return id, nil
}
// Start inserts a new job in the table, returning the unique job ID.
// Statistics are not transfered!
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
}
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
}
return r.InsertJob(job)
}
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) Stop(
jobId int64,
duration int32,
state schema.JobState,
monitoringStatus int32,
) (err error) {
stmt := sq.Update("job").
Set("job_state", state).
Set("duration", duration).
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
}

View File

@@ -0,0 +1,244 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"database/sql"
"fmt"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) Find(
jobId *int64,
cluster *string,
startTime *int64,
) (*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
log.Debugf("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindAll(
jobId *int64,
cluster *string,
startTime *int64,
) ([]*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
jobs := make([]*schema.Job, 0, 10)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Debugf("Timer FindAll %s", time.Since(start))
return jobs, nil
}
// FindById executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
q, qerr := SecurityCheck(ctx, q)
if qerr != nil {
return nil, qerr
}
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByIdDirect executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByJobId executes a SQL query to find a specific batch job.
// The job is queried using the slurm id and the clustername.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").
Where("job.job_id = ?", jobId).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
q, qerr := SecurityCheck(ctx, q)
if qerr != nil {
return nil, qerr
}
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// IsJobOwner executes a SQL query to find a specific batch job.
// The job is queried using the slurm id,a username and the cluster.
// It returns a bool.
// If job was found, user is owner: test err != sql.ErrNoRows
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
q := sq.Select("id").
From("job").
Where("job.job_id = ?", jobId).
Where("job.user = ?", user).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
return err != sql.ErrNoRows
}
func (r *JobRepository) FindConcurrentJobs(
ctx context.Context,
job *schema.Job,
) (*model.JobLinkResultList, error) {
if job == nil {
return nil, nil
}
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
if qerr != nil {
return nil, qerr
}
query = query.Where("cluster = ?", job.Cluster)
var startTime int64
var stopTime int64
startTime = job.StartTimeUnix
hostname := job.Resources[0].Hostname
if job.State == schema.JobStateRunning {
stopTime = time.Now().Unix()
} else {
stopTime = startTime + int64(job.Duration)
}
// Add 200s overlap for jobs start time at the end
startTimeTail := startTime + 10
stopTimeTail := stopTime - 200
startTimeFront := startTime + 200
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
"running", startTimeTail, stopTimeTail, startTime)
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
items := make([]*model.JobLink, 0, 10)
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
}
rows, err = queryRunning.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
}
cnt := len(items)
return &model.JobLinkResultList{
ListQuery: &queryString,
Items: items,
Count: &cnt,
}, nil
}

View File

@@ -22,8 +22,8 @@ func (r *JobRepository) QueryJobs(
ctx context.Context,
filters []*model.JobFilter,
page *model.PageRequest,
order *model.OrderByInput) ([]*schema.Job, error) {
order *model.OrderByInput,
) ([]*schema.Job, error) {
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
if qerr != nil {
return nil, qerr
@@ -31,14 +31,28 @@ func (r *JobRepository) QueryJobs(
if order != nil {
field := toSnakeCase(order.Field)
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
if order.Type == "col" {
// "col": Fixed column name query
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
}
} else {
// "foot": Order by footprint JSON field values
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(meta_data)")
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
}
}
}
@@ -73,9 +87,10 @@ func (r *JobRepository) QueryJobs(
func (r *JobRepository) CountJobs(
ctx context.Context,
filters []*model.JobFilter) (int, error) {
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
filters []*model.JobFilter,
) (int, error) {
// DISTICT count for tags filters, does not affect other queries
query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
if qerr != nil {
return 0, qerr
}
@@ -97,30 +112,33 @@ func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilde
if user == nil {
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user context is nil")
} else if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleApi}) { // Admin & Co. : All jobs
}
switch {
case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
return query, nil
} else if user.HasRole(schema.RoleManager) { // Manager : Add filter for managed projects' jobs only + personal jobs
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
return query, nil
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
if len(user.Projects) != 0 {
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.user": user.Username}}), nil
} else {
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
return query.Where("job.user = ?", user.Username), nil
}
} else if user.HasRole(schema.RoleUser) { // User : Only personal jobs
case user.HasRole(schema.RoleUser): // User : Only personal jobs
return query.Where("job.user = ?", user.Username), nil
} else {
// Shortterm compatibility: Return User-Query if no roles:
return query.Where("job.user = ?", user.Username), nil
// // On the longterm: Return Error instead of fallback:
// var qnil sq.SelectBuilder
// return qnil, fmt.Errorf("user '%s' with unknown roles [%#v]", user.Username, user.Roles)
default: // No known Role, return error
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user has no or unknown roles")
}
}
// Build a sq.SelectBuilder out of a schema.JobFilter.
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
if filter.Tags != nil {
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags})
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
}
if filter.JobID != nil {
query = buildStringCondition("job.job_id", filter.JobID, query)
@@ -174,17 +192,13 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
if filter.Node != nil {
query = buildStringCondition("job.resources", filter.Node, query)
}
if filter.FlopsAnyAvg != nil {
query = buildFloatCondition("job.flops_any_avg", filter.FlopsAnyAvg, query)
if filter.Energy != nil {
query = buildFloatCondition("job.energy", filter.Energy, query)
}
if filter.MemBwAvg != nil {
query = buildFloatCondition("job.mem_bw_avg", filter.MemBwAvg, query)
}
if filter.LoadAvg != nil {
query = buildFloatCondition("job.load_avg", filter.LoadAvg, query)
}
if filter.MemUsedMax != nil {
query = buildFloatCondition("job.mem_used_max", filter.MemUsedMax, query)
if filter.MetricStats != nil {
for _, ms := range filter.MetricStats {
query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
}
}
return query
}
@@ -193,6 +207,10 @@ func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuild
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
if cond.From != nil && cond.To != nil {
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
@@ -200,13 +218,32 @@ func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBui
return query.Where("? <= "+field, cond.From.Unix())
} else if cond.To != nil {
return query.Where(field+" <= ?", cond.To.Unix())
} else if cond.Range != "" {
now := time.Now().Unix()
var then int64
switch cond.Range {
case "last6h":
then = now - (60 * 60 * 6)
case "last24h":
then = now - (60 * 60 * 24)
case "last7d":
then = now - (60 * 60 * 24 * 7)
case "last30d":
then = now - (60 * 60 * 24 * 30)
default:
log.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
return query
}
return query.Where(field+" BETWEEN ? AND ?", then, now)
} else {
return query
}
}
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(footprint)")
return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
}
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
@@ -227,9 +264,7 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
}
if cond.In != nil {
queryElements := make([]string, len(cond.In))
for i, val := range cond.In {
queryElements[i] = val
}
copy(queryElements, cond.In)
return query.Where(sq.Or{sq.Eq{field: queryElements}})
}
return query
@@ -257,8 +292,10 @@ func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.
return query
}
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
var (
matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
)
func toSnakeCase(str string) string {
for _, c := range str {

View File

@@ -5,9 +5,11 @@
package repository
import (
"context"
"fmt"
"testing"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
_ "github.com/mattn/go-sqlite3"
)
@@ -30,7 +32,7 @@ func TestFind(t *testing.T) {
func TestFindById(t *testing.T) {
r := setup(t)
job, err := r.FindById(5)
job, err := r.FindById(getContext(t), 5)
if err != nil {
t.Fatal(err)
}
@@ -45,7 +47,19 @@ func TestFindById(t *testing.T) {
func TestGetTags(t *testing.T) {
r := setup(t)
tags, counts, err := r.CountTags(nil)
const contextUserKey ContextKey = "user"
contextUserValue := &schema.User{
Username: "testuser",
Projects: make([]string, 0),
Roles: []string{"user"},
AuthType: 0,
AuthSource: 2,
}
ctx := context.WithValue(getContext(t), contextUserKey, contextUserValue)
// Test Tag has Scope "global"
tags, counts, err := r.CountTags(ctx)
if err != nil {
t.Fatal(err)
}

View File

@@ -16,7 +16,7 @@ import (
"github.com/golang-migrate/migrate/v4/source/iofs"
)
const Version uint = 7
const Version uint = 8
//go:embed migrations/*
var migrationFiles embed.FS

View File

@@ -0,0 +1,21 @@
ALTER TABLE job DROP energy;
ALTER TABLE job DROP energy_footprint;
ALTER TABLE job ADD COLUMN flops_any_avg;
ALTER TABLE job ADD COLUMN mem_bw_avg;
ALTER TABLE job ADD COLUMN mem_used_max;
ALTER TABLE job ADD COLUMN load_avg;
ALTER TABLE job ADD COLUMN net_bw_avg;
ALTER TABLE job ADD COLUMN net_data_vol_total;
ALTER TABLE job ADD COLUMN file_bw_avg;
ALTER TABLE job ADD COLUMN file_data_vol_total;
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
ALTER TABLE job DROP footprint;

View File

@@ -0,0 +1,137 @@
DROP INDEX job_stats;
DROP INDEX job_by_user;
DROP INDEX job_by_starttime;
DROP INDEX job_by_job_id;
DROP INDEX job_list;
DROP INDEX job_list_user;
DROP INDEX job_list_users;
DROP INDEX job_list_users_start;
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL;
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
ALTER TABLE job DROP flops_any_avg;
ALTER TABLE job DROP mem_bw_avg;
ALTER TABLE job DROP mem_used_max;
ALTER TABLE job DROP load_avg;
ALTER TABLE job DROP net_bw_avg;
ALTER TABLE job DROP net_data_vol_total;
ALTER TABLE job DROP file_bw_avg;
ALTER TABLE job DROP file_data_vol_total;
-- Indices for: Single filters, combined filters, sorting, sorting with filters
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, partition);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, partition, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, partition, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, partition, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, partition, energy);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, partition, job_state, user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, partition, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, partition, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, partition, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, partition, job_state, energy);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
-- User Filter
CREATE INDEX IF NOT EXISTS jobs_user ON job (user);
-- User Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (user, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (user, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (user, num_acc);
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (user, energy);
-- Project Filter
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, user);
-- Project Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
-- JobState Filter
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
-- ArrayJob Filter
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
-- Sorting without active filters
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
-- Single filters with default starttime sorting
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, start_time);
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
-- Optimize DB index usage
PRAGMA optimize;

View File

@@ -55,7 +55,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_, err := db.FindById(jobId)
_, err := db.FindById(getContext(b), jobId)
noErr(b, err)
}
})

View File

@@ -13,7 +13,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -41,8 +41,8 @@ var sortBy2column = map[model.SortByAggregate]string{
func (r *JobRepository) buildCountQuery(
filter []*model.JobFilter,
kind string,
col string) sq.SelectBuilder {
col string,
) sq.SelectBuilder {
var query sq.SelectBuilder
if col != "" {
@@ -69,16 +69,16 @@ func (r *JobRepository) buildCountQuery(
func (r *JobRepository) buildStatsQuery(
filter []*model.JobFilter,
col string) sq.SelectBuilder {
col string,
) sq.SelectBuilder {
var query sq.SelectBuilder
castType := r.getCastType()
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
if col != "" {
// Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id) as totalJobs",
// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
@@ -86,10 +86,9 @@ func (r *JobRepository) buildStatsQuery(
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
).From("job").GroupBy(col)
).From("job").Join("user ON user.username = job.user").GroupBy(col)
} else {
// Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select("COUNT(job.id)",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
@@ -108,15 +107,15 @@ func (r *JobRepository) buildStatsQuery(
return query
}
func (r *JobRepository) getUserName(ctx context.Context, id string) string {
user := GetUserFromContext(ctx)
name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
if name != "" {
return name
} else {
return "-"
}
}
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
// user := GetUserFromContext(ctx)
// name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
// if name != "" {
// return name
// } else {
// return "-"
// }
// }
func (r *JobRepository) getCastType() string {
var castType string
@@ -138,8 +137,8 @@ func (r *JobRepository) JobsStatsGrouped(
filter []*model.JobFilter,
page *model.PageRequest,
sortBy *model.SortByAggregate,
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
groupBy *model.Aggregate,
) ([]*model.JobsStatistics, error) {
start := time.Now()
col := groupBy2column[*groupBy]
query := r.buildStatsQuery(filter, col)
@@ -168,14 +167,20 @@ func (r *JobRepository) JobsStatsGrouped(
for rows.Next() {
var id sql.NullString
var name sql.NullString
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
var personName string
if name.Valid {
personName = name.String
}
if jobs.Valid {
totalJobs = int(jobs.Int64)
@@ -206,11 +211,11 @@ func (r *JobRepository) JobsStatsGrouped(
}
if col == "job.user" {
name := r.getUserName(ctx, id.String)
// name := r.getUserName(ctx, id.String)
stats = append(stats,
&model.JobsStatistics{
ID: id.String,
Name: name,
Name: personName,
TotalJobs: totalJobs,
TotalWalltime: totalWalltime,
TotalNodes: totalNodes,
@@ -218,7 +223,8 @@ func (r *JobRepository) JobsStatsGrouped(
TotalCores: totalCores,
TotalCoreHours: totalCoreHours,
TotalAccs: totalAccs,
TotalAccHours: totalAccHours})
TotalAccHours: totalAccHours,
})
} else {
stats = append(stats,
&model.JobsStatistics{
@@ -230,7 +236,8 @@ func (r *JobRepository) JobsStatsGrouped(
TotalCores: totalCores,
TotalCoreHours: totalCoreHours,
TotalAccs: totalAccs,
TotalAccHours: totalAccHours})
TotalAccHours: totalAccHours,
})
}
}
}
@@ -241,8 +248,8 @@ func (r *JobRepository) JobsStatsGrouped(
func (r *JobRepository) JobsStats(
ctx context.Context,
filter []*model.JobFilter) ([]*model.JobsStatistics, error) {
filter []*model.JobFilter,
) ([]*model.JobsStatistics, error) {
start := time.Now()
query := r.buildStatsQuery(filter, "")
query, err := SecurityCheck(ctx, query)
@@ -277,18 +284,36 @@ func (r *JobRepository) JobsStats(
TotalWalltime: int(walltime.Int64),
TotalNodeHours: totalNodeHours,
TotalCoreHours: totalCoreHours,
TotalAccHours: totalAccHours})
TotalAccHours: totalAccHours,
})
}
log.Debugf("Timer JobStats %s", time.Since(start))
return stats, nil
}
func LoadJobStat(job *schema.JobMeta, metric string, statType string) float64 {
if stats, ok := job.Statistics[metric]; ok {
switch statType {
case "avg":
return stats.Avg
case "max":
return stats.Max
case "min":
return stats.Min
default:
log.Errorf("Unknown stat type %s", statType)
}
}
return 0.0
}
func (r *JobRepository) JobCountGrouped(
ctx context.Context,
filter []*model.JobFilter,
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
groupBy *model.Aggregate,
) ([]*model.JobsStatistics, error) {
start := time.Now()
col := groupBy2column[*groupBy]
query := r.buildCountQuery(filter, "", col)
@@ -315,7 +340,8 @@ func (r *JobRepository) JobCountGrouped(
stats = append(stats,
&model.JobsStatistics{
ID: id.String,
TotalJobs: int(cnt.Int64)})
TotalJobs: int(cnt.Int64),
})
}
}
@@ -328,8 +354,8 @@ func (r *JobRepository) AddJobCountGrouped(
filter []*model.JobFilter,
groupBy *model.Aggregate,
stats []*model.JobsStatistics,
kind string) ([]*model.JobsStatistics, error) {
kind string,
) ([]*model.JobsStatistics, error) {
start := time.Now()
col := groupBy2column[*groupBy]
query := r.buildCountQuery(filter, kind, col)
@@ -376,8 +402,8 @@ func (r *JobRepository) AddJobCount(
ctx context.Context,
filter []*model.JobFilter,
stats []*model.JobsStatistics,
kind string) ([]*model.JobsStatistics, error) {
kind string,
) ([]*model.JobsStatistics, error) {
start := time.Now()
query := r.buildCountQuery(filter, kind, "")
query, err := SecurityCheck(ctx, query)
@@ -420,7 +446,8 @@ func (r *JobRepository) AddJobCount(
func (r *JobRepository) AddHistograms(
ctx context.Context,
filter []*model.JobFilter,
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
stat *model.JobsStatistics,
) (*model.JobsStatistics, error) {
start := time.Now()
castType := r.getCastType()
@@ -459,7 +486,8 @@ func (r *JobRepository) AddMetricHistograms(
ctx context.Context,
filter []*model.JobFilter,
metrics []string,
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
stat *model.JobsStatistics,
) (*model.JobsStatistics, error) {
start := time.Now()
// Running Jobs Only: First query jobdata from sqlite, then query data and make bins
@@ -491,8 +519,8 @@ func (r *JobRepository) AddMetricHistograms(
func (r *JobRepository) jobsStatisticsHistogram(
ctx context.Context,
value string,
filters []*model.JobFilter) ([]*model.HistoPoint, error) {
filters []*model.JobFilter,
) ([]*model.HistoPoint, error) {
start := time.Now()
query, qerr := SecurityCheck(ctx,
sq.Select(value, "COUNT(job.id) AS count").From("job"))
@@ -528,36 +556,20 @@ func (r *JobRepository) jobsStatisticsHistogram(
func (r *JobRepository) jobsMetricStatisticsHistogram(
ctx context.Context,
metric string,
filters []*model.JobFilter) (*model.MetricHistoPoints, error) {
var dbMetric string
switch metric {
case "cpu_load":
dbMetric = "load_avg"
case "flops_any":
dbMetric = "flops_any_avg"
case "mem_bw":
dbMetric = "mem_bw_avg"
case "mem_used":
dbMetric = "mem_used_max"
case "net_bw":
dbMetric = "net_bw_avg"
case "file_bw":
dbMetric = "file_bw_avg"
default:
return nil, fmt.Errorf("%s not implemented", metric)
}
filters []*model.JobFilter,
) (*model.MetricHistoPoints, error) {
// Get specific Peak or largest Peak
var metricConfig *schema.MetricConfig
var peak float64 = 0.0
var unit string = ""
var footprintStat string = ""
for _, f := range filters {
if f.Cluster != nil {
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
peak = metricConfig.Peak
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
footprintStat = metricConfig.Footprint
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
}
}
@@ -572,23 +584,29 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
if unit == "" {
unit = m.Unit.Prefix + m.Unit.Base
}
if footprintStat == "" {
footprintStat = m.Footprint
}
}
}
}
}
// log.Debugf("Metric %s: DB %s, Peak %f, Unit %s", metric, dbMetric, peak, unit)
// log.Debugf("Metric %s, Peak %f, Unit %s, Aggregation %s", metric, peak, unit, aggreg)
// Make bins, see https://jereze.com/code/sql-histogram/
start := time.Now()
jm := fmt.Sprintf(`json_extract(footprint, "$.%s")`, (metric + "_" + footprintStat))
crossJoinQuery := sq.Select(
fmt.Sprintf(`max(%s) as max`, dbMetric),
fmt.Sprintf(`min(%s) as min`, dbMetric),
fmt.Sprintf(`max(%s) as max`, jm),
fmt.Sprintf(`min(%s) as min`, jm),
).From("job").Where(
fmt.Sprintf(`%s is not null`, dbMetric),
"JSON_VALID(footprint)",
).Where(
fmt.Sprintf(`%s <= %f`, dbMetric, peak),
fmt.Sprintf(`%s is not null`, jm),
).Where(
fmt.Sprintf(`%s <= %f`, jm, peak),
)
crossJoinQuery, cjqerr := SecurityCheck(ctx, crossJoinQuery)
@@ -607,16 +625,18 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
}
bins := 10
binQuery := fmt.Sprintf(`CAST( (case when job.%s = value.max then value.max*0.999999999 else job.%s end - value.min) / (value.max - value.min) * %d as INTEGER )`, dbMetric, dbMetric, bins)
binQuery := fmt.Sprintf(`CAST( (case when %s = value.max
then value.max*0.999999999 else %s end - value.min) / (value.max -
value.min) * %d as INTEGER )`, jm, jm, bins)
mainQuery := sq.Select(
fmt.Sprintf(`%s + 1 as bin`, binQuery),
fmt.Sprintf(`count(job.%s) as count`, dbMetric),
fmt.Sprintf(`count(%s) as count`, jm),
fmt.Sprintf(`CAST(((value.max / %d) * (%s )) as INTEGER ) as min`, bins, binQuery),
fmt.Sprintf(`CAST(((value.max / %d) * (%s + 1 )) as INTEGER ) as max`, bins, binQuery),
).From("job").CrossJoin(
fmt.Sprintf(`(%s) as value`, crossJoinQuerySql), crossJoinQueryArgs...,
).Where(fmt.Sprintf(`job.%s is not null and job.%s <= %f`, dbMetric, dbMetric, peak))
).Where(fmt.Sprintf(`%s is not null and %s <= %f`, jm, jm, peak))
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
@@ -641,14 +661,14 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
for rows.Next() {
point := model.MetricHistoPoint{}
if err := rows.Scan(&point.Bin, &point.Count, &point.Min, &point.Max); err != nil {
log.Warnf("Error while scanning rows for %s", metric)
log.Warnf("Error while scanning rows for %s", jm)
return nil, err // Totally bricks cc-backend if returned and if all metrics requested?
}
points = append(points, &point)
}
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Data: points}
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Stat: &footprintStat, Data: points}
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
return &result, nil
@@ -657,8 +677,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
ctx context.Context,
metrics []string,
filters []*model.JobFilter) []*model.MetricHistoPoints {
filters []*model.JobFilter,
) []*model.MetricHistoPoints {
// Get Jobs
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
if err != nil {
@@ -681,7 +701,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
continue
}
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
log.Errorf("Error while loading averages for histogram: %s", err)
return nil
}

View File

@@ -5,6 +5,8 @@
package repository
import (
"context"
"fmt"
"strings"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -14,7 +16,14 @@ import (
)
// Add the tag with id `tagId` to the job with the database id `jobId`.
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
func (r *JobRepository) AddTag(ctx context.Context, job int64, tag int64) ([]*schema.Tag, error) {
j, err := r.FindById(ctx, job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
@@ -23,49 +32,62 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
return nil, err
}
j, err := r.FindById(job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
tags, err := r.GetTags(&job)
tags, err := r.GetTags(ctx, &job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
return tags, archive.UpdateTags(j, tags)
archiveTags, err := r.getArchiveTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
return tags, archive.UpdateTags(j, archiveTags)
}
// Removes a tag from a job
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
func (r *JobRepository) RemoveTag(ctx context.Context, job, tag int64) ([]*schema.Tag, error) {
j, err := r.FindById(ctx, job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
q := sq.Delete("jobtag").Where("jobtag.job_id = ?", job).Where("jobtag.tag_id = ?", tag)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error adding tag with %s: %v", s, err)
log.Errorf("Error removing tag with %s: %v", s, err)
return nil, err
}
j, err := r.FindById(job)
if err != nil {
log.Warn("Error while finding job by id")
return nil, err
}
tags, err := r.GetTags(&job)
tags, err := r.GetTags(ctx, &job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
return tags, archive.UpdateTags(j, tags)
archiveTags, err := r.getArchiveTags(&job)
if err != nil {
log.Warn("Error while getting tags for job")
return nil, err
}
return tags, archive.UpdateTags(j, archiveTags)
}
// CreateTag creates a new tag with the specified type and name and returns its database id.
func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64, err error) {
q := sq.Insert("tag").Columns("tag_type", "tag_name").Values(tagType, tagName)
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
}
q := sq.Insert("tag").Columns("tag_type", "tag_name", "tag_scope").Values(tagType, tagName, tagScope)
res, err := q.RunWith(r.stmtCache).Exec()
if err != nil {
@@ -77,9 +99,10 @@ func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64,
return res.LastInsertId()
}
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
func (r *JobRepository) CountTags(ctx context.Context) (tags []schema.Tag, counts map[string]int, err error) {
// Fetch all Tags in DB for Display in Frontend Tag-View
tags = make([]schema.Tag, 0, 100)
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name FROM tag")
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
if err != nil {
return nil, nil, err
}
@@ -89,16 +112,38 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
if err = xrows.StructScan(&t); err != nil {
return nil, nil, err
}
tags = append(tags, t)
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
readable, err := r.checkScopeAuth(ctx, "read", t.Scope)
if err != nil {
return nil, nil, err
}
if readable {
tags = append(tags, t)
}
}
q := sq.Select("t.tag_name, count(jt.tag_id)").
user := GetUserFromContext(ctx)
// Query and Count Jobs with attached Tags
q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
From("tag t").
LeftJoin("jobtag jt ON t.id = jt.tag_id").
GroupBy("t.tag_name")
// Handle Scope Filtering
scopeList := "\"global\""
if user != nil {
scopeList += ",\"" + user.Username + "\""
}
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
scopeList += ",\"admin\""
}
q = q.Where("t.tag_scope IN (" + scopeList + ")")
// Handle Job Ownership
if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
// log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
// Unchanged: Needs to be own case still, due to UserRole/NoRole compatibility handling in else case
} else if user != nil && user.HasRole(schema.RoleManager) { // MANAGER: Count own jobs plus project's jobs
// Build ("project1", "project2", ...) list of variable length directly in SQL string
@@ -115,29 +160,45 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
counts = make(map[string]int)
for rows.Next() {
var tagName string
var tagId int
var count int
if err = rows.Scan(&tagName, &count); err != nil {
if err = rows.Scan(&tagName, &tagId, &count); err != nil {
return nil, nil, err
}
counts[tagName] = count
// Use tagId as second Map-Key component to differentiate tags with identical names
counts[fmt.Sprint(tagName, tagId)] = count
}
err = rows.Err()
return
return tags, counts, err
}
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
// If such a tag does not yet exist, it is created.
func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName string) (tagId int64, err error) {
tagId, exists := r.TagId(tagType, tagName)
func (r *JobRepository) AddTagOrCreate(ctx context.Context, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
// Default to "Global" scope if none defined
if tagScope == "" {
tagScope = "global"
}
writable, err := r.checkScopeAuth(ctx, "write", tagScope)
if err != nil {
return 0, err
}
if !writable {
return 0, fmt.Errorf("cannot write tag scope with current authorization")
}
tagId, exists := r.TagId(tagType, tagName, tagScope)
if !exists {
tagId, err = r.CreateTag(tagType, tagName)
tagId, err = r.CreateTag(tagType, tagName, tagScope)
if err != nil {
return 0, err
}
}
if _, err := r.AddTag(jobId, tagId); err != nil {
if _, err := r.AddTag(ctx, jobId, tagId); err != nil {
return 0, err
}
@@ -145,19 +206,19 @@ func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName stri
}
// TagId returns the database id of the tag with the specified type and name.
func (r *JobRepository) TagId(tagType string, tagName string) (tagId int64, exists bool) {
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
exists = true
if err := sq.Select("id").From("tag").
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
exists = false
}
return
}
// GetTags returns a list of all tags if job is nil or of the tags that the job with that database ID has.
func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
q := sq.Select("id", "tag_type", "tag_name").From("tag")
// GetTags returns a list of all scoped tags if job is nil or of the tags that the job with that database ID has.
func (r *JobRepository) GetTags(ctx context.Context, job *int64) ([]*schema.Tag, error) {
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
if job != nil {
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
}
@@ -172,7 +233,41 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
tags := make([]*schema.Tag, 0)
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
readable, err := r.checkScopeAuth(ctx, "read", tag.Scope)
if err != nil {
return nil, err
}
if readable {
tags = append(tags, tag)
}
}
return tags, nil
}
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
if job != nil {
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error get tags with %s: %v", s, err)
return nil, err
}
tags := make([]*schema.Tag, 0)
for rows.Next() {
tag := &schema.Tag{}
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
@@ -181,3 +276,60 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
return tags, nil
}
func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err
tagId, exists := r.TagId(tagType, tagName, tagScope)
if !exists {
tagId, err = r.CreateTag(tagType, tagName, tagScope)
if err != nil {
return err
}
}
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
s, _, _ := q.ToSql()
log.Errorf("Error adding tag on import with %s: %v", s, err)
return err
}
return nil
}
func (r *JobRepository) checkScopeAuth(ctx context.Context, operation string, scope string) (pass bool, err error) {
user := GetUserFromContext(ctx)
if user != nil {
switch {
case operation == "write" && scope == "admin":
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
return true, nil
}
return false, nil
case operation == "write" && scope == "global":
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
return true, nil
}
return false, nil
case operation == "write" && scope == user.Username:
return true, nil
case operation == "read" && scope == "admin":
return user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}), nil
case operation == "read" && scope == "global":
return true, nil
case operation == "read" && scope == user.Username:
return true, nil
default:
if operation == "read" || operation == "write" {
// No acceptable scope: deny tag
return false, nil
} else {
return false, fmt.Errorf("error while checking tag operation auth: unknown operation (%s)", operation)
}
}
} else {
return false, fmt.Errorf("error while checking tag operation auth: no user in context")
}
}

Binary file not shown.

View File

@@ -6,7 +6,6 @@ package repository
import (
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/jmoiron/sqlx"
)
@@ -18,20 +17,12 @@ type Transaction struct {
func (r *JobRepository) TransactionInit() (*Transaction, error) {
var err error
t := new(Transaction)
// Inserts are bundled into transactions because in sqlite,
// that speeds up inserts A LOT.
t.tx, err = r.DB.Beginx()
if err != nil {
log.Warn("Error while bundling transactions")
return nil, err
}
t.stmt, err = t.tx.PrepareNamed(NamedJobInsert)
if err != nil {
log.Warn("Error while preparing namedJobInsert")
return nil, err
}
return t, nil
}
@@ -50,7 +41,6 @@ func (r *JobRepository) TransactionCommit(t *Transaction) error {
return err
}
t.stmt = t.tx.NamedStmt(t.stmt)
return nil
}
@@ -59,14 +49,17 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error {
log.Warn("Error while committing SQL transactions")
return err
}
return nil
}
func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, error) {
res, err := t.stmt.Exec(job)
func (r *JobRepository) TransactionAddNamed(
t *Transaction,
query string,
args ...interface{},
) (int64, error) {
res, err := t.tx.NamedExec(query, args)
if err != nil {
log.Errorf("repository initDB(): %v", err)
log.Errorf("Named Exec failed: %v", err)
return 0, err
}
@@ -79,26 +72,19 @@ func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, e
return id, nil
}
func (r *JobRepository) TransactionAddTag(t *Transaction, tag *schema.Tag) (int64, error) {
res, err := t.tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
res, err := t.tx.Exec(query, args...)
if err != nil {
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
return 0, err
}
tagId, err := res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
log.Errorf("TransactionAdd(), Exec() Error: %v", err)
return 0, err
}
return tagId, nil
}
func (r *JobRepository) TransactionSetTag(t *Transaction, jobId int64, tagId int64) error {
if _, err := t.tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, jobId, tagId); err != nil {
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", jobId, tagId)
return err
id, err := res.LastInsertId()
if err != nil {
log.Errorf("TransactionAdd(), LastInsertId() Error: %v", err)
return 0, err
}
return nil
return id, nil
}

View File

@@ -72,7 +72,6 @@ func (r *UserRepository) GetUser(username string) (*schema.User, error) {
}
func (r *UserRepository) GetLdapUsernames() ([]string, error) {
var users []string
rows, err := r.DB.Query(`SELECT username FROM user WHERE user.ldap = 1`)
if err != nil {
@@ -131,8 +130,28 @@ func (r *UserRepository) AddUser(user *schema.User) error {
return nil
}
func (r *UserRepository) DelUser(username string) error {
func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) error {
// user contains updated info, apply to dbuser
// TODO: Discuss updatable fields
if dbUser.Name != user.Name {
if _, err := sq.Update("user").Set("name", user.Name).Where("user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
log.Errorf("error while updating name of user '%s'", user.Username)
return err
}
}
// Toggled until greenlit
// if dbUser.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
// projects, _ := json.Marshal(user.Projects)
// if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
// return err
// }
// }
return nil
}
func (r *UserRepository) DelUser(username string) error {
_, err := r.DB.Exec(`DELETE FROM user WHERE user.username = ?`, username)
if err != nil {
log.Errorf("Error while deleting user '%s' from DB", username)
@@ -143,7 +162,6 @@ func (r *UserRepository) DelUser(username string) error {
}
func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
q := sq.Select("username", "name", "email", "roles", "projects").From("user")
if specialsOnly {
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
@@ -186,8 +204,8 @@ func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
func (r *UserRepository) AddRole(
ctx context.Context,
username string,
queryrole string) error {
queryrole string,
) error {
newRole := strings.ToLower(queryrole)
user, err := r.GetUser(username)
if err != nil {
@@ -198,15 +216,15 @@ func (r *UserRepository) AddRole(
exists, valid := user.HasValidRole(newRole)
if !valid {
return fmt.Errorf("Supplied role is no valid option : %v", newRole)
return fmt.Errorf("supplied role is no valid option : %v", newRole)
}
if exists {
return fmt.Errorf("User %v already has role %v", username, newRole)
return fmt.Errorf("user %v already has role %v", username, newRole)
}
roles, _ := json.Marshal(append(user.Roles, newRole))
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
log.Errorf("Error while adding new role for user '%s'", user.Username)
log.Errorf("error while adding new role for user '%s'", user.Username)
return err
}
return nil
@@ -223,14 +241,14 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
exists, valid := user.HasValidRole(oldRole)
if !valid {
return fmt.Errorf("Supplied role is no valid option : %v", oldRole)
return fmt.Errorf("supplied role is no valid option : %v", oldRole)
}
if !exists {
return fmt.Errorf("Role already deleted for user '%v': %v", username, oldRole)
return fmt.Errorf("role already deleted for user '%v': %v", username, oldRole)
}
if oldRole == schema.GetRoleString(schema.RoleManager) && len(user.Projects) != 0 {
return fmt.Errorf("Cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
return fmt.Errorf("cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
}
var newroles []string
@@ -240,7 +258,7 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
}
}
var mroles, _ = json.Marshal(newroles)
mroles, _ := json.Marshal(newroles)
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
log.Errorf("Error while removing role for user '%s'", user.Username)
return err
@@ -251,15 +269,15 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
func (r *UserRepository) AddProject(
ctx context.Context,
username string,
project string) error {
project string,
) error {
user, err := r.GetUser(username)
if err != nil {
return err
}
if !user.HasRole(schema.RoleManager) {
return fmt.Errorf("user '%s' is not a manager!", username)
return fmt.Errorf("user '%s' is not a manager", username)
}
if user.HasProject(project) {
@@ -281,11 +299,11 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
}
if !user.HasRole(schema.RoleManager) {
return fmt.Errorf("user '%#v' is not a manager!", username)
return fmt.Errorf("user '%#v' is not a manager", username)
}
if !user.HasProject(project) {
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match!", username, project)
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match", username, project)
}
var exists bool
@@ -298,7 +316,7 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
}
}
if exists == true {
if exists {
var result interface{}
if len(newprojects) == 0 {
result = "[]"

View File

@@ -13,6 +13,7 @@ import (
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/util"
@@ -81,6 +82,9 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
func setupJobRoute(i InfoType, r *http.Request) InfoType {
i["id"] = mux.Vars(r)["id"]
if config.Keys.EmissionConstant != 0 {
i["emission"] = config.Keys.EmissionConstant
}
return i
}
@@ -128,24 +132,41 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType {
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
jobRepo := repository.GetJobRepository()
user := repository.GetUserFromContext(r.Context())
tags, counts, err := jobRepo.CountTags(user)
tags, counts, err := jobRepo.CountTags(r.Context())
tagMap := make(map[string][]map[string]interface{})
if err != nil {
log.Warnf("GetTags failed: %s", err.Error())
i["tagmap"] = tagMap
return i
}
for _, tag := range tags {
tagItem := map[string]interface{}{
"id": tag.ID,
"name": tag.Name,
"count": counts[tag.Name],
// Reduces displayed tags for unauth'd users
userAuthlevel := repository.GetUserFromContext(r.Context()).GetAuthLevel()
// Uses tag.ID as second Map-Key component to differentiate tags with identical names
if userAuthlevel >= 4 { // Support+ : Show tags for all scopes, regardless of count
for _, tag := range tags {
tagItem := map[string]interface{}{
"id": tag.ID,
"name": tag.Name,
"scope": tag.Scope,
"count": counts[fmt.Sprint(tag.Name, tag.ID)],
}
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
}
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
}
} else if userAuthlevel < 4 && userAuthlevel >= 2 { // User+ : Show global and admin scope only if at least 1 tag used, private scope regardless of count
for _, tag := range tags {
tagCount := counts[fmt.Sprint(tag.Name, tag.ID)]
if ((tag.Scope == "global" || tag.Scope == "admin") && tagCount >= 1) || (tag.Scope != "global" && tag.Scope != "admin") {
tagItem := map[string]interface{}{
"id": tag.ID,
"name": tag.Name,
"scope": tag.Scope,
"count": tagCount,
}
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
}
}
} // auth < 2 return nothing for this route
i["tagmap"] = tagMap
return i
}
@@ -238,7 +259,7 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
}
if query.Get("startTime") != "" {
parts := strings.Split(query.Get("startTime"), "-")
if len(parts) == 2 {
if len(parts) == 2 { // Time in seconds, from - to
a, e1 := strconv.ParseInt(parts[0], 10, 64)
b, e2 := strconv.ParseInt(parts[1], 10, 64)
if e1 == nil && e2 == nil {
@@ -247,6 +268,10 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
"to": time.Unix(b, 0).Format(time.RFC3339),
}
}
} else { // named range
filterPresets["startTime"] = map[string]string{
"range": query.Get("startTime"),
}
}
}
@@ -277,12 +302,13 @@ func SetupRoutes(router *mux.Router, buildInfo web.Build) {
availableRoles, _ := schema.GetValidRolesMap(user)
page := web.Page{
Title: title,
User: *user,
Roles: availableRoles,
Build: buildInfo,
Config: conf,
Infos: infos,
Title: title,
User: *user,
Roles: availableRoles,
Build: buildInfo,
Config: conf,
Resampling: config.Keys.EnableResampling,
Infos: infos,
}
if route.Filter {

View File

@@ -0,0 +1,41 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package taskManager
import (
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/go-co-op/gocron/v2"
)
func RegisterCompressionService(compressOlderThan int) {
log.Info("Register compression service")
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(05, 0, 0))),
gocron.NewTask(
func() {
var jobs []*schema.Job
var err error
ar := archive.GetHandle()
startTime := time.Now().Unix() - int64(compressOlderThan*24*3600)
lastTime := ar.CompressLast(startTime)
if startTime == lastTime {
log.Info("Compression Service - Complete archive run")
jobs, err = jobRepo.FindJobsBetween(0, startTime)
} else {
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
}
if err != nil {
log.Warnf("Error while looking for compression jobs: %v", err)
}
ar.Compress(jobs)
}))
}

View File

@@ -0,0 +1,36 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package taskManager
import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/go-co-op/gocron/v2"
)
func RegisterLdapSyncService(ds string) {
interval, err := parseDuration(ds)
if err != nil {
log.Warnf("Could not parse duration for sync interval: %v",
ds)
return
}
auth := auth.GetAuthInstance()
log.Info("Register LDAP sync service")
s.NewJob(gocron.DurationJob(interval),
gocron.NewTask(
func() {
t := time.Now()
log.Printf("ldap sync started at %s", t.Format(time.RFC3339))
if err := auth.LdapAuth.Sync(); err != nil {
log.Errorf("ldap sync failed: %s", err.Error())
}
log.Print("ldap sync done")
}))
}

View File

@@ -0,0 +1,67 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package taskManager
import (
"time"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/go-co-op/gocron/v2"
)
func RegisterRetentionDeleteService(age int, includeDB bool) {
log.Info("Register retention delete service")
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(04, 0, 0))),
gocron.NewTask(
func() {
startTime := time.Now().Unix() - int64(age*24*3600)
jobs, err := jobRepo.FindJobsBetween(0, startTime)
if err != nil {
log.Warnf("Error while looking for retention jobs: %s", err.Error())
}
archive.GetHandle().CleanUp(jobs)
if includeDB {
cnt, err := jobRepo.DeleteJobsBefore(startTime)
if err != nil {
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
} else {
log.Infof("Retention: Removed %d jobs from db", cnt)
}
if err = jobRepo.Optimize(); err != nil {
log.Errorf("Error occured in db optimization: %s", err.Error())
}
}
}))
}
func RegisterRetentionMoveService(age int, includeDB bool, location string) {
log.Info("Register retention move service")
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(04, 0, 0))),
gocron.NewTask(
func() {
startTime := time.Now().Unix() - int64(age*24*3600)
jobs, err := jobRepo.FindJobsBetween(0, startTime)
if err != nil {
log.Warnf("Error while looking for retention jobs: %s", err.Error())
}
archive.GetHandle().Move(jobs, location)
if includeDB {
cnt, err := jobRepo.DeleteJobsBefore(startTime)
if err != nil {
log.Errorf("Error while deleting retention jobs from db: %v", err)
} else {
log.Infof("Retention: Removed %d jobs from db", cnt)
}
if err = jobRepo.Optimize(); err != nil {
log.Errorf("Error occured in db optimization: %v", err)
}
}
}))
}

View File

@@ -0,0 +1,27 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package taskManager
import (
"runtime"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/go-co-op/gocron/v2"
)
func RegisterStopJobsExceedTime() {
log.Info("Register undead jobs service")
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(03, 0, 0))),
gocron.NewTask(
func() {
err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
if err != nil {
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
}
runtime.GC()
}))
}

View File

@@ -0,0 +1,90 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package taskManager
import (
"encoding/json"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/go-co-op/gocron/v2"
)
var (
s gocron.Scheduler
jobRepo *repository.JobRepository
)
func parseDuration(s string) (time.Duration, error) {
interval, err := time.ParseDuration(s)
if err != nil {
log.Warnf("Could not parse duration for sync interval: %v",
s)
return 0, err
}
if interval == 0 {
log.Info("TaskManager: Sync interval is zero")
}
return interval, nil
}
func Start() {
var err error
jobRepo = repository.GetJobRepository()
s, err = gocron.NewScheduler()
if err != nil {
log.Fatalf("Error while creating gocron scheduler: %s", err.Error())
}
if config.Keys.StopJobsExceedingWalltime > 0 {
RegisterStopJobsExceedTime()
}
var cfg struct {
Retention schema.Retention `json:"retention"`
Compression int `json:"compression"`
}
cfg.Retention.IncludeDB = true
if err := json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
}
switch cfg.Retention.Policy {
case "delete":
RegisterRetentionDeleteService(
cfg.Retention.Age,
cfg.Retention.IncludeDB)
case "move":
RegisterRetentionMoveService(
cfg.Retention.Age,
cfg.Retention.IncludeDB,
cfg.Retention.Location)
}
if cfg.Compression > 0 {
RegisterCompressionService(cfg.Compression)
}
lc := config.Keys.LdapConfig
if lc != nil && lc.SyncInterval != "" {
RegisterLdapSyncService(lc.SyncInterval)
}
RegisterFootprintWorker()
RegisterUpdateDurationWorker()
s.Start()
}
func Shutdown() {
s.Shutdown()
}

View File

@@ -0,0 +1,33 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package taskManager
import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/go-co-op/gocron/v2"
)
func RegisterUpdateDurationWorker() {
var frequency string
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.DurationWorker != "" {
frequency = config.Keys.CronFrequency.DurationWorker
} else {
frequency = "5m"
}
d, _ := time.ParseDuration(frequency)
log.Infof("Register Duration Update service with %s interval", frequency)
s.NewJob(gocron.DurationJob(d),
gocron.NewTask(
func() {
start := time.Now()
log.Printf("Update duration started at %s", start.Format(time.RFC3339))
jobRepo.UpdateDuration()
log.Printf("Update duration is done and took %s", time.Since(start))
}))
}

View File

@@ -0,0 +1,143 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package taskManager
import (
"context"
"math"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
"github.com/go-co-op/gocron/v2"
)
func RegisterFootprintWorker() {
var frequency string
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.FootprintWorker != "" {
frequency = config.Keys.CronFrequency.FootprintWorker
} else {
frequency = "10m"
}
d, _ := time.ParseDuration(frequency)
log.Infof("Register Footprint Update service with %s interval", frequency)
s.NewJob(gocron.DurationJob(d),
gocron.NewTask(
func() {
s := time.Now()
c := 0
ce := 0
cl := 0
log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
t, err := jobRepo.TransactionInit()
if err != nil {
log.Errorf("Failed TransactionInit %v", err)
}
for _, cluster := range archive.Clusters {
jobs, err := jobRepo.FindRunningJobs(cluster.Name)
if err != nil {
continue
}
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
scopes := []schema.MetricScope{schema.MetricScopeNode}
scopes = append(scopes, schema.MetricScopeCore)
scopes = append(scopes, schema.MetricScopeAccelerator)
for _, job := range jobs {
log.Debugf("Try job %d", job.JobID)
cl++
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, context.Background(), 0) // 0 Resolution-Value retrieves highest res
if err != nil {
log.Errorf("Error wile loading job data for footprint update: %v", err)
ce++
continue
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// This should never happen ?
ce++
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
// Add values rounded to 2 digits
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
},
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
Min: (math.Round(min*100) / 100),
Max: (math.Round(max*100) / 100),
}
}
// Init UpdateBuilder
stmt := sq.Update("job")
// Add SET queries
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
if err != nil {
log.Errorf("Update job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
ce++
continue
}
stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta)
if err != nil {
log.Errorf("Update job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
ce++
continue
}
// Add WHERE Filter
stmt = stmt.Where("job.id = ?", job.ID)
query, args, err := stmt.ToSql()
if err != nil {
log.Errorf("Failed in ToSQL conversion: %v", err)
ce++
continue
}
// Args: JSON, JSON, ENERGY, JOBID
jobRepo.TransactionAdd(t, query, args...)
// if err := jobRepo.Execute(stmt); err != nil {
// log.Errorf("Update job footprint (dbid: %d) failed at db execute: %s", job.ID, err.Error())
// continue
// }
c++
log.Debugf("Finish Job %d", job.JobID)
}
jobRepo.TransactionCommit(t)
log.Debugf("Finish Cluster %s", cluster.Name)
}
jobRepo.TransactionEnd(t)
log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
}))
}

View File

@@ -4,7 +4,13 @@
// license that can be found in the LICENSE file.
package util
import "golang.org/x/exp/constraints"
import (
"golang.org/x/exp/constraints"
"fmt"
"math"
"sort"
)
func Min[T constraints.Ordered](a, b T) T {
if a < b {
@@ -19,3 +25,36 @@ func Max[T constraints.Ordered](a, b T) T {
}
return b
}
func sortedCopy(input []float64) []float64 {
sorted := make([]float64, len(input))
copy(sorted, input)
sort.Float64s(sorted)
return sorted
}
func Mean(input []float64) (float64, error) {
if len(input) == 0 {
return math.NaN(), fmt.Errorf("input array is empty: %#v", input)
}
sum := 0.0
for _, n := range input {
sum += n
}
return sum / float64(len(input)), nil
}
func Median(input []float64) (median float64, err error) {
c := sortedCopy(input)
// Even numbers: add the two middle numbers, divide by two (use mean function)
// Odd numbers: Use the middle number
l := len(c)
if l == 0 {
return math.NaN(), fmt.Errorf("input array is empty: %#v", input)
} else if l%2 == 0 {
median, _ = Mean(c[l/2-1 : l/2+1])
} else {
median = c[l/2]
}
return median, nil
}

View File

@@ -7,6 +7,7 @@ package archive
import (
"encoding/json"
"fmt"
"sync"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
@@ -53,40 +54,48 @@ type JobContainer struct {
}
var (
initOnce sync.Once
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
ar ArchiveBackend
useArchive bool
)
func Init(rawConfig json.RawMessage, disableArchive bool) error {
useArchive = !disableArchive
var err error
var cfg struct {
Kind string `json:"kind"`
}
initOnce.Do(func() {
useArchive = !disableArchive
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
return err
}
var cfg struct {
Kind string `json:"kind"`
}
switch cfg.Kind {
case "file":
ar = &FsArchive{}
// case "s3":
// ar = &S3Archive{}
default:
return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
}
if err = json.Unmarshal(rawConfig, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
return
}
version, err := ar.Init(rawConfig)
if err != nil {
log.Error("Error while initializing archiveBackend")
return err
}
log.Infof("Load archive version %d", version)
switch cfg.Kind {
case "file":
ar = &FsArchive{}
// case "s3":
// ar = &S3Archive{}
default:
err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
}
return initClusterConfig()
var version uint64
version, err = ar.Init(rawConfig)
if err != nil {
log.Error("Error while initializing archiveBackend")
return
}
log.Infof("Load archive version %d", version)
err = initClusterConfig()
})
return err
}
func GetHandle() ArchiveBackend {
@@ -162,8 +171,9 @@ func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
jobMeta.Tags = make([]*schema.Tag, 0)
for _, tag := range tags {
jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{
Name: tag.Name,
Type: tag.Type,
Name: tag.Name,
Type: tag.Type,
Scope: tag.Scope,
})
}

View File

@@ -12,13 +12,16 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var Clusters []*schema.Cluster
var nodeLists map[string]map[string]NodeList
var (
Clusters []*schema.Cluster
GlobalMetricList []*schema.GlobalMetricListItem
nodeLists map[string]map[string]NodeList
)
func initClusterConfig() error {
Clusters = []*schema.Cluster{}
nodeLists = map[string]map[string]NodeList{}
metricLookup := make(map[string]schema.GlobalMetricListItem)
for _, c := range ar.GetClusters() {
@@ -49,6 +52,59 @@ func initClusterConfig() error {
if !mc.Scope.Valid() {
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
}
ml, ok := metricLookup[mc.Name]
if !ok {
metricLookup[mc.Name] = schema.GlobalMetricListItem{
Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint,
}
ml = metricLookup[mc.Name]
}
availability := schema.ClusterSupport{Cluster: cluster.Name}
scLookup := make(map[string]*schema.SubClusterConfig)
for _, scc := range mc.SubClusters {
scLookup[scc.Name] = scc
}
for _, sc := range cluster.SubClusters {
newMetric := mc
newMetric.SubClusters = nil
if cfg, ok := scLookup[sc.Name]; ok {
if !cfg.Remove {
availability.SubClusters = append(availability.SubClusters, sc.Name)
newMetric.Peak = cfg.Peak
newMetric.Normal = cfg.Normal
newMetric.Caution = cfg.Caution
newMetric.Alert = cfg.Alert
newMetric.Footprint = cfg.Footprint
newMetric.Energy = cfg.Energy
newMetric.LowerIsBetter = cfg.LowerIsBetter
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
if newMetric.Footprint != "" {
sc.Footprint = append(sc.Footprint, newMetric.Name)
ml.Footprint = newMetric.Footprint
}
if newMetric.Energy != "" {
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
}
}
} else {
availability.SubClusters = append(availability.SubClusters, sc.Name)
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
if newMetric.Footprint != "" {
sc.Footprint = append(sc.Footprint, newMetric.Name)
}
if newMetric.Energy != "" {
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
}
}
}
ml.Availability = append(metricLookup[mc.Name].Availability, availability)
metricLookup[mc.Name] = ml
}
Clusters = append(Clusters, cluster)
@@ -67,11 +123,14 @@ func initClusterConfig() error {
}
}
for _, ml := range metricLookup {
GlobalMetricList = append(GlobalMetricList, &ml)
}
return nil
}
func GetCluster(cluster string) *schema.Cluster {
for _, c := range Clusters {
if c.Name == cluster {
return c
@@ -90,11 +149,10 @@ func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
}
}
}
return nil, fmt.Errorf("Subcluster '%v' not found for cluster '%v', or cluster '%v' not configured!", subcluster, cluster, cluster)
return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster)
}
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
for _, c := range Clusters {
if c.Name == cluster {
for _, m := range c.MetricConfig {
@@ -110,7 +168,6 @@ func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
// AssignSubCluster sets the `job.subcluster` property of the job based
// on its cluster and resources.
func AssignSubCluster(job *schema.BaseJob) error {
cluster := GetCluster(job.Cluster)
if cluster == nil {
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
@@ -146,7 +203,6 @@ func AssignSubCluster(job *schema.BaseJob) error {
}
func GetSubClusterByNode(cluster, hostname string) (string, error) {
for sc, nl := range nodeLists[cluster] {
if nl != nil && nl.Contains(hostname) {
return sc, nil
@@ -164,3 +220,13 @@ func GetSubClusterByNode(cluster, hostname string) (string, error) {
return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", cluster, hostname)
}
func MetricIndex(mc []schema.MetricConfig, name string) (int, error) {
for i, m := range mc {
if m.Name == name {
return i, nil
}
}
return 0, fmt.Errorf("unknown metric name %s", name)
}

View File

@@ -0,0 +1,39 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive_test
import (
"encoding/json"
"testing"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
)
func TestClusterConfig(t *testing.T) {
if err := archive.Init(json.RawMessage("{\"kind\": \"file\",\"path\": \"testdata/archive\"}"), false); err != nil {
t.Fatal(err)
}
sc, err := archive.GetSubCluster("fritz", "spr1tb")
if err != nil {
t.Fatal(err)
}
// spew.Dump(sc.MetricConfig)
if len(sc.Footprint) != 3 {
t.Fail()
}
if len(sc.MetricConfig) != 15 {
t.Fail()
}
for _, metric := range sc.MetricConfig {
if metric.LowerIsBetter && metric.Name != "mem_used" {
t.Fail()
}
}
// spew.Dump(archive.GlobalMetricList)
// t.Fail()
}

View File

@@ -30,6 +30,7 @@ func TestInitNoJson(t *testing.T) {
t.Fatal(err)
}
}
func TestInitNotExists(t *testing.T) {
var fsa FsArchive
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/job-archive\"}"))
@@ -50,7 +51,7 @@ func TestInit(t *testing.T) {
if version != 1 {
t.Fail()
}
if len(fsa.clusters) != 1 || fsa.clusters[0] != "emmy" {
if len(fsa.clusters) != 3 || fsa.clusters[1] != "emmy" {
t.Fail()
}
}
@@ -133,7 +134,6 @@ func TestLoadJobData(t *testing.T) {
}
func BenchmarkLoadJobData(b *testing.B) {
tmpdir := b.TempDir()
jobarchive := filepath.Join(tmpdir, "job-archive")
util.CopyDir("./testdata/archive/", jobarchive)
@@ -157,7 +157,6 @@ func BenchmarkLoadJobData(b *testing.B) {
}
func BenchmarkLoadJobDataCompressed(b *testing.B) {
tmpdir := b.TempDir()
jobarchive := filepath.Join(tmpdir, "job-archive")
util.CopyDir("./testdata/archive/", jobarchive)

View File

@@ -9,8 +9,8 @@ import (
"io"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

123
pkg/resampler/resampler.go Normal file
View File

@@ -0,0 +1,123 @@
package resampler
import (
"errors"
"fmt"
"math"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func SimpleResampler(data []schema.Float, old_frequency int64, new_frequency int64) ([]schema.Float, error) {
if old_frequency == 0 || new_frequency == 0 {
return nil, errors.New("either old or new frequency is set to 0")
}
if new_frequency%old_frequency != 0 {
return nil, errors.New("new sampling frequency should be multiple of the old frequency")
}
var step int = int(new_frequency / old_frequency)
var new_data_length = len(data) / step
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
return data, nil
}
new_data := make([]schema.Float, new_data_length)
for i := 0; i < new_data_length; i++ {
new_data[i] = data[i*step]
}
return new_data, nil
}
// Inspired by one of the algorithms from https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf
// Adapted from https://github.com/haoel/downsampling/blob/master/core/lttb.go
func LargestTriangleThreeBucket(data []schema.Float, old_frequency int, new_frequency int) ([]schema.Float, int, error) {
if old_frequency == 0 || new_frequency == 0 {
return data, old_frequency, nil
}
if new_frequency%old_frequency != 0 {
return nil, 0, errors.New(fmt.Sprintf("new sampling frequency : %d should be multiple of the old frequency : %d", new_frequency, old_frequency))
}
var step int = int(new_frequency / old_frequency)
var new_data_length = len(data) / step
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
return data, old_frequency, nil
}
new_data := make([]schema.Float, 0, new_data_length)
// Bucket size. Leave room for start and end data points
bucketSize := float64(len(data)-2) / float64(new_data_length-2)
new_data = append(new_data, data[0]) // Always add the first point
// We have 3 pointers represent for
// > bucketLow - the current bucket's beginning location
// > bucketMiddle - the current bucket's ending location,
// also the beginning location of next bucket
// > bucketHight - the next bucket's ending location.
bucketLow := 1
bucketMiddle := int(math.Floor(bucketSize)) + 1
var prevMaxAreaPoint int
for i := 0; i < new_data_length-2; i++ {
bucketHigh := int(math.Floor(float64(i+2)*bucketSize)) + 1
if bucketHigh >= len(data)-1 {
bucketHigh = len(data) - 2
}
// Calculate point average for next bucket (containing c)
avgPointX, avgPointY := calculateAverageDataPoint(data[bucketMiddle:bucketHigh+1], int64(bucketMiddle))
// Get the range for current bucket
currBucketStart := bucketLow
currBucketEnd := bucketMiddle
// Point a
pointX := prevMaxAreaPoint
pointY := data[prevMaxAreaPoint]
maxArea := -1.0
var maxAreaPoint int
flag_ := 0
for ; currBucketStart < currBucketEnd; currBucketStart++ {
area := calculateTriangleArea(schema.Float(pointX), pointY, avgPointX, avgPointY, schema.Float(currBucketStart), data[currBucketStart])
if area > maxArea {
maxArea = area
maxAreaPoint = currBucketStart
}
if math.IsNaN(float64(avgPointY)) {
flag_ = 1
}
}
if flag_ == 1 {
new_data = append(new_data, schema.NaN) // Pick this point from the bucket
} else {
new_data = append(new_data, data[maxAreaPoint]) // Pick this point from the bucket
}
prevMaxAreaPoint = maxAreaPoint // This MaxArea point is the next's prevMAxAreaPoint
//move to the next window
bucketLow = bucketMiddle
bucketMiddle = bucketHigh
}
new_data = append(new_data, data[len(data)-1]) // Always add last
return new_data, new_frequency, nil
}

35
pkg/resampler/util.go Normal file
View File

@@ -0,0 +1,35 @@
package resampler
import (
"math"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func calculateTriangleArea(paX, paY, pbX, pbY, pcX, pcY schema.Float) float64 {
area := ((paX-pcX)*(pbY-paY) - (paX-pbX)*(pcY-paY)) * 0.5
return math.Abs(float64(area))
}
func calculateAverageDataPoint(points []schema.Float, xStart int64) (avgX schema.Float, avgY schema.Float) {
flag := 0
for _, point := range points {
avgX += schema.Float(xStart)
avgY += point
xStart++
if math.IsNaN(float64(point)) {
flag = 1
}
}
l := schema.Float(len(points))
avgX /= l
avgY /= l
if flag == 1 {
return avgX, schema.NaN
} else {
return avgX, avgY
}
}

View File

@@ -30,38 +30,47 @@ type MetricValue struct {
}
type SubCluster struct {
Name string `json:"name"`
Nodes string `json:"nodes"`
ProcessorType string `json:"processorType"`
SocketsPerNode int `json:"socketsPerNode"`
CoresPerSocket int `json:"coresPerSocket"`
ThreadsPerCore int `json:"threadsPerCore"`
FlopRateScalar MetricValue `json:"flopRateScalar"`
FlopRateSimd MetricValue `json:"flopRateSimd"`
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
Topology Topology `json:"topology"`
Name string `json:"name"`
Nodes string `json:"nodes"`
ProcessorType string `json:"processorType"`
Topology Topology `json:"topology"`
FlopRateScalar MetricValue `json:"flopRateScalar"`
FlopRateSimd MetricValue `json:"flopRateSimd"`
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
MetricConfig []MetricConfig `json:"metricConfig,omitempty"`
Footprint []string `json:"footprint,omitempty"`
EnergyFootprint []string `json:"energyFootprint,omitempty"`
SocketsPerNode int `json:"socketsPerNode"`
CoresPerSocket int `json:"coresPerSocket"`
ThreadsPerCore int `json:"threadsPerCore"`
}
type SubClusterConfig struct {
Name string `json:"name"`
Peak float64 `json:"peak"`
Normal float64 `json:"normal"`
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
Remove bool `json:"remove"`
Name string `json:"name"`
Footprint string `json:"footprint,omitempty"`
Peak float64 `json:"peak"`
Normal float64 `json:"normal"`
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
Remove bool `json:"remove"`
LowerIsBetter bool `json:"lowerIsBetter"`
Energy string `json:"energy"`
}
type MetricConfig struct {
Name string `json:"name"`
Unit Unit `json:"unit"`
Scope MetricScope `json:"scope"`
Aggregation string `json:"aggregation"`
Timestep int `json:"timestep"`
Peak float64 `json:"peak"`
Normal float64 `json:"normal"`
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
Unit Unit `json:"unit"`
Name string `json:"name"`
Scope MetricScope `json:"scope"`
Aggregation string `json:"aggregation"`
Footprint string `json:"footprint,omitempty"`
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
Peak float64 `json:"peak"`
Normal float64 `json:"normal"`
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
Timestep int `json:"timestep"`
LowerIsBetter bool `json:"lowerIsBetter"`
Energy string `json:"energy"`
}
type Cluster struct {
@@ -70,14 +79,27 @@ type Cluster struct {
SubClusters []*SubCluster `json:"subClusters"`
}
type ClusterSupport struct {
Cluster string `json:"cluster"`
SubClusters []string `json:"subclusters"`
}
type GlobalMetricListItem struct {
Name string `json:"name"`
Unit Unit `json:"unit"`
Scope MetricScope `json:"scope"`
Footprint string `json:"footprint,omitempty"`
Availability []ClusterSupport `json:"availability"`
}
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
// hwthread is in that socket, add it to the list. If no hwthreads other than
// those in the argument list are assigned to one of the sockets in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
func (topo *Topology) GetSocketsFromHWThreads(
hwthreads []int) (sockets []int, exclusive bool) {
hwthreads []int,
) (sockets []int, exclusive bool) {
socketsMap := map[int]int{}
for _, hwthread := range hwthreads {
for socket, hwthreadsInSocket := range topo.Socket {
@@ -106,8 +128,8 @@ func (topo *Topology) GetSocketsFromHWThreads(
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
func (topo *Topology) GetCoresFromHWThreads(
hwthreads []int) (cores []int, exclusive bool) {
hwthreads []int,
) (cores []int, exclusive bool) {
coresMap := map[int]int{}
for _, hwthread := range hwthreads {
for core, hwthreadsInCore := range topo.Core {
@@ -136,8 +158,8 @@ func (topo *Topology) GetCoresFromHWThreads(
// memory domains in the first return value, return true as the second value.
// TODO: Optimize this, there must be a more efficient way/algorithm.
func (topo *Topology) GetMemoryDomainsFromHWThreads(
hwthreads []int) (memDoms []int, exclusive bool) {
hwthreads []int,
) (memDoms []int, exclusive bool) {
memDomsMap := map[int]int{}
for _, hwthread := range hwthreads {
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {

View File

@@ -24,8 +24,9 @@ type LdapConfig struct {
}
type OpenIDConfig struct {
Provider string `json:"provider"`
SyncUserOnLogin bool `json:"syncUserOnLogin"`
Provider string `json:"provider"`
SyncUserOnLogin bool `json:"syncUserOnLogin"`
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
}
type JWTAuthConfig struct {
@@ -45,6 +46,9 @@ type JWTAuthConfig struct {
// Should an non-existent user be added to the DB based on the information in the token
SyncUserOnLogin bool `json:"syncUserOnLogin"`
// Should an existent user be updated in the DB based on the information in the token
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
}
type IntRange struct {
@@ -53,8 +57,9 @@ type IntRange struct {
}
type TimeRange struct {
From *time.Time `json:"from"`
To *time.Time `json:"to"`
Range string `json:"range,omitempty"` // Optional, e.g. 'last6h'
From *time.Time `json:"from"`
To *time.Time `json:"to"`
}
type FilterRanges struct {
@@ -76,6 +81,20 @@ type Retention struct {
IncludeDB bool `json:"includeDB"`
}
type ResampleConfig struct {
// Trigger next zoom level at less than this many visible datapoints
Trigger int `json:"trigger"`
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
Resolutions []int `json:"resolutions"`
}
type CronFrequency struct {
// Duration Update Worker [Defaults to '5m']
DurationWorker string `json:"duration-worker"`
// Metric- and Energy Footprint Update Worker [Defaults to '10m']
FootprintWorker string `json:"footprint-worker"`
}
// Format of the configuration (file). See below for the defaults.
type ProgramConfig struct {
// Address where the http (or https) server will listen on (for example: 'localhost:80').
@@ -133,6 +152,9 @@ type ProgramConfig struct {
// be provided! Most options here can be overwritten by the user.
UiDefaults map[string]interface{} `json:"ui-defaults"`
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
EnableResampling *ResampleConfig `json:"enable-resampling"`
// Where to store MachineState files
MachineStateDir string `json:"machine-state-dir"`
@@ -144,4 +166,11 @@ type ProgramConfig struct {
// Array of Clusters
Clusters []*ClusterConfig `json:"clusters"`
// Energy Mix CO2 Emission Constant [g/kWh]
// If entered, displays estimated CO2 emission for job based on jobs totalEnergy
EmissionConstant int `json:"emission-constant"`
// Frequency of cron job workers
CronFrequency *CronFrequency `json:"cron-frequency"`
}

View File

@@ -16,30 +16,33 @@ import (
// Common subset of Job and JobMeta. Use one of those, not this type directly.
type BaseJob struct {
// The unique identifier of a job
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
Partition string `json:"partition,omitempty" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"` // The unique identifier of an array job
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
// NumCores int32 `json:"numCores" db:"num_cores" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"` // SMT threads used by job
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
Tags []*Tag `json:"tags,omitempty"` // List of tags
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
Resources []*Resource `json:"resources"` // Resources used by job
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
MetaData map[string]string `json:"metaData"` // Additional information about the job
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
Partition string `json:"partition,omitempty" db:"partition" example:"main"`
Project string `json:"project" db:"project" example:"abcd200"`
User string `json:"user" db:"user" example:"abcd100h"`
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
Tags []*Tag `json:"tags,omitempty"`
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
RawFootprint []byte `json:"-" db:"footprint"`
RawMetaData []byte `json:"-" db:"meta_data"`
RawResources []byte `json:"-" db:"resources"`
Resources []*Resource `json:"resources"`
EnergyFootprint map[string]float64 `json:"energyFootprint"`
Footprint map[string]float64 `json:"footprint"`
MetaData map[string]string `json:"metaData"`
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
Energy float64 `json:"energy" db:"energy"`
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
}
// Job struct type
@@ -49,19 +52,10 @@ type BaseJob struct {
// Job model
// @Description Information of a HPC job.
type Job struct {
// The unique identifier of a job in the database
ID int64 `json:"id" db:"id"`
StartTime time.Time `json:"startTime"`
BaseJob
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64
FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64
MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64
LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"` // FileDataVolTotal as Float64
ID int64 `json:"id" db:"id"`
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"`
}
// JobMeta struct type
@@ -88,11 +82,10 @@ type JobLinkResultList struct {
// JobMeta model
// @Description Meta data information of a HPC job.
type JobMeta struct {
// The unique identifier of a job in the database
ID *int64 `json:"id,omitempty"`
ID *int64 `json:"id,omitempty"`
Statistics map[string]JobStatistics `json:"statistics"`
BaseJob
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` // Start epoch time stamp in seconds (Min > 0)
Statistics map[string]JobStatistics `json:"statistics"` // Metric statistics of job
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"`
}
const (
@@ -124,18 +117,19 @@ type JobStatistics struct {
// Tag model
// @Description Defines a tag using name and type.
type Tag struct {
ID int64 `json:"id" db:"id"` // The unique DB identifier of a tag
Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type
Name string `json:"name" db:"tag_name" example:"Testjob"` // Tag Name
Type string `json:"type" db:"tag_type" example:"Debug"`
Name string `json:"name" db:"tag_name" example:"Testjob"`
Scope string `json:"scope" db:"tag_scope" example:"global"`
ID int64 `json:"id" db:"id"`
}
// Resource model
// @Description A resource used by a job
type Resource struct {
Hostname string `json:"hostname"` // Name of the host (= node)
HWThreads []int `json:"hwthreads,omitempty"` // List of OS processor ids
Accelerators []string `json:"accelerators,omitempty"` // List of of accelerator device ids
Configuration string `json:"configuration,omitempty"` // The configuration options of the node
Hostname string `json:"hostname"`
Configuration string `json:"configuration,omitempty"`
HWThreads []int `json:"hwthreads,omitempty"`
Accelerators []string `json:"accelerators,omitempty"`
}
type JobState string

View File

@@ -10,6 +10,8 @@ import (
"math"
"sort"
"unsafe"
"github.com/ClusterCockpit/cc-backend/internal/util"
)
type JobData map[string]map[MetricScope]*JobMetric
@@ -36,6 +38,7 @@ type MetricStatistics struct {
type StatsSeries struct {
Mean []Float `json:"mean"`
Median []Float `json:"median"`
Min []Float `json:"min"`
Max []Float `json:"max"`
Percentiles map[int][]Float `json:"percentiles,omitempty"`
@@ -121,6 +124,7 @@ func (jd *JobData) Size() int {
if metric.StatisticsSeries != nil {
n += len(metric.StatisticsSeries.Max)
n += len(metric.StatisticsSeries.Mean)
n += len(metric.StatisticsSeries.Median)
n += len(metric.StatisticsSeries.Min)
}
@@ -149,53 +153,74 @@ func (jm *JobMetric) AddStatisticsSeries() {
}
}
min, mean, max := make([]Float, n), make([]Float, n), make([]Float, n)
// mean := make([]Float, n)
min, median, max := make([]Float, n), make([]Float, n), make([]Float, n)
i := 0
for ; i < m; i++ {
smin, ssum, smax := math.MaxFloat32, 0.0, -math.MaxFloat32
seriesCount := len(jm.Series)
// ssum := 0.0
smin, smed, smax := math.MaxFloat32, make([]float64, seriesCount), -math.MaxFloat32
notnan := 0
for j := 0; j < len(jm.Series); j++ {
for j := 0; j < seriesCount; j++ {
x := float64(jm.Series[j].Data[i])
if math.IsNaN(x) {
continue
}
notnan += 1
ssum += x
// ssum += x
smed[j] = x
smin = math.Min(smin, x)
smax = math.Max(smax, x)
}
if notnan < 3 {
min[i] = NaN
mean[i] = NaN
// mean[i] = NaN
median[i] = NaN
max[i] = NaN
} else {
min[i] = Float(smin)
mean[i] = Float(ssum / float64(notnan))
// mean[i] = Float(ssum / float64(notnan))
max[i] = Float(smax)
medianRaw, err := util.Median(smed)
if err != nil {
median[i] = NaN
} else {
median[i] = Float(medianRaw)
}
}
}
for ; i < n; i++ {
min[i] = NaN
mean[i] = NaN
// mean[i] = NaN
median[i] = NaN
max[i] = NaN
}
if smooth {
for i := 2; i < len(mean)-2; i++ {
for i := 2; i < len(median)-2; i++ {
if min[i].IsNaN() {
continue
}
min[i] = (min[i-2] + min[i-1] + min[i] + min[i+1] + min[i+2]) / 5
max[i] = (max[i-2] + max[i-1] + max[i] + max[i+1] + max[i+2]) / 5
mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5
// mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5
// Reduce Median further
smoothRaw := []float64{float64(median[i-2]), float64(median[i-1]), float64(median[i]), float64(median[i+1]), float64(median[i+2])}
smoothMedian, err := util.Median(smoothRaw)
if err != nil {
median[i] = NaN
} else {
median[i] = Float(smoothMedian)
}
}
}
jm.StatisticsSeries = &StatsSeries{Mean: mean, Min: min, Max: max}
jm.StatisticsSeries = &StatsSeries{Median: median, Min: min, Max: max} // Mean: mean
}
func (jd *JobData) AddNodeScope(metric string) bool {

View File

@@ -1,284 +1,327 @@
{
"$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://cluster.schema.json",
"title": "HPC cluster description",
"description": "Meta data information of a HPC cluster",
"type": "object",
"properties": {
"name": {
"description": "The unique identifier of a cluster",
"type": "string"
},
"metricConfig": {
"description": "Metric specifications",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"description": "Metric name",
"type": "string"
},
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"scope": {
"description": "Native measurement resolution",
"type": "string"
},
"timestep": {
"description": "Frequency of timeseries points",
"type": "integer"
},
"aggregation": {
"description": "How the metric is aggregated",
"type": "string",
"enum": [
"sum",
"avg"
]
},
"peak": {
"description": "Metric peak threshold (Upper metric limit)",
"type": "number"
},
"normal": {
"description": "Metric normal threshold",
"type": "number"
},
"caution": {
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
"type": "number"
},
"alert": {
"description": "Metric alert threshold (Requires immediate action)",
"type": "number"
},
"subClusters": {
"description": "Array of cluster hardware partition metric thresholds",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"description": "Hardware partition name",
"type": "string"
},
"peak": {
"type": "number"
},
"normal": {
"type": "number"
},
"caution": {
"type": "number"
},
"alert": {
"type": "number"
},
"remove": {
"type": "boolean"
}
},
"required": [
"name"
]
}
}
},
"required": [
"name",
"unit",
"scope",
"timestep",
"aggregation",
"peak",
"normal",
"caution",
"alert"
]
},
"minItems": 1
},
"subClusters": {
"description": "Array of cluster hardware partitions",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"description": "Hardware partition name",
"type": "string"
},
"processorType": {
"description": "Processor type",
"type": "string"
},
"socketsPerNode": {
"description": "Number of sockets per node",
"type": "integer"
},
"coresPerSocket": {
"description": "Number of cores per socket",
"type": "integer"
},
"threadsPerCore": {
"description": "Number of SMT threads per core",
"type": "integer"
},
"flopRateScalar": {
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"value": {
"type": "number"
}
}
},
"flopRateSimd": {
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"value": {
"type": "number"
}
}
},
"memoryBandwidth": {
"description": "Theoretical node peak memory bandwidth in GB/s",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"value": {
"type": "number"
}
}
},
"nodes": {
"description": "Node list expression",
"type": "string"
},
"topology": {
"description": "Node topology",
"type": "object",
"properties": {
"node": {
"description": "HwTread lists of node",
"type": "array",
"items": {
"type": "integer"
}
},
"socket": {
"description": "HwTread lists of sockets",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"memoryDomain": {
"description": "HwTread lists of memory domains",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"die": {
"description": "HwTread lists of dies",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"core": {
"description": "HwTread lists of cores",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"accelerators": {
"type": "array",
"description": "List of of accelerator devices",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique device id"
},
"type": {
"type": "string",
"description": "The accelerator type",
"enum": [
"Nvidia GPU",
"AMD GPU",
"Intel GPU"
]
},
"model": {
"type": "string",
"description": "The accelerator model"
}
},
"required": [
"id",
"type",
"model"
]
}
}
},
"required": [
"node",
"socket",
"memoryDomain"
]
}
},
"required": [
"name",
"nodes",
"topology",
"processorType",
"socketsPerNode",
"coresPerSocket",
"threadsPerCore",
"flopRateScalar",
"flopRateSimd",
"memoryBandwidth"
]
},
"minItems": 1
}
"$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://cluster.schema.json",
"title": "HPC cluster description",
"description": "Meta data information of a HPC cluster",
"type": "object",
"properties": {
"name": {
"description": "The unique identifier of a cluster",
"type": "string"
},
"required": [
"name",
"metricConfig",
"subClusters"
]
"metricConfig": {
"description": "Metric specifications",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"description": "Metric name",
"type": "string"
},
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"scope": {
"description": "Native measurement resolution",
"type": "string"
},
"timestep": {
"description": "Frequency of timeseries points",
"type": "integer"
},
"aggregation": {
"description": "How the metric is aggregated",
"type": "string",
"enum": [
"sum",
"avg"
]
},
"footprint": {
"description": "Is it a footprint metric and what type",
"type": "string",
"enum": [
"avg",
"max",
"min"
]
},
"energy": {
"description": "Is it used to calculate job energy",
"type": "string",
"enum": [
"power",
"energy"
]
},
"lowerIsBetter": {
"description": "Is lower better.",
"type": "boolean"
},
"peak": {
"description": "Metric peak threshold (Upper metric limit)",
"type": "number"
},
"normal": {
"description": "Metric normal threshold",
"type": "number"
},
"caution": {
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
"type": "number"
},
"alert": {
"description": "Metric alert threshold (Requires immediate action)",
"type": "number"
},
"subClusters": {
"description": "Array of cluster hardware partition metric thresholds",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"description": "Hardware partition name",
"type": "string"
},
"footprint": {
"description": "Is it a footprint metric and what type. Overwrite global setting",
"type": "string",
"enum": [
"avg",
"max",
"min"
]
},
"energy": {
"description": "Is it used to calculate job energy. Overwrite global",
"type": "string",
"enum": [
"power",
"energy"
]
},
"lowerIsBetter": {
"description": "Is lower better. Overwrite global",
"type": "boolean"
},
"peak": {
"type": "number"
},
"normal": {
"type": "number"
},
"caution": {
"type": "number"
},
"alert": {
"type": "number"
},
"remove": {
"description": "Remove this metric for this subcluster",
"type": "boolean"
}
},
"required": [
"name"
]
}
}
},
"required": [
"name",
"unit",
"scope",
"timestep",
"aggregation",
"peak",
"normal",
"caution",
"alert"
]
},
"minItems": 1
},
"subClusters": {
"description": "Array of cluster hardware partitions",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"description": "Hardware partition name",
"type": "string"
},
"processorType": {
"description": "Processor type",
"type": "string"
},
"socketsPerNode": {
"description": "Number of sockets per node",
"type": "integer"
},
"coresPerSocket": {
"description": "Number of cores per socket",
"type": "integer"
},
"threadsPerCore": {
"description": "Number of SMT threads per core",
"type": "integer"
},
"flopRateScalar": {
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"value": {
"type": "number"
}
}
},
"flopRateSimd": {
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"value": {
"type": "number"
}
}
},
"memoryBandwidth": {
"description": "Theoretical node peak memory bandwidth in GB/s",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"value": {
"type": "number"
}
}
},
"nodes": {
"description": "Node list expression",
"type": "string"
},
"topology": {
"description": "Node topology",
"type": "object",
"properties": {
"node": {
"description": "HwTread lists of node",
"type": "array",
"items": {
"type": "integer"
}
},
"socket": {
"description": "HwTread lists of sockets",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"memoryDomain": {
"description": "HwTread lists of memory domains",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"die": {
"description": "HwTread lists of dies",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"core": {
"description": "HwTread lists of cores",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"accelerators": {
"type": "array",
"description": "List of of accelerator devices",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique device id"
},
"type": {
"type": "string",
"description": "The accelerator type",
"enum": [
"Nvidia GPU",
"AMD GPU",
"Intel GPU"
]
},
"model": {
"type": "string",
"description": "The accelerator model"
}
},
"required": [
"id",
"type",
"model"
]
}
}
},
"required": [
"node",
"socket",
"memoryDomain"
]
}
},
"required": [
"name",
"nodes",
"topology",
"processorType",
"socketsPerNode",
"coresPerSocket",
"threadsPerCore",
"flopRateScalar",
"flopRateSimd",
"memoryBandwidth"
]
},
"minItems": 1
}
},
"required": [
"name",
"metricConfig",
"subClusters"
]
}

View File

@@ -424,6 +424,27 @@
"plot_general_colorscheme",
"plot_list_selectedMetrics"
]
},
"enable-resampling": {
"description": "Enable dynamic zoom in frontend metric plots.",
"type": "object",
"properties": {
"trigger": {
"description": "Trigger next zoom level at less than this many visible datapoints.",
"type": "integer"
},
"resolutions": {
"description": "Array of resampling target resolutions, in seconds.",
"type": "array",
"items": {
"type": "integer"
}
}
},
"required": [
"trigger",
"resolutions"
]
}
},
"required": [

File diff suppressed because it is too large Load Diff

View File

@@ -7,25 +7,25 @@
"dev": "rollup -c -w"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^25.0.7",
"@rollup/plugin-node-resolve": "^15.2.3",
"@rollup/plugin-commonjs": "^25.0.8",
"@rollup/plugin-node-resolve": "^15.3.0",
"@rollup/plugin-terser": "^0.4.4",
"@timohausmann/quadtree-js": "^1.2.6",
"rollup": "^4.12.1",
"rollup": "^4.24.0",
"rollup-plugin-css-only": "^4.5.2",
"rollup-plugin-svelte": "^7.1.6",
"svelte": "^4.2.12"
"rollup-plugin-svelte": "^7.2.2",
"svelte": "^4.2.19"
},
"dependencies": {
"@rollup/plugin-replace": "^5.0.5",
"@sveltestrap/sveltestrap": "^6.2.6",
"@urql/svelte": "^4.1.0",
"chart.js": "^4.4.2",
"@rollup/plugin-replace": "^5.0.7",
"@sveltestrap/sveltestrap": "^6.2.7",
"@urql/svelte": "^4.2.1",
"chart.js": "^4.4.5",
"date-fns": "^2.30.0",
"graphql": "^16.8.1",
"mathjs": "^12.4.0",
"graphql": "^16.9.0",
"mathjs": "^12.4.3",
"svelte-chartjs": "^3.1.5",
"uplot": "^1.6.30",
"uplot": "^1.6.31",
"wonka": "^6.3.4"
}
}

View File

@@ -1,5 +1,11 @@
<!--
@component Main analysis view component
Properties:
- `filterPresets Object`: Optional predefined filter values
-->
<script>
import { init, convert2uplot } from "./utils.js";
import { getContext, onMount } from "svelte";
import {
queryStore,
@@ -15,14 +21,18 @@
Table,
Icon,
} from "@sveltestrap/sveltestrap";
import Filters from "./filters/Filters.svelte";
import PlotSelection from "./PlotSelection.svelte";
import Histogram from "./plots/Histogram.svelte";
import Pie, { colors } from "./plots/Pie.svelte";
import { binsFromFootprint } from "./utils.js";
import ScatterPlot from "./plots/Scatter.svelte";
import PlotTable from "./PlotTable.svelte";
import RooflineHeatmap from "./plots/RooflineHeatmap.svelte";
import {
init,
convert2uplot,
binsFromFootprint,
} from "./generic/utils.js";
import PlotSelection from "./analysis/PlotSelection.svelte";
import Filters from "./generic/Filters.svelte";
import PlotGrid from "./generic/PlotGrid.svelte";
import Histogram from "./generic/plots/Histogram.svelte";
import Pie, { colors } from "./generic/plots/Pie.svelte";
import ScatterPlot from "./generic/plots/Scatter.svelte";
import RooflineHeatmap from "./generic/plots/RooflineHeatmap.svelte";
const { query: initq } = init();
@@ -45,11 +55,13 @@
let filterComponent; // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
let jobFilters = [];
let rooflineMaxY;
let colWidth1, colWidth2, colWidth3, colWidth4;
let colWidth1, colWidth2;
let numBins = 50;
let maxY = -1;
const initialized = getContext("initialized");
const globalMetrics = getContext("globalMetrics");
const ccconfig = getContext("cc-config");
const metricConfig = getContext("metrics");
let metricsInHistograms = ccconfig.analysis_view_histogramMetrics,
metricsInScatterplots = ccconfig.analysis_view_scatterPlotMetrics;
@@ -268,10 +280,23 @@
}
}
let availableMetrics = [];
let metricUnits = {};
let metricScopes = {};
function loadMetrics(isInitialized) {
if (!isInitialized) return
availableMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster.name))]
for (let sm of availableMetrics) {
metricUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
metricScopes[sm.name] = sm?.scope
}
}
$: loadMetrics($initialized)
$: updateEntityConfiguration(groupSelection.key);
$: updateCategoryConfiguration(sortSelection.key);
onMount(() => filterComponent.update());
onMount(() => filterComponent.updateFilters());
</script>
<Row>
@@ -280,12 +305,12 @@
<Spinner />
</Col>
{/if}
<Col xs="auto">
<Col xs="auto" class="mb-2 mb-lg-0">
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if cluster}
<PlotSelection
availableMetrics={cluster.metricConfig.map((mc) => mc.name)}
availableMetrics={availableMetrics.map((av) => av.name)}
bind:metricsInHistograms
bind:metricsInScatterplots
/>
@@ -297,7 +322,7 @@
{filterPresets}
disableClusterSelection={true}
startTimeQuickSelect={true}
on:update={({ detail }) => {
on:update-filters={({ detail }) => {
jobFilters = detail.filters;
}}
/>
@@ -430,7 +455,7 @@
width={colWidth2}
height={300}
tiles={$rooflineQuery.data.rooflineHeatmap}
cluster={cluster.subClusters.length == 1
subCluster={cluster.subClusters.length == 1
? cluster.subClusters[0]
: null}
maxY={rooflineMaxY}
@@ -440,36 +465,30 @@
{/if}
</Col>
<Col>
<div bind:clientWidth={colWidth3}>
{#key $statsQuery.data.stats[0].histDuration}
<Histogram
width={colWidth3}
height={300}
data={convert2uplot($statsQuery.data.stats[0].histDuration)}
title="Duration Distribution"
xlabel="Current Runtimes"
xunit="Hours"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</div>
{#key $statsQuery.data.stats[0].histDuration}
<Histogram
height={300}
data={convert2uplot($statsQuery.data.stats[0].histDuration)}
title="Duration Distribution"
xlabel="Current Runtimes"
xunit="Hours"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</Col>
<Col>
<div bind:clientWidth={colWidth4}>
{#key $statsQuery.data.stats[0].histNumCores}
<Histogram
width={colWidth4}
height={300}
data={convert2uplot($statsQuery.data.stats[0].histNumCores)}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Cores"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</div>
{#key $statsQuery.data.stats[0].histNumCores}
<Histogram
height={300}
data={convert2uplot($statsQuery.data.stats[0].histNumCores)}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Cores"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</Col>
</Row>
{/if}
@@ -498,15 +517,14 @@
</Row>
<Row>
<Col>
<PlotTable
<PlotGrid
let:item
let:width
renderFor="analysis"
items={metricsInHistograms.map((metric) => ({
metric,
...binsFromFootprint(
$footprintsQuery.data.footprints.timeWeights,
metricConfig(cluster.name, metric)?.scope,
metricScopes[metric],
$footprintsQuery.data.footprints.metrics.find(
(f) => f.metric == metric,
).data,
@@ -517,30 +535,14 @@
>
<Histogram
data={convert2uplot(item.bins)}
{width}
height={250}
usesBins={true}
title="Average Distribution of '{item.metric}'"
xlabel={`${item.metric} bin maximum ${
(metricConfig(cluster.name, item.metric)?.unit?.prefix
? "[" + metricConfig(cluster.name, item.metric)?.unit?.prefix
: "") +
(metricConfig(cluster.name, item.metric)?.unit?.base
? metricConfig(cluster.name, item.metric)?.unit?.base + "]"
: "")
}`}
xunit={`${
(metricConfig(cluster.name, item.metric)?.unit?.prefix
? metricConfig(cluster.name, item.metric)?.unit?.prefix
: "") +
(metricConfig(cluster.name, item.metric)?.unit?.base
? metricConfig(cluster.name, item.metric)?.unit?.base
: "")
}`}
xlabel={`${item.metric} bin maximum [${metricUnits[item.metric]}]`}
xunit={`${metricUnits[item.metric]}`}
ylabel="Normalized Hours"
yunit="Hours"
/>
</PlotTable>
</PlotGrid>
</Col>
</Row>
<br />
@@ -558,7 +560,7 @@
</Row>
<Row>
<Col>
<PlotTable
<PlotGrid
let:item
let:width
renderFor="analysis"
@@ -578,27 +580,13 @@
{width}
height={250}
color={"rgba(0, 102, 204, 0.33)"}
xLabel={`${item.m1} [${
(metricConfig(cluster.name, item.m1)?.unit?.prefix
? metricConfig(cluster.name, item.m1)?.unit?.prefix
: "") +
(metricConfig(cluster.name, item.m1)?.unit?.base
? metricConfig(cluster.name, item.m1)?.unit?.base
: "")
}]`}
yLabel={`${item.m2} [${
(metricConfig(cluster.name, item.m2)?.unit?.prefix
? metricConfig(cluster.name, item.m2)?.unit?.prefix
: "") +
(metricConfig(cluster.name, item.m2)?.unit?.base
? metricConfig(cluster.name, item.m2)?.unit?.base
: "")
}]`}
xLabel={`${item.m1} [${metricUnits[item.m1]}]`}
yLabel={`${item.m2} [${metricUnits[item.m2]}]`}
X={item.f1}
Y={item.f2}
S={$footprintsQuery.data.footprints.timeWeights.nodeHours}
/>
</PlotTable>
</PlotGrid>
</Col>
</Row>
{/if}

View File

@@ -1,16 +1,20 @@
<script>
import { getContext } from "svelte";
import { init } from "./utils.js";
import { Card, CardHeader, CardTitle } from "@sveltestrap/sveltestrap";
<!--
@component Main Config Option Component, Wrapper for admin and user sub-components
import PlotSettings from "./config/PlotSettings.svelte";
Properties:
- `ìsAdmin Bool!`: Is currently logged in user admin authority
- `isApi Bool!`: Is currently logged in user api authority
- `username String!`: Empty string if auth. is disabled, otherwise the username as string
-->
<script>
import { Card, CardHeader, CardTitle } from "@sveltestrap/sveltestrap";
import UserSettings from "./config/UserSettings.svelte";
import AdminSettings from "./config/AdminSettings.svelte";
const { query: initq } = init();
const ccconfig = getContext("cc-config");
export let isAdmin;
export let isApi;
export let username;
</script>
{#if isAdmin == true}
@@ -24,7 +28,7 @@
<Card>
<CardHeader>
<CardTitle class="mb-1">Plotting Options</CardTitle>
<CardTitle class="mb-1">User Options</CardTitle>
</CardHeader>
<PlotSettings config={ccconfig} />
<UserSettings {username} {isApi}/>
</Card>

View File

@@ -1,3 +1,13 @@
<!--
@component Main navbar component; handles view display based on user roles
Properties:
- `username String`: Empty string if auth. is disabled, otherwise the username as string
- `authlevel Number`: The current users authentication level
- `clusters [String]`: List of cluster names
- `roles [Number]`: Enum containing available roles
-->
<script>
import {
Icon,
@@ -10,13 +20,13 @@
DropdownToggle,
DropdownMenu,
} from "@sveltestrap/sveltestrap";
import NavbarLinks from "./NavbarLinks.svelte";
import NavbarTools from "./NavbarTools.svelte";
import NavbarLinks from "./header/NavbarLinks.svelte";
import NavbarTools from "./header/NavbarTools.svelte";
export let username; // empty string if auth. is disabled, otherwise the username as string
export let authlevel; // Integer
export let clusters; // array of names
export let roles; // Role Enum-Like
export let username;
export let authlevel;
export let clusters;
export let roles;
let isOpen = false;
let screenSize;
@@ -30,14 +40,19 @@
usersTitle.set(3, "Managed Users");
usersTitle.set(4, "Users");
usersTitle.set(5, "Users");
const projectsTitle = new Map();
projectsTitle.set(3, "Managed Projects");
projectsTitle.set(4, "Projects");
projectsTitle.set(5, "Projects");
const views = [
{
title: "My Jobs",
requiredRole: roles.user,
href: `/monitoring/user/${username}`,
icon: "bar-chart-line-fill",
icon: "bar-chart-line",
perCluster: false,
listOptions: false,
menu: "none",
},
{
@@ -46,23 +61,8 @@
href: `/monitoring/jobs/`,
icon: "card-list",
perCluster: false,
menu: "none",
},
{
title: usersTitle.get(authlevel),
requiredRole: roles.manager,
href: "/monitoring/users/",
icon: "people-fill",
perCluster: false,
menu: "Groups",
},
{
title: "Projects",
requiredRole: roles.support,
href: "/monitoring/projects/",
icon: "folder",
perCluster: false,
menu: "Groups",
listOptions: false,
menu: "Jobs",
},
{
title: "Tags",
@@ -70,31 +70,53 @@
href: "/monitoring/tags/",
icon: "tags",
perCluster: false,
listOptions: false,
menu: "Jobs",
},
{
title: usersTitle.get(authlevel),
requiredRole: roles.manager,
href: "/monitoring/users/",
icon: "people",
perCluster: true,
listOptions: true,
menu: "Groups",
},
{
title: projectsTitle.get(authlevel),
requiredRole: roles.manager,
href: "/monitoring/projects/",
icon: "journals",
perCluster: true,
listOptions: true,
menu: "Groups",
},
{
title: "Nodes",
requiredRole: roles.admin,
href: "/monitoring/systems/",
icon: "hdd-rack",
perCluster: true,
listOptions: false,
menu: "Info",
},
{
title: "Status",
requiredRole: roles.admin,
href: "/monitoring/status/",
icon: "clipboard-data",
perCluster: true,
listOptions: false,
menu: "Info",
},
{
title: "Analysis",
requiredRole: roles.support,
href: "/monitoring/analysis/",
icon: "graph-up",
perCluster: true,
menu: "Stats",
},
{
title: "Nodes",
requiredRole: roles.admin,
href: "/monitoring/systems/",
icon: "cpu",
perCluster: true,
menu: "Groups",
},
{
title: "Status",
requiredRole: roles.admin,
href: "/monitoring/status/",
icon: "cpu",
perCluster: true,
menu: "Stats",
listOptions: false,
menu: "Info",
},
];
</script>
@@ -122,24 +144,27 @@
<NavbarLinks
{clusters}
links={views.filter(
(item) => item.requiredRole <= authlevel && item.menu != "Stats",
(item) => item.requiredRole <= authlevel && item.menu != "Info",
)}
/>
<Dropdown nav>
<DropdownToggle nav caret>
<Icon name="graph-up" />
Stats
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
<NavbarLinks
{clusters}
links={views.filter(
(item) =>
item.requiredRole <= authlevel && item.menu == "Stats",
)}
/>
</DropdownMenu>
</Dropdown>
{#if authlevel >= 4} <!-- Support+ Info Menu-->
<Dropdown nav>
<DropdownToggle nav caret>
<Icon name="graph-up" />
Info
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
<NavbarLinks
{clusters}
direction="right"
links={views.filter(
(item) =>
item.requiredRole <= authlevel && item.menu == "Info",
)}
/>
</DropdownMenu>
</Dropdown>
{/if}
{:else}
<NavbarLinks
{clusters}
@@ -147,21 +172,54 @@
(item) => item.requiredRole <= authlevel && item.menu == "none",
)}
/>
{#each Array("Groups", "Stats") as menu}
{#if authlevel >= 2} <!-- User+ Job Menu -->
<Dropdown nav>
<DropdownToggle nav caret>
{menu}
Jobs
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
<NavbarLinks
{clusters}
direction="right"
links={views.filter(
(item) => item.requiredRole <= authlevel && item.menu == menu,
(item) => item.requiredRole <= authlevel && item.menu == 'Jobs',
)}
/>
</DropdownMenu>
</Dropdown>
{/each}
{/if}
{#if authlevel >= 3} <!-- Manager+ Group Lists Menu-->
<Dropdown nav>
<DropdownToggle nav caret>
Groups
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
<NavbarLinks
{clusters}
direction="right"
links={views.filter(
(item) => item.requiredRole <= authlevel && item.menu == 'Groups',
)}
/>
</DropdownMenu>
</Dropdown>
{/if}
{#if authlevel >= 4} <!-- Support+ Info Menu-->
<Dropdown nav>
<DropdownToggle nav caret>
Info
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
<NavbarLinks
{clusters}
direction="right"
links={views.filter(
(item) => item.requiredRole <= authlevel && item.menu == 'Info',
)}
/>
</DropdownMenu>
</Dropdown>
{/if}
{/if}
</Nav>
<NavbarTools {username} {authlevel} {roles} {screenSize} />

View File

@@ -1,11 +1,19 @@
<!--
@component Main single job display component; displays plots for every metric as well as various information
Properties:
- `dbid Number`: The jobs DB ID
- `username String`: Empty string if auth. is disabled, otherwise the username as string
- `authlevel Number`: The current users authentication level
- `roles [Number]`: Enum containing available roles
-->
<script>
import {
init,
groupByScope,
fetchMetricsStore,
checkMetricDisabled,
transformDataForRoofline,
} from "./utils.js";
import {
queryStore,
gql,
getContextClient
} from "@urql/svelte";
import {
Row,
Col,
@@ -17,137 +25,147 @@
CardHeader,
CardTitle,
Button,
Icon,
} from "@sveltestrap/sveltestrap";
import PlotTable from "./PlotTable.svelte";
import Metric from "./Metric.svelte";
import Polar from "./plots/Polar.svelte";
import Roofline from "./plots/Roofline.svelte";
import JobInfo from "./joblist/JobInfo.svelte";
import TagManagement from "./TagManagement.svelte";
import MetricSelection from "./MetricSelection.svelte";
import StatsTable from "./StatsTable.svelte";
import JobFootprint from "./JobFootprint.svelte";
import { getContext } from "svelte";
import {
init,
groupByScope,
checkMetricDisabled,
transformDataForRoofline,
} from "./generic/utils.js";
import Metric from "./job/Metric.svelte";
import StatsTable from "./job/StatsTable.svelte";
import JobSummary from "./job/JobSummary.svelte";
import EnergySummary from "./job/EnergySummary.svelte";
import ConcurrentJobs from "./generic/helper/ConcurrentJobs.svelte";
import PlotGrid from "./generic/PlotGrid.svelte";
import Roofline from "./generic/plots/Roofline.svelte";
import JobInfo from "./generic/joblist/JobInfo.svelte";
import MetricSelection from "./generic/select/MetricSelection.svelte";
export let dbid;
export let username;
export let authlevel;
export let roles;
const accMetrics = [
"acc_utilization",
"acc_mem_used",
"acc_power",
"nv_mem_util",
"nv_sm_clock",
"nv_temp",
];
let accNodeOnly;
// Setup General
const ccconfig = getContext("cc-config")
let isMetricsSelectionOpen = false,
selectedMetrics = [],
selectedScopes = [];
let plots = {},
roofWidth,
statsTable
let missingMetrics = [],
missingHosts = [],
somethingMissing = false;
// Setup GQL
// First: Add Job Query to init function -> Only requires DBID as argument, received via URL-ID
// Second: Trigger jobMetrics query with now received jobInfos (scopes: from job metadata, selectedMetrics: from config or all, job: from url-id)
const { query: initq } = init(`
job(id: "${dbid}") {
id, jobId, user, project, cluster, startTime,
duration, numNodes, numHWThreads, numAcc,
duration, numNodes, numHWThreads, numAcc, energy,
SMT, exclusive, partition, subCluster, arrayJobId,
monitoringStatus, state, walltime,
tags { id, type, name },
tags { id, type, scope, name },
resources { hostname, hwthreads, accelerators },
metaData,
userData { name, email },
concurrentJobs { items { id, jobId }, count, listQuery },
flopsAnyAvg, memBwAvg, loadAvg
footprint { name, stat, value },
energyFootprint { hardware, metric, value }
}
`);
const ccconfig = getContext("cc-config"),
clusters = getContext("clusters"),
metrics = getContext("metrics");
const client = getContextClient();
const query = gql`
query ($dbid: ID!, $selectedMetrics: [String!]!, $selectedScopes: [MetricScope!]!) {
jobMetrics(id: $dbid, metrics: $selectedMetrics, scopes: $selectedScopes) {
name
scope
metric {
unit {
prefix
base
}
timestep
statisticsSeries {
min
mean
median
max
}
series {
hostname
id
data
statistics {
min
avg
max
}
}
}
}
}
`;
let isMetricsSelectionOpen = false,
selectedMetrics = [],
showFootprint = true,
isFetched = new Set();
const [jobMetrics, startFetching] = fetchMetricsStore();
$: jobMetrics = queryStore({
client: client,
query: query,
variables: { dbid, selectedMetrics, selectedScopes },
});
// Handle Job Query on Init -> is not executed anymore
getContext("on-init")(() => {
let job = $initq.data.job;
if (!job) return;
selectedMetrics =
ccconfig[`job_view_selectedMetrics:${job.cluster}`] ||
clusters
.find((c) => c.name == job.cluster)
.metricConfig.map((mc) => mc.name);
showFootprint =
ccconfig[`job_view_showFootprint`]
let toFetch = new Set([
const pendingMetrics = [
"flops_any",
"mem_bw",
...selectedMetrics,
...(ccconfig[`job_view_selectedMetrics:${job.cluster}`] ||
$initq.data.globalMetrics.reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === job.cluster)) {
names.push(gm.name);
}
return names;
}, [])
),
...(ccconfig[`job_view_polarPlotMetrics:${job.cluster}`] ||
ccconfig[`job_view_polarPlotMetrics`]),
ccconfig[`job_view_polarPlotMetrics`]
),
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
ccconfig[`job_view_nodestats_selectedMetrics`]),
]);
ccconfig[`job_view_nodestats_selectedMetrics`]
),
];
// Select default Scopes to load: Check before if accelerator metrics are not on accelerator scope by default
accNodeOnly = [...toFetch].some(function (m) {
if (accMetrics.includes(m)) {
const mc = metrics(job.cluster, m);
return mc.scope !== "accelerator";
} else {
return false;
}
// Select default Scopes to load: Check before if any metric has accelerator scope by default
const accScopeDefault = [...pendingMetrics].some(function (m) {
const cluster = $initq.data.clusters.find((c) => c.name == job.cluster);
const subCluster = cluster.subClusters.find((sc) => sc.name == job.subCluster);
return subCluster.metricConfig.find((smc) => smc.name == m)?.scope === "accelerator";
});
if (job.numAcc === 0 || accNodeOnly === true) {
// No Accels or Accels on Node Scope
startFetching(
job,
[...toFetch],
job.numNodes > 2 ? ["node"] : ["node", "socket", "core"],
);
} else {
// Accels and not on node scope
startFetching(
job,
[...toFetch],
job.numNodes > 2
? ["node", "accelerator"]
: ["node", "accelerator", "socket", "core"],
);
const pendingScopes = ["node"]
if (accScopeDefault) pendingScopes.push("accelerator")
if (job.numNodes === 1) {
pendingScopes.push("socket")
pendingScopes.push("core")
}
isFetched = toFetch;
selectedMetrics = [...new Set(pendingMetrics)];
selectedScopes = [...new Set(pendingScopes)];
});
const lazyFetchMoreMetrics = () => {
let notYetFetched = new Set();
for (let m of selectedMetrics) {
if (!isFetched.has(m)) {
notYetFetched.add(m);
isFetched.add(m);
}
}
if (notYetFetched.size > 0)
startFetching(
$initq.data.job,
[...notYetFetched],
$initq.data.job.numNodes > 2 ? ["node"] : ["node", "core"],
);
};
// Fetch more data once required:
$: if ($initq.data && $jobMetrics.data && selectedMetrics)
lazyFetchMoreMetrics();
let plots = {},
jobTags,
statsTable,
jobFootprint;
// Interactive Document Title
$: document.title = $initq.fetching
? "Loading..."
: $initq.error
@@ -155,15 +173,15 @@
: `Job ${$initq.data.job.jobId} - ClusterCockpit`;
// Find out what metrics or hosts are missing:
let missingMetrics = [],
missingHosts = [],
somethingMissing = false;
$: if ($initq.data && $jobMetrics.data) {
$: if ($initq?.data && $jobMetrics?.data?.jobMetrics) {
let job = $initq.data.job,
metrics = $jobMetrics.data.jobMetrics,
metricNames = clusters
.find((c) => c.name == job.cluster)
.metricConfig.map((mc) => mc.name);
metricNames = $initq.data.globalMetrics.reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === job.cluster)) {
names.push(gm.name);
}
return names;
}, []);
// Metric not found in JobMetrics && Metric not explicitly disabled in config or deselected: Was expected, but is Missing
missingMetrics = metricNames.filter(
@@ -192,6 +210,7 @@
somethingMissing = missingMetrics.length > 0 || missingHosts.length > 0;
}
// Helper
const orderAndMap = (grouped, selectedMetrics) =>
selectedMetrics.map((metric) => ({
metric: metric,
@@ -204,124 +223,134 @@
}));
</script>
<Row>
<Col>
<Row class="mb-3">
<!-- Column 1: Job Info, Job Tags, Concurrent Jobs, Admin Message if found-->
<Col xs={12} md={6} xl={3} class="mb-3 mb-xxl-0">
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq.data}
<JobInfo job={$initq.data.job} {jobTags} />
<Card class="overflow-auto" style="height: 400px;">
<TabContent> <!-- on:tab={(e) => (status = e.detail)} -->
{#if $initq.data?.job?.metaData?.message}
<TabPane tabId="admin-msg" tab="Admin Note" active>
<CardBody>
<Card body class="mb-2" color="warning">
<h5>Job {$initq.data?.job?.jobId} ({$initq.data?.job?.cluster})</h5>
The following note was added by administrators:
</Card>
<Card body>
{@html $initq.data.job.metaData.message}
</Card>
</CardBody>
</TabPane>
{/if}
<TabPane tabId="meta-info" tab="Job Info" active={$initq.data?.job?.metaData?.message?false:true}>
<CardBody class="pb-2">
<JobInfo job={$initq.data.job} {username} {authlevel} {roles} showTagedit/>
</CardBody>
</TabPane>
{#if $initq.data.job.concurrentJobs != null && $initq.data.job.concurrentJobs.items.length != 0}
<TabPane tabId="shared-jobs">
<span slot="tab">
{$initq.data.job.concurrentJobs.items.length} Concurrent Jobs
</span>
<CardBody>
<ConcurrentJobs cJobs={$initq.data.job.concurrentJobs} showLinks={(authlevel > roles.manager)}/>
</CardBody>
</TabPane>
{/if}
</TabContent>
</Card>
{:else}
<Spinner secondary />
{/if}
</Col>
{#if $jobMetrics.data && showFootprint}
{#key $jobMetrics.data}
<Col>
<JobFootprint
bind:this={jobFootprint}
job={$initq.data.job}
jobMetrics={$jobMetrics.data.jobMetrics}
/>
</Col>
{/key}
{/if}
{#if $jobMetrics.data && $initq.data}
{#if $initq.data.job.concurrentJobs != null && $initq.data.job.concurrentJobs.items.length != 0}
{#if authlevel > roles.manager}
<Col>
<h5>
Concurrent Jobs <Icon
name="info-circle"
style="cursor:help;"
title="Shared jobs running on the same node with overlapping runtimes"
/>
</h5>
<ul>
<li>
<a
href="/monitoring/jobs/?{$initq.data.job.concurrentJobs
.listQuery}"
target="_blank">See All</a
>
</li>
{#each $initq.data.job.concurrentJobs.items as pjob, index}
<li>
<a href="/monitoring/job/{pjob.id}" target="_blank"
>{pjob.jobId}</a
>
</li>
{/each}
</ul>
</Col>
{:else}
<Col>
<h5>
{$initq.data.job.concurrentJobs.items.length} Concurrent Jobs
</h5>
<p>
Number of shared jobs on the same node with overlapping runtimes.
</p>
</Col>
{/if}
{/if}
<Col>
<Polar
metrics={ccconfig[
`job_view_polarPlotMetrics:${$initq.data.job.cluster}`
] || ccconfig[`job_view_polarPlotMetrics`]}
cluster={$initq.data.job.cluster}
jobMetrics={$jobMetrics.data.jobMetrics}
/>
</Col>
<Col>
<Roofline
renderTime={true}
cluster={clusters
.find((c) => c.name == $initq.data.job.cluster)
.subClusters.find((sc) => sc.name == $initq.data.job.subCluster)}
data={transformDataForRoofline(
$jobMetrics.data?.jobMetrics?.find(
(m) => m.name == "flops_any" && m.scope == "node",
)?.metric,
$jobMetrics.data?.jobMetrics?.find(
(m) => m.name == "mem_bw" && m.scope == "node",
)?.metric,
)}
/>
</Col>
{:else}
<Col />
<Col />
{/if}
</Row>
<Row class="mb-3">
<Col xs="auto">
{#if $initq.data}
<TagManagement job={$initq.data.job} bind:jobTags />
{/if}
</Col>
<Col xs="auto">
{#if $initq.data}
<Button outline on:click={() => (isMetricsSelectionOpen = true)}>
<Icon name="graph-up" /> Metrics
</Button>
{/if}
</Col>
</Row>
<Row>
<Col>
{#if $jobMetrics.error}
{#if $initq.data.job.monitoringStatus == 0 || $initq.data.job.monitoringStatus == 2}
<Card body color="warning">Not monitored or archiving failed</Card>
<br />
{/if}
<Card body color="danger">{$jobMetrics.error.message}</Card>
{:else if $jobMetrics.fetching}
<!-- Column 2: Job Footprint, Polar Representation, Heuristic Summary -->
<Col xs={12} md={6} xl={4} xxl={3} class="mb-3 mb-xxl-0">
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq?.data && $jobMetrics?.data}
<JobSummary job={$initq.data.job} jobMetrics={$jobMetrics.data.jobMetrics}/>
{:else}
<Spinner secondary />
{:else if $jobMetrics.data && $initq.data}
<PlotTable
{/if}
</Col>
<!-- Column 3: Job Roofline; If footprint Enabled: full width, else half width -->
<Col xs={12} md={12} xl={5} xxl={6}>
{#if $initq.error || $jobMetrics.error}
<Card body color="danger">
<p>Initq Error: {$initq.error?.message}</p>
<p>jobMetrics Error: {$jobMetrics.error?.message}</p>
</Card>
{:else if $initq?.data && $jobMetrics?.data}
<Card style="height: 400px;">
<div bind:clientWidth={roofWidth}>
<Roofline
allowSizeChange={true}
width={roofWidth}
renderTime={true}
subCluster={$initq.data.clusters
.find((c) => c.name == $initq.data.job.cluster)
.subClusters.find((sc) => sc.name == $initq.data.job.subCluster)}
data={transformDataForRoofline(
$jobMetrics.data?.jobMetrics?.find(
(m) => m.name == "flops_any" && m.scope == "node",
)?.metric,
$jobMetrics.data?.jobMetrics?.find(
(m) => m.name == "mem_bw" && m.scope == "node",
)?.metric,
)}
/>
</div>
</Card>
{:else}
<Spinner secondary />
{/if}
</Col>
</Row>
{#if $initq?.data && $initq.data.job.energyFootprint.length != 0}
<Row class="mb-3">
<Col>
<EnergySummary jobId={$initq.data.job.jobId} jobEnergy={$initq.data.job.energy} jobEnergyFootprint={$initq.data.job.energyFootprint}/>
</Col>
</Row>
{/if}
<Card class="mb-3">
<CardBody>
<Row class="mb-2">
{#if $initq.data}
<Col xs="auto">
<Button outline on:click={() => (isMetricsSelectionOpen = true)} color="primary">
Select Metrics
</Button>
</Col>
{/if}
</Row>
<hr class="mb-2"/>
{#if $jobMetrics.error}
<Row class="mt-2">
<Col>
{#if $initq.data.job.monitoringStatus == 0 || $initq.data.job.monitoringStatus == 2}
<Card body color="warning">Not monitored or archiving failed</Card>
<br />
{/if}
<Card body color="danger">{$jobMetrics.error.message}</Card>
</Col>
</Row>
{:else if $jobMetrics.fetching}
<Row class="mt-2">
<Col>
<Spinner secondary />
</Col>
</Row>
{:else if $initq?.data && $jobMetrics?.data?.jobMetrics}
<PlotGrid
let:item
let:width
renderFor="job"
items={orderAndMap(
groupByScope($jobMetrics.data.jobMetrics),
@@ -335,90 +364,95 @@
on:more-loaded={({ detail }) => statsTable.moreLoaded(detail)}
job={$initq.data.job}
metricName={item.metric}
metricUnit={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.unit}
nativeScope={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.scope}
rawData={item.data.map((x) => x.metric)}
scopes={item.data.map((x) => x.scope)}
{width}
isShared={$initq.data.job.exclusive != 1}
/>
{:else}
<Card body color="warning"
<Card body color="warning" class="mt-2"
>No dataset returned for <code>{item.metric}</code></Card
>
{/if}
</PlotTable>
</PlotGrid>
{/if}
</Col>
</Row>
<Row class="mt-2">
</CardBody>
</Card>
<Row class="mb-3">
<Col>
{#if $initq.data}
<TabContent>
{#if somethingMissing}
<TabPane tabId="resources" tab="Resources" active={somethingMissing}>
<div style="margin: 10px;">
<Card color="warning">
<CardHeader>
<CardTitle>Missing Metrics/Resources</CardTitle>
</CardHeader>
<CardBody>
{#if missingMetrics.length > 0}
<p>
No data at all is available for the metrics: {missingMetrics.join(
", ",
)}
</p>
{/if}
{#if missingHosts.length > 0}
<p>Some metrics are missing for the following hosts:</p>
<ul>
{#each missingHosts as missing}
<li>
{missing.hostname}: {missing.metrics.join(", ")}
</li>
{/each}
</ul>
{/if}
</CardBody>
</Card>
<Card>
<TabContent>
{#if somethingMissing}
<TabPane tabId="resources" tab="Resources" active={somethingMissing}>
<div style="margin: 10px;">
<Card color="warning">
<CardHeader>
<CardTitle>Missing Metrics/Resources</CardTitle>
</CardHeader>
<CardBody>
{#if missingMetrics.length > 0}
<p>
No data at all is available for the metrics: {missingMetrics.join(
", ",
)}
</p>
{/if}
{#if missingHosts.length > 0}
<p>Some metrics are missing for the following hosts:</p>
<ul>
{#each missingHosts as missing}
<li>
{missing.hostname}: {missing.metrics.join(", ")}
</li>
{/each}
</ul>
{/if}
</CardBody>
</Card>
</div>
</TabPane>
{/if}
<TabPane
tabId="stats"
tab="Statistics Table"
class="overflow-x-auto"
active={!somethingMissing}
>
{#if $jobMetrics?.data?.jobMetrics}
{#key $jobMetrics.data.jobMetrics}
<StatsTable
bind:this={statsTable}
job={$initq.data.job}
jobMetrics={$jobMetrics.data.jobMetrics}
/>
{/key}
{/if}
</TabPane>
<TabPane tabId="job-script" tab="Job Script">
<div class="pre-wrapper">
{#if $initq.data.job.metaData?.jobScript}
<pre><code>{$initq.data.job.metaData?.jobScript}</code></pre>
{:else}
<Card body color="warning">No job script available</Card>
{/if}
</div>
</TabPane>
{/if}
<TabPane
tabId="stats"
tab="Statistics Table"
active={!somethingMissing}
>
{#if $jobMetrics.data}
{#key $jobMetrics.data}
<StatsTable
bind:this={statsTable}
job={$initq.data.job}
jobMetrics={$jobMetrics.data.jobMetrics}
/>
{/key}
{/if}
</TabPane>
<TabPane tabId="job-script" tab="Job Script">
<div class="pre-wrapper">
{#if $initq.data.job.metaData?.jobScript}
<pre><code>{$initq.data.job.metaData?.jobScript}</code></pre>
{:else}
<Card body color="warning">No job script available</Card>
{/if}
</div>
</TabPane>
<TabPane tabId="slurm-info" tab="Slurm Info">
<div class="pre-wrapper">
{#if $initq.data.job.metaData?.slurmInfo}
<pre><code>{$initq.data.job.metaData?.slurmInfo}</code></pre>
{:else}
<Card body color="warning"
>No additional slurm information available</Card
>
{/if}
</div>
</TabPane>
</TabContent>
<TabPane tabId="slurm-info" tab="Slurm Info">
<div class="pre-wrapper">
{#if $initq.data.job.metaData?.slurmInfo}
<pre><code>{$initq.data.job.metaData?.slurmInfo}</code></pre>
{:else}
<Card body color="warning"
>No additional slurm information available</Card
>
{/if}
</div>
</TabPane>
</TabContent>
</Card>
{/if}
</Col>
</Row>

View File

@@ -1,268 +0,0 @@
<script context="module">
export function findJobThresholds(job, metricConfig, subClusterConfig) {
if (!job || !metricConfig || !subClusterConfig) {
console.warn("Argument missing for findJobThresholds!");
return null;
}
const subclusterThresholds = metricConfig.subClusters.find(
(sc) => sc.name == subClusterConfig.name,
);
const defaultThresholds = {
peak: subclusterThresholds
? subclusterThresholds.peak
: metricConfig.peak,
normal: subclusterThresholds
? subclusterThresholds.normal
: metricConfig.normal,
caution: subclusterThresholds
? subclusterThresholds.caution
: metricConfig.caution,
alert: subclusterThresholds
? subclusterThresholds.alert
: metricConfig.alert,
};
// Job_Exclusivity does not matter, only aggregation
if (metricConfig.aggregation === "avg") {
return defaultThresholds;
} else if (metricConfig.aggregation === "sum") {
const jobFraction =
job.numHWThreads / subClusterConfig.topology.node.length;
return {
peak: round(defaultThresholds.peak * jobFraction, 0),
normal: round(defaultThresholds.normal * jobFraction, 0),
caution: round(defaultThresholds.caution * jobFraction, 0),
alert: round(defaultThresholds.alert * jobFraction, 0),
};
} else {
console.warn(
"Missing or unkown aggregation mode (sum/avg) for metric:",
metricConfig,
);
return defaultThresholds;
}
}
</script>
<script>
import { getContext } from "svelte";
import {
Card,
CardHeader,
CardTitle,
CardBody,
Progress,
Icon,
Tooltip,
} from "@sveltestrap/sveltestrap";
import { mean, round } from "mathjs";
export let job;
export let jobMetrics;
export let view = "job";
export let width = "auto";
const clusters = getContext("clusters");
const subclusterConfig = clusters
.find((c) => c.name == job.cluster)
.subClusters.find((sc) => sc.name == job.subCluster);
const footprintMetrics =
job.numAcc !== 0
? job.exclusive !== 1 // GPU
? ["acc_utilization", "acc_mem_used", "nv_sm_clock", "nv_mem_util"] // Shared
: ["acc_utilization", "acc_mem_used", "nv_sm_clock", "nv_mem_util"] // Exclusive
: (job.exclusive !== 1) // CPU Only
? ["flops_any", "mem_used"] // Shared
: ["cpu_load", "flops_any", "mem_used", "mem_bw"]; // Exclusive
const footprintData = footprintMetrics.map((fm) => {
// Unit
const fmc = getContext("metrics")(job.cluster, fm);
let unit = "";
if (fmc?.unit?.base) unit = fmc.unit.prefix + fmc.unit.base;
// Threshold / -Differences
const fmt = findJobThresholds(job, fmc, subclusterConfig);
if (fm === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0);
// Value: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
// Exclusivity does not matter
let mv = 0.0;
if (fmc.aggregation === "avg") {
if (fm === "cpu_load" && job.loadAvg !== 0) {
mv = round(job.loadAvg, 2);
} else if (fm === "flops_any" && job.flopsAnyAvg !== 0) {
mv = round(job.flopsAnyAvg, 2);
} else if (fm === "mem_bw" && job.memBwAvg !== 0) {
mv = round(job.memBwAvg, 2);
} else {
// Calculate Avg from jobMetrics
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
if (jm?.metric?.statisticsSeries) {
const noNan = jm.metric.statisticsSeries.mean.filter(function (val) {
return val != null;
});
mv = round(mean(noNan), 2);
} else if (jm?.metric?.series?.length > 1) {
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
mv = round(mean(avgs), 2);
} else if (jm?.metric?.series) {
mv = round(jm.metric.series[0].statistics.avg, 2);
}
}
} else if (fmc.aggregation === "sum") {
// Calculate Sum from jobMetrics: Sum all node averages
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
if (jm?.metric?.series?.length > 1) { // More than 1 node
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
mv = round(avgs.reduce((a, b) => a + b, 0));
} else if (jm?.metric?.series) {
mv = round(jm.metric.series[0].statistics.avg, 2);
}
} else {
console.warn(
"Missing or unkown aggregation mode (sum/avg) for metric:",
metricConfig,
);
}
// Define basic data
const fmBase = {
name: fm,
unit: unit,
avg: mv,
max: fmt.peak,
};
if (evalFootprint(fm, mv, fmt, "alert")) {
return {
...fmBase,
color: "danger",
message: `Metric average way ${fm === "mem_used" ? "above" : "below"} expected normal thresholds.`,
impact: 3,
};
} else if (evalFootprint(fm, mv, fmt, "caution")) {
return {
...fmBase,
color: "warning",
message: `Metric average ${fm === "mem_used" ? "above" : "below"} expected normal thresholds.`,
impact: 2,
};
} else if (evalFootprint(fm, mv, fmt, "normal")) {
return {
...fmBase,
color: "success",
message: "Metric average within expected thresholds.",
impact: 1,
};
} else if (evalFootprint(fm, mv, fmt, "peak")) {
return {
...fmBase,
color: "info",
message:
"Metric average above expected normal thresholds: Check for artifacts recommended.",
impact: 0,
};
} else {
return {
...fmBase,
color: "secondary",
message:
"Metric average above expected peak threshold: Check for artifacts!",
impact: -1,
};
}
});
function evalFootprint(metric, mean, thresholds, level) {
// mem_used has inverse logic regarding threshold levels, notify levels triggered if mean > threshold
switch (level) {
case "peak":
if (metric === "mem_used")
return false; // mem_used over peak -> return false to trigger impact -1
else return mean <= thresholds.peak && mean > thresholds.normal;
case "alert":
if (metric === "mem_used")
return mean <= thresholds.peak && mean >= thresholds.alert;
else return mean <= thresholds.alert && mean >= 0;
case "caution":
if (metric === "mem_used")
return mean < thresholds.alert && mean >= thresholds.caution;
else return mean <= thresholds.caution && mean > thresholds.alert;
case "normal":
if (metric === "mem_used")
return mean < thresholds.caution && mean >= 0;
else return mean <= thresholds.normal && mean > thresholds.caution;
default:
return false;
}
}
</script>
<Card class="h-auto mt-1" style="width: {width}px;">
{#if view === "job"}
<CardHeader>
<CardTitle class="mb-0 d-flex justify-content-center">
Core Metrics Footprint
</CardTitle>
</CardHeader>
{/if}
<CardBody>
{#each footprintData as fpd, index}
<div class="mb-1 d-flex justify-content-between">
<div>&nbsp;<b>{fpd.name}</b></div>
<!-- For symmetry, see below ...-->
<div
class="cursor-help d-inline-flex"
id={`footprint-${job.jobId}-${index}`}
>
<div class="mx-1">
<!-- Alerts Only -->
{#if fpd.impact === 3 || fpd.impact === -1}
<Icon name="exclamation-triangle-fill" class="text-danger" />
{:else if fpd.impact === 2}
<Icon name="exclamation-triangle" class="text-warning" />
{/if}
<!-- Emoji for all states-->
{#if fpd.impact === 3}
<Icon name="emoji-frown" class="text-danger" />
{:else if fpd.impact === 2}
<Icon name="emoji-neutral" class="text-warning" />
{:else if fpd.impact === 1}
<Icon name="emoji-smile" class="text-success" />
{:else if fpd.impact === 0}
<Icon name="emoji-laughing" class="text-info" />
{:else if fpd.impact === -1}
<Icon name="emoji-dizzy" class="text-danger" />
{/if}
</div>
<div>
<!-- Print Values -->
{fpd.avg} / {fpd.max}
{fpd.unit} &nbsp; <!-- To increase margin to tooltip: No other way manageable ... -->
</div>
</div>
<Tooltip
target={`footprint-${job.jobId}-${index}`}
placement="right"
offset={[0, 20]}>{fpd.message}</Tooltip
>
</div>
<div class="mb-2">
<Progress value={fpd.avg} max={fpd.max} color={fpd.color} />
</div>
{/each}
{#if job?.metaData?.message}
<hr class="mt-1 mb-2" />
{@html job.metaData.message}
{/if}
</CardBody>
</Card>
<style>
.cursor-help {
cursor: help;
}
</style>

View File

@@ -1,20 +1,30 @@
<script>
<!--
@component Main job list component
Properties:
- `filterPresets Object?`: Optional predefined filter values [Default: {}]
- `authlevel Number`: The current users authentication level
- `roles [Number]`: Enum containing available roles
-->
<script>
import { onMount, getContext } from "svelte";
import { init } from "./utils.js";
import {
Row,
Col,
Button,
ButtonGroup,
Icon,
Card,
Spinner,
} from "@sveltestrap/sveltestrap";
import Filters from "./filters/Filters.svelte";
import JobList from "./joblist/JobList.svelte";
import Refresher from "./joblist/Refresher.svelte";
import Sorting from "./joblist/SortSelection.svelte";
import MetricSelection from "./MetricSelection.svelte";
import TextFilter from "./filters/TextFilter.svelte";
import { init } from "./generic/utils.js";
import Filters from "./generic/Filters.svelte";
import JobList from "./generic/JobList.svelte";
import TextFilter from "./generic/helper/TextFilter.svelte";
import Refresher from "./generic/helper/Refresher.svelte";
import Sorting from "./generic/select/SortSelection.svelte";
import MetricSelection from "./generic/select/MetricSelection.svelte";
const { query: initq } = init();
@@ -27,7 +37,7 @@
let filterComponent; // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
let jobList,
matchedJobs = null;
let sorting = { field: "startTime", order: "DESC" },
let sorting = { field: "startTime", type: "col", order: "DESC" },
isSortingOpen = false,
isMetricsSelectionOpen = false;
let metrics = filterPresets.cluster
@@ -43,69 +53,77 @@
// The filterPresets are handled by the Filters component,
// so we need to wait for it to be ready before we can start a query.
// This is also why JobList component starts out with a paused query.
onMount(() => filterComponent.update());
onMount(() => filterComponent.updateFilters());
</script>
<Row>
{#if $initq.fetching}
<Col xs="auto">
<!-- ROW1: Status-->
{#if $initq.fetching}
<Row class="mb-3">
<Col>
<Spinner />
</Col>
{:else if $initq.error}
<Col xs="auto">
</Row>
{:else if $initq.error}
<Row class="mb-3">
<Col>
<Card body color="danger">{$initq.error.message}</Card>
</Col>
{/if}
</Row>
<Row>
<Col xs="auto">
<Button outline color="primary" on:click={() => (isSortingOpen = true)}>
<Icon name="sort-up" /> Sorting
</Button>
<Button
outline
color="primary"
on:click={() => (isMetricsSelectionOpen = true)}
>
<Icon name="graph-up" /> Metrics
</Button>
<Button disabled outline
>{matchedJobs == null ? "Loading..." : `${matchedJobs} jobs`}</Button
>
</Row>
{/if}
<!-- ROW2: Tools-->
<Row cols={{ xs: 1, md: 2, lg: 4}} class="mb-3">
<Col lg="2" class="mb-2 mb-lg-0">
<ButtonGroup class="w-100">
<Button outline color="primary" on:click={() => (isSortingOpen = true)}>
<Icon name="sort-up" /> Sorting
</Button>
<Button
outline
color="primary"
on:click={() => (isMetricsSelectionOpen = true)}
>
<Icon name="graph-up" /> Metrics
</Button>
</ButtonGroup>
</Col>
<Col xs="auto">
<Col lg="4" xl="{(presetProject !== '') ? 5 : 6}" class="mb-1 mb-lg-0">
<Filters
{filterPresets}
{matchedJobs}
bind:this={filterComponent}
on:update={({ detail }) => {
on:update-filters={({ detail }) => {
selectedCluster = detail.filters[0]?.cluster
? detail.filters[0].cluster.eq
: null;
jobList.update(detail.filters);
jobList.queryJobs(detail.filters);
}}
/>
</Col>
<Col xs="3" style="margin-left: auto;">
<Col lg="3" xl="{(presetProject !== '') ? 3 : 2}" class="mb-2 mb-lg-0">
<TextFilter
{presetProject}
bind:authlevel
bind:roles
on:update={({ detail }) => filterComponent.update(detail)}
on:set-filter={({ detail }) => filterComponent.updateFilters(detail)}
/>
</Col>
<Col xs="2">
<Refresher on:reload={() => jobList.refresh()} />
<Col lg="3" xl="2" class="mb-1 mb-lg-0">
<Refresher on:refresh={() => {
jobList.refreshJobs()
jobList.refreshAllMetrics()
}} />
</Col>
</Row>
<br />
<!-- ROW3: Job List-->
<Row>
<Col>
<JobList
bind:this={jobList}
bind:metrics
bind:sorting
bind:matchedJobs
bind:this={jobList}
bind:showFootprint
/>
</Col>
@@ -119,5 +137,5 @@
bind:metrics
bind:isOpen={isMetricsSelectionOpen}
bind:showFootprint
view="list"
footprintSelect={true}
/>

View File

@@ -1,9 +1,13 @@
<!--
@component List of users or projects
@component Main component for listing users or projects
Properties:
- `type String?`: The type of list ['USER' || 'PROJECT']
- `filterPresets Object?`: Optional predefined filter values [Default: {}]
-->
<script>
import { onMount } from "svelte";
import { init } from "./utils.js";
import {
Row,
Col,
@@ -15,9 +19,17 @@
InputGroup,
Input,
} from "@sveltestrap/sveltestrap";
import Filters from "./filters/Filters.svelte";
import { queryStore, gql, getContextClient } from "@urql/svelte";
import { scramble, scrambleNames } from "./joblist/JobInfo.svelte";
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import {
init,
scramble,
scrambleNames,
} from "./generic/utils.js";
import Filters from "./generic/Filters.svelte";
const {} = init();
@@ -28,15 +40,9 @@
if (filterPresets?.startTime == null) {
if (filterPresets == null) filterPresets = {};
const lastMonth = new Date(
Date.now() - 30 * 24 * 60 * 60 * 1000,
).toISOString();
const now = new Date(Date.now()).toISOString();
filterPresets.startTime = {
from: lastMonth,
to: now,
range: "last30d",
text: "Last 30 Days",
url: "last30d",
};
}
@@ -89,11 +95,11 @@
return stats.filter((u) => u.id.includes(nameFilter)).sort(cmp);
}
onMount(() => filterComponent.update());
onMount(() => filterComponent.updateFilters());
</script>
<Row>
<Col xs="auto">
<Row cols={{ xs: 1, md: 2}}>
<Col xs="12" md="5" lg="4" xl="3" class="mb-2 mb-md-0">
<InputGroup>
<Button disabled outline>
Search {type.toLowerCase()}s
@@ -107,13 +113,13 @@
/>
</InputGroup>
</Col>
<Col xs="auto">
<Col xs="12" md="7" lg="8" xl="9">
<Filters
bind:this={filterComponent}
{filterPresets}
startTimeQuickSelect={true}
menuText="Only {type.toLowerCase()}s with jobs that match the filters will show up"
on:update={({ detail }) => {
on:update-filters={({ detail }) => {
jobFilters = detail.filters;
}}
/>
@@ -123,10 +129,11 @@
<thead>
<tr>
<th scope="col">
{{
USER: "Username",
PROJECT: "Project Name",
}[type]}
{#if type === 'USER'}
Username
{:else if type === 'PROJECT'}
Project Name
{/if}
<Button
color={sorting.field == "id" ? "primary" : "light"}
size="sm"

View File

@@ -1,147 +0,0 @@
<script>
import { getContext, createEventDispatcher } from "svelte";
import Timeseries from "./plots/MetricPlot.svelte";
import {
InputGroup,
InputGroupText,
Spinner,
Card,
} from "@sveltestrap/sveltestrap";
import { fetchMetrics, minScope } from "./utils";
export let job;
export let metricName;
export let scopes;
export let width;
export let rawData;
export let isShared = false;
const dispatch = createEventDispatcher();
const cluster = getContext("clusters").find(
(cluster) => cluster.name == job.cluster,
);
const subCluster = cluster.subClusters.find(
(subCluster) => subCluster.name == job.subCluster,
);
const metricConfig = cluster.metricConfig.find(
(metricConfig) => metricConfig.name == metricName,
);
let selectedHost = null,
plot,
fetching = false,
error = null;
let selectedScope = minScope(scopes);
let statsPattern = /(.*)-stats$/
let statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null)
let selectedScopeIndex
$: availableScopes = scopes;
$: patternMatches = statsPattern.exec(selectedScope)
$: if (!patternMatches) {
selectedScopeIndex = scopes.findIndex((s) => s == selectedScope);
} else {
selectedScopeIndex = scopes.findIndex((s) => s == patternMatches[1]);
}
$: data = rawData[selectedScopeIndex];
$: series = data?.series.filter(
(series) => selectedHost == null || series.hostname == selectedHost,
);
let from = null,
to = null;
export function setTimeRange(f, t) {
(from = f), (to = t);
}
$: if (plot != null) plot.setTimeRange(from, to);
export async function loadMore() {
fetching = true;
let response = await fetchMetrics(job, [metricName], ["core"]);
fetching = false;
if (response.error) {
error = response.error;
return;
}
for (let jm of response.data.jobMetrics) {
if (jm.scope != "node") {
scopes = [...scopes, jm.scope];
rawData.push(jm.metric);
statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null)
selectedScope = jm.scope;
selectedScopeIndex = scopes.findIndex((s) => s == jm.scope);
dispatch("more-loaded", jm);
}
}
}
$: if (selectedScope == "load-more") loadMore();
</script>
<InputGroup>
<InputGroupText style="min-width: 150px;">
{metricName} ({(metricConfig?.unit?.prefix
? metricConfig.unit.prefix
: "") + (metricConfig?.unit?.base ? metricConfig.unit.base : "")})
</InputGroupText>
<select class="form-select" bind:value={selectedScope}>
{#each availableScopes as scope, index}
<option value={scope}>{scope}</option>
{#if statsSeries[index]}
<option value={scope + '-stats'}>stats series ({scope})</option>
{/if}
{/each}
{#if availableScopes.length == 1 && metricConfig?.scope != "node"}
<option value={"load-more"}>Load more...</option>
{/if}
</select>
{#if job.resources.length > 1}
<select class="form-select" bind:value={selectedHost} disabled={patternMatches}>
<option value={null}>All Hosts</option>
{#each job.resources as { hostname }}
<option value={hostname}>{hostname}</option>
{/each}
</select>
{/if}
</InputGroup>
{#key series}
{#if fetching == true}
<Spinner />
{:else if error != null}
<Card body color="danger">{error.message}</Card>
{:else if series != null && !patternMatches}
<Timeseries
bind:this={plot}
{width}
height={300}
{cluster}
{subCluster}
timestep={data.timestep}
scope={selectedScope}
metric={metricName}
{series}
{isShared}
resources={job.resources}
/>
{:else if statsSeries[selectedScopeIndex] != null && patternMatches}
<Timeseries
bind:this={plot}
{width}
height={300}
{cluster}
{subCluster}
timestep={data.timestep}
scope={selectedScope}
metric={metricName}
{series}
{isShared}
resources={job.resources}
statisticsSeries={statsSeries[selectedScopeIndex]}
useStatsSeries={!!statsSeries[selectedScopeIndex]}
/>
{/if}
{/key}

View File

@@ -1,38 +0,0 @@
<script>
import {
Icon,
NavLink,
Dropdown,
DropdownToggle,
DropdownMenu,
DropdownItem,
} from "@sveltestrap/sveltestrap";
export let clusters; // array of names
export let links; // array of nav links
</script>
{#each links as item}
{#if !item.perCluster}
<NavLink href={item.href} active={window.location.pathname == item.href}
><Icon name={item.icon} /> {item.title}</NavLink
>
{:else}
<Dropdown nav inNavbar>
<DropdownToggle nav caret>
<Icon name={item.icon} />
{item.title}
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
{#each clusters as cluster}
<DropdownItem
href={item.href + cluster.name}
active={window.location.pathname == item.href + cluster.name}
>
{cluster.name}
</DropdownItem>
{/each}
</DropdownMenu>
</Dropdown>
{/if}
{/each}

View File

@@ -1,20 +1,38 @@
<!--
@component System-View subcomponent; renders all current metrics for specified node
Properties:
- `cluster String`: Currently selected cluster
- `hostname String`: Currently selected host (== node)
- `from Date?`: Custom Time Range selection 'from' [Default: null]
- `to Date?`: Custom Time Range selection 'to' [Default: null]
-->
<script>
import { init, checkMetricDisabled } from "./utils.js";
import { getContext } from "svelte";
import {
Row,
Col,
Input,
InputGroup,
InputGroupText,
Icon,
Spinner,
Card,
} from "@sveltestrap/sveltestrap";
import { queryStore, gql, getContextClient } from "@urql/svelte";
import TimeSelection from "./filters/TimeSelection.svelte";
import Refresher from "./joblist/Refresher.svelte";
import PlotTable from "./PlotTable.svelte";
import MetricPlot from "./plots/MetricPlot.svelte";
import { getContext } from "svelte";
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import {
init,
checkMetricDisabled,
} from "./generic/utils.js";
import PlotGrid from "./generic/PlotGrid.svelte";
import MetricPlot from "./generic/plots/MetricPlot.svelte";
import TimeSelection from "./generic/select/TimeSelection.svelte";
import Refresher from "./generic/helper/Refresher.svelte";
export let cluster;
export let hostname;
@@ -26,9 +44,11 @@
if (from == null || to == null) {
to = new Date(Date.now());
from = new Date(to.getTime());
from.setMinutes(from.getMinutes() - 30);
from.setHours(from.getHours() - 12);
}
const initialized = getContext("initialized")
const globalMetrics = getContext("globalMetrics")
const ccconfig = getContext("cc-config");
const clusters = getContext("clusters");
const client = getContextClient();
@@ -71,18 +91,13 @@
},
});
let itemsPerPage = ccconfig.plot_list_jobsPerPage;
let page = 1;
let paging = { itemsPerPage, page };
let sorting = { field: "startTime", order: "DESC" };
$: filter = [
const paging = { itemsPerPage: 50, page: 1 };
const sorting = { field: "startTime", type: "col", order: "DESC" };
const filter = [
{ cluster: { eq: cluster } },
{ node: { contains: hostname } },
{ state: ["running"] },
// {startTime: {
// from: from.toISOString(),
// to: to.toISOString()
// }}
];
const nodeJobsQuery = gql`
@@ -92,10 +107,6 @@
$paging: PageRequest!
) {
jobs(filter: $filter, order: $sorting, page: $paging) {
# items {
# id
# jobId
# }
count
}
}
@@ -107,66 +118,65 @@
variables: { paging, sorting, filter },
});
let metricUnits = {};
$: if ($nodeMetricsData.data) {
let thisCluster = clusters.find((c) => c.name == cluster);
if (thisCluster) {
for (let metric of thisCluster.metricConfig) {
if (metric.unit.prefix || metric.unit.base) {
metricUnits[metric.name] =
"(" +
(metric.unit.prefix ? metric.unit.prefix : "") +
(metric.unit.base ? metric.unit.base : "") +
")";
} else {
// If no unit defined: Omit Unit Display
metricUnits[metric.name] = "";
}
}
let systemUnits = {};
function loadUnits(isInitialized) {
if (!isInitialized) return
const systemMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))]
for (let sm of systemMetrics) {
systemUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
}
}
const dateToUnixEpoch = (rfc3339) => Math.floor(Date.parse(rfc3339) / 1000);
$: loadUnits($initialized)
</script>
<Row>
<Row cols={{ xs: 2, lg: 4 }}>
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq.fetching}
<Spinner />
{:else}
<!-- Node Col -->
<Col>
<InputGroup>
<InputGroupText><Icon name="hdd" /></InputGroupText>
<InputGroupText>{hostname} ({cluster})</InputGroupText>
<InputGroupText>Selected Node</InputGroupText>
<Input style="background-color: white;"type="text" value="{hostname} ({cluster})" disabled/>
</InputGroup>
</Col>
<!-- Time Col -->
<Col>
<TimeSelection bind:from bind:to />
</Col>
<!-- Concurrent Col -->
<Col class="mt-2 mt-lg-0">
{#if $nodeJobsData.fetching}
<Spinner />
{:else if $nodeJobsData.data}
Currently running jobs on this node: {$nodeJobsData.data.jobs.count}
[
<a
href="/monitoring/jobs/?cluster={cluster}&state=running&node={hostname}"
target="_blank">View in Job List</a
> ]
<InputGroup>
<InputGroupText><Icon name="activity" /></InputGroupText>
<InputGroupText>Activity</InputGroupText>
<Input style="background-color: white;"type="text" value="{$nodeJobsData.data.jobs.count} Jobs" disabled/>
<a title="Show jobs running on this node" href="/monitoring/jobs/?cluster={cluster}&state=running&node={hostname}" target="_blank" class="btn btn-outline-secondary" role="button" aria-disabled="true">
<Icon name="view-list" /> Show List
</a>
</InputGroup>
{:else}
<Input type="text" disabled>
No currently running jobs.
</Input>
{/if}
</Col>
<Col>
<!-- Refresh Col-->
<Col class="mt-2 mt-lg-0">
<Refresher
on:reload={() => {
on:refresh={() => {
const diff = Date.now() - to;
from = new Date(from.getTime() + diff);
to = new Date(to.getTime() + diff);
}}
/>
</Col>
<Col>
<TimeSelection bind:from bind:to />
</Col>
{/if}
</Row>
<br />
@@ -177,9 +187,8 @@
{:else if $nodeMetricsData.fetching || $initq.fetching}
<Spinner />
{:else}
<PlotTable
<PlotGrid
let:item
let:width
renderFor="node"
itemsPerRow={ccconfig.plot_view_plotsPerRow}
items={$nodeMetricsData.data.nodeMetrics[0].metrics
@@ -195,18 +204,15 @@
>
<h4 style="text-align: center; padding-top:15px;">
{item.name}
{metricUnits[item.name]}
{systemUnits[item.name] ? "(" + systemUnits[item.name] + ")" : ""}
</h4>
{#if item.disabled === false && item.metric}
<MetricPlot
{width}
height={300}
metric={item.name}
timestep={item.metric.timestep}
cluster={clusters.find((c) => c.name == cluster)}
subCluster={$nodeMetricsData.data.nodeMetrics[0].subCluster}
series={item.metric.series}
resources={[{ hostname: hostname }]}
forNode={true}
/>
{:else if item.disabled === true && item.metric}
@@ -224,7 +230,7 @@
>No dataset returned for <code>{item.name}</code></Card
>
{/if}
</PlotTable>
</PlotGrid>
{/if}
</Col>
</Row>

View File

@@ -1,9 +1,12 @@
<script>
<!--
@component Main cluster status view component; renders current system-usage information
Properties:
- `cluster String`: The cluster to show status information for
-->
<script>
import { getContext } from "svelte";
import Refresher from "./joblist/Refresher.svelte";
import Roofline from "./plots/Roofline.svelte";
import Pie, { colors } from "./plots/Pie.svelte";
import Histogram from "./plots/Histogram.svelte";
import {
Row,
Col,
@@ -17,29 +20,32 @@
Icon,
Button,
} from "@sveltestrap/sveltestrap";
import {
init,
convert2uplot,
transformPerNodeDataForRoofline,
} from "./utils.js";
import { scaleNumbers } from "./units.js";
import {
queryStore,
gql,
getContextClient,
mutationStore,
} from "@urql/svelte";
import PlotTable from "./PlotTable.svelte";
import HistogramSelection from "./HistogramSelection.svelte";
import {
init,
convert2uplot,
transformPerNodeDataForRoofline,
} from "./generic/utils.js";
import { scaleNumbers } from "./generic/units.js";
import PlotGrid from "./generic/PlotGrid.svelte";
import Roofline from "./generic/plots/Roofline.svelte";
import Pie, { colors } from "./generic/plots/Pie.svelte";
import Histogram from "./generic/plots/Histogram.svelte";
import Refresher from "./generic/helper/Refresher.svelte";
import HistogramSelection from "./generic/select/HistogramSelection.svelte";
const { query: initq } = init();
const ccconfig = getContext("cc-config");
export let cluster;
let plotWidths = [],
colWidth1,
colWidth2;
let plotWidths = [];
let colWidth;
let from = new Date(Date.now() - 5 * 60 * 1000),
to = new Date(Date.now());
const topOptions = [
@@ -146,7 +152,7 @@
`,
variables: {
cluster: cluster,
metrics: ["flops_any", "mem_bw"],
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
from: from.toISOString(),
to: to.toISOString(),
filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
@@ -331,7 +337,7 @@
<Col class="mt-2 mt-md-0">
<Refresher
initially={120}
on:reload={() => {
on:refresh={() => {
from = new Date(Date.now() - 5 * 60 * 1000);
to = new Date(Date.now());
}}
@@ -442,7 +448,7 @@
allowSizeChange={true}
width={plotWidths[i] - 10}
height={300}
cluster={subCluster}
subCluster={subCluster}
data={transformPerNodeDataForRoofline(
$mainQuery.data.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name,
@@ -461,7 +467,7 @@
<Row cols={{ lg: 4, md: 2, sm: 1 }}>
<Col class="p-2">
<div bind:clientWidth={colWidth1}>
<div bind:clientWidth={colWidth}>
<h4 class="text-center">
Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)}
</h4>
@@ -472,7 +478,7 @@
<Card body color="danger">{$topUserQuery.error.message}</Card>
{:else}
<Pie
size={colWidth1}
size={colWidth}
sliceLabel={topUserSelection.label}
quantities={$topUserQuery.data.topUser.map(
(tu) => tu[topUserSelection.key],
@@ -532,7 +538,7 @@
<Card body color="danger">{$topProjectQuery.error.message}</Card>
{:else}
<Pie
size={colWidth1}
size={colWidth}
sliceLabel={topProjectSelection.label}
quantities={$topProjectQuery.data.topProjects.map(
(tp) => tp[topProjectSelection.key],
@@ -584,25 +590,21 @@
<hr class="my-2" />
<Row cols={{ lg: 2, md: 1 }}>
<Col class="p-2">
<div bind:clientWidth={colWidth2}>
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histDuration)}
width={colWidth2 - 25}
title="Duration Distribution"
xlabel="Current Runtimes"
xunit="Hours"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</div>
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histDuration)}
title="Duration Distribution"
xlabel="Current Runtimes"
xunit="Hours"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</Col>
<Col class="p-2">
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histNumNodes)}
width={colWidth2 - 25}
title="Number of Nodes Distribution"
xlabel="Allocated Nodes"
xunit="Nodes"
@@ -614,25 +616,21 @@
</Row>
<Row cols={{ lg: 2, md: 1 }}>
<Col class="p-2">
<div bind:clientWidth={colWidth2}>
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histNumCores)}
width={colWidth2 - 25}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Cores"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</div>
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histNumCores)}
title="Number of Cores Distribution"
xlabel="Allocated Cores"
xunit="Cores"
ylabel="Number of Jobs"
yunit="Jobs"
/>
{/key}
</Col>
<Col class="p-2">
{#key $mainQuery.data.stats}
<Histogram
data={convert2uplot($mainQuery.data.stats[0].histNumAccs)}
width={colWidth2 - 25}
title="Number of Accelerators Distribution"
xlabel="Allocated Accs"
xunit="Accs"
@@ -644,31 +642,24 @@
</Row>
<hr class="my-2" />
{#if metricsInHistograms}
<Row cols={1}>
<Col>
{#key $mainQuery.data.stats[0].histMetrics}
<PlotTable
let:item
let:width
renderFor="user"
items={$mainQuery.data.stats[0].histMetrics}
itemsPerRow={2}
>
<Histogram
data={convert2uplot(item.data)}
usesBins={true}
{width}
height={250}
title="Distribution of '{item.metric}' averages"
xlabel={`${item.metric} bin maximum ${item?.unit ? `[${item.unit}]` : ``}`}
xunit={item.unit}
ylabel="Number of Jobs"
yunit="Jobs"
/>
</PlotTable>
{/key}
</Col>
</Row>
{#key $mainQuery.data.stats[0].histMetrics}
<PlotGrid
let:item
renderFor="user"
items={$mainQuery.data.stats[0].histMetrics}
itemsPerRow={2}
>
<Histogram
data={convert2uplot(item.data)}
usesBins={true}
title="Distribution of '{item.metric}' averages"
xlabel={`${item.metric} bin maximum ${item?.unit ? `[${item.unit}]` : ``}`}
xunit={item.unit}
ylabel="Number of Jobs"
yunit="Jobs"
/>
</PlotGrid>
{/key}
{/if}
{/if}

View File

@@ -1,6 +1,14 @@
<!--
@component Main cluster metric status view component; renders current state of metrics / nodes
Properties:
- `cluster String`: The cluster to show status information for
- `from Date?`: Custom Time Range selection 'from' [Default: null]
- `to Date?`: Custom Time Range selection 'to' [Default: null]
-->
<script>
import { init, checkMetricDisabled } from "./utils.js";
import Refresher from "./joblist/Refresher.svelte";
import { getContext } from "svelte";
import {
Row,
Col,
@@ -11,11 +19,19 @@
Spinner,
Card,
} from "@sveltestrap/sveltestrap";
import { queryStore, gql, getContextClient } from "@urql/svelte";
import TimeSelection from "./filters/TimeSelection.svelte";
import PlotTable from "./PlotTable.svelte";
import MetricPlot from "./plots/MetricPlot.svelte";
import { getContext } from "svelte";
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import {
init,
checkMetricDisabled,
} from "./generic/utils.js";
import PlotGrid from "./generic/PlotGrid.svelte";
import MetricPlot from "./generic/plots/MetricPlot.svelte";
import TimeSelection from "./generic/select/TimeSelection.svelte";
import Refresher from "./generic/helper/Refresher.svelte";
export let cluster;
export let from = null;
@@ -26,14 +42,14 @@
if (from == null || to == null) {
to = new Date(Date.now());
from = new Date(to.getTime());
from.setMinutes(from.getMinutes() - 30);
from.setHours(from.getHours() - 12);
}
const clusters = getContext("clusters");
const initialized = getContext("initialized");
const ccconfig = getContext("cc-config");
const metricConfig = getContext("metrics");
const clusters = getContext("clusters");
const globalMetrics = getContext("globalMetrics");
let plotHeight = 300;
let hostnameFilter = "";
let selectedMetric = ccconfig.system_view_selectedMetric;
@@ -80,57 +96,27 @@
},
});
let metricUnits = {};
$: if ($nodesQuery.data) {
let thisCluster = clusters.find((c) => c.name == cluster);
if (thisCluster) {
for (let metric of thisCluster.metricConfig) {
if (metric.unit.prefix || metric.unit.base) {
metricUnits[metric.name] =
"(" +
(metric.unit.prefix ? metric.unit.prefix : "") +
(metric.unit.base ? metric.unit.base : "") +
")";
} else {
// If no unit defined: Omit Unit Display
metricUnits[metric.name] = "";
}
}
let systemMetrics = [];
let systemUnits = {};
function loadMetrics(isInitialized) {
if (!isInitialized) return
systemMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))]
for (let sm of systemMetrics) {
systemUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
}
}
$: loadMetrics($initialized)
</script>
<Row>
<Row cols={{ xs: 2, lg: 4 }}>
{#if $initq.error}
<Card body color="danger">{$initq.error.message}</Card>
{:else if $initq.fetching}
<Spinner />
{:else}
<Col>
<Refresher
on:reload={() => {
const diff = Date.now() - to;
from = new Date(from.getTime() + diff);
to = new Date(to.getTime() + diff);
}}
/>
</Col>
<Col>
<TimeSelection bind:from bind:to />
</Col>
<Col>
<InputGroup>
<InputGroupText><Icon name="graph-up" /></InputGroupText>
<InputGroupText>Metric</InputGroupText>
<select class="form-select" bind:value={selectedMetric}>
{#each clusters.find((c) => c.name == cluster).metricConfig as metric}
<option value={metric.name}
>{metric.name} {metricUnits[metric.name]}</option
>
{/each}
</select>
</InputGroup>
</Col>
<!-- Node Col-->
<Col>
<InputGroup>
<InputGroupText><Icon name="hdd" /></InputGroupText>
@@ -142,77 +128,105 @@
/>
</InputGroup>
</Col>
<!-- Range Col-->
<Col>
<TimeSelection bind:from bind:to />
</Col>
<!-- Metric Col-->
<Col class="mt-2 mt-lg-0">
<InputGroup>
<InputGroupText><Icon name="graph-up" /></InputGroupText>
<InputGroupText>Metric</InputGroupText>
<select class="form-select" bind:value={selectedMetric}>
{#each systemMetrics as metric}
<option value={metric.name}
>{metric.name} {systemUnits[metric.name] ? "("+systemUnits[metric.name]+")" : ""}</option
>
{/each}
</select>
</InputGroup>
</Col>
<!-- Refresh Col-->
<Col class="mt-2 mt-lg-0">
<Refresher
on:refresh={() => {
const diff = Date.now() - to;
from = new Date(from.getTime() + diff);
to = new Date(to.getTime() + diff);
}}
/>
</Col>
{/if}
</Row>
<br />
<Row>
<Col>
{#if $nodesQuery.error}
{#if $nodesQuery.error}
<Row>
<Col>
<Card body color="danger">{$nodesQuery.error.message}</Card>
{:else if $nodesQuery.fetching || $initq.fetching}
</Col>
</Row>
{:else if $nodesQuery.fetching || $initq.fetching}
<Row>
<Col>
<Spinner />
{:else}
<PlotTable
let:item
let:width
renderFor="systems"
itemsPerRow={ccconfig.plot_view_plotsPerRow}
items={$nodesQuery.data.nodeMetrics
.filter(
(h) =>
h.host.includes(hostnameFilter) &&
h.metrics.some(
(m) => m.name == selectedMetric && m.scope == "node",
),
)
.map((h) => ({
host: h.host,
subCluster: h.subCluster,
data: h.metrics.find(
(m) => m.name == selectedMetric && m.scope == "node",
),
disabled: checkMetricDisabled(
selectedMetric,
cluster,
h.subCluster,
),
}))
.sort((a, b) => a.host.localeCompare(b.host))}
</Col>
</Row>
{:else}
<PlotGrid
let:item
renderFor="systems"
itemsPerRow={ccconfig.plot_view_plotsPerRow}
items={$nodesQuery.data.nodeMetrics
.filter(
(h) =>
h.host.includes(hostnameFilter) &&
h.metrics.some(
(m) => m.name == selectedMetric && m.scope == "node",
),
)
.map((h) => ({
host: h.host,
subCluster: h.subCluster,
data: h.metrics.find(
(m) => m.name == selectedMetric && m.scope == "node",
),
disabled: checkMetricDisabled(
selectedMetric,
cluster,
h.subCluster,
),
}))
.sort((a, b) => a.host.localeCompare(b.host))}
>
<h4 style="width: 100%; text-align: center;">
<a
style="display: block;padding-top: 15px;"
href="/monitoring/node/{cluster}/{item.host}"
>{item.host} ({item.subCluster})</a
>
</h4>
{#if item.disabled === false && item.data}
<MetricPlot
timestep={item.data.metric.timestep}
series={item.data.metric.series}
metric={item.data.name}
cluster={clusters.find((c) => c.name == cluster)}
subCluster={item.subCluster}
forNode={true}
/>
{:else if item.disabled === true && item.data}
<Card style="margin-left: 2rem;margin-right: 2rem;" body color="info"
>Metric disabled for subcluster <code
>{selectedMetric}:{item.subCluster}</code
></Card
>
{:else}
<Card
style="margin-left: 2rem;margin-right: 2rem;"
body
color="warning"
>No dataset returned for <code>{selectedMetric}</code></Card
>
<h4 style="width: 100%; text-align: center;">
<a
style="display: block;padding-top: 15px;"
href="/monitoring/node/{cluster}/{item.host}"
>{item.host} ({item.subCluster})</a
>
</h4>
{#if item.disabled === false && item.data}
<MetricPlot
{width}
height={plotHeight}
timestep={item.data.metric.timestep}
series={item.data.metric.series}
metric={item.data.name}
cluster={clusters.find((c) => c.name == cluster)}
subCluster={item.subCluster}
resources={[{ hostname: item.host }]}
forNode={true}
/>
{:else if item.disabled === true && item.data}
<Card style="margin-left: 2rem;margin-right: 2rem;" body color="info"
>Metric disabled for subcluster <code
>{selectedMetric}:{item.subCluster}</code
></Card
>
{:else}
<Card
style="margin-left: 2rem;margin-right: 2rem;"
body
color="warning"
>No dataset returned for <code>{selectedMetric}</code></Card
>
{/if}
</PlotTable>
{/if}
</Col>
</Row>
</PlotGrid>
{/if}

View File

@@ -1,234 +0,0 @@
<script>
import { getContext } from "svelte";
import { gql, getContextClient, mutationStore } from "@urql/svelte";
import {
Icon,
Button,
ListGroupItem,
Spinner,
Modal,
Input,
ModalBody,
ModalHeader,
ModalFooter,
Alert,
} from "@sveltestrap/sveltestrap";
import { fuzzySearchTags } from "./utils.js";
import Tag from "./Tag.svelte";
export let job;
export let jobTags = job.tags;
let allTags = getContext("tags"),
initialized = getContext("initialized");
let newTagType = "",
newTagName = "";
let filterTerm = "";
let pendingChange = false;
let isOpen = false;
const client = getContextClient();
const createTagMutation = ({ type, name }) => {
return mutationStore({
client: client,
query: gql`
mutation ($type: String!, $name: String!) {
createTag(type: $type, name: $name) {
id
type
name
}
}
`,
variables: { type, name },
});
};
const addTagsToJobMutation = ({ job, tagIds }) => {
return mutationStore({
client: client,
query: gql`
mutation ($job: ID!, $tagIds: [ID!]!) {
addTagsToJob(job: $job, tagIds: $tagIds) {
id
type
name
}
}
`,
variables: { job, tagIds },
});
};
const removeTagsFromJobMutation = ({ job, tagIds }) => {
return mutationStore({
client: client,
query: gql`
mutation ($job: ID!, $tagIds: [ID!]!) {
removeTagsFromJob(job: $job, tagIds: $tagIds) {
id
type
name
}
}
`,
variables: { job, tagIds },
});
};
let allTagsFiltered; // $initialized is in there because when it becomes true, allTags is initailzed.
$: allTagsFiltered = ($initialized, fuzzySearchTags(filterTerm, allTags));
$: {
newTagType = "";
newTagName = "";
let parts = filterTerm.split(":").map((s) => s.trim());
if (parts.length == 2 && parts.every((s) => s.length > 0)) {
newTagType = parts[0];
newTagName = parts[1];
}
}
function isNewTag(type, name) {
for (let tag of allTagsFiltered)
if (tag.type == type && tag.name == name) return false;
return true;
}
function createTag(type, name) {
pendingChange = true;
createTagMutation({ type: type, name: name }).subscribe((res) => {
if (res.fetching === false && !res.error) {
pendingChange = false;
allTags = [...allTags, res.data.createTag];
newTagType = "";
newTagName = "";
addTagToJob(res.data.createTag);
} else if (res.fetching === false && res.error) {
throw res.error;
// console.log('Error on subscription: ' + res.error)
}
});
}
function addTagToJob(tag) {
pendingChange = tag.id;
addTagsToJobMutation({ job: job.id, tagIds: [tag.id] }).subscribe((res) => {
if (res.fetching === false && !res.error) {
jobTags = job.tags = res.data.addTagsToJob;
pendingChange = false;
} else if (res.fetching === false && res.error) {
throw res.error;
// console.log('Error on subscription: ' + res.error)
}
});
}
function removeTagFromJob(tag) {
pendingChange = tag.id;
removeTagsFromJobMutation({ job: job.id, tagIds: [tag.id] }).subscribe(
(res) => {
if (res.fetching === false && !res.error) {
jobTags = job.tags = res.data.removeTagsFromJob;
pendingChange = false;
} else if (res.fetching === false && res.error) {
throw res.error;
// console.log('Error on subscription: ' + res.error)
}
},
);
}
</script>
<Modal {isOpen} toggle={() => (isOpen = !isOpen)}>
<ModalHeader>
Manage Tags
{#if pendingChange !== false}
<Spinner size="sm" secondary />
{:else}
<Icon name="tags" />
{/if}
</ModalHeader>
<ModalBody>
<Input
style="width: 100%;"
type="text"
placeholder="Search Tags"
bind:value={filterTerm}
/>
<br />
<Alert color="info">
Search using "<code>type: name</code>". If no tag matches your search, a
button for creating a new one will appear.
</Alert>
<ul class="list-group">
{#each allTagsFiltered as tag}
<ListGroupItem>
<Tag {tag} />
<span style="float: right;">
{#if pendingChange === tag.id}
<Spinner size="sm" secondary />
{:else if job.tags.find((t) => t.id == tag.id)}
<Button
size="sm"
outline
color="danger"
on:click={() => removeTagFromJob(tag)}
>
<Icon name="x" />
</Button>
{:else}
<Button
size="sm"
outline
color="success"
on:click={() => addTagToJob(tag)}
>
<Icon name="plus" />
</Button>
{/if}
</span>
</ListGroupItem>
{:else}
<ListGroupItem disabled>
<i>No tags matching</i>
</ListGroupItem>
{/each}
</ul>
<br />
{#if newTagType && newTagName && isNewTag(newTagType, newTagName)}
<Button
outline
color="success"
on:click={(e) => (
e.preventDefault(), createTag(newTagType, newTagName)
)}
>
Create & Add Tag:
<Tag tag={{ type: newTagType, name: newTagName }} clickable={false} />
</Button>
{:else if allTagsFiltered.length == 0}
<Alert>Search Term is not a valid Tag (<code>type: name</code>)</Alert>
{/if}
</ModalBody>
<ModalFooter>
<Button color="primary" on:click={() => (isOpen = false)}>Close</Button>
</ModalFooter>
</Modal>
<Button outline on:click={() => (isOpen = true)}>
Manage Tags <Icon name="tags" />
</Button>
<style>
ul.list-group {
max-height: 450px;
margin-bottom: 10px;
overflow: scroll;
}
</style>

View File

@@ -1,26 +1,43 @@
<!--
@component Main user jobs list display component; displays job list and additional information for a given user
Properties:
- `user Object`: The GraphQL user object
- `filterPresets Object`: Optional predefined filter values
-->
<script>
import { onMount, getContext } from "svelte";
import { init, convert2uplot } from "./utils.js";
import {
Table,
Row,
Col,
Button,
ButtonGroup,
Icon,
Card,
Spinner,
} from "@sveltestrap/sveltestrap";
import { queryStore, gql, getContextClient } from "@urql/svelte";
import Filters from "./filters/Filters.svelte";
import TextFilter from "./filters/TextFilter.svelte"
import JobList from "./joblist/JobList.svelte";
import Sorting from "./joblist/SortSelection.svelte";
import Refresher from "./joblist/Refresher.svelte";
import Histogram from "./plots/Histogram.svelte";
import MetricSelection from "./MetricSelection.svelte";
import HistogramSelection from "./HistogramSelection.svelte";
import PlotTable from "./PlotTable.svelte";
import { scramble, scrambleNames } from "./joblist/JobInfo.svelte";
import {
queryStore,
gql,
getContextClient,
} from "@urql/svelte";
import {
init,
convert2uplot,
scramble,
scrambleNames,
} from "./generic/utils.js";
import JobList from "./generic/JobList.svelte";
import Filters from "./generic/Filters.svelte";
import PlotGrid from "./generic/PlotGrid.svelte";
import Histogram from "./generic/plots/Histogram.svelte";
import MetricSelection from "./generic/select/MetricSelection.svelte";
import HistogramSelection from "./generic/select/HistogramSelection.svelte";
import Sorting from "./generic/select/SortSelection.svelte";
import TextFilter from "./generic/helper/TextFilter.svelte"
import Refresher from "./generic/helper/Refresher.svelte";
const { query: initq } = init();
@@ -32,14 +49,12 @@
let filterComponent; // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
let jobList;
let jobFilters = [];
let sorting = { field: "startTime", order: "DESC" },
let matchedJobs = 0;
let sorting = { field: "startTime", type: "col", order: "DESC" },
isSortingOpen = false;
let metrics = ccconfig.plot_list_selectedMetrics,
isMetricsSelectionOpen = false;
let w1,
w2,
histogramHeight = 250,
isHistogramSelectionOpen = false;
let isHistogramSelectionOpen = false;
let selectedCluster = filterPresets?.cluster ? filterPresets.cluster : null;
let showFootprint = filterPresets.cluster
? !!ccconfig[`plot_list_showFootprint:${filterPresets.cluster}`]
@@ -70,6 +85,7 @@
histMetrics {
metric
unit
stat
data {
min
max
@@ -83,66 +99,70 @@
variables: { jobFilters, metricsInHistograms },
});
onMount(() => filterComponent.update());
onMount(() => filterComponent.updateFilters());
</script>
<Row>
{#if $initq.fetching}
<!-- ROW1: Status-->
{#if $initq.fetching}
<Row>
<Col>
<Spinner />
</Col>
{:else if $initq.error}
<Col xs="auto">
</Row>
{:else if $initq.error}
<Row>
<Col>
<Card body color="danger">{$initq.error.message}</Card>
</Col>
{/if}
</Row>
{/if}
<Col xs="auto">
<Button outline color="primary" on:click={() => (isSortingOpen = true)}>
<Icon name="sort-up" /> Sorting
</Button>
<Button
outline
color="primary"
on:click={() => (isMetricsSelectionOpen = true)}
>
<Icon name="graph-up" /> Metrics
</Button>
<Button
outline
color="secondary"
on:click={() => (isHistogramSelectionOpen = true)}
>
<Icon name="bar-chart-line" /> Select Histograms
</Button>
<!-- ROW2: Tools-->
<Row cols={{ xs: 1, md: 2, lg: 4}} class="mb-3">
<Col lg="2" class="mb-2 mb-lg-0">
<ButtonGroup class="w-100">
<Button outline color="primary" on:click={() => (isSortingOpen = true)}>
<Icon name="sort-up" /> Sorting
</Button>
<Button
outline
color="primary"
on:click={() => (isMetricsSelectionOpen = true)}
>
<Icon name="graph-up" /> Metrics
</Button>
</ButtonGroup>
</Col>
<Col xs="auto">
<Col lg="4" xl="6" class="mb-1 mb-lg-0">
<Filters
{filterPresets}
{matchedJobs}
startTimeQuickSelect={true}
bind:this={filterComponent}
on:update={({ detail }) => {
on:update-filters={({ detail }) => {
jobFilters = [...detail.filters, { user: { eq: user.username } }];
selectedCluster = jobFilters[0]?.cluster
? jobFilters[0].cluster.eq
: null;
jobList.update(jobFilters);
jobList.queryJobs(jobFilters);
}}
/>
</Col>
<Col xs="auto" style="margin-left: auto;">
<Col lg="3" xl="2" class="mb-2 mb-lg-0">
<TextFilter
on:update={({ detail }) => filterComponent.update(detail)}
on:set-filter={({ detail }) => filterComponent.updateFilters(detail)}
/>
</Col>
<Col xs="auto">
<Refresher on:reload={() => jobList.refresh()} />
<Col lg="3" xl="2" class="mb-1 mb-lg-0">
<Refresher on:refresh={() => {
jobList.refreshJobs()
jobList.refreshAllMetrics()
}} />
</Col>
</Row>
<br />
<Row>
<!-- ROW3: Base Information-->
<Row cols={{ xs: 1, md: 3}} class="mb-2">
{#if $stats.error}
<Col>
<Card body color="danger">{$stats.error.message}</Card>
@@ -152,7 +172,7 @@
<Spinner secondary />
</Col>
{:else}
<Col xs="4">
<Col>
<Table>
<tbody>
<tr>
@@ -190,12 +210,10 @@
</tbody>
</Table>
</Col>
<div class="col-4 text-center" bind:clientWidth={w1}>
<Col class="px-1">
{#key $stats.data.jobsStatistics[0].histDuration}
<Histogram
data={convert2uplot($stats.data.jobsStatistics[0].histDuration)}
width={w1 - 25}
height={histogramHeight}
title="Duration Distribution"
xlabel="Current Runtimes"
xunit="Hours"
@@ -203,13 +221,11 @@
yunit="Jobs"
/>
{/key}
</div>
<div class="col-4 text-center" bind:clientWidth={w2}>
</Col>
<Col class="px-1">
{#key $stats.data.jobsStatistics[0].histNumNodes}
<Histogram
data={convert2uplot($stats.data.jobsStatistics[0].histNumNodes)}
width={w2 - 25}
height={histogramHeight}
title="Number of Nodes Distribution"
xlabel="Allocated Nodes"
xunit="Nodes"
@@ -217,50 +233,74 @@
yunit="Jobs"
/>
{/key}
</div>
</Col>
{/if}
</Row>
{#if metricsInHistograms}
<Row>
{#if $stats.error}
<!-- ROW4+5: Selectable Histograms -->
<Row cols={{ xs: 1, md: 5}}>
<Col>
<Button
outline
color="secondary"
on:click={() => (isHistogramSelectionOpen = true)}
>
<Icon name="bar-chart-line" /> Select Histograms
</Button>
</Col>
</Row>
{#if metricsInHistograms?.length > 0}
{#if $stats.error}
<Row>
<Col>
<Card body color="danger">{$stats.error.message}</Card>
</Col>
{:else if !$stats.data}
</Row>
{:else if !$stats.data}
<Row>
<Col>
<Spinner secondary />
</Col>
{:else}
<Col>
{#key $stats.data.jobsStatistics[0].histMetrics}
<PlotTable
let:item
let:width
renderFor="user"
items={$stats.data.jobsStatistics[0].histMetrics}
itemsPerRow={3}
>
<Histogram
data={convert2uplot(item.data)}
usesBins={true}
{width}
height={250}
title="Distribution of '{item.metric}' averages"
xlabel={`${item.metric} bin maximum ${item?.unit ? `[${item.unit}]` : ``}`}
xunit={item.unit}
ylabel="Number of Jobs"
yunit="Jobs"
/>
</PlotTable>
{/key}
</Col>
{/if}
</Row>
{:else}
<hr class="my-2"/>
{#key $stats.data.jobsStatistics[0].histMetrics}
<PlotGrid
let:item
renderFor="user"
items={$stats.data.jobsStatistics[0].histMetrics}
itemsPerRow={3}
>
<Histogram
data={convert2uplot(item.data)}
usesBins={true}
title="Distribution of '{item.metric} ({item.stat})' footprints"
xlabel={`${item.metric} bin maximum ${item?.unit ? `[${item.unit}]` : ``}`}
xunit={item.unit}
ylabel="Number of Jobs"
yunit="Jobs"
/>
</PlotGrid>
{/key}
{/if}
{:else}
<Row class="mt-2">
<Col>
<Card body>No footprint histograms selected.</Card>
</Col>
</Row>
{/if}
<br />
<Row>
<!-- ROW6: JOB LIST-->
<Row class="mt-3">
<Col>
<JobList bind:metrics bind:sorting bind:this={jobList} bind:showFootprint />
<JobList
bind:this={jobList}
bind:matchedJobs
bind:metrics
bind:sorting
bind:showFootprint
/>
</Col>
</Row>
@@ -272,7 +312,7 @@
bind:metrics
bind:isOpen={isMetricsSelectionOpen}
bind:showFootprint
view="list"
footprintSelect={true}
/>
<HistogramSelection

View File

@@ -1,65 +0,0 @@
<script>
import { Icon, InputGroup, InputGroupText } from "@sveltestrap/sveltestrap";
export let timeseriesPlots;
let windowSize = 100; // Goes from 0 to 100
let windowPosition = 50; // Goes from 0 to 100
function updatePlots() {
let ws = windowSize / (100 * 2),
wp = windowPosition / 100;
let from = wp - ws,
to = wp + ws;
Object.values(timeseriesPlots).forEach((plot) =>
plot.setTimeRange(from, to),
);
}
// Rendering a big job can take a long time, so we
// throttle the rerenders to every 100ms here.
let timeoutId = null;
function requestUpdatePlots() {
if (timeoutId != null) window.cancelAnimationFrame(timeoutId);
timeoutId = window.requestAnimationFrame(() => {
updatePlots();
timeoutId = null;
}, 100);
}
$: requestUpdatePlots(windowSize, windowPosition);
</script>
<div>
<InputGroup>
<InputGroupText>
<Icon name="zoom-in" />
</InputGroupText>
<InputGroupText>
Window Size:
<input
style="margin: 0em 0em 0em 1em"
type="range"
bind:value={windowSize}
min="1"
max="100"
step="1"
/>
<span style="width: 5em;">
({windowSize}%)
</span>
</InputGroupText>
<InputGroupText>
Window Position:
<input
style="margin: 0em 0em 0em 1em"
type="range"
bind:value={windowPosition}
min="0"
max="100"
step="1"
/>
</InputGroupText>
</InputGroup>
</div>

View File

@@ -6,7 +6,8 @@ filterPresets.cluster = cluster
new Analysis({
target: document.getElementById('svelte-app'),
props: {
filterPresets: filterPresets
filterPresets: filterPresets,
cluster: cluster
},
context: new Map([
['cc-config', clusterCockpitConfig]

View File

@@ -1,3 +1,12 @@
<!--
@component Analysis-View subcomponent; allows selection for normalized histograms and scatterplots
Properties:
- `availableMetrics [String]`: Available metrics in selected cluster
- `metricsInHistograms [String]`: The currently selected metrics to display as histogram
- `metricsInScatterplots [[String, String]]`: The currently selected metrics to display as scatterplot
-->
<script>
import {
Modal,
@@ -41,7 +50,6 @@
}).subscribe((res) => {
if (res.fetching === false && res.error) {
throw res.error;
// console.log('Error on subscription: ' + res.error)
}
});
}

View File

@@ -4,9 +4,12 @@ import Config from './Config.root.svelte'
new Config({
target: document.getElementById('svelte-app'),
props: {
isAdmin: isAdmin
isAdmin: isAdmin,
isApi: isApi,
username: username
},
context: new Map([
['cc-config', clusterCockpitConfig]
['cc-config', clusterCockpitConfig],
['resampling', resampleConfig]
])
})

View File

@@ -1,3 +1,7 @@
<!--
@component Admin settings wrapper
-->
<script>
import { Row, Col } from "@sveltestrap/sveltestrap";
import { onMount } from "svelte";
@@ -11,7 +15,7 @@
let roles = [];
function getUserList() {
fetch("/api/users/?via-ldap=false&not-just-user=true")
fetch("/config/users/?via-ldap=false&not-just-user=true")
.then((res) => res.json())
.then((usersRaw) => {
users = usersRaw;
@@ -19,7 +23,7 @@
}
function getValidRoles() {
fetch("/api/roles/")
fetch("/config/roles/")
.then((res) => res.json())
.then((rolesRaw) => {
roles = rolesRaw;
@@ -47,7 +51,5 @@
<Col>
<EditProject on:reload={getUserList} />
</Col>
<Col>
<Options />
</Col>
<Options />
</Row>

View File

@@ -1,552 +0,0 @@
<script>
import {
Button,
Table,
Row,
Col,
Card,
CardTitle,
} from "@sveltestrap/sveltestrap";
import { fade } from "svelte/transition";
export let config;
let message = { msg: "", target: "", color: "#d63384" };
let displayMessage = false;
const colorschemes = {
Default: [
"#00bfff",
"#0000ff",
"#ff00ff",
"#ff0000",
"#ff8000",
"#ffff00",
"#80ff00",
],
Autumn: [
"rgb(255,0,0)",
"rgb(255,11,0)",
"rgb(255,20,0)",
"rgb(255,30,0)",
"rgb(255,41,0)",
"rgb(255,50,0)",
"rgb(255,60,0)",
"rgb(255,71,0)",
"rgb(255,80,0)",
"rgb(255,90,0)",
"rgb(255,101,0)",
"rgb(255,111,0)",
"rgb(255,120,0)",
"rgb(255,131,0)",
"rgb(255,141,0)",
"rgb(255,150,0)",
"rgb(255,161,0)",
"rgb(255,171,0)",
"rgb(255,180,0)",
"rgb(255,190,0)",
"rgb(255,201,0)",
"rgb(255,210,0)",
"rgb(255,220,0)",
"rgb(255,231,0)",
"rgb(255,240,0)",
"rgb(255,250,0)",
],
Beach: [
"rgb(0,252,0)",
"rgb(0,233,0)",
"rgb(0,212,0)",
"rgb(0,189,0)",
"rgb(0,169,0)",
"rgb(0,148,0)",
"rgb(0,129,4)",
"rgb(0,145,46)",
"rgb(0,162,90)",
"rgb(0,180,132)",
"rgb(29,143,136)",
"rgb(73,88,136)",
"rgb(115,32,136)",
"rgb(81,9,64)",
"rgb(124,51,23)",
"rgb(162,90,0)",
"rgb(194,132,0)",
"rgb(220,171,0)",
"rgb(231,213,0)",
"rgb(0,0,13)",
"rgb(0,0,55)",
"rgb(0,0,92)",
"rgb(0,0,127)",
"rgb(0,0,159)",
"rgb(0,0,196)",
"rgb(0,0,233)",
],
BlueRed: [
"rgb(0,0,131)",
"rgb(0,0,168)",
"rgb(0,0,208)",
"rgb(0,0,247)",
"rgb(0,27,255)",
"rgb(0,67,255)",
"rgb(0,108,255)",
"rgb(0,148,255)",
"rgb(0,187,255)",
"rgb(0,227,255)",
"rgb(8,255,247)",
"rgb(48,255,208)",
"rgb(87,255,168)",
"rgb(127,255,127)",
"rgb(168,255,87)",
"rgb(208,255,48)",
"rgb(247,255,8)",
"rgb(255,224,0)",
"rgb(255,183,0)",
"rgb(255,143,0)",
"rgb(255,104,0)",
"rgb(255,64,0)",
"rgb(255,23,0)",
"rgb(238,0,0)",
"rgb(194,0,0)",
"rgb(150,0,0)",
],
Rainbow: [
"rgb(125,0,255)",
"rgb(85,0,255)",
"rgb(39,0,255)",
"rgb(0,6,255)",
"rgb(0,51,255)",
"rgb(0,97,255)",
"rgb(0,141,255)",
"rgb(0,187,255)",
"rgb(0,231,255)",
"rgb(0,255,233)",
"rgb(0,255,189)",
"rgb(0,255,143)",
"rgb(0,255,99)",
"rgb(0,255,53)",
"rgb(0,255,9)",
"rgb(37,255,0)",
"rgb(83,255,0)",
"rgb(127,255,0)",
"rgb(173,255,0)",
"rgb(217,255,0)",
"rgb(255,248,0)",
"rgb(255,203,0)",
"rgb(255,159,0)",
"rgb(255,113,0)",
"rgb(255,69,0)",
"rgb(255,23,0)",
],
Binary: [
"rgb(215,215,215)",
"rgb(206,206,206)",
"rgb(196,196,196)",
"rgb(185,185,185)",
"rgb(176,176,176)",
"rgb(166,166,166)",
"rgb(155,155,155)",
"rgb(145,145,145)",
"rgb(136,136,136)",
"rgb(125,125,125)",
"rgb(115,115,115)",
"rgb(106,106,106)",
"rgb(95,95,95)",
"rgb(85,85,85)",
"rgb(76,76,76)",
"rgb(66,66,66)",
"rgb(55,55,55)",
"rgb(46,46,46)",
"rgb(36,36,36)",
"rgb(25,25,25)",
"rgb(16,16,16)",
"rgb(6,6,6)",
],
GistEarth: [
"rgb(0,0,0)",
"rgb(2,7,117)",
"rgb(9,30,118)",
"rgb(16,53,120)",
"rgb(23,73,122)",
"rgb(31,93,124)",
"rgb(39,110,125)",
"rgb(47,126,127)",
"rgb(51,133,119)",
"rgb(57,138,106)",
"rgb(62,145,94)",
"rgb(66,150,82)",
"rgb(74,157,71)",
"rgb(97,162,77)",
"rgb(121,168,83)",
"rgb(136,173,85)",
"rgb(153,176,88)",
"rgb(170,180,92)",
"rgb(185,182,94)",
"rgb(189,173,99)",
"rgb(192,164,101)",
"rgb(203,169,124)",
"rgb(215,178,149)",
"rgb(226,192,176)",
"rgb(238,212,204)",
"rgb(248,236,236)",
],
BlueWaves: [
"rgb(83,0,215)",
"rgb(43,6,108)",
"rgb(9,16,16)",
"rgb(8,32,25)",
"rgb(0,50,8)",
"rgb(27,64,66)",
"rgb(69,67,178)",
"rgb(115,62,210)",
"rgb(155,50,104)",
"rgb(178,43,41)",
"rgb(180,51,34)",
"rgb(161,78,87)",
"rgb(124,117,187)",
"rgb(78,155,203)",
"rgb(34,178,85)",
"rgb(4,176,2)",
"rgb(9,152,27)",
"rgb(4,118,2)",
"rgb(34,92,85)",
"rgb(78,92,203)",
"rgb(124,127,187)",
"rgb(161,187,87)",
"rgb(180,248,34)",
"rgb(178,220,41)",
"rgb(155,217,104)",
"rgb(115,254,210)",
],
BlueGreenRedYellow: [
"rgb(0,0,0)",
"rgb(0,0,20)",
"rgb(0,0,41)",
"rgb(0,0,62)",
"rgb(0,25,83)",
"rgb(0,57,101)",
"rgb(0,87,101)",
"rgb(0,118,101)",
"rgb(0,150,101)",
"rgb(0,150,69)",
"rgb(0,148,37)",
"rgb(0,141,6)",
"rgb(60,120,0)",
"rgb(131,87,0)",
"rgb(180,25,0)",
"rgb(203,13,0)",
"rgb(208,36,0)",
"rgb(213,60,0)",
"rgb(219,83,0)",
"rgb(224,106,0)",
"rgb(229,129,0)",
"rgb(233,152,0)",
"rgb(238,176,0)",
"rgb(243,199,0)",
"rgb(248,222,0)",
"rgb(254,245,0)",
],
};
async function handleSettingSubmit(selector, target) {
let form = document.querySelector(selector);
let formData = new FormData(form);
try {
const res = await fetch(form.action, { method: "POST", body: formData });
if (res.ok) {
let text = await res.text();
popMessage(text, target, "#048109");
} else {
let text = await res.text();
// console.log(res.statusText)
throw new Error("Response Code " + res.status + "-> " + text);
}
} catch (err) {
popMessage(err, target, "#d63384");
}
return false;
}
function popMessage(response, restarget, rescolor) {
message = { msg: response, target: restarget, color: rescolor };
displayMessage = true;
setTimeout(function () {
displayMessage = false;
}, 3500);
}
</script>
<Row cols={4} class="p-2 g-2">
<!-- LINE WIDTH -->
<Col
><Card class="h-100">
<!-- Important: Function with arguments needs to be event-triggered like on:submit={() => functionName('Some','Args')} OR no arguments and like this: on:submit={functionName} -->
<form
id="line-width-form"
method="post"
action="/api/configuration/"
class="card-body"
on:submit|preventDefault={() =>
handleSettingSubmit("#line-width-form", "lw")}
>
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
<CardTitle
style="margin-bottom: 1em; display: flex; align-items: center;"
>
<div>Line Width</div>
<!-- Expand If-Clause for clarity once -->
{#if displayMessage && message.target == "lw"}
<div style="margin-left: auto; font-size: 0.9em;">
<code style="color: {message.color};" out:fade>
Update: {message.msg}
</code>
</div>
{/if}
</CardTitle>
<input type="hidden" name="key" value="plot_general_lineWidth" />
<div class="mb-3">
<label for="value" class="form-label">Line Width</label>
<input
type="number"
class="form-control"
id="lwvalue"
name="value"
aria-describedby="lineWidthHelp"
value={config.plot_general_lineWidth}
min="1"
/>
<div id="lineWidthHelp" class="form-text">
Width of the lines in the timeseries plots.
</div>
</div>
<Button color="primary" type="submit">Submit</Button>
</form>
</Card></Col
>
<!-- PLOTS PER ROW -->
<Col
><Card class="h-100">
<form
id="plots-per-row-form"
method="post"
action="/api/configuration/"
class="card-body"
on:submit|preventDefault={() =>
handleSettingSubmit("#plots-per-row-form", "ppr")}
>
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
<CardTitle
style="margin-bottom: 1em; display: flex; align-items: center;"
>
<div>Plots per Row</div>
{#if displayMessage && message.target == "ppr"}<div
style="margin-left: auto; font-size: 0.9em;"
>
<code style="color: {message.color};" out:fade
>Update: {message.msg}</code
>
</div>{/if}
</CardTitle>
<input type="hidden" name="key" value="plot_view_plotsPerRow" />
<div class="mb-3">
<label for="value" class="form-label">Plots per Row</label>
<input
type="number"
class="form-control"
id="pprvalue"
name="value"
aria-describedby="plotsperrowHelp"
value={config.plot_view_plotsPerRow}
min="1"
/>
<div id="plotsperrowHelp" class="form-text">
How many plots to show next to each other on pages such as
/monitoring/job/, /monitoring/system/...
</div>
</div>
<Button color="primary" type="submit">Submit</Button>
</form>
</Card></Col
>
<!-- BACKGROUND -->
<Col
><Card class="h-100">
<form
id="backgrounds-form"
method="post"
action="/api/configuration/"
class="card-body"
on:submit|preventDefault={() =>
handleSettingSubmit("#backgrounds-form", "bg")}
>
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
<CardTitle
style="margin-bottom: 1em; display: flex; align-items: center;"
>
<div>Colored Backgrounds</div>
{#if displayMessage && message.target == "bg"}<div
style="margin-left: auto; font-size: 0.9em;"
>
<code style="color: {message.color};" out:fade
>Update: {message.msg}</code
>
</div>{/if}
</CardTitle>
<input type="hidden" name="key" value="plot_general_colorBackground" />
<div class="mb-3">
<div>
{#if config.plot_general_colorBackground}
<input type="radio" id="true" name="value" value="true" checked />
{:else}
<input type="radio" id="true" name="value" value="true" />
{/if}
<label for="true">Yes</label>
</div>
<div>
{#if config.plot_general_colorBackground}
<input type="radio" id="false" name="value" value="false" />
{:else}
<input
type="radio"
id="false"
name="value"
value="false"
checked
/>
{/if}
<label for="false">No</label>
</div>
</div>
<Button color="primary" type="submit">Submit</Button>
</form>
</Card></Col
>
<!-- PAGING -->
<Col
><Card class="h-100">
<form
id="paging-form"
method="post"
action="/api/configuration/"
class="card-body"
on:submit|preventDefault={() =>
handleSettingSubmit("#paging-form", "pag")}
>
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
<CardTitle
style="margin-bottom: 1em; display: flex; align-items: center;"
>
<div>Paging Type</div>
{#if displayMessage && message.target == "pag"}<div
style="margin-left: auto; font-size: 0.9em;"
>
<code style="color: {message.color};" out:fade
>Update: {message.msg}</code
>
</div>{/if}
</CardTitle>
<input type="hidden" name="key" value="job_list_usePaging" />
<div class="mb-3">
<div>
{#if config.job_list_usePaging}
<input type="radio" id="true" name="value" value="true" checked />
{:else}
<input type="radio" id="true" name="value" value="true" />
{/if}
<label for="true">Paging with selectable count of jobs.</label>
</div>
<div>
{#if config.job_list_usePaging}
<input type="radio" id="false" name="value" value="false" />
{:else}
<input
type="radio"
id="false"
name="value"
value="false"
checked
/>
{/if}
<label for="false">Continuous scroll iteratively adding 10 jobs.</label>
</div>
</div>
<Button color="primary" type="submit">Submit</Button>
</form>
</Card></Col
>
</Row>
<Row cols={1} class="p-2 g-2">
<!-- COLORSCHEME -->
<Col
><Card>
<form
id="colorscheme-form"
method="post"
action="/api/configuration/"
class="card-body"
>
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
<CardTitle
style="margin-bottom: 1em; display: flex; align-items: center;"
>
<div>Color Scheme for Timeseries Plots</div>
{#if displayMessage && message.target == "cs"}<div
style="margin-left: auto; font-size: 0.9em;"
>
<code style="color: {message.color};" out:fade
>Update: {message.msg}</code
>
</div>{/if}
</CardTitle>
<input type="hidden" name="key" value="plot_general_colorscheme" />
<Table hover>
<tbody>
{#each Object.entries(colorschemes) as [name, rgbrow]}
<tr>
<th scope="col">{name}</th>
<td>
{#if rgbrow.join(",") == config.plot_general_colorscheme}
<input
type="radio"
name="value"
value={JSON.stringify(rgbrow)}
checked
on:click={() =>
handleSettingSubmit("#colorscheme-form", "cs")}
/>
{:else}
<input
type="radio"
name="value"
value={JSON.stringify(rgbrow)}
on:click={() =>
handleSettingSubmit("#colorscheme-form", "cs")}
/>
{/if}
</td>
<td>
{#each rgbrow as rgb}
<span class="color-dot" style="background-color: {rgb};"
></span>
{/each}
</td>
</tr>
{/each}
</tbody>
</Table>
</form>
</Card></Col
>
</Row>
<style>
.color-dot {
height: 10px;
width: 10px;
border-radius: 50%;
display: inline-block;
}
</style>

View File

@@ -0,0 +1,54 @@
<!--
@component User settings wrapper
Properties:
- `username String!`: Empty string if auth. is disabled, otherwise the username as string
- `isApi Bool!`: Is currently logged in user api authority
-->
<script>
import { getContext } from "svelte";
import UserOptions from "./user/UserOptions.svelte";
import PlotRenderOptions from "./user/PlotRenderOptions.svelte";
import PlotColorScheme from "./user/PlotColorScheme.svelte";
export let username
export let isApi
const ccconfig = getContext("cc-config");
let message = { msg: "", target: "", color: "#d63384" };
let displayMessage = false;
async function handleSettingSubmit(event) {
const selector = event.detail.selector
const target = event.detail.target
let form = document.querySelector(selector);
let formData = new FormData(form);
try {
const res = await fetch(form.action, { method: "POST", body: formData });
if (res.ok) {
let text = await res.text();
popMessage(text, target, "#048109");
} else {
let text = await res.text();
throw new Error("Response Code " + res.status + "-> " + text);
}
} catch (err) {
popMessage(err, target, "#d63384");
}
return false;
}
function popMessage(response, restarget, rescolor) {
message = { msg: response, target: restarget, color: rescolor };
displayMessage = true;
setTimeout(function () {
displayMessage = false;
}, 3500);
}
</script>
<UserOptions config={ccconfig} {username} {isApi} bind:message bind:displayMessage on:update-config={(e) => handleSettingSubmit(e)}/>
<PlotRenderOptions config={ccconfig} bind:message bind:displayMessage on:update-config={(e) => handleSettingSubmit(e)}/>
<PlotColorScheme config={ccconfig} bind:message bind:displayMessage on:update-config={(e) => handleSettingSubmit(e)}/>

View File

@@ -1,4 +1,14 @@
<script>
<!--
@component User creation form card
Properties:
- `roles [String]!`: List of roles used in app as strings
Events:
- `reload`: Trigger upstream reload of user list after user creation
-->
<script>
import { Button, Card, CardTitle } from "@sveltestrap/sveltestrap";
import { createEventDispatcher } from "svelte";
import { fade } from "svelte/transition";
@@ -8,7 +18,7 @@
let message = { msg: "", color: "#d63384" };
let displayMessage = false;
export let roles = [];
export let roles;
async function handleUserSubmit() {
let form = document.querySelector("#create-user-form");
@@ -23,7 +33,6 @@
form.reset();
} else {
let text = await res.text();
// console.log(res.statusText)
throw new Error("Response Code " + res.status + "-> " + text);
}
} catch (err) {
@@ -48,7 +57,7 @@
<form
id="create-user-form"
method="post"
action="/api/users/"
action="/config/users/"
class="card-body"
on:submit|preventDefault={handleUserSubmit}
>

View File

@@ -1,3 +1,10 @@
<!--
@component User managed project edit form card
Events:
- `reload`: Trigger upstream reload of user list after project update
-->
<script>
import { Card, CardTitle, CardBody } from "@sveltestrap/sveltestrap";
import { createEventDispatcher } from "svelte";
@@ -22,7 +29,7 @@
formData.append("add-project", project);
try {
const res = await fetch(`/api/user/${username}`, {
const res = await fetch(`/config/user/${username}`, {
method: "POST",
body: formData,
});
@@ -32,7 +39,6 @@
reloadUserList();
} else {
let text = await res.text();
// console.log(res.statusText)
throw new Error("Response Code " + res.status + "-> " + text);
}
} catch (err) {
@@ -54,7 +60,7 @@
formData.append("remove-project", project);
try {
const res = await fetch(`/api/user/${username}`, {
const res = await fetch(`/config/user/${username}`, {
method: "POST",
body: formData,
});
@@ -64,7 +70,6 @@
reloadUserList();
} else {
let text = await res.text();
// console.log(res.statusText)
throw new Error("Response Code " + res.status + "-> " + text);
}
} catch (err) {

View File

@@ -1,3 +1,13 @@
<!--
@component User role edit form card
Properties:
- `roles [String]!`: List of roles used in app as strings
Events:
- `reload`: Trigger upstream reload of user list after role edit
-->
<script>
import { Card, CardTitle, CardBody } from "@sveltestrap/sveltestrap";
import { createEventDispatcher } from "svelte";
@@ -8,7 +18,7 @@
let message = { msg: "", color: "#d63384" };
let displayMessage = false;
export let roles = [];
export let roles;
async function handleAddRole() {
const username = document.querySelector("#role-username").value;
@@ -24,7 +34,7 @@
formData.append("add-role", role);
try {
const res = await fetch(`/api/user/${username}`, {
const res = await fetch(`/config/user/${username}`, {
method: "POST",
body: formData,
});
@@ -34,7 +44,6 @@
reloadUserList();
} else {
let text = await res.text();
// console.log(res.statusText)
throw new Error("Response Code " + res.status + "-> " + text);
}
} catch (err) {
@@ -56,7 +65,7 @@
formData.append("remove-role", role);
try {
const res = await fetch(`/api/user/${username}`, {
const res = await fetch(`/config/user/${username}`, {
method: "POST",
body: formData,
});
@@ -66,7 +75,6 @@
reloadUserList();
} else {
let text = await res.text();
// console.log(res.statusText)
throw new Error("Response Code " + res.status + "-> " + text);
}
} catch (err) {

View File

@@ -1,9 +1,15 @@
<!--
@component Admin option select card
-->
<script>
import { onMount } from "svelte";
import { Card, CardBody, CardTitle } from "@sveltestrap/sveltestrap";
import { getContext, onMount } from "svelte";
import { Col, Card, CardBody, CardTitle } from "@sveltestrap/sveltestrap";
let scrambled;
const resampleConfig = getContext("resampling");
onMount(() => {
scrambled = window.localStorage.getItem("cc-scramble-names") != null;
});
@@ -19,16 +25,30 @@
}
</script>
<Card class="h-100">
<CardBody>
<CardTitle class="mb-3">Scramble Names / Presentation Mode</CardTitle>
<input
type="checkbox"
id="scramble-names-checkbox"
style="margin-right: 1em;"
on:click={handleScramble}
bind:checked={scrambled}
/>
Active?
</CardBody>
</Card>
<Col>
<Card class="h-100">
<CardBody>
<CardTitle class="mb-3">Scramble Names / Presentation Mode</CardTitle>
<input
type="checkbox"
id="scramble-names-checkbox"
style="margin-right: 1em;"
on:click={handleScramble}
bind:checked={scrambled}
/>
Active?
</CardBody>
</Card>
</Col>
{#if resampleConfig}
<Col>
<Card class="h-100">
<CardBody>
<CardTitle class="mb-3">Metric Plot Resampling</CardTitle>
<p>Triggered at {resampleConfig.trigger} datapoints.</p>
<p>Configured resolutions: {resampleConfig.resolutions}</p>
</CardBody>
</Card>
</Col>
{/if}

View File

@@ -1,3 +1,13 @@
<!--
@component User management table
Properties:
- `users [Object]?`: List of users
Events:
- `reload`: Trigger upstream reload of user list
-->
<script>
import {
Button,
@@ -20,7 +30,7 @@
if (confirm("Are you sure?")) {
let formData = new FormData();
formData.append("username", username);
fetch("/api/users/", { method: "DELETE", body: formData }).then((res) => {
fetch("/config/users/", { method: "DELETE", body: formData }).then((res) => {
if (res.status == 200) {
reloadUserList();
} else {

View File

@@ -1,18 +1,25 @@
<!--
@component User data row for table
Properties:
- `user Object!`: User Object
- {username: String, name: String, roles: [String], projects: String, email: String}
-->
<script>
import { Button } from "@sveltestrap/sveltestrap";
import { fetchJwt } from "../../generic/utils.js"
export let user;
let jwt = "";
let jwt = "";
function getUserJwt(username) {
fetch(`/api/jwt/?username=${username}`)
.then((res) => res.text())
.then((text) => {
jwt = text;
navigator.clipboard
.writeText(text)
.catch((reason) => console.error(reason));
});
const p = fetchJwt(username);
p.then((content) => {
jwt = content
}).catch((error) => {
console.error(`Could not get JWT: ${error}`);
});
}
</script>

Some files were not shown because too many files have changed in this diff Show More