mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-04-20 14:27:29 +02:00
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.20.x
|
||||
go-version: 1.22.x
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
- name: Build, Vet & Test
|
||||
|
||||
22
Makefile
22
Makefile
@@ -2,7 +2,7 @@ TARGET = ./cc-backend
|
||||
VAR = ./var
|
||||
CFG = config.json .env
|
||||
FRONTEND = ./web/frontend
|
||||
VERSION = 1.3.1
|
||||
VERSION = 1.4.0
|
||||
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
||||
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
|
||||
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
|
||||
@@ -22,11 +22,21 @@ SVELTE_COMPONENTS = status \
|
||||
header
|
||||
|
||||
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
|
||||
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/filters/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
|
||||
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/analysis/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/header/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/job/*.svelte)
|
||||
|
||||
.PHONY: clean distclean test tags frontend swagger graphql $(TARGET)
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ type Job {
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
energy: Float!
|
||||
SMT: Int!
|
||||
exclusive: Int!
|
||||
partition: String!
|
||||
@@ -27,12 +28,8 @@ type Job {
|
||||
tags: [Tag!]!
|
||||
resources: [Resource!]!
|
||||
concurrentJobs: JobLinkResultList
|
||||
|
||||
memUsedMax: Float
|
||||
flopsAnyAvg: Float
|
||||
memBwAvg: Float
|
||||
loadAvg: Float
|
||||
|
||||
footprint: [FootprintValue]
|
||||
energyFootprint: [EnergyFootprintValue]
|
||||
metaData: Any
|
||||
userData: User
|
||||
}
|
||||
@@ -45,7 +42,6 @@ type JobLink {
|
||||
type Cluster {
|
||||
name: String!
|
||||
partitions: [String!]! # Slurm partitions
|
||||
metricConfig: [MetricConfig!]!
|
||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||
}
|
||||
|
||||
@@ -61,9 +57,24 @@ type SubCluster {
|
||||
flopRateSimd: MetricValue!
|
||||
memoryBandwidth: MetricValue!
|
||||
topology: Topology!
|
||||
metricConfig: [MetricConfig!]!
|
||||
footprint: [String!]!
|
||||
}
|
||||
|
||||
type FootprintValue {
|
||||
name: String!
|
||||
stat: String!
|
||||
value: Float!
|
||||
}
|
||||
|
||||
type EnergyFootprintValue {
|
||||
hardware: String!
|
||||
metric: String!
|
||||
value: Float!
|
||||
}
|
||||
|
||||
type MetricValue {
|
||||
name: String
|
||||
unit: Unit!
|
||||
value: Float!
|
||||
}
|
||||
@@ -102,6 +113,7 @@ type MetricConfig {
|
||||
normal: Float
|
||||
caution: Float!
|
||||
alert: Float!
|
||||
lowerIsBetter: Boolean
|
||||
subClusters: [SubClusterConfig!]!
|
||||
}
|
||||
|
||||
@@ -109,6 +121,7 @@ type Tag {
|
||||
id: ID!
|
||||
type: String!
|
||||
name: String!
|
||||
scope: String!
|
||||
}
|
||||
|
||||
type Resource {
|
||||
@@ -150,9 +163,10 @@ type MetricStatistics {
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
mean: [NullableFloat!]!
|
||||
median: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricFootprints {
|
||||
@@ -180,6 +194,19 @@ type NodeMetrics {
|
||||
metrics: [JobMetricWithName!]!
|
||||
}
|
||||
|
||||
type ClusterSupport {
|
||||
cluster: String!
|
||||
subClusters: [String!]!
|
||||
}
|
||||
|
||||
type GlobalMetricListItem {
|
||||
name: String!
|
||||
unit: Unit!
|
||||
scope: MetricScope!
|
||||
footprint: String
|
||||
availability: [ClusterSupport!]!
|
||||
}
|
||||
|
||||
type Count {
|
||||
name: String!
|
||||
count: Int!
|
||||
@@ -191,15 +218,21 @@ type User {
|
||||
email: String!
|
||||
}
|
||||
|
||||
input MetricStatItem {
|
||||
metricName: String!
|
||||
range: FloatRange!
|
||||
}
|
||||
|
||||
type Query {
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [Tag!]! # List of all tags
|
||||
globalMetrics: [GlobalMetricListItem!]!
|
||||
|
||||
user(username: String!): User
|
||||
allocatedNodes(cluster: String!): [Count!]!
|
||||
|
||||
job(id: ID!): Job
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||
|
||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||
@@ -211,7 +244,7 @@ type Query {
|
||||
}
|
||||
|
||||
type Mutation {
|
||||
createTag(type: String!, name: String!): Tag!
|
||||
createTag(type: String!, name: String!, scope: String!): Tag!
|
||||
deleteTag(id: ID!): ID!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
@@ -220,7 +253,7 @@ type Mutation {
|
||||
}
|
||||
|
||||
type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { from: Time!, to: Time! }
|
||||
type TimeRangeOutput { range: String, from: Time!, to: Time! }
|
||||
|
||||
input JobFilter {
|
||||
tags: [ID!]
|
||||
@@ -232,6 +265,7 @@ input JobFilter {
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
energy: FloatRange
|
||||
|
||||
minRunningFor: Int
|
||||
|
||||
@@ -241,17 +275,14 @@ input JobFilter {
|
||||
|
||||
startTime: TimeRange
|
||||
state: [JobState!]
|
||||
flopsAnyAvg: FloatRange
|
||||
memBwAvg: FloatRange
|
||||
loadAvg: FloatRange
|
||||
memUsedMax: FloatRange
|
||||
|
||||
metricStats: [MetricStatItem!]
|
||||
exclusive: Int
|
||||
node: StringInput
|
||||
}
|
||||
|
||||
input OrderByInput {
|
||||
field: String!
|
||||
type: String!,
|
||||
order: SortDirectionEnum! = ASC
|
||||
}
|
||||
|
||||
@@ -269,9 +300,13 @@ input StringInput {
|
||||
in: [String!]
|
||||
}
|
||||
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input FloatRange { from: Float!, to: Float! }
|
||||
input TimeRange { from: Time, to: Time }
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input TimeRange { range: String, from: Time, to: Time }
|
||||
|
||||
input FloatRange {
|
||||
from: Float!
|
||||
to: Float!
|
||||
}
|
||||
|
||||
type JobResultList {
|
||||
items: [Job!]!
|
||||
@@ -295,6 +330,7 @@ type HistoPoint {
|
||||
type MetricHistoPoints {
|
||||
metric: String!
|
||||
unit: String!
|
||||
stat: String
|
||||
data: [MetricHistoPoint!]
|
||||
}
|
||||
|
||||
|
||||
33
cmd/cc-backend/cli.go
Normal file
33
cmd/cc-backend/cli.go
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import "flag"
|
||||
|
||||
var (
|
||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
)
|
||||
|
||||
func cliInit() {
|
||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
||||
flag.Parse()
|
||||
}
|
||||
85
cmd/cc-backend/init.go
Normal file
85
cmd/cc-backend/init.go
Normal file
@@ -0,0 +1,85 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
const envString = `
|
||||
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||
# You can generate your own keypair using the gen-keypair tool
|
||||
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||
|
||||
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
`
|
||||
|
||||
const configString = `
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
func initEnv() {
|
||||
if util.CheckFileExists("var") {
|
||||
fmt.Print("Directory ./var already exists. Exiting!\n")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
|
||||
log.Fatalf("Writing config.json failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
|
||||
log.Fatalf("Writing .env failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.Mkdir("var", 0o777); err != nil {
|
||||
log.Fatalf("Mkdir var failed: %s", err.Error())
|
||||
}
|
||||
|
||||
err := repository.MigrateDB("sqlite3", "./var/job.db")
|
||||
if err != nil {
|
||||
log.Fatalf("Initialize job.db failed: %s", err.Error())
|
||||
}
|
||||
}
|
||||
@@ -5,158 +5,48 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
"github.com/go-co-op/gocron"
|
||||
"github.com/google/gops/agent"
|
||||
"github.com/gorilla/handlers"
|
||||
"github.com/gorilla/mux"
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
|
||||
_ "github.com/go-sql-driver/mysql"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
const logoString = `
|
||||
____ _ _ ____ _ _ _
|
||||
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
|
||||
_____ _ _ ____ _ _ _
|
||||
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
|
||||
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
|
||||
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
|
||||
\____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
||||
\_____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
||||
|_|
|
||||
`
|
||||
|
||||
const envString = `
|
||||
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||
# You can generate your own keypair using the gen-keypair tool
|
||||
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||
|
||||
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
`
|
||||
|
||||
const configString = `
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
var (
|
||||
date string
|
||||
commit string
|
||||
version string
|
||||
)
|
||||
|
||||
func initEnv() {
|
||||
if util.CheckFileExists("var") {
|
||||
fmt.Print("Directory ./var already exists. Exiting!\n")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
|
||||
log.Fatalf("Writing config.json failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
|
||||
log.Fatalf("Writing .env failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.Mkdir("var", 0o777); err != nil {
|
||||
log.Fatalf("Mkdir var failed: %s", err.Error())
|
||||
}
|
||||
|
||||
err := repository.MigrateDB("sqlite3", "./var/job.db")
|
||||
if err != nil {
|
||||
log.Fatalf("Initialize job.db failed: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
var flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
||||
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
||||
flag.Parse()
|
||||
cliInit()
|
||||
|
||||
if flagVersion {
|
||||
fmt.Print(logoString)
|
||||
@@ -171,14 +61,6 @@ func main() {
|
||||
// Apply config flags for pkg/log
|
||||
log.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
if flagInit {
|
||||
initEnv()
|
||||
fmt.Print("Succesfully setup environment!\n")
|
||||
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
|
||||
fmt.Print("Add your job-archive at ./var/job-archive.\n")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// See https://github.com/google/gops (Runtime overhead is almost zero)
|
||||
if flagGops {
|
||||
if err := agent.Listen(agent.Options{}); err != nil {
|
||||
@@ -227,18 +109,18 @@ func main() {
|
||||
}
|
||||
|
||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||
db := repository.GetConnection()
|
||||
|
||||
var authentication *auth.Authentication
|
||||
if flagInit {
|
||||
initEnv()
|
||||
fmt.Print("Succesfully setup environment!\n")
|
||||
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
|
||||
fmt.Print("Add your job-archive at ./var/job-archive.\n")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
var err error
|
||||
if authentication, err = auth.Init(); err != nil {
|
||||
log.Fatalf("auth initialization failed: %v", err)
|
||||
}
|
||||
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
|
||||
authentication.SessionMaxAge = d
|
||||
}
|
||||
auth.Init()
|
||||
|
||||
if flagNewUser != "" {
|
||||
parts := strings.SplitN(flagNewUser, ":", 3)
|
||||
@@ -260,12 +142,14 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
if flagSyncLDAP {
|
||||
if authentication.LdapAuth == nil {
|
||||
if authHandle.LdapAuth == nil {
|
||||
log.Fatal("cannot sync: LDAP authentication is not configured")
|
||||
}
|
||||
|
||||
if err := authentication.LdapAuth.Sync(); err != nil {
|
||||
if err := authHandle.LdapAuth.Sync(); err != nil {
|
||||
log.Fatalf("LDAP sync failed: %v", err)
|
||||
}
|
||||
log.Info("LDAP sync successfull")
|
||||
@@ -282,7 +166,7 @@ func main() {
|
||||
log.Warnf("user '%s' does not have the API role", user.Username)
|
||||
}
|
||||
|
||||
jwt, err := authentication.JwtAuth.ProvideJWT(user)
|
||||
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
|
||||
}
|
||||
@@ -298,7 +182,7 @@ func main() {
|
||||
log.Fatalf("failed to initialize archive: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
||||
if err := metricdata.Init(); err != nil {
|
||||
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
|
||||
}
|
||||
|
||||
@@ -318,228 +202,16 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
// Setup the http.Handler/Router used by the server
|
||||
jobRepo := repository.GetJobRepository()
|
||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
||||
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
if os.Getenv("DEBUG") != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
||||
}
|
||||
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
})
|
||||
}
|
||||
|
||||
api := &api.RestApi{
|
||||
JobRepository: jobRepo,
|
||||
Resolver: resolver,
|
||||
MachineStateDir: config.Keys.MachineStateDir,
|
||||
Authentication: authentication,
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
||||
|
||||
info := map[string]interface{}{}
|
||||
info["hasOpenIDConnect"] = false
|
||||
|
||||
if config.Keys.OpenIDConfig != nil {
|
||||
openIDConnect := auth.NewOIDC(authentication)
|
||||
openIDConnect.RegisterEndpoints(r)
|
||||
info["hasOpenIDConnect"] = true
|
||||
}
|
||||
|
||||
r.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
log.Debugf("##%v##", info)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||
}).Methods(http.MethodGet)
|
||||
r.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
r.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
secured := r.PathPrefix("/").Subrouter()
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
r.Handle("/login", authentication.Login(
|
||||
// On success:
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})).Methods(http.MethodPost)
|
||||
|
||||
r.Handle("/jwt-login", authentication.Login(
|
||||
// On success:
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))
|
||||
|
||||
r.Handle("/logout", authentication.Logout(
|
||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Bye - ClusterCockpit",
|
||||
MsgType: "alert-info",
|
||||
Message: "Logout successful",
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))).Methods(http.MethodPost)
|
||||
|
||||
secured.Use(func(next http.Handler) http.Handler {
|
||||
return authentication.Auth(
|
||||
// On success;
|
||||
next,
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Authentication failed - ClusterCockpit",
|
||||
MsgType: "alert-danger",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
r.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||
}
|
||||
secured.Handle("/query", graphQLEndpoint)
|
||||
|
||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
||||
})
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
routerConfig.SetupRoutes(secured, buildInfo)
|
||||
api.MountRoutes(secured)
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
if i, err := os.Stat("./var/img"); err == nil {
|
||||
if i.IsDir() {
|
||||
log.Info("Use local directory for static images")
|
||||
r.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
}
|
||||
}
|
||||
r.PathPrefix("/").Handler(web.ServeFiles())
|
||||
} else {
|
||||
r.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
}
|
||||
|
||||
r.Use(handlers.CompressHandler)
|
||||
r.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
r.Use(handlers.CORS(
|
||||
handlers.AllowCredentials(),
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"})))
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, r, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
} else {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
}
|
||||
})
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
taskManager.Start()
|
||||
serverInit()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
server := http.Server{
|
||||
ReadTimeout: 10 * time.Second,
|
||||
WriteTimeout: 10 * time.Second,
|
||||
Handler: handler,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
log.Fatalf("starting http listener failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||
go func() {
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
||||
}()
|
||||
}
|
||||
|
||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||
if err != nil {
|
||||
log.Fatalf("loading X509 keypair failed: %v", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
CipherSuites: []uint16{
|
||||
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
},
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
} else {
|
||||
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
|
||||
}
|
||||
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err = runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
log.Fatalf("error while preparing server start: %s", err.Error())
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatalf("starting server failed: %v", err)
|
||||
}
|
||||
serverStart()
|
||||
}()
|
||||
|
||||
wg.Add(1)
|
||||
@@ -550,113 +222,11 @@ func main() {
|
||||
<-sigs
|
||||
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
||||
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
serverShutdown()
|
||||
|
||||
// Then, wait for any async archivings still pending...
|
||||
api.JobRepository.WaitForArchiving()
|
||||
taskManager.Shutdown()
|
||||
}()
|
||||
|
||||
s := gocron.NewScheduler(time.Local)
|
||||
|
||||
if config.Keys.StopJobsExceedingWalltime > 0 {
|
||||
log.Info("Register undead jobs service")
|
||||
|
||||
s.Every(1).Day().At("3:00").Do(func() {
|
||||
err = jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
|
||||
}
|
||||
runtime.GC()
|
||||
})
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
Retention schema.Retention `json:"retention"`
|
||||
Compression int `json:"compression"`
|
||||
}
|
||||
|
||||
cfg.Retention.IncludeDB = true
|
||||
|
||||
if err = json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
}
|
||||
|
||||
switch cfg.Retention.Policy {
|
||||
case "delete":
|
||||
log.Info("Register retention delete service")
|
||||
|
||||
s.Every(1).Day().At("4:00").Do(func() {
|
||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().CleanUp(jobs)
|
||||
|
||||
if cfg.Retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %s", err.Error())
|
||||
}
|
||||
}
|
||||
})
|
||||
case "move":
|
||||
log.Info("Register retention move service")
|
||||
|
||||
s.Every(1).Day().At("4:00").Do(func() {
|
||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().Move(jobs, cfg.Retention.Location)
|
||||
|
||||
if cfg.Retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %v", err)
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
if cfg.Compression > 0 {
|
||||
log.Info("Register compression service")
|
||||
|
||||
s.Every(1).Day().At("5:00").Do(func() {
|
||||
var jobs []*schema.Job
|
||||
|
||||
ar := archive.GetHandle()
|
||||
startTime := time.Now().Unix() - int64(cfg.Compression*24*3600)
|
||||
lastTime := ar.CompressLast(startTime)
|
||||
if startTime == lastTime {
|
||||
log.Info("Compression Service - Complete archive run")
|
||||
jobs, err = jobRepo.FindJobsBetween(0, startTime)
|
||||
|
||||
} else {
|
||||
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for compression jobs: %v", err)
|
||||
}
|
||||
ar.Compress(jobs)
|
||||
})
|
||||
}
|
||||
|
||||
s.StartAsync()
|
||||
|
||||
if os.Getenv("GOGC") == "" {
|
||||
debug.SetGCPercent(25)
|
||||
}
|
||||
|
||||
321
cmd/cc-backend/server.go
Normal file
321
cmd/cc-backend/server.go
Normal file
@@ -0,0 +1,321 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
"github.com/gorilla/handlers"
|
||||
"github.com/gorilla/mux"
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
)
|
||||
|
||||
var (
|
||||
router *mux.Router
|
||||
server *http.Server
|
||||
apiHandle *api.RestApi
|
||||
)
|
||||
|
||||
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
json.NewEncoder(rw).Encode(map[string]string{
|
||||
"status": http.StatusText(http.StatusUnauthorized),
|
||||
"error": err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
func serverInit() {
|
||||
// Setup the http.Handler/Router used by the server
|
||||
graph.Init()
|
||||
resolver := graph.GetResolverInstance()
|
||||
graphQLEndpoint := handler.NewDefaultServer(
|
||||
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
|
||||
if os.Getenv("DEBUG") != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
||||
}
|
||||
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
})
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
apiHandle = api.New()
|
||||
|
||||
router = mux.NewRouter()
|
||||
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
||||
|
||||
info := map[string]interface{}{}
|
||||
info["hasOpenIDConnect"] = false
|
||||
|
||||
if config.Keys.OpenIDConfig != nil {
|
||||
openIDConnect := auth.NewOIDC(authHandle)
|
||||
openIDConnect.RegisterEndpoints(router)
|
||||
info["hasOpenIDConnect"] = true
|
||||
}
|
||||
|
||||
router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
log.Debugf("##%v##", info)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||
}).Methods(http.MethodGet)
|
||||
router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
secured := router.PathPrefix("/").Subrouter()
|
||||
securedapi := router.PathPrefix("/api").Subrouter()
|
||||
userapi := router.PathPrefix("/userapi").Subrouter()
|
||||
configapi := router.PathPrefix("/config").Subrouter()
|
||||
frontendapi := router.PathPrefix("/frontend").Subrouter()
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
router.Handle("/login", authHandle.Login(
|
||||
// On success:
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})).Methods(http.MethodPost)
|
||||
|
||||
router.Handle("/jwt-login", authHandle.Login(
|
||||
// On success:
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))
|
||||
|
||||
router.Handle("/logout", authHandle.Logout(
|
||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Bye - ClusterCockpit",
|
||||
MsgType: "alert-info",
|
||||
Message: "Logout successful",
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))).Methods(http.MethodPost)
|
||||
|
||||
secured.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.Auth(
|
||||
// On success;
|
||||
next,
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Authentication failed - ClusterCockpit",
|
||||
MsgType: "alert-danger",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
securedapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
userapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthUserApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
configapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthConfigApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
frontendapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthFrontendApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||
}
|
||||
secured.Handle("/query", graphQLEndpoint)
|
||||
|
||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
||||
})
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
routerConfig.SetupRoutes(secured, buildInfo)
|
||||
apiHandle.MountApiRoutes(securedapi)
|
||||
apiHandle.MountUserApiRoutes(userapi)
|
||||
apiHandle.MountConfigApiRoutes(configapi)
|
||||
apiHandle.MountFrontendApiRoutes(frontendapi)
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
if i, err := os.Stat("./var/img"); err == nil {
|
||||
if i.IsDir() {
|
||||
log.Info("Use local directory for static images")
|
||||
router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
}
|
||||
}
|
||||
router.PathPrefix("/").Handler(web.ServeFiles())
|
||||
} else {
|
||||
router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
}
|
||||
|
||||
router.Use(handlers.CompressHandler)
|
||||
router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
router.Use(handlers.CORS(
|
||||
handlers.AllowCredentials(),
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"})))
|
||||
}
|
||||
|
||||
func serverStart() {
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
} else {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
}
|
||||
})
|
||||
|
||||
server = &http.Server{
|
||||
ReadTimeout: 20 * time.Second,
|
||||
WriteTimeout: 20 * time.Second,
|
||||
Handler: handler,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
log.Fatalf("starting http listener failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||
go func() {
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
||||
}()
|
||||
}
|
||||
|
||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(
|
||||
config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||
if err != nil {
|
||||
log.Fatalf("loading X509 keypair failed: %v", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
CipherSuites: []uint16{
|
||||
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
},
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
} else {
|
||||
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
|
||||
}
|
||||
//
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
log.Fatalf("error while preparing server start: %s", err.Error())
|
||||
}
|
||||
|
||||
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatalf("starting server failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func serverShutdown() {
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
|
||||
// Then, wait for any async archivings still pending...
|
||||
archiver.WaitForArchiving()
|
||||
}
|
||||
94
go.mod
94
go.mod
@@ -1,91 +1,89 @@
|
||||
module github.com/ClusterCockpit/cc-backend
|
||||
|
||||
go 1.18
|
||||
go 1.22
|
||||
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.17.45
|
||||
github.com/99designs/gqlgen v0.17.49
|
||||
github.com/ClusterCockpit/cc-units v0.4.0
|
||||
github.com/Masterminds/squirrel v1.5.3
|
||||
github.com/coreos/go-oidc/v3 v3.9.0
|
||||
github.com/go-co-op/gocron v1.25.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.4
|
||||
github.com/go-sql-driver/mysql v1.7.0
|
||||
github.com/Masterminds/squirrel v1.5.4
|
||||
github.com/coreos/go-oidc/v3 v3.11.0
|
||||
github.com/go-co-op/gocron/v2 v2.9.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.8
|
||||
github.com/go-sql-driver/mysql v1.8.1
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1
|
||||
github.com/golang-migrate/migrate/v4 v4.15.2
|
||||
github.com/google/gops v0.3.27
|
||||
github.com/gorilla/handlers v1.5.1
|
||||
github.com/gorilla/mux v1.8.0
|
||||
github.com/gorilla/sessions v1.2.1
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.12.2
|
||||
github.com/jmoiron/sqlx v1.3.5
|
||||
github.com/mattn/go-sqlite3 v1.14.16
|
||||
github.com/prometheus/client_golang v1.14.0
|
||||
github.com/prometheus/common v0.40.0
|
||||
github.com/golang-migrate/migrate/v4 v4.17.1
|
||||
github.com/google/gops v0.3.28
|
||||
github.com/gorilla/handlers v1.5.2
|
||||
github.com/gorilla/mux v1.8.1
|
||||
github.com/gorilla/sessions v1.3.0
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0
|
||||
github.com/jmoiron/sqlx v1.4.0
|
||||
github.com/mattn/go-sqlite3 v1.14.22
|
||||
github.com/prometheus/client_golang v1.19.1
|
||||
github.com/prometheus/common v0.55.0
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.2.0
|
||||
github.com/swaggo/http-swagger v1.3.3
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||
github.com/swaggo/http-swagger v1.3.4
|
||||
github.com/swaggo/swag v1.16.3
|
||||
github.com/vektah/gqlparser/v2 v2.5.11
|
||||
golang.org/x/crypto v0.21.0
|
||||
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea
|
||||
golang.org/x/oauth2 v0.13.0
|
||||
github.com/vektah/gqlparser/v2 v2.5.16
|
||||
golang.org/x/crypto v0.25.0
|
||||
golang.org/x/exp v0.0.0-20240707233637-46b078467d37
|
||||
golang.org/x/oauth2 v0.21.0
|
||||
)
|
||||
|
||||
require (
|
||||
filippo.io/edwards25519 v1.1.0 // indirect
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||
github.com/agnivade/levenshtein v1.1.1 // indirect
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/containerd/containerd v1.6.26 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
||||
github.com/deepmap/oapi-codegen v1.12.4 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.3 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
|
||||
github.com/go-jose/go-jose/v3 v3.0.3 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.0.3 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
||||
github.com/go-openapi/jsonreference v0.21.0 // indirect
|
||||
github.com/go-openapi/spec v0.21.0 // indirect
|
||||
github.com/go-openapi/swag v0.23.0 // indirect
|
||||
github.com/golang/protobuf v1.5.3 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.1 // indirect
|
||||
github.com/gorilla/websocket v1.5.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
||||
github.com/jonboulle/clockwork v0.4.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/jpillora/backoff v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/prometheus/client_model v0.3.0 // indirect
|
||||
github.com/prometheus/procfs v0.9.0 // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/sosodev/duration v1.2.0 // indirect
|
||||
github.com/swaggo/files v1.0.0 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.1 // indirect
|
||||
github.com/sosodev/duration v1.3.1 // indirect
|
||||
github.com/swaggo/files v1.0.1 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.2 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect
|
||||
go.uber.org/atomic v1.10.0 // indirect
|
||||
golang.org/x/mod v0.16.0 // indirect
|
||||
golang.org/x/net v0.22.0 // indirect
|
||||
golang.org/x/sys v0.18.0 // indirect
|
||||
golang.org/x/text v0.14.0 // indirect
|
||||
golang.org/x/tools v0.19.0 // indirect
|
||||
google.golang.org/appengine v1.6.8 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect
|
||||
google.golang.org/protobuf v1.33.0 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
golang.org/x/mod v0.19.0 // indirect
|
||||
golang.org/x/net v0.27.0 // indirect
|
||||
golang.org/x/sync v0.7.0 // indirect
|
||||
golang.org/x/sys v0.22.0 // indirect
|
||||
golang.org/x/text v0.16.0 // indirect
|
||||
golang.org/x/tools v0.23.0 // indirect
|
||||
google.golang.org/protobuf v1.34.2 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
sigs.k8s.io/yaml v1.4.0 // indirect
|
||||
|
||||
61
gqlgen.yml
61
gqlgen.yml
@@ -61,23 +61,50 @@ models:
|
||||
fields:
|
||||
partitions:
|
||||
resolver: true
|
||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||
NullableFloat:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||
MetricScope:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||
MetricValue:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||
JobStatistics:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||
GlobalMetricListItem:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.GlobalMetricListItem",
|
||||
}
|
||||
ClusterSupport:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.ClusterSupport" }
|
||||
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
||||
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
||||
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
||||
Resource:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||
JobState:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||
TimeRange:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||
IntRange:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
||||
JobMetric:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
||||
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
|
||||
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" }
|
||||
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
||||
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" }
|
||||
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
||||
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
||||
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||
MetricStatistics:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics",
|
||||
}
|
||||
MetricConfig:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
||||
SubClusterConfig:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig",
|
||||
}
|
||||
Accelerator:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
||||
Topology:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
||||
FilterRanges:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||
SubCluster:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||
StatsSeries:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }
|
||||
|
||||
@@ -19,8 +19,11 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@@ -144,23 +147,20 @@ func setup(t *testing.T) *api.RestApi {
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||
|
||||
repository.Connect("sqlite3", dbfilepath)
|
||||
db := repository.GetConnection()
|
||||
|
||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
||||
if err := metricdata.Init(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
jobRepo := repository.GetJobRepository()
|
||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
auth.Init()
|
||||
graph.Init()
|
||||
|
||||
return &api.RestApi{
|
||||
JobRepository: resolver.Repo,
|
||||
Resolver: resolver,
|
||||
}
|
||||
return api.New()
|
||||
}
|
||||
|
||||
func cleanup() {
|
||||
@@ -175,7 +175,6 @@ func cleanup() {
|
||||
func TestRestApi(t *testing.T) {
|
||||
restapi := setup(t)
|
||||
t.Cleanup(cleanup)
|
||||
|
||||
testData := schema.JobData{
|
||||
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||
schema.MetricScopeNode: {
|
||||
@@ -192,12 +191,14 @@ func TestRestApi(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
return testData, nil
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
restapi.MountRoutes(r)
|
||||
r.PathPrefix("/api").Subrouter()
|
||||
r.StrictSlash(true)
|
||||
restapi.MountApiRoutes(r)
|
||||
|
||||
const startJobBody string = `{
|
||||
"jobId": 123,
|
||||
@@ -213,7 +214,7 @@ func TestRestApi(t *testing.T) {
|
||||
"exclusive": 1,
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"tags": [{ "type": "testTagType", "name": "testTagName" }],
|
||||
"tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
@@ -225,11 +226,22 @@ func TestRestApi(t *testing.T) {
|
||||
}`
|
||||
|
||||
var dbid int64
|
||||
const contextUserKey repository.ContextKey = "user"
|
||||
contextUserValue := &schema.User{
|
||||
Username: "testuser",
|
||||
Projects: make([]string, 0),
|
||||
Roles: []string{"user"},
|
||||
AuthType: 0,
|
||||
AuthSource: 2,
|
||||
}
|
||||
|
||||
if ok := t.Run("StartJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusCreated {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
@@ -240,12 +252,13 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(res.DBID)))
|
||||
resolver := graph.GetResolverInstance()
|
||||
job, err := resolver.Query().Job(ctx, strconv.Itoa(int(res.DBID)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
job.Tags, err = restapi.Resolver.Job().Tags(context.Background(), job)
|
||||
job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -269,7 +282,7 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatalf("unexpected job properties: %#v", job)
|
||||
}
|
||||
|
||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" {
|
||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||
t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||
}
|
||||
|
||||
@@ -289,17 +302,20 @@ func TestRestApi(t *testing.T) {
|
||||
|
||||
var stoppedJob *schema.Job
|
||||
if ok := t.Run("StopJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
restapi.JobRepository.WaitForArchiving()
|
||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(dbid)))
|
||||
archiver.WaitForArchiving()
|
||||
resolver := graph.GetResolverInstance()
|
||||
job, err := resolver.Query().Job(ctx, strconv.Itoa(int(dbid)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -327,7 +343,7 @@ func TestRestApi(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run("CheckArchive", func(t *testing.T) {
|
||||
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
|
||||
data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -341,10 +357,12 @@ func TestRestApi(t *testing.T) {
|
||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusUnprocessableEntity {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
@@ -371,10 +389,12 @@ func TestRestApi(t *testing.T) {
|
||||
}`
|
||||
|
||||
ok := t.Run("StartJobFailed", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusCreated {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
@@ -393,16 +413,18 @@ func TestRestApi(t *testing.T) {
|
||||
}`
|
||||
|
||||
ok = t.Run("StopJobFailed", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
restapi.JobRepository.WaitForArchiving()
|
||||
archiver.WaitForArchiving()
|
||||
jobid, cluster := int64(12345), "testcluster"
|
||||
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
||||
if err != nil {
|
||||
|
||||
@@ -19,12 +19,13 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@@ -53,14 +54,20 @@ import (
|
||||
|
||||
type RestApi struct {
|
||||
JobRepository *repository.JobRepository
|
||||
Resolver *graph.Resolver
|
||||
Authentication *auth.Authentication
|
||||
MachineStateDir string
|
||||
RepositoryMutex sync.Mutex
|
||||
}
|
||||
|
||||
func (api *RestApi) MountRoutes(r *mux.Router) {
|
||||
r = r.PathPrefix("/api").Subrouter()
|
||||
func New() *RestApi {
|
||||
return &RestApi{
|
||||
JobRepository: repository.GetJobRepository(),
|
||||
MachineStateDir: config.Keys.MachineStateDir,
|
||||
Authentication: auth.GetAuthInstance(),
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) MountApiRoutes(r *mux.Router) {
|
||||
r.StrictSlash(true)
|
||||
|
||||
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
|
||||
@@ -84,14 +91,34 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
|
||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet)
|
||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost)
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
|
||||
r.StrictSlash(true)
|
||||
|
||||
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodPost)
|
||||
r.HandleFunc("/jobs/{id}", api.getCompleteJobById).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
||||
}
|
||||
|
||||
func (api *RestApi) MountConfigApiRoutes(r *mux.Router) {
|
||||
r.StrictSlash(true)
|
||||
|
||||
if api.Authentication != nil {
|
||||
r.HandleFunc("/jwt/", api.getJWT).Methods(http.MethodGet)
|
||||
r.HandleFunc("/roles/", api.getRoles).Methods(http.MethodGet)
|
||||
r.HandleFunc("/users/", api.createUser).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/users/", api.getUsers).Methods(http.MethodGet)
|
||||
r.HandleFunc("/users/", api.deleteUser).Methods(http.MethodDelete)
|
||||
r.HandleFunc("/user/{id}", api.updateUser).Methods(http.MethodPost)
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) MountFrontendApiRoutes(r *mux.Router) {
|
||||
r.StrictSlash(true)
|
||||
|
||||
if api.Authentication != nil {
|
||||
r.HandleFunc("/jwt/", api.getJWT).Methods(http.MethodGet)
|
||||
r.HandleFunc("/configuration/", api.updateConfiguration).Methods(http.MethodPost)
|
||||
}
|
||||
}
|
||||
@@ -150,8 +177,9 @@ type ErrorResponse struct {
|
||||
// ApiTag model
|
||||
type ApiTag struct {
|
||||
// Tag Type
|
||||
Type string `json:"type" example:"Debug"`
|
||||
Name string `json:"name" example:"Testjob"` // Tag Name
|
||||
Type string `json:"type" example:"Debug"`
|
||||
Name string `json:"name" example:"Testjob"` // Tag Name
|
||||
Scope string `json:"scope" example:"global"` // Tag Scope for Frontend Display
|
||||
}
|
||||
|
||||
// ApiMeta model
|
||||
@@ -311,13 +339,6 @@ func (api *RestApi) getClusters(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/ [get]
|
||||
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
withMetadata := false
|
||||
filter := &model.JobFilter{}
|
||||
page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
|
||||
@@ -400,7 +421,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
StartTime: job.StartTime.Unix(),
|
||||
}
|
||||
|
||||
res.Tags, err = api.JobRepository.GetTags(&job.ID)
|
||||
res.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
@@ -434,7 +455,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
// getJobById godoc
|
||||
// getCompleteJobById godoc
|
||||
// @summary Get job meta and optional all metric data
|
||||
// @tags Job query
|
||||
// @description Job to get is specified by database ID
|
||||
@@ -452,14 +473,6 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/{id} [get]
|
||||
func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v",
|
||||
schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Fetch job from db
|
||||
id, ok := mux.Vars(r)["id"]
|
||||
var job *schema.Job
|
||||
@@ -471,7 +484,7 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
|
||||
return
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.FindById(id)
|
||||
job, err = api.JobRepository.FindById(r.Context(), id) // Get Job from Repo by ID
|
||||
} else {
|
||||
handleError(fmt.Errorf("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||
return
|
||||
@@ -481,7 +494,7 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(&job.ID)
|
||||
job.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
@@ -503,8 +516,15 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
|
||||
|
||||
var data schema.JobData
|
||||
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
resolution := 0
|
||||
|
||||
for _, mc := range metricConfigs {
|
||||
resolution = max(resolution, mc.Timestep)
|
||||
}
|
||||
|
||||
if r.URL.Query().Get("all-metrics") == "true" {
|
||||
data, err = metricdata.LoadData(job, nil, scopes, r.Context())
|
||||
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context(), resolution)
|
||||
if err != nil {
|
||||
log.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
@@ -546,14 +566,6 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/{id} [post]
|
||||
func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v",
|
||||
schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Fetch job from db
|
||||
id, ok := mux.Vars(r)["id"]
|
||||
var job *schema.Job
|
||||
@@ -565,7 +577,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.FindById(id)
|
||||
job, err = api.JobRepository.FindById(r.Context(), id)
|
||||
} else {
|
||||
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||
return
|
||||
@@ -575,7 +587,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(&job.ID)
|
||||
job.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
@@ -601,7 +613,14 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
scopes = []schema.MetricScope{"node"}
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadData(job, metrics, scopes, r.Context())
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
resolution := 0
|
||||
|
||||
for _, mc := range metricConfigs {
|
||||
resolution = max(resolution, mc.Timestep)
|
||||
}
|
||||
|
||||
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context(), resolution)
|
||||
if err != nil {
|
||||
log.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
@@ -651,19 +670,13 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/edit_meta/{id} [post]
|
||||
func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
iid, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
job, err := api.JobRepository.FindById(iid)
|
||||
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
return
|
||||
@@ -689,6 +702,7 @@ func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
||||
// @summary Adds one or more tags to a job
|
||||
// @tags Job add and modify
|
||||
// @description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
||||
// @description Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username.
|
||||
// @description If tagged job is already finished: Tag will be written directly to respective archive files.
|
||||
// @accept json
|
||||
// @produce json
|
||||
@@ -702,26 +716,19 @@ func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/tag_job/{id} [post]
|
||||
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
iid, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
job, err := api.JobRepository.FindById(iid)
|
||||
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(&job.ID)
|
||||
job.Tags, err = api.JobRepository.GetTags(r.Context(), &job.ID)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
@@ -734,16 +741,17 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
for _, tag := range req {
|
||||
tagId, err := api.JobRepository.AddTagOrCreate(job.ID, tag.Type, tag.Name)
|
||||
tagId, err := api.JobRepository.AddTagOrCreate(r.Context(), job.ID, tag.Type, tag.Name, tag.Scope)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags = append(job.Tags, &schema.Tag{
|
||||
ID: tagId,
|
||||
Type: tag.Type,
|
||||
Name: tag.Name,
|
||||
ID: tagId,
|
||||
Type: tag.Type,
|
||||
Name: tag.Name,
|
||||
Scope: tag.Scope,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -769,13 +777,6 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/start_job/ [post]
|
||||
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
req := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||
@@ -818,7 +819,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||
|
||||
for _, tag := range req.Tags {
|
||||
if _, err := api.JobRepository.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
||||
if _, err := api.JobRepository.AddTagOrCreate(r.Context(), id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
@@ -852,13 +853,6 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/stop_job/{id} [post]
|
||||
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse request body: Only StopTime and State
|
||||
req := StopJobApiRequest{}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
@@ -877,7 +871,7 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.FindById(id)
|
||||
job, err = api.JobRepository.FindById(r.Context(), id)
|
||||
} else {
|
||||
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||
return
|
||||
@@ -907,13 +901,6 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/stop_job/ [post]
|
||||
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse request body
|
||||
req := StopJobApiRequest{}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
@@ -931,7 +918,6 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
// log.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
|
||||
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
@@ -956,11 +942,6 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/delete_job/{id} [delete]
|
||||
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil && !user.HasRole(schema.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Fetch job (that will be stopped) from db
|
||||
id, ok := mux.Vars(r)["id"]
|
||||
var err error
|
||||
@@ -1004,12 +985,6 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/delete_job/ [delete]
|
||||
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse request body
|
||||
req := DeleteJobApiRequest{}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
@@ -1026,7 +1001,6 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
@@ -1061,11 +1035,6 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/delete_job_before/{ts} [delete]
|
||||
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil && !user.HasRole(schema.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
var cnt int
|
||||
// Fetch job (that will be stopped) from db
|
||||
id, ok := mux.Vars(r)["ts"]
|
||||
@@ -1131,7 +1100,7 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
api.JobRepository.TriggerArchiving(job)
|
||||
archiver.TriggerArchiving(job)
|
||||
}
|
||||
|
||||
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
@@ -1159,7 +1128,8 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
} `json:"error"`
|
||||
}
|
||||
|
||||
data, err := api.Resolver.Query().JobMetrics(r.Context(), id, metrics, scopes)
|
||||
resolver := graph.GetResolverInstance()
|
||||
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
|
||||
if err != nil {
|
||||
json.NewEncoder(rw).Encode(Respone{
|
||||
Error: &struct {
|
||||
|
||||
94
internal/archiver/archiveWorker.go
Normal file
94
internal/archiver/archiveWorker.go
Normal file
@@ -0,0 +1,94 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
var (
|
||||
archivePending sync.WaitGroup
|
||||
archiveChannel chan *schema.Job
|
||||
jobRepo *repository.JobRepository
|
||||
)
|
||||
|
||||
func Start(r *repository.JobRepository) {
|
||||
archiveChannel = make(chan *schema.Job, 128)
|
||||
jobRepo = r
|
||||
|
||||
go archivingWorker()
|
||||
}
|
||||
|
||||
// Archiving worker thread
|
||||
func archivingWorker() {
|
||||
for {
|
||||
select {
|
||||
case job, ok := <-archiveChannel:
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
start := time.Now()
|
||||
// not using meta data, called to load JobMeta into Cache?
|
||||
// will fail if job meta not in repository
|
||||
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||
// TODO: Maybe use context with cancel/timeout here
|
||||
jobMeta, err := ArchiveJob(job, context.Background())
|
||||
if err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
stmt := sq.Update("job").Where("job.id = ?", job.ID)
|
||||
|
||||
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
// Update the jobs database entry one last time:
|
||||
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
|
||||
if err := jobRepo.Execute(stmt); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||
archivePending.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
func TriggerArchiving(job *schema.Job) {
|
||||
if archiveChannel == nil {
|
||||
log.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
|
||||
}
|
||||
|
||||
archivePending.Add(1)
|
||||
archiveChannel <- job
|
||||
}
|
||||
|
||||
// Wait for background thread to finish pending archiving operations
|
||||
func WaitForArchiving() {
|
||||
// close channel and wait for worker to process remaining jobs
|
||||
archivePending.Wait()
|
||||
}
|
||||
82
internal/archiver/archiver.go
Normal file
82
internal/archiver/archiver.go
Normal file
@@ -0,0 +1,82 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
// FIXME: Add a config option for this
|
||||
if job.NumNodes <= 8 {
|
||||
// This will add the native scope if core scope is not available
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
|
||||
if job.NumAcc > 0 {
|
||||
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||
}
|
||||
|
||||
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||
if err != nil {
|
||||
log.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// This should never happen ?
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
// If the file based archive is disabled,
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if config.Keys.DisableArchive {
|
||||
return jobMeta, nil
|
||||
}
|
||||
|
||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"errors"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
@@ -26,6 +27,11 @@ type Authenticator interface {
|
||||
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
||||
}
|
||||
|
||||
var (
|
||||
initOnce sync.Once
|
||||
authInstance *Authentication
|
||||
)
|
||||
|
||||
type Authentication struct {
|
||||
sessionStore *sessions.CookieStore
|
||||
LdapAuth *LdapAuthenticator
|
||||
@@ -62,82 +68,111 @@ func (auth *Authentication) AuthViaSession(
|
||||
}, nil
|
||||
}
|
||||
|
||||
func Init() (*Authentication, error) {
|
||||
auth := &Authentication{}
|
||||
func Init() {
|
||||
initOnce.Do(func() {
|
||||
authInstance = &Authentication{}
|
||||
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Error("Error while initializing authentication -> decoding session key failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
if config.Keys.LdapConfig != nil {
|
||||
ldapAuth := &LdapAuthenticator{}
|
||||
if err := ldapAuth.Init(); err != nil {
|
||||
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
}
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
auth.LdapAuth = ldapAuth
|
||||
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
|
||||
}
|
||||
} else {
|
||||
log.Info("Missing LDAP configuration: No LDAP support!")
|
||||
}
|
||||
|
||||
if config.Keys.JwtConfig != nil {
|
||||
auth.JwtAuth = &JWTAuthenticator{}
|
||||
if err := auth.JwtAuth.Init(); err != nil {
|
||||
log.Error("Error while initializing authentication -> jwtAuth init failed")
|
||||
return nil, err
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Fatal("Error while initializing authentication -> decoding session key failed")
|
||||
}
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||
if err := jwtSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
|
||||
authInstance.SessionMaxAge = d
|
||||
}
|
||||
|
||||
if config.Keys.LdapConfig != nil {
|
||||
ldapAuth := &LdapAuthenticator{}
|
||||
if err := ldapAuth.Init(); err != nil {
|
||||
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||
} else {
|
||||
authInstance.LdapAuth = ldapAuth
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
|
||||
}
|
||||
} else {
|
||||
auth.authenticators = append(auth.authenticators, jwtSessionAuth)
|
||||
log.Info("Missing LDAP configuration: No LDAP support!")
|
||||
}
|
||||
|
||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||
if config.Keys.JwtConfig != nil {
|
||||
authInstance.JwtAuth = &JWTAuthenticator{}
|
||||
if err := authInstance.JwtAuth.Init(); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> jwtAuth init failed")
|
||||
}
|
||||
|
||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||
if err := jwtSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
|
||||
}
|
||||
|
||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
|
||||
}
|
||||
} else {
|
||||
auth.authenticators = append(auth.authenticators, jwtCookieSessionAuth)
|
||||
log.Info("Missing JWT configuration: No JWT token support!")
|
||||
}
|
||||
} else {
|
||||
log.Info("Missing JWT configuration: No JWT token support!")
|
||||
}
|
||||
|
||||
auth.LocalAuth = &LocalAuthenticator{}
|
||||
if err := auth.LocalAuth.Init(); err != nil {
|
||||
log.Error("Error while initializing authentication -> localAuth init failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
|
||||
|
||||
return auth, nil
|
||||
authInstance.LocalAuth = &LocalAuthenticator{}
|
||||
if err := authInstance.LocalAuth.Init(); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> localAuth init failed")
|
||||
}
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
|
||||
})
|
||||
}
|
||||
|
||||
func persistUser(user *schema.User) {
|
||||
func GetAuthInstance() *Authentication {
|
||||
if authInstance == nil {
|
||||
log.Fatal("Authentication module not initialized!")
|
||||
}
|
||||
|
||||
return authInstance
|
||||
}
|
||||
|
||||
func handleTokenUser(tokenUser *schema.User) {
|
||||
r := repository.GetUserRepository()
|
||||
_, err := r.GetUser(user.Username)
|
||||
dbUser, err := r.GetUser(tokenUser.Username)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%s': %v", user.Username, err)
|
||||
} else if err == sql.ErrNoRows {
|
||||
if err := r.AddUser(user); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", user.Username, err)
|
||||
log.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && config.Keys.JwtConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(tokenUser); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
|
||||
}
|
||||
} else if err == nil && config.Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, tokenUser); err != nil {
|
||||
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleOIDCUser(OIDCUser *schema.User) {
|
||||
r := repository.GetUserRepository()
|
||||
dbUser, err := r.GetUser(OIDCUser.Username)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && config.Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(OIDCUser); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
|
||||
}
|
||||
} else if err == nil && config.Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
|
||||
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -219,31 +254,141 @@ func (auth *Authentication) Auth(
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("authentication failed: %s", err.Error())
|
||||
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
if user == nil {
|
||||
user, err = auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("authentication failed: %s", err.Error())
|
||||
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("authentication failed")
|
||||
log.Info("auth -> authentication failed")
|
||||
onfailure(rw, r, errors.New("unauthorized (please login first)"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleApi) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasAllRoles([]schema.Role{schema.RoleAdmin, schema.RoleApi}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
log.Info("auth api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
log.Info("auth api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthUserApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth user api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleApi) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
log.Info("auth user api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
log.Info("auth user api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthConfigApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth config api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil && user.HasRole(schema.RoleAdmin) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
log.Info("auth config api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthFrontendApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth frontend api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
log.Info("auth frontend api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
session, err := auth.sessionStore.Get(r, "session")
|
||||
|
||||
@@ -198,8 +198,8 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if jc.SyncUserOnLogin {
|
||||
persistUser(user)
|
||||
if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -138,8 +138,8 @@ func (ja *JWTSessionAuthenticator) Login(
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if config.Keys.JwtConfig.SyncUserOnLogin {
|
||||
persistUser(user)
|
||||
if config.Keys.JwtConfig.SyncUserOnLogin || config.Keys.JwtConfig.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
@@ -34,33 +33,6 @@ func (la *LdapAuthenticator) Init() error {
|
||||
|
||||
lc := config.Keys.LdapConfig
|
||||
|
||||
if lc.SyncInterval != "" {
|
||||
interval, err := time.ParseDuration(lc.SyncInterval)
|
||||
if err != nil {
|
||||
log.Warnf("Could not parse duration for sync interval: %v",
|
||||
lc.SyncInterval)
|
||||
return err
|
||||
}
|
||||
|
||||
if interval == 0 {
|
||||
log.Info("Sync interval is zero")
|
||||
return nil
|
||||
}
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(interval)
|
||||
for t := range ticker.C {
|
||||
log.Printf("sync started at %s", t.Format(time.RFC3339))
|
||||
if err := la.Sync(); err != nil {
|
||||
log.Errorf("sync failed: %s", err.Error())
|
||||
}
|
||||
log.Print("sync done")
|
||||
}
|
||||
}()
|
||||
} else {
|
||||
log.Info("LDAP configuration key sync_interval invalid")
|
||||
}
|
||||
|
||||
if lc.UserAttr != "" {
|
||||
la.UserAttr = lc.UserAttr
|
||||
} else {
|
||||
|
||||
@@ -168,8 +168,8 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
||||
AuthSource: schema.AuthViaOIDC,
|
||||
}
|
||||
|
||||
if config.Keys.OpenIDConfig.SyncUserOnLogin {
|
||||
persistUser(user)
|
||||
if config.Keys.OpenIDConfig.SyncUserOnLogin || config.Keys.OpenIDConfig.UpdateUserOnLogin {
|
||||
handleOIDCUser(user)
|
||||
}
|
||||
|
||||
oa.authentication.SaveSession(rw, r, user)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -16,11 +16,23 @@ type Count struct {
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
type EnergyFootprintValue struct {
|
||||
Hardware string `json:"hardware"`
|
||||
Metric string `json:"metric"`
|
||||
Value float64 `json:"value"`
|
||||
}
|
||||
|
||||
type FloatRange struct {
|
||||
From float64 `json:"from"`
|
||||
To float64 `json:"to"`
|
||||
}
|
||||
|
||||
type FootprintValue struct {
|
||||
Name string `json:"name"`
|
||||
Stat string `json:"stat"`
|
||||
Value float64 `json:"value"`
|
||||
}
|
||||
|
||||
type Footprints struct {
|
||||
TimeWeights *TimeWeights `json:"timeWeights"`
|
||||
Metrics []*MetricFootprints `json:"metrics"`
|
||||
@@ -46,16 +58,14 @@ type JobFilter struct {
|
||||
Cluster *StringInput `json:"cluster,omitempty"`
|
||||
Partition *StringInput `json:"partition,omitempty"`
|
||||
Duration *schema.IntRange `json:"duration,omitempty"`
|
||||
Energy *FloatRange `json:"energy,omitempty"`
|
||||
MinRunningFor *int `json:"minRunningFor,omitempty"`
|
||||
NumNodes *schema.IntRange `json:"numNodes,omitempty"`
|
||||
NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
|
||||
NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
|
||||
StartTime *schema.TimeRange `json:"startTime,omitempty"`
|
||||
State []schema.JobState `json:"state,omitempty"`
|
||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg,omitempty"`
|
||||
MemBwAvg *FloatRange `json:"memBwAvg,omitempty"`
|
||||
LoadAvg *FloatRange `json:"loadAvg,omitempty"`
|
||||
MemUsedMax *FloatRange `json:"memUsedMax,omitempty"`
|
||||
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
||||
Exclusive *int `json:"exclusive,omitempty"`
|
||||
Node *StringInput `json:"node,omitempty"`
|
||||
}
|
||||
@@ -120,9 +130,15 @@ type MetricHistoPoint struct {
|
||||
type MetricHistoPoints struct {
|
||||
Metric string `json:"metric"`
|
||||
Unit string `json:"unit"`
|
||||
Stat *string `json:"stat,omitempty"`
|
||||
Data []*MetricHistoPoint `json:"data,omitempty"`
|
||||
}
|
||||
|
||||
type MetricStatItem struct {
|
||||
MetricName string `json:"metricName"`
|
||||
Range *FloatRange `json:"range"`
|
||||
}
|
||||
|
||||
type Mutation struct {
|
||||
}
|
||||
|
||||
@@ -134,6 +150,7 @@ type NodeMetrics struct {
|
||||
|
||||
type OrderByInput struct {
|
||||
Field string `json:"field"`
|
||||
Type string `json:"type"`
|
||||
Order SortDirectionEnum `json:"order"`
|
||||
}
|
||||
|
||||
@@ -155,8 +172,9 @@ type StringInput struct {
|
||||
}
|
||||
|
||||
type TimeRangeOutput struct {
|
||||
From time.Time `json:"from"`
|
||||
To time.Time `json:"to"`
|
||||
Range *string `json:"range,omitempty"`
|
||||
From time.Time `json:"from"`
|
||||
To time.Time `json:"to"`
|
||||
}
|
||||
|
||||
type TimeWeights struct {
|
||||
|
||||
@@ -1,15 +1,39 @@
|
||||
package graph
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
// This file will not be regenerated automatically.
|
||||
//
|
||||
// It serves as dependency injection for your app, add any dependencies you require here.
|
||||
var (
|
||||
initOnce sync.Once
|
||||
resolverInstance *Resolver
|
||||
)
|
||||
|
||||
type Resolver struct {
|
||||
DB *sqlx.DB
|
||||
Repo *repository.JobRepository
|
||||
}
|
||||
|
||||
func Init() {
|
||||
initOnce.Do(func() {
|
||||
db := repository.GetConnection()
|
||||
resolverInstance = &Resolver{
|
||||
DB: db.DB, Repo: repository.GetJobRepository(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func GetResolverInstance() *Resolver {
|
||||
if resolverInstance == nil {
|
||||
log.Fatal("Authentication module not initialized!")
|
||||
}
|
||||
|
||||
return resolverInstance
|
||||
}
|
||||
|
||||
@@ -2,19 +2,22 @@ package graph
|
||||
|
||||
// This file will be automatically regenerated based on the schema, any resolver implementations
|
||||
// will be copied through when generating and any unknown code will be moved to the end.
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.45
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.49
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
@@ -28,7 +31,7 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(&obj.ID)
|
||||
return r.Repo.GetTags(ctx, &obj.ID)
|
||||
}
|
||||
|
||||
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
||||
@@ -44,8 +47,72 @@ func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*mod
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Footprint is the resolver for the footprint field.
|
||||
func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.FootprintValue, error) {
|
||||
rawFootprint, err := r.Repo.FetchFootprint(obj)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching job footprint data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.FootprintValue{}
|
||||
for name, value := range rawFootprint {
|
||||
|
||||
parts := strings.Split(name, "_")
|
||||
statPart := parts[len(parts)-1]
|
||||
nameParts := parts[:len(parts)-1]
|
||||
|
||||
res = append(res, &model.FootprintValue{
|
||||
Name: strings.Join(nameParts, "_"),
|
||||
Stat: statPart,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
|
||||
return res, err
|
||||
}
|
||||
|
||||
// EnergyFootprint is the resolver for the energyFootprint field.
|
||||
func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*model.EnergyFootprintValue, error) {
|
||||
rawEnergyFootprint, err := r.Repo.FetchEnergyFootprint(obj)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching job energy footprint data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.EnergyFootprintValue{}
|
||||
for name, value := range rawEnergyFootprint {
|
||||
// Suboptimal: Nearly hardcoded metric name expectations
|
||||
matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`)
|
||||
matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
|
||||
matchMem := regexp.MustCompile(`mem|Mem|MEM`)
|
||||
matchCore := regexp.MustCompile(`core|Core|CORE`)
|
||||
|
||||
hwType := ""
|
||||
switch test := name; { // NOtice ';' for var declaration
|
||||
case matchCpu.MatchString(test):
|
||||
hwType = "CPU"
|
||||
case matchAcc.MatchString(test):
|
||||
hwType = "Accelerator"
|
||||
case matchMem.MatchString(test):
|
||||
hwType = "Memory"
|
||||
case matchCore.MatchString(test):
|
||||
hwType = "Core"
|
||||
default:
|
||||
hwType = "Other"
|
||||
}
|
||||
|
||||
res = append(res, &model.EnergyFootprintValue{
|
||||
Hardware: hwType,
|
||||
Metric: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
return res, err
|
||||
}
|
||||
|
||||
// MetaData is the resolver for the metaData field.
|
||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
|
||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (any, error) {
|
||||
return r.Repo.FetchMetadata(obj)
|
||||
}
|
||||
|
||||
@@ -54,15 +121,20 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
|
||||
return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
|
||||
}
|
||||
|
||||
// Name is the resolver for the name field.
|
||||
func (r *metricValueResolver) Name(ctx context.Context, obj *schema.MetricValue) (*string, error) {
|
||||
panic(fmt.Errorf("not implemented: Name - name"))
|
||||
}
|
||||
|
||||
// CreateTag is the resolver for the createTag field.
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
||||
id, err := r.Repo.CreateTag(typeArg, name)
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string, scope string) (*schema.Tag, error) {
|
||||
id, err := r.Repo.CreateTag(typeArg, name, scope)
|
||||
if err != nil {
|
||||
log.Warn("Error while creating tag")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
||||
}
|
||||
|
||||
// DeleteTag is the resolver for the deleteTag field.
|
||||
@@ -72,6 +144,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
|
||||
|
||||
// AddTagsToJob is the resolver for the addTagsToJob field.
|
||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
// Selectable Tags Pre-Filtered by Scope in Frontend: No backend check required
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while adding tag to job")
|
||||
@@ -86,7 +159,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
|
||||
if tags, err = r.Repo.AddTag(ctx, jid, tid); err != nil {
|
||||
log.Warn("Error while adding tag")
|
||||
return nil, err
|
||||
}
|
||||
@@ -97,6 +170,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
|
||||
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
|
||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
// Removable Tags Pre-Filtered by Scope in Frontend: No backend check required
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing job id")
|
||||
@@ -111,7 +185,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
|
||||
if tags, err = r.Repo.RemoveTag(ctx, jid, tid); err != nil {
|
||||
log.Warn("Error while removing tag")
|
||||
return nil, err
|
||||
}
|
||||
@@ -137,7 +211,12 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error)
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(nil)
|
||||
return r.Repo.GetTags(ctx, nil)
|
||||
}
|
||||
|
||||
// GlobalMetrics is the resolver for the globalMetrics field.
|
||||
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
|
||||
return archive.GlobalMetricList, nil
|
||||
}
|
||||
|
||||
// User is the resolver for the user field.
|
||||
@@ -172,7 +251,7 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job, err := r.Repo.FindById(numericId)
|
||||
job, err := r.Repo.FindById(ctx, numericId)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
@@ -188,14 +267,24 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
}
|
||||
|
||||
// JobMetrics is the resolver for the jobMetrics field.
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying job for metrics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
|
||||
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job data")
|
||||
return nil, err
|
||||
@@ -347,7 +436,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
}
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading node data")
|
||||
return nil, err
|
||||
@@ -392,6 +481,9 @@ func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver
|
||||
// Job returns generated.JobResolver implementation.
|
||||
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
||||
|
||||
// MetricValue returns generated.MetricValueResolver implementation.
|
||||
func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricValueResolver{r} }
|
||||
|
||||
// Mutation returns generated.MutationResolver implementation.
|
||||
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
||||
|
||||
@@ -403,6 +495,7 @@ func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subCluste
|
||||
|
||||
type clusterResolver struct{ *Resolver }
|
||||
type jobResolver struct{ *Resolver }
|
||||
type metricValueResolver struct{ *Resolver }
|
||||
type mutationResolver struct{ *Resolver }
|
||||
type queryResolver struct{ *Resolver }
|
||||
type subClusterResolver struct{ *Resolver }
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
|
||||
"github.com/99designs/gqlgen/graphql"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@@ -24,8 +24,8 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
rows int, cols int,
|
||||
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
||||
|
||||
minX float64, minY float64, maxX float64, maxY float64,
|
||||
) ([][]float64, error) {
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||
if err != nil {
|
||||
log.Error("Error while querying jobs for roofline")
|
||||
@@ -47,7 +47,14 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
continue
|
||||
}
|
||||
|
||||
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
// resolution := 0
|
||||
|
||||
// for _, mc := range metricConfigs {
|
||||
// resolution = max(resolution, mc.Timestep)
|
||||
// }
|
||||
|
||||
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
||||
return nil, err
|
||||
@@ -120,7 +127,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
||||
continue
|
||||
}
|
||||
|
||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
log.Error("Error while loading averages for footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
@@ -42,8 +41,8 @@ func HandleImportFlag(flag string) error {
|
||||
}
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err = dec.Decode(&jobMeta); err != nil {
|
||||
job := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err = dec.Decode(&job); err != nil {
|
||||
log.Warn("Error while decoding raw json metadata for import")
|
||||
return err
|
||||
}
|
||||
@@ -67,33 +66,32 @@ func HandleImportFlag(flag string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// checkJobData(&jobData)
|
||||
job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
// if _, err = r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
||||
// if err != nil {
|
||||
// log.Warn("Error while finding job in jobRepository")
|
||||
// return err
|
||||
// }
|
||||
//
|
||||
// return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist")
|
||||
// }
|
||||
//
|
||||
job := schema.Job{
|
||||
BaseJob: jobMeta.BaseJob,
|
||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: Other metrics...
|
||||
job.LoadAvg = loadJobStat(&jobMeta, "cpu_load")
|
||||
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
|
||||
job.MemUsedMax = loadJobStat(&jobMeta, "mem_used")
|
||||
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
|
||||
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
|
||||
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
job.Footprint[fp] = repository.LoadJobStat(&job, name, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job resources")
|
||||
@@ -110,7 +108,7 @@ func HandleImportFlag(flag string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
|
||||
if err = archive.GetHandle().ImportJob(&job, &jobData); err != nil {
|
||||
log.Error("Error while importing job")
|
||||
return err
|
||||
}
|
||||
@@ -122,8 +120,8 @@ func HandleImportFlag(flag string) error {
|
||||
}
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
||||
log.Error("Error while adding or creating tag")
|
||||
if err := r.ImportTag(id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||
log.Error("Error while adding or creating tag on import")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,11 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
const (
|
||||
addTagQuery = "INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)"
|
||||
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
|
||||
)
|
||||
|
||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||
// repopulate them using the jobs found in `archive`.
|
||||
func InitDB() error {
|
||||
@@ -60,13 +65,30 @@ func InitDB() error {
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
}
|
||||
|
||||
// TODO: Other metrics...
|
||||
job.LoadAvg = loadJobStat(jobMeta, "cpu_load")
|
||||
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
|
||||
job.MemUsedMax = loadJobStat(jobMeta, "mem_used")
|
||||
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
|
||||
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
|
||||
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[fp] = repository.LoadJobStat(jobMeta, name, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
@@ -88,7 +110,8 @@ func InitDB() error {
|
||||
continue
|
||||
}
|
||||
|
||||
id, err := r.TransactionAdd(t, job)
|
||||
id, err := r.TransactionAddNamed(t,
|
||||
repository.NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
@@ -99,7 +122,9 @@ func InitDB() error {
|
||||
tagstr := tag.Name + ":" + tag.Type
|
||||
tagId, ok := tags[tagstr]
|
||||
if !ok {
|
||||
tagId, err = r.TransactionAddTag(t, tag)
|
||||
tagId, err = r.TransactionAdd(t,
|
||||
addTagQuery,
|
||||
tag.Name, tag.Type)
|
||||
if err != nil {
|
||||
log.Errorf("Error adding tag: %v", err)
|
||||
errorOccured++
|
||||
@@ -108,7 +133,9 @@ func InitDB() error {
|
||||
tags[tagstr] = tagId
|
||||
}
|
||||
|
||||
r.TransactionSetTag(t, id, tagId)
|
||||
r.TransactionAdd(t,
|
||||
setTagQuery,
|
||||
id, tagId)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
@@ -150,18 +177,6 @@ func SanityChecks(job *schema.BaseJob) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
if metric == "mem_used" {
|
||||
return stats.Max
|
||||
} else {
|
||||
return stats.Avg
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0
|
||||
}
|
||||
|
||||
func checkJobData(d *schema.JobData) error {
|
||||
for _, scopes := range *d {
|
||||
// var newUnit schema.Unit
|
||||
|
||||
1486
internal/importer/testdata/cluster-fritz.json
vendored
1486
internal/importer/testdata/cluster-fritz.json
vendored
File diff suppressed because it is too large
Load Diff
256
internal/metricDataDispatcher/dataLoader.go
Normal file
256
internal/metricDataDispatcher/dataLoader.go
Normal file
@@ -0,0 +1,256 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricDataDispatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/resampler"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
|
||||
func cacheKey(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) string {
|
||||
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
|
||||
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
|
||||
job.ID, job.State, metrics, scopes, resolution)
|
||||
}
|
||||
|
||||
// Fetches the metric data for a job.
|
||||
func LoadData(job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ interface{}, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
if job.State == schema.JobStateRunning ||
|
||||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
||||
config.Keys.DisableArchive {
|
||||
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
}
|
||||
|
||||
if scopes == nil {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
cluster := archive.GetCluster(job.Cluster)
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
// return err, 0, 0 // Reactivating will block archiving on one partial error
|
||||
} else {
|
||||
log.Error("Error while loading job data from metric repository")
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
size = jd.Size()
|
||||
} else {
|
||||
var jd_temp schema.JobData
|
||||
jd_temp, err = archive.GetHandle().LoadJobData(job)
|
||||
if err != nil {
|
||||
log.Error("Error while loading job data from archive")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
//Deep copy the cached archive hashmap
|
||||
jd = metricdata.DeepCopy(jd_temp)
|
||||
|
||||
//Resampling for archived data.
|
||||
//Pass the resolution from frontend here.
|
||||
for _, v := range jd {
|
||||
for _, v_ := range v {
|
||||
timestep := 0
|
||||
for i := 0; i < len(v_.Series); i += 1 {
|
||||
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
|
||||
if err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
v_.Timestep = timestep
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid sending unrequested data to the client:
|
||||
if metrics != nil || scopes != nil {
|
||||
if metrics == nil {
|
||||
metrics = make([]string, 0, len(jd))
|
||||
for k := range jd {
|
||||
metrics = append(metrics, k)
|
||||
}
|
||||
}
|
||||
|
||||
res := schema.JobData{}
|
||||
for _, metric := range metrics {
|
||||
if perscope, ok := jd[metric]; ok {
|
||||
if len(perscope) > 1 {
|
||||
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
||||
for _, scope := range scopes {
|
||||
if jm, ok := perscope[scope]; ok {
|
||||
subset[scope] = jm
|
||||
}
|
||||
}
|
||||
|
||||
if len(subset) > 0 {
|
||||
perscope = subset
|
||||
}
|
||||
}
|
||||
|
||||
res[metric] = perscope
|
||||
}
|
||||
}
|
||||
jd = res
|
||||
}
|
||||
size = jd.Size()
|
||||
}
|
||||
|
||||
ttl = 5 * time.Hour
|
||||
if job.State == schema.JobStateRunning {
|
||||
ttl = 2 * time.Minute
|
||||
}
|
||||
|
||||
// FIXME: Review: Is this really necessary or correct.
|
||||
// Note: Lines 147-170 formerly known as prepareJobData(jobData, scopes)
|
||||
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
|
||||
// to be available at the scope 'node'. If a job has a lot of nodes,
|
||||
// statisticsSeries should be available so that a min/median/max Graph can be
|
||||
// used instead of a lot of single lines.
|
||||
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
|
||||
// Existing (archived) StatsSeries can be 'min/mean/max'!
|
||||
const maxSeriesSize int = 15
|
||||
for _, scopes := range jd {
|
||||
for _, jm := range scopes {
|
||||
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
|
||||
continue
|
||||
}
|
||||
|
||||
jm.AddStatisticsSeries()
|
||||
}
|
||||
}
|
||||
|
||||
nodeScopeRequested := false
|
||||
for _, scope := range scopes {
|
||||
if scope == schema.MetricScopeNode {
|
||||
nodeScopeRequested = true
|
||||
}
|
||||
}
|
||||
|
||||
if nodeScopeRequested {
|
||||
jd.AddNodeScope("flops_any")
|
||||
jd.AddNodeScope("mem_bw")
|
||||
}
|
||||
|
||||
return jd, ttl, size
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in returned dataset")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(schema.JobData), nil
|
||||
}
|
||||
|
||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||
func LoadAverages(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
data [][]schema.Float,
|
||||
ctx context.Context,
|
||||
) error {
|
||||
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
|
||||
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
||||
}
|
||||
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||
return err
|
||||
}
|
||||
|
||||
for i, m := range metrics {
|
||||
nodes, ok := stats[m]
|
||||
if !ok {
|
||||
data[i] = append(data[i], schema.NaN)
|
||||
continue
|
||||
}
|
||||
|
||||
sum := 0.0
|
||||
for _, node := range nodes {
|
||||
sum += node.Avg
|
||||
}
|
||||
data[i] = append(data[i], schema.Float(sum))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for the node/system view. Returns a map of nodes to a map of metrics.
|
||||
func LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
repo, err := metricdata.GetMetricDataRepo(cluster)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
@@ -55,6 +55,7 @@ type ApiQuery struct {
|
||||
SubType *string `json:"subtype,omitempty"`
|
||||
Metric string `json:"metric"`
|
||||
Hostname string `json:"host"`
|
||||
Resolution int `json:"resolution"`
|
||||
TypeIds []string `json:"type-ids,omitempty"`
|
||||
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||
Aggregate bool `json:"aggreg"`
|
||||
@@ -66,13 +67,14 @@ type ApiQueryResponse struct {
|
||||
}
|
||||
|
||||
type ApiMetricData struct {
|
||||
Error *string `json:"error"`
|
||||
Data []schema.Float `json:"data"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
Avg schema.Float `json:"avg"`
|
||||
Min schema.Float `json:"min"`
|
||||
Max schema.Float `json:"max"`
|
||||
Error *string `json:"error"`
|
||||
Data []schema.Float `json:"data"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
Resolution int `json:"resolution"`
|
||||
Avg schema.Float `json:"avg"`
|
||||
Min schema.Float `json:"min"`
|
||||
Max schema.Float `json:"max"`
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
|
||||
@@ -129,7 +131,7 @@ func (ccms *CCMetricStore) doRequest(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf)
|
||||
if err != nil {
|
||||
log.Warn("Error while building request body")
|
||||
return nil, err
|
||||
@@ -138,6 +140,13 @@ func (ccms *CCMetricStore) doRequest(
|
||||
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
||||
}
|
||||
|
||||
// versioning the cc-metric-store query API.
|
||||
// v2 = data with resampling
|
||||
// v1 = data without resampling
|
||||
q := req.URL.Query()
|
||||
q.Add("version", "v2")
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
res, err := ccms.client.Do(req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
@@ -162,8 +171,9 @@ func (ccms *CCMetricStore) LoadData(
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
|
||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution)
|
||||
if err != nil {
|
||||
log.Warn("Error while building queries")
|
||||
return nil, err
|
||||
@@ -195,11 +205,17 @@ func (ccms *CCMetricStore) LoadData(
|
||||
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
}
|
||||
|
||||
res := row[0].Resolution
|
||||
if res == 0 {
|
||||
res = mc.Timestep
|
||||
}
|
||||
|
||||
jobMetric, ok := jobData[metric][scope]
|
||||
|
||||
if !ok {
|
||||
jobMetric = &schema.JobMetric{
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Timestep,
|
||||
Timestep: res,
|
||||
Series: make([]schema.Series, 0),
|
||||
}
|
||||
jobData[metric][scope] = jobMetric
|
||||
@@ -251,7 +267,6 @@ func (ccms *CCMetricStore) LoadData(
|
||||
/* Returns list for "partial errors" */
|
||||
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||
}
|
||||
|
||||
return jobData, nil
|
||||
}
|
||||
|
||||
@@ -267,6 +282,7 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
||||
assignedScope := []schema.MetricScope{}
|
||||
@@ -318,11 +334,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
}
|
||||
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: host.Accelerators,
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: host.Accelerators,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
|
||||
continue
|
||||
@@ -335,11 +352,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
}
|
||||
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: host.Accelerators,
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: host.Accelerators,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -348,11 +366,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
// HWThread -> HWThead
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(hwthreads),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(hwthreads),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -363,11 +382,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||
for _, core := range cores {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Core[core]),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Core[core]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
@@ -379,11 +399,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||
for _, socket := range sockets {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
@@ -393,11 +414,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
// HWThread -> Node
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(hwthreads),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(hwthreads),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -407,11 +429,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -421,11 +444,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -435,11 +459,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -449,11 +474,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -463,11 +489,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -477,11 +504,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -490,8 +518,9 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
// Node -> Node
|
||||
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -510,7 +539,15 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
metrics []string,
|
||||
ctx context.Context,
|
||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}) // #166 Add scope shere for analysis view accelerator normalization?
|
||||
|
||||
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
// resolution := 9000
|
||||
|
||||
// for _, mc := range metricConfigs {
|
||||
// resolution = min(resolution, mc.Timestep)
|
||||
// }
|
||||
|
||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
|
||||
if err != nil {
|
||||
log.Warn("Error while building query")
|
||||
return nil, err
|
||||
@@ -588,8 +625,9 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
for _, node := range nodes {
|
||||
for _, metric := range metrics {
|
||||
req.Queries = append(req.Queries, ApiQuery{
|
||||
Hostname: node,
|
||||
Metric: ccms.toRemoteName(metric),
|
||||
Hostname: node,
|
||||
Metric: ccms.toRemoteName(metric),
|
||||
Resolution: 60, // Default for Node Queries
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -597,7 +635,7 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
||||
@@ -60,7 +60,8 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
ctx context.Context,
|
||||
resolution int) (schema.JobData, error) {
|
||||
|
||||
measurementsConds := make([]string, 0, len(metrics))
|
||||
for _, m := range metrics {
|
||||
|
||||
@@ -8,13 +8,10 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
@@ -24,7 +21,7 @@ type MetricDataRepository interface {
|
||||
Init(rawConfig json.RawMessage) error
|
||||
|
||||
// Return the JobData for the given job, only with the requested metrics.
|
||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
|
||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
|
||||
|
||||
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
|
||||
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
||||
@@ -35,10 +32,7 @@ type MetricDataRepository interface {
|
||||
|
||||
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
||||
|
||||
var useArchive bool
|
||||
|
||||
func Init(disableArchive bool) error {
|
||||
useArchive = !disableArchive
|
||||
func Init() error {
|
||||
for _, cluster := range config.Keys.Clusters {
|
||||
if cluster.MetricDataRepository != nil {
|
||||
var kind struct {
|
||||
@@ -73,284 +67,13 @@ func Init(disableArchive bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
|
||||
// Fetches the metric data for a job.
|
||||
func LoadData(job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
if job.State == schema.JobStateRunning ||
|
||||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
||||
!useArchive {
|
||||
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
|
||||
if !ok {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
}
|
||||
|
||||
if scopes == nil {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
cluster := archive.GetCluster(job.Cluster)
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
// return err, 0, 0 // Reactivating will block archiving on one partial error
|
||||
} else {
|
||||
log.Error("Error while loading job data from metric repository")
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
size = jd.Size()
|
||||
} else {
|
||||
jd, err = archive.GetHandle().LoadJobData(job)
|
||||
if err != nil {
|
||||
log.Error("Error while loading job data from archive")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
// Avoid sending unrequested data to the client:
|
||||
if metrics != nil || scopes != nil {
|
||||
if metrics == nil {
|
||||
metrics = make([]string, 0, len(jd))
|
||||
for k := range jd {
|
||||
metrics = append(metrics, k)
|
||||
}
|
||||
}
|
||||
|
||||
res := schema.JobData{}
|
||||
for _, metric := range metrics {
|
||||
if perscope, ok := jd[metric]; ok {
|
||||
if len(perscope) > 1 {
|
||||
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
||||
for _, scope := range scopes {
|
||||
if jm, ok := perscope[scope]; ok {
|
||||
subset[scope] = jm
|
||||
}
|
||||
}
|
||||
|
||||
if len(subset) > 0 {
|
||||
perscope = subset
|
||||
}
|
||||
}
|
||||
|
||||
res[metric] = perscope
|
||||
}
|
||||
}
|
||||
jd = res
|
||||
}
|
||||
size = jd.Size()
|
||||
}
|
||||
|
||||
ttl = 5 * time.Hour
|
||||
if job.State == schema.JobStateRunning {
|
||||
ttl = 2 * time.Minute
|
||||
}
|
||||
|
||||
prepareJobData(job, jd, scopes)
|
||||
|
||||
return jd, ttl, size
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in returned dataset")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(schema.JobData), nil
|
||||
}
|
||||
|
||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||
func LoadAverages(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
data [][]schema.Float,
|
||||
ctx context.Context,
|
||||
) error {
|
||||
if job.State != schema.JobStateRunning && useArchive {
|
||||
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
||||
}
|
||||
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||
return err
|
||||
}
|
||||
|
||||
for i, m := range metrics {
|
||||
nodes, ok := stats[m]
|
||||
if !ok {
|
||||
data[i] = append(data[i], schema.NaN)
|
||||
continue
|
||||
}
|
||||
|
||||
sum := 0.0
|
||||
for _, node := range nodes {
|
||||
sum += node.Avg
|
||||
}
|
||||
data[i] = append(data[i], schema.Float(sum))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for the node/system view. Returns a map of nodes to a map of metrics.
|
||||
func LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
func GetMetricDataRepo(cluster string) (MetricDataRepository, error) {
|
||||
var err error
|
||||
repo, ok := metricDataRepos[cluster]
|
||||
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
err = fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func cacheKey(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
) string {
|
||||
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
|
||||
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]",
|
||||
job.ID, job.State, metrics, scopes)
|
||||
}
|
||||
|
||||
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
|
||||
// to be available at the scope 'node'. If a job has a lot of nodes,
|
||||
// statisticsSeries should be available so that a min/mean/max Graph can be
|
||||
// used instead of a lot of single lines.
|
||||
func prepareJobData(
|
||||
job *schema.Job,
|
||||
jobData schema.JobData,
|
||||
scopes []schema.MetricScope,
|
||||
) {
|
||||
const maxSeriesSize int = 15
|
||||
for _, scopes := range jobData {
|
||||
for _, jm := range scopes {
|
||||
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
|
||||
continue
|
||||
}
|
||||
|
||||
jm.AddStatisticsSeries()
|
||||
}
|
||||
}
|
||||
|
||||
nodeScopeRequested := false
|
||||
for _, scope := range scopes {
|
||||
if scope == schema.MetricScopeNode {
|
||||
nodeScopeRequested = true
|
||||
}
|
||||
}
|
||||
|
||||
if nodeScopeRequested {
|
||||
jobData.AddNodeScope("flops_any")
|
||||
jobData.AddNodeScope("mem_bw")
|
||||
}
|
||||
}
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
// TODO: Talk about this! What resolutions to store data at...
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
if job.NumNodes <= 8 {
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
|
||||
jobData, err := LoadData(job, allMetrics, scopes, ctx)
|
||||
if err != nil {
|
||||
log.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// TODO/FIXME: Calc average for non-node metrics as well!
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
// If the file based archive is disabled,
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if !useArchive {
|
||||
return jobMeta, nil
|
||||
}
|
||||
|
||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
||||
return repo, err
|
||||
}
|
||||
|
||||
@@ -166,10 +166,10 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
||||
var rt http.RoundTripper = nil
|
||||
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
|
||||
prom_pw := promcfg.Secret(prom_pw)
|
||||
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
|
||||
rt = promcfg.NewBasicAuthRoundTripper(promcfg.NewInlineSecret(config.Username), promcfg.NewInlineSecret(string(prom_pw)), promapi.DefaultRoundTripper)
|
||||
} else {
|
||||
if config.Username != "" {
|
||||
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
|
||||
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set")
|
||||
}
|
||||
}
|
||||
// init client
|
||||
@@ -204,8 +204,8 @@ func (pdb *PrometheusDataRepository) FormatQuery(
|
||||
metric string,
|
||||
scope schema.MetricScope,
|
||||
nodes []string,
|
||||
cluster string) (string, error) {
|
||||
|
||||
cluster string,
|
||||
) (string, error) {
|
||||
args := PromQLArgs{}
|
||||
if len(nodes) > 0 {
|
||||
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
|
||||
@@ -233,12 +233,13 @@ func (pdb *PrometheusDataRepository) RowToSeries(
|
||||
from time.Time,
|
||||
step int64,
|
||||
steps int64,
|
||||
row *promm.SampleStream) schema.Series {
|
||||
row *promm.SampleStream,
|
||||
) schema.Series {
|
||||
ts := from.Unix()
|
||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
||||
// init array of expected length with NaN
|
||||
values := make([]schema.Float, steps+1)
|
||||
for i, _ := range values {
|
||||
for i := range values {
|
||||
values[i] = schema.NaN
|
||||
}
|
||||
// copy recorded values from prom sample pair
|
||||
@@ -263,8 +264,9 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
// TODO respect requested scope
|
||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
@@ -306,7 +308,6 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
@@ -335,7 +336,7 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
pdb.RowToSeries(from, step, steps, row))
|
||||
}
|
||||
// only add metric if at least one host returned data
|
||||
if !ok && len(jobMetric.Series) > 0{
|
||||
if !ok && len(jobMetric.Series) > 0 {
|
||||
jobData[metric][scope] = jobMetric
|
||||
}
|
||||
// sort by hostname to get uniform coloring
|
||||
@@ -351,12 +352,12 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
func (pdb *PrometheusDataRepository) LoadStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
|
||||
ctx context.Context,
|
||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
// map of metrics of nodes of stats
|
||||
stats := map[string]map[string]schema.MetricStatistics{}
|
||||
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job for stats")
|
||||
return nil, err
|
||||
@@ -376,7 +377,8 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
t0 := time.Now()
|
||||
// Map of hosts of metrics of value slices
|
||||
data := make(map[string]map[string][]*schema.JobMetric)
|
||||
@@ -411,7 +413,6 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
|
||||
@@ -12,7 +12,7 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
@@ -27,9 +27,10 @@ func (tmdr *TestMetricDataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
ctx context.Context,
|
||||
resolution int) (schema.JobData, error) {
|
||||
|
||||
return TestLoadDataCallback(job, metrics, scopes, ctx)
|
||||
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
|
||||
}
|
||||
|
||||
func (tmdr *TestMetricDataRepository) LoadStats(
|
||||
@@ -48,3 +49,49 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
|
||||
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
func DeepCopy(jd_temp schema.JobData) schema.JobData {
|
||||
var jd schema.JobData
|
||||
|
||||
jd = make(schema.JobData, len(jd_temp))
|
||||
for k, v := range jd_temp {
|
||||
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
|
||||
for k_, v_ := range v {
|
||||
jd[k][k_] = new(schema.JobMetric)
|
||||
jd[k][k_].Series = make([]schema.Series, len(v_.Series))
|
||||
for i := 0; i < len(v_.Series); i += 1 {
|
||||
jd[k][k_].Series[i].Data = make([]schema.Float, len(v_.Series[i].Data))
|
||||
copy(jd[k][k_].Series[i].Data, v_.Series[i].Data)
|
||||
jd[k][k_].Series[i].Hostname = v_.Series[i].Hostname
|
||||
jd[k][k_].Series[i].Id = v_.Series[i].Id
|
||||
jd[k][k_].Series[i].Statistics.Avg = v_.Series[i].Statistics.Avg
|
||||
jd[k][k_].Series[i].Statistics.Min = v_.Series[i].Statistics.Min
|
||||
jd[k][k_].Series[i].Statistics.Max = v_.Series[i].Statistics.Max
|
||||
}
|
||||
jd[k][k_].Timestep = v_.Timestep
|
||||
jd[k][k_].Unit.Base = v_.Unit.Base
|
||||
jd[k][k_].Unit.Prefix = v_.Unit.Prefix
|
||||
if v_.StatisticsSeries != nil {
|
||||
// Init Slices
|
||||
jd[k][k_].StatisticsSeries = new(schema.StatsSeries)
|
||||
jd[k][k_].StatisticsSeries.Max = make([]schema.Float, len(v_.StatisticsSeries.Max))
|
||||
jd[k][k_].StatisticsSeries.Min = make([]schema.Float, len(v_.StatisticsSeries.Min))
|
||||
jd[k][k_].StatisticsSeries.Median = make([]schema.Float, len(v_.StatisticsSeries.Median))
|
||||
jd[k][k_].StatisticsSeries.Mean = make([]schema.Float, len(v_.StatisticsSeries.Mean))
|
||||
// Copy Data
|
||||
copy(jd[k][k_].StatisticsSeries.Max, v_.StatisticsSeries.Max)
|
||||
copy(jd[k][k_].StatisticsSeries.Min, v_.StatisticsSeries.Min)
|
||||
copy(jd[k][k_].StatisticsSeries.Median, v_.StatisticsSeries.Median)
|
||||
copy(jd[k][k_].StatisticsSeries.Mean, v_.StatisticsSeries.Mean)
|
||||
// Handle Percentiles
|
||||
for k__, v__ := range v_.StatisticsSeries.Percentiles {
|
||||
jd[k][k_].StatisticsSeries.Percentiles[k__] = make([]schema.Float, len(v__))
|
||||
copy(jd[k][k_].StatisticsSeries.Percentiles[k__], v__)
|
||||
}
|
||||
} else {
|
||||
jd[k][k_].StatisticsSeries = v_.StatisticsSeries
|
||||
}
|
||||
}
|
||||
}
|
||||
return jd
|
||||
}
|
||||
|
||||
@@ -5,17 +5,16 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
@@ -30,12 +29,10 @@ var (
|
||||
)
|
||||
|
||||
type JobRepository struct {
|
||||
DB *sqlx.DB
|
||||
stmtCache *sq.StmtCache
|
||||
cache *lrucache.Cache
|
||||
archiveChannel chan *schema.Job
|
||||
driver string
|
||||
archivePending sync.WaitGroup
|
||||
DB *sqlx.DB
|
||||
stmtCache *sq.StmtCache
|
||||
cache *lrucache.Cache
|
||||
driver string
|
||||
}
|
||||
|
||||
func GetJobRepository() *JobRepository {
|
||||
@@ -46,12 +43,9 @@ func GetJobRepository() *JobRepository {
|
||||
DB: db.DB,
|
||||
driver: db.Driver,
|
||||
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
archiveChannel: make(chan *schema.Job, 128),
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
}
|
||||
// start archiving worker
|
||||
go jobRepoInstance.archivingWorker()
|
||||
})
|
||||
return jobRepoInstance
|
||||
}
|
||||
@@ -59,23 +53,31 @@ func GetJobRepository() *JobRepository {
|
||||
var jobColumns []string = []string{
|
||||
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id",
|
||||
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
||||
"job.duration", "job.walltime", "job.resources", "job.mem_used_max", "job.flops_any_avg", "job.mem_bw_avg", "job.load_avg", // "job.meta_data",
|
||||
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
|
||||
}
|
||||
|
||||
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
||||
job := &schema.Job{}
|
||||
|
||||
if err := row.Scan(
|
||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||
&job.Duration, &job.Walltime, &job.RawResources, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.LoadAvg /*&job.RawMetaData*/); err != nil {
|
||||
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
||||
log.Warnf("Error while scanning rows (Job): %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
||||
log.Warn("Error while unmarhsaling raw resources json")
|
||||
log.Warn("Error while unmarshaling raw resources json")
|
||||
return nil, err
|
||||
}
|
||||
job.RawResources = nil
|
||||
|
||||
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||
log.Warnf("Error while unmarshaling raw footprint json: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
job.RawFootprint = nil
|
||||
|
||||
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
||||
// return nil, err
|
||||
@@ -86,7 +88,6 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
||||
}
|
||||
|
||||
job.RawResources = nil
|
||||
return job, nil
|
||||
}
|
||||
|
||||
@@ -205,7 +206,10 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
|
||||
if _, err = sq.Update("job").
|
||||
Set("meta_data", job.RawMetaData).
|
||||
Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
@@ -214,227 +218,60 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
return archive.UpdateMetadata(job, job.MetaData)
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) Find(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) (*schema.Job, error) {
|
||||
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
cachekey := fmt.Sprintf("footprint:%d", job.ID)
|
||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||
job.Footprint = cached.(map[string]float64)
|
||||
return job.Footprint, nil
|
||||
}
|
||||
|
||||
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
|
||||
|
||||
// s, args, _ := q.ToSql()
|
||||
// log.Printf("trying to find db job with query: %s | %v", s, args)
|
||||
|
||||
log.Debugf("Timer Find %s", time.Since(start))
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindAll(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) ([]*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
|
||||
log.Warn("Error while scanning for job footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 10)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
log.Debugf("Timer FindAll %s", time.Since(start))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// FindById executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindConcurrentJobs(
|
||||
ctx context.Context,
|
||||
job *schema.Job,
|
||||
) (*model.JobLinkResultList, error) {
|
||||
if job == nil {
|
||||
if len(job.RawFootprint) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
query = query.Where("cluster = ?", job.Cluster)
|
||||
var startTime int64
|
||||
var stopTime int64
|
||||
|
||||
startTime = job.StartTimeUnix
|
||||
hostname := job.Resources[0].Hostname
|
||||
|
||||
if job.State == schema.JobStateRunning {
|
||||
stopTime = time.Now().Unix()
|
||||
} else {
|
||||
stopTime = startTime + int64(job.Duration)
|
||||
}
|
||||
|
||||
// Add 200s overlap for jobs start time at the end
|
||||
startTimeTail := startTime + 10
|
||||
stopTimeTail := stopTime - 200
|
||||
startTimeFront := startTime + 200
|
||||
|
||||
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTime)
|
||||
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
||||
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||
log.Warn("Error while unmarshaling raw footprint json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
items := make([]*model.JobLink, 0, 10)
|
||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||
r.cache.Put(cachekey, job.Footprint, len(job.Footprint), 24*time.Hour)
|
||||
log.Debugf("Timer FetchFootprint %s", time.Since(start))
|
||||
return job.Footprint, nil
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
|
||||
start := time.Now()
|
||||
cachekey := fmt.Sprintf("energyFootprint:%d", job.ID)
|
||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||
job.EnergyFootprint = cached.(map[string]float64)
|
||||
return job.EnergyFootprint, nil
|
||||
}
|
||||
|
||||
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
|
||||
log.Warn("Error while scanning for job energy_footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
if len(job.RawEnergyFootprint) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
cnt := len(items)
|
||||
|
||||
return &model.JobLinkResultList{
|
||||
ListQuery: &queryString,
|
||||
Items: items,
|
||||
Count: &cnt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Start inserts a new job in the table, returning the unique job ID.
|
||||
// Statistics are not transfered!
|
||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
||||
if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
|
||||
log.Warn("Error while unmarshaling raw energy footprint json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
||||
}
|
||||
|
||||
res, err := r.DB.NamedExec(`INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, `+"`partition`"+`, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data
|
||||
);`, job)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) Stop(
|
||||
jobId int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
|
||||
log.Debugf("Timer FetchEnergyFootprint %s", time.Since(start))
|
||||
return job.EnergyFootprint, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
@@ -466,103 +303,6 @@ func (r *JobRepository) DeleteJobById(id int64) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", job)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) MarkArchived(
|
||||
jobId int64,
|
||||
monitoringStatus int32,
|
||||
metricStats map[string]schema.JobStatistics,
|
||||
) error {
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
for metric, stats := range metricStats {
|
||||
switch metric {
|
||||
case "flops_any":
|
||||
stmt = stmt.Set("flops_any_avg", stats.Avg)
|
||||
case "mem_used":
|
||||
stmt = stmt.Set("mem_used_max", stats.Max)
|
||||
case "mem_bw":
|
||||
stmt = stmt.Set("mem_bw_avg", stats.Avg)
|
||||
case "load":
|
||||
stmt = stmt.Set("load_avg", stats.Avg)
|
||||
case "cpu_load":
|
||||
stmt = stmt.Set("load_avg", stats.Avg)
|
||||
case "net_bw":
|
||||
stmt = stmt.Set("net_bw_avg", stats.Avg)
|
||||
case "file_bw":
|
||||
stmt = stmt.Set("file_bw_avg", stats.Avg)
|
||||
default:
|
||||
log.Debugf("MarkArchived() Metric '%v' unknown", metric)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Warn("Error while marking job as archived")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Archiving worker thread
|
||||
func (r *JobRepository) archivingWorker() {
|
||||
for {
|
||||
select {
|
||||
case job, ok := <-r.archiveChannel:
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
start := time.Now()
|
||||
// not using meta data, called to load JobMeta into Cache?
|
||||
// will fail if job meta not in repository
|
||||
if _, err := r.FetchMetadata(job); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
// metricdata.ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||
// TODO: Maybe use context with cancel/timeout here
|
||||
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
|
||||
if err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
// Update the jobs database entry one last time:
|
||||
if err := r.MarkArchived(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at marking archived step: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||
r.archivePending.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
|
||||
r.archivePending.Add(1)
|
||||
r.archiveChannel <- job
|
||||
}
|
||||
|
||||
// Wait for background thread to finish pending archiving operations
|
||||
func (r *JobRepository) WaitForArchiving() {
|
||||
// close channel and wait for worker to process remaining jobs
|
||||
r.archivePending.Wait()
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
|
||||
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
|
||||
return searchterm, "", "", ""
|
||||
@@ -745,6 +485,46 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||
query := sq.Select(jobColumns...).From("job").
|
||||
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.duration > 600")
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
log.Infof("Return job count %d", len(jobs))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateDuration() error {
|
||||
stmnt := sq.Update("job").
|
||||
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
|
||||
Where("job_state = 'running'")
|
||||
|
||||
_, err := stmnt.RunWith(r.stmtCache).Exec()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
|
||||
var query sq.SelectBuilder
|
||||
|
||||
@@ -783,27 +563,108 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
|
||||
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
|
||||
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
|
||||
);`
|
||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", job)
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while NamedJobInsert")
|
||||
return 0, err
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return id, nil
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
}
|
||||
|
||||
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
|
||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) MarkArchived(
|
||||
stmt sq.UpdateBuilder,
|
||||
monitoringStatus int32,
|
||||
) sq.UpdateBuilder {
|
||||
return stmt.Set("monitoring_status", monitoringStatus)
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateEnergy(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return stmt, err
|
||||
}
|
||||
energyFootprint := make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||
energy = math.Round(((LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
energyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
}
|
||||
|
||||
var rawFootprint []byte
|
||||
if rawFootprint, err = json.Marshal(energyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return stmt, err
|
||||
}
|
||||
|
||||
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateFootprint(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return stmt, err
|
||||
}
|
||||
footprint := make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
var statType string
|
||||
for _, gm := range archive.GlobalMetricList {
|
||||
if gm.Name == fp {
|
||||
statType = gm.Footprint
|
||||
}
|
||||
}
|
||||
|
||||
if statType != "avg" && statType != "min" && statType != "max" {
|
||||
log.Warnf("unknown statType for footprint update: %s", statType)
|
||||
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
|
||||
}
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
footprint[name] = LoadJobStat(jobMeta, fp, statType)
|
||||
}
|
||||
|
||||
var rawFootprint []byte
|
||||
if rawFootprint, err = json.Marshal(footprint); err != nil {
|
||||
log.Warnf("Error while marshaling footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return stmt, err
|
||||
}
|
||||
|
||||
return stmt.Set("footprint", string(rawFootprint)), nil
|
||||
}
|
||||
|
||||
75
internal/repository/jobCreate.go
Normal file
75
internal/repository/jobCreate.go
Normal file
@@ -0,0 +1,75 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :resources, :meta_data
|
||||
);`
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while NamedJobInsert")
|
||||
return 0, err
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// Start inserts a new job in the table, returning the unique job ID.
|
||||
// Statistics are not transfered!
|
||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
||||
}
|
||||
|
||||
return r.InsertJob(job)
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) Stop(
|
||||
jobId int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
}
|
||||
244
internal/repository/jobFind.go
Normal file
244
internal/repository/jobFind.go
Normal file
@@ -0,0 +1,244 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) Find(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) (*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
|
||||
|
||||
log.Debugf("Timer Find %s", time.Since(start))
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindAll(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) ([]*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 10)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
log.Debugf("Timer FindAll %s", time.Since(start))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// FindById executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
|
||||
q, qerr := SecurityCheck(ctx, q)
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByIdDirect executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByJobId executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the slurm id and the clustername.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
q, qerr := SecurityCheck(ctx, q)
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// IsJobOwner executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the slurm id,a username and the cluster.
|
||||
// It returns a bool.
|
||||
// If job was found, user is owner: test err != sql.ErrNoRows
|
||||
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
|
||||
q := sq.Select("id").
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.user = ?", user).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
return err != sql.ErrNoRows
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindConcurrentJobs(
|
||||
ctx context.Context,
|
||||
job *schema.Job,
|
||||
) (*model.JobLinkResultList, error) {
|
||||
if job == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
query = query.Where("cluster = ?", job.Cluster)
|
||||
var startTime int64
|
||||
var stopTime int64
|
||||
|
||||
startTime = job.StartTimeUnix
|
||||
hostname := job.Resources[0].Hostname
|
||||
|
||||
if job.State == schema.JobStateRunning {
|
||||
stopTime = time.Now().Unix()
|
||||
} else {
|
||||
stopTime = startTime + int64(job.Duration)
|
||||
}
|
||||
|
||||
// Add 200s overlap for jobs start time at the end
|
||||
startTimeTail := startTime + 10
|
||||
stopTimeTail := stopTime - 200
|
||||
startTimeFront := startTime + 200
|
||||
|
||||
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTime)
|
||||
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
||||
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
items := make([]*model.JobLink, 0, 10)
|
||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
cnt := len(items)
|
||||
|
||||
return &model.JobLinkResultList{
|
||||
ListQuery: &queryString,
|
||||
Items: items,
|
||||
Count: &cnt,
|
||||
}, nil
|
||||
}
|
||||
@@ -22,8 +22,8 @@ func (r *JobRepository) QueryJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter,
|
||||
page *model.PageRequest,
|
||||
order *model.OrderByInput) ([]*schema.Job, error) {
|
||||
|
||||
order *model.OrderByInput,
|
||||
) ([]*schema.Job, error) {
|
||||
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
@@ -31,14 +31,28 @@ func (r *JobRepository) QueryJobs(
|
||||
|
||||
if order != nil {
|
||||
field := toSnakeCase(order.Field)
|
||||
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
|
||||
if order.Type == "col" {
|
||||
// "col": Fixed column name query
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
|
||||
}
|
||||
} else {
|
||||
// "foot": Order by footprint JSON field values
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(meta_data)")
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,9 +87,10 @@ func (r *JobRepository) QueryJobs(
|
||||
|
||||
func (r *JobRepository) CountJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter) (int, error) {
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
|
||||
filters []*model.JobFilter,
|
||||
) (int, error) {
|
||||
// DISTICT count for tags filters, does not affect other queries
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
|
||||
if qerr != nil {
|
||||
return 0, qerr
|
||||
}
|
||||
@@ -97,30 +112,33 @@ func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilde
|
||||
if user == nil {
|
||||
var qnil sq.SelectBuilder
|
||||
return qnil, fmt.Errorf("user context is nil")
|
||||
} else if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleApi}) { // Admin & Co. : All jobs
|
||||
}
|
||||
|
||||
switch {
|
||||
case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
|
||||
return query, nil
|
||||
} else if user.HasRole(schema.RoleManager) { // Manager : Add filter for managed projects' jobs only + personal jobs
|
||||
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
|
||||
return query, nil
|
||||
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
|
||||
if len(user.Projects) != 0 {
|
||||
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.user": user.Username}}), nil
|
||||
} else {
|
||||
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
|
||||
return query.Where("job.user = ?", user.Username), nil
|
||||
}
|
||||
} else if user.HasRole(schema.RoleUser) { // User : Only personal jobs
|
||||
case user.HasRole(schema.RoleUser): // User : Only personal jobs
|
||||
return query.Where("job.user = ?", user.Username), nil
|
||||
} else {
|
||||
// Shortterm compatibility: Return User-Query if no roles:
|
||||
return query.Where("job.user = ?", user.Username), nil
|
||||
// // On the longterm: Return Error instead of fallback:
|
||||
// var qnil sq.SelectBuilder
|
||||
// return qnil, fmt.Errorf("user '%s' with unknown roles [%#v]", user.Username, user.Roles)
|
||||
default: // No known Role, return error
|
||||
var qnil sq.SelectBuilder
|
||||
return qnil, fmt.Errorf("user has no or unknown roles")
|
||||
}
|
||||
}
|
||||
|
||||
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
||||
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if filter.Tags != nil {
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags})
|
||||
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
|
||||
}
|
||||
if filter.JobID != nil {
|
||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||
@@ -174,17 +192,13 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
if filter.Node != nil {
|
||||
query = buildStringCondition("job.resources", filter.Node, query)
|
||||
}
|
||||
if filter.FlopsAnyAvg != nil {
|
||||
query = buildFloatCondition("job.flops_any_avg", filter.FlopsAnyAvg, query)
|
||||
if filter.Energy != nil {
|
||||
query = buildFloatCondition("job.energy", filter.Energy, query)
|
||||
}
|
||||
if filter.MemBwAvg != nil {
|
||||
query = buildFloatCondition("job.mem_bw_avg", filter.MemBwAvg, query)
|
||||
}
|
||||
if filter.LoadAvg != nil {
|
||||
query = buildFloatCondition("job.load_avg", filter.LoadAvg, query)
|
||||
}
|
||||
if filter.MemUsedMax != nil {
|
||||
query = buildFloatCondition("job.mem_used_max", filter.MemUsedMax, query)
|
||||
if filter.MetricStats != nil {
|
||||
for _, ms := range filter.MetricStats {
|
||||
query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
|
||||
}
|
||||
}
|
||||
return query
|
||||
}
|
||||
@@ -193,6 +207,10 @@ func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuild
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.From != nil && cond.To != nil {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
|
||||
@@ -200,13 +218,32 @@ func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBui
|
||||
return query.Where("? <= "+field, cond.From.Unix())
|
||||
} else if cond.To != nil {
|
||||
return query.Where(field+" <= ?", cond.To.Unix())
|
||||
} else if cond.Range != "" {
|
||||
now := time.Now().Unix()
|
||||
var then int64
|
||||
switch cond.Range {
|
||||
case "last6h":
|
||||
then = now - (60 * 60 * 6)
|
||||
case "last24h":
|
||||
then = now - (60 * 60 * 24)
|
||||
case "last7d":
|
||||
then = now - (60 * 60 * 24 * 7)
|
||||
case "last30d":
|
||||
then = now - (60 * 60 * 24 * 30)
|
||||
default:
|
||||
log.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
|
||||
return query
|
||||
}
|
||||
return query.Where(field+" BETWEEN ? AND ?", then, now)
|
||||
} else {
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(footprint)")
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
|
||||
}
|
||||
|
||||
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
@@ -227,9 +264,7 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
|
||||
}
|
||||
if cond.In != nil {
|
||||
queryElements := make([]string, len(cond.In))
|
||||
for i, val := range cond.In {
|
||||
queryElements[i] = val
|
||||
}
|
||||
copy(queryElements, cond.In)
|
||||
return query.Where(sq.Or{sq.Eq{field: queryElements}})
|
||||
}
|
||||
return query
|
||||
@@ -257,8 +292,10 @@ func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.
|
||||
return query
|
||||
}
|
||||
|
||||
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
var (
|
||||
matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
)
|
||||
|
||||
func toSnakeCase(str string) string {
|
||||
for _, c := range str {
|
||||
@@ -5,9 +5,11 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
@@ -30,7 +32,7 @@ func TestFind(t *testing.T) {
|
||||
func TestFindById(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
job, err := r.FindById(5)
|
||||
job, err := r.FindById(getContext(t), 5)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -45,7 +47,19 @@ func TestFindById(t *testing.T) {
|
||||
func TestGetTags(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
tags, counts, err := r.CountTags(nil)
|
||||
const contextUserKey ContextKey = "user"
|
||||
contextUserValue := &schema.User{
|
||||
Username: "testuser",
|
||||
Projects: make([]string, 0),
|
||||
Roles: []string{"user"},
|
||||
AuthType: 0,
|
||||
AuthSource: 2,
|
||||
}
|
||||
|
||||
ctx := context.WithValue(getContext(t), contextUserKey, contextUserValue)
|
||||
|
||||
// Test Tag has Scope "global"
|
||||
tags, counts, err := r.CountTags(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ import (
|
||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||
)
|
||||
|
||||
const Version uint = 7
|
||||
const Version uint = 8
|
||||
|
||||
//go:embed migrations/*
|
||||
var migrationFiles embed.FS
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
ALTER TABLE job DROP energy;
|
||||
ALTER TABLE job DROP energy_footprint;
|
||||
ALTER TABLE job ADD COLUMN flops_any_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_used_max;
|
||||
ALTER TABLE job ADD COLUMN load_avg;
|
||||
ALTER TABLE job ADD COLUMN net_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN net_data_vol_total;
|
||||
ALTER TABLE job ADD COLUMN file_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN file_data_vol_total;
|
||||
|
||||
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
|
||||
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
|
||||
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
|
||||
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
|
||||
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
|
||||
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
|
||||
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
|
||||
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
|
||||
|
||||
ALTER TABLE job DROP footprint;
|
||||
137
internal/repository/migrations/sqlite3/08_add-footprint.up.sql
Normal file
137
internal/repository/migrations/sqlite3/08_add-footprint.up.sql
Normal file
@@ -0,0 +1,137 @@
|
||||
DROP INDEX job_stats;
|
||||
DROP INDEX job_by_user;
|
||||
DROP INDEX job_by_starttime;
|
||||
DROP INDEX job_by_job_id;
|
||||
DROP INDEX job_list;
|
||||
DROP INDEX job_list_user;
|
||||
DROP INDEX job_list_users;
|
||||
DROP INDEX job_list_users_start;
|
||||
|
||||
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
|
||||
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
|
||||
|
||||
ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL;
|
||||
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
|
||||
|
||||
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
|
||||
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
|
||||
|
||||
ALTER TABLE job DROP flops_any_avg;
|
||||
ALTER TABLE job DROP mem_bw_avg;
|
||||
ALTER TABLE job DROP mem_used_max;
|
||||
ALTER TABLE job DROP load_avg;
|
||||
ALTER TABLE job DROP net_bw_avg;
|
||||
ALTER TABLE job DROP net_data_vol_total;
|
||||
ALTER TABLE job DROP file_bw_avg;
|
||||
ALTER TABLE job DROP file_data_vol_total;
|
||||
|
||||
-- Indices for: Single filters, combined filters, sorting, sorting with filters
|
||||
-- Cluster Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, partition);
|
||||
-- Cluster+Partition Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, partition, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, partition, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, partition, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, partition, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, partition, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, partition, energy);
|
||||
|
||||
-- Cluster+Partition+Jobstate Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, partition, job_state, user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, partition, job_state, project);
|
||||
-- Cluster+Partition+Jobstate Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, partition, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, partition, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, partition, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, partition, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, partition, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, partition, job_state, energy);
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||
-- Cluster+JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
|
||||
|
||||
-- User Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_user ON job (user);
|
||||
-- User Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (user, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (user, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (user, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (user, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (user, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (user, energy);
|
||||
|
||||
-- Project Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, user);
|
||||
-- Project Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
|
||||
|
||||
-- JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||
-- JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
|
||||
|
||||
-- ArrayJob Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||
|
||||
-- Sorting without active filters
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||
|
||||
-- Optimize DB index usage
|
||||
PRAGMA optimize;
|
||||
@@ -55,7 +55,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_, err := db.FindById(jobId)
|
||||
_, err := db.FindById(getContext(b), jobId)
|
||||
noErr(b, err)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -13,7 +13,7 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@@ -41,8 +41,8 @@ var sortBy2column = map[model.SortByAggregate]string{
|
||||
func (r *JobRepository) buildCountQuery(
|
||||
filter []*model.JobFilter,
|
||||
kind string,
|
||||
col string) sq.SelectBuilder {
|
||||
|
||||
col string,
|
||||
) sq.SelectBuilder {
|
||||
var query sq.SelectBuilder
|
||||
|
||||
if col != "" {
|
||||
@@ -69,16 +69,16 @@ func (r *JobRepository) buildCountQuery(
|
||||
|
||||
func (r *JobRepository) buildStatsQuery(
|
||||
filter []*model.JobFilter,
|
||||
col string) sq.SelectBuilder {
|
||||
|
||||
col string,
|
||||
) sq.SelectBuilder {
|
||||
var query sq.SelectBuilder
|
||||
castType := r.getCastType()
|
||||
|
||||
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
||||
|
||||
if col != "" {
|
||||
// Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select(col, "COUNT(job.id) as totalJobs",
|
||||
// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
|
||||
@@ -86,10 +86,9 @@ func (r *JobRepository) buildStatsQuery(
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
|
||||
).From("job").GroupBy(col)
|
||||
|
||||
).From("job").Join("user ON user.username = job.user").GroupBy(col)
|
||||
} else {
|
||||
// Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select("COUNT(job.id)",
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
|
||||
@@ -108,15 +107,15 @@ func (r *JobRepository) buildStatsQuery(
|
||||
return query
|
||||
}
|
||||
|
||||
func (r *JobRepository) getUserName(ctx context.Context, id string) string {
|
||||
user := GetUserFromContext(ctx)
|
||||
name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
|
||||
if name != "" {
|
||||
return name
|
||||
} else {
|
||||
return "-"
|
||||
}
|
||||
}
|
||||
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
|
||||
// user := GetUserFromContext(ctx)
|
||||
// name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
|
||||
// if name != "" {
|
||||
// return name
|
||||
// } else {
|
||||
// return "-"
|
||||
// }
|
||||
// }
|
||||
|
||||
func (r *JobRepository) getCastType() string {
|
||||
var castType string
|
||||
@@ -138,8 +137,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
filter []*model.JobFilter,
|
||||
page *model.PageRequest,
|
||||
sortBy *model.SortByAggregate,
|
||||
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
|
||||
groupBy *model.Aggregate,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
col := groupBy2column[*groupBy]
|
||||
query := r.buildStatsQuery(filter, col)
|
||||
@@ -168,14 +167,20 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var name sql.NullString
|
||||
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
||||
if err := rows.Scan(&id, &jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
||||
if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
|
||||
var personName string
|
||||
|
||||
if name.Valid {
|
||||
personName = name.String
|
||||
}
|
||||
|
||||
if jobs.Valid {
|
||||
totalJobs = int(jobs.Int64)
|
||||
@@ -206,11 +211,11 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
}
|
||||
|
||||
if col == "job.user" {
|
||||
name := r.getUserName(ctx, id.String)
|
||||
// name := r.getUserName(ctx, id.String)
|
||||
stats = append(stats,
|
||||
&model.JobsStatistics{
|
||||
ID: id.String,
|
||||
Name: name,
|
||||
Name: personName,
|
||||
TotalJobs: totalJobs,
|
||||
TotalWalltime: totalWalltime,
|
||||
TotalNodes: totalNodes,
|
||||
@@ -218,7 +223,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
TotalCores: totalCores,
|
||||
TotalCoreHours: totalCoreHours,
|
||||
TotalAccs: totalAccs,
|
||||
TotalAccHours: totalAccHours})
|
||||
TotalAccHours: totalAccHours,
|
||||
})
|
||||
} else {
|
||||
stats = append(stats,
|
||||
&model.JobsStatistics{
|
||||
@@ -230,7 +236,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
TotalCores: totalCores,
|
||||
TotalCoreHours: totalCoreHours,
|
||||
TotalAccs: totalAccs,
|
||||
TotalAccHours: totalAccHours})
|
||||
TotalAccHours: totalAccHours,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -241,8 +248,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
|
||||
func (r *JobRepository) JobsStats(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter) ([]*model.JobsStatistics, error) {
|
||||
|
||||
filter []*model.JobFilter,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
query := r.buildStatsQuery(filter, "")
|
||||
query, err := SecurityCheck(ctx, query)
|
||||
@@ -277,18 +284,36 @@ func (r *JobRepository) JobsStats(
|
||||
TotalWalltime: int(walltime.Int64),
|
||||
TotalNodeHours: totalNodeHours,
|
||||
TotalCoreHours: totalCoreHours,
|
||||
TotalAccHours: totalAccHours})
|
||||
TotalAccHours: totalAccHours,
|
||||
})
|
||||
}
|
||||
|
||||
log.Debugf("Timer JobStats %s", time.Since(start))
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func LoadJobStat(job *schema.JobMeta, metric string, statType string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
switch statType {
|
||||
case "avg":
|
||||
return stats.Avg
|
||||
case "max":
|
||||
return stats.Max
|
||||
case "min":
|
||||
return stats.Min
|
||||
default:
|
||||
log.Errorf("Unknown stat type %s", statType)
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0
|
||||
}
|
||||
|
||||
func (r *JobRepository) JobCountGrouped(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
|
||||
groupBy *model.Aggregate,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
col := groupBy2column[*groupBy]
|
||||
query := r.buildCountQuery(filter, "", col)
|
||||
@@ -315,7 +340,8 @@ func (r *JobRepository) JobCountGrouped(
|
||||
stats = append(stats,
|
||||
&model.JobsStatistics{
|
||||
ID: id.String,
|
||||
TotalJobs: int(cnt.Int64)})
|
||||
TotalJobs: int(cnt.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -328,8 +354,8 @@ func (r *JobRepository) AddJobCountGrouped(
|
||||
filter []*model.JobFilter,
|
||||
groupBy *model.Aggregate,
|
||||
stats []*model.JobsStatistics,
|
||||
kind string) ([]*model.JobsStatistics, error) {
|
||||
|
||||
kind string,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
col := groupBy2column[*groupBy]
|
||||
query := r.buildCountQuery(filter, kind, col)
|
||||
@@ -376,8 +402,8 @@ func (r *JobRepository) AddJobCount(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
stats []*model.JobsStatistics,
|
||||
kind string) ([]*model.JobsStatistics, error) {
|
||||
|
||||
kind string,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
query := r.buildCountQuery(filter, kind, "")
|
||||
query, err := SecurityCheck(ctx, query)
|
||||
@@ -420,7 +446,8 @@ func (r *JobRepository) AddJobCount(
|
||||
func (r *JobRepository) AddHistograms(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
|
||||
stat *model.JobsStatistics,
|
||||
) (*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
|
||||
castType := r.getCastType()
|
||||
@@ -459,7 +486,8 @@ func (r *JobRepository) AddMetricHistograms(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
metrics []string,
|
||||
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
|
||||
stat *model.JobsStatistics,
|
||||
) (*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
|
||||
// Running Jobs Only: First query jobdata from sqlite, then query data and make bins
|
||||
@@ -491,8 +519,8 @@ func (r *JobRepository) AddMetricHistograms(
|
||||
func (r *JobRepository) jobsStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
value string,
|
||||
filters []*model.JobFilter) ([]*model.HistoPoint, error) {
|
||||
|
||||
filters []*model.JobFilter,
|
||||
) ([]*model.HistoPoint, error) {
|
||||
start := time.Now()
|
||||
query, qerr := SecurityCheck(ctx,
|
||||
sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
||||
@@ -528,36 +556,20 @@ func (r *JobRepository) jobsStatisticsHistogram(
|
||||
func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
metric string,
|
||||
filters []*model.JobFilter) (*model.MetricHistoPoints, error) {
|
||||
|
||||
var dbMetric string
|
||||
switch metric {
|
||||
case "cpu_load":
|
||||
dbMetric = "load_avg"
|
||||
case "flops_any":
|
||||
dbMetric = "flops_any_avg"
|
||||
case "mem_bw":
|
||||
dbMetric = "mem_bw_avg"
|
||||
case "mem_used":
|
||||
dbMetric = "mem_used_max"
|
||||
case "net_bw":
|
||||
dbMetric = "net_bw_avg"
|
||||
case "file_bw":
|
||||
dbMetric = "file_bw_avg"
|
||||
default:
|
||||
return nil, fmt.Errorf("%s not implemented", metric)
|
||||
}
|
||||
|
||||
filters []*model.JobFilter,
|
||||
) (*model.MetricHistoPoints, error) {
|
||||
// Get specific Peak or largest Peak
|
||||
var metricConfig *schema.MetricConfig
|
||||
var peak float64 = 0.0
|
||||
var unit string = ""
|
||||
var footprintStat string = ""
|
||||
|
||||
for _, f := range filters {
|
||||
if f.Cluster != nil {
|
||||
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
||||
peak = metricConfig.Peak
|
||||
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
|
||||
footprintStat = metricConfig.Footprint
|
||||
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
|
||||
}
|
||||
}
|
||||
@@ -572,23 +584,29 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
if unit == "" {
|
||||
unit = m.Unit.Prefix + m.Unit.Base
|
||||
}
|
||||
if footprintStat == "" {
|
||||
footprintStat = m.Footprint
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// log.Debugf("Metric %s: DB %s, Peak %f, Unit %s", metric, dbMetric, peak, unit)
|
||||
// log.Debugf("Metric %s, Peak %f, Unit %s, Aggregation %s", metric, peak, unit, aggreg)
|
||||
// Make bins, see https://jereze.com/code/sql-histogram/
|
||||
|
||||
start := time.Now()
|
||||
jm := fmt.Sprintf(`json_extract(footprint, "$.%s")`, (metric + "_" + footprintStat))
|
||||
|
||||
crossJoinQuery := sq.Select(
|
||||
fmt.Sprintf(`max(%s) as max`, dbMetric),
|
||||
fmt.Sprintf(`min(%s) as min`, dbMetric),
|
||||
fmt.Sprintf(`max(%s) as max`, jm),
|
||||
fmt.Sprintf(`min(%s) as min`, jm),
|
||||
).From("job").Where(
|
||||
fmt.Sprintf(`%s is not null`, dbMetric),
|
||||
"JSON_VALID(footprint)",
|
||||
).Where(
|
||||
fmt.Sprintf(`%s <= %f`, dbMetric, peak),
|
||||
fmt.Sprintf(`%s is not null`, jm),
|
||||
).Where(
|
||||
fmt.Sprintf(`%s <= %f`, jm, peak),
|
||||
)
|
||||
|
||||
crossJoinQuery, cjqerr := SecurityCheck(ctx, crossJoinQuery)
|
||||
@@ -607,16 +625,18 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
}
|
||||
|
||||
bins := 10
|
||||
binQuery := fmt.Sprintf(`CAST( (case when job.%s = value.max then value.max*0.999999999 else job.%s end - value.min) / (value.max - value.min) * %d as INTEGER )`, dbMetric, dbMetric, bins)
|
||||
binQuery := fmt.Sprintf(`CAST( (case when %s = value.max
|
||||
then value.max*0.999999999 else %s end - value.min) / (value.max -
|
||||
value.min) * %d as INTEGER )`, jm, jm, bins)
|
||||
|
||||
mainQuery := sq.Select(
|
||||
fmt.Sprintf(`%s + 1 as bin`, binQuery),
|
||||
fmt.Sprintf(`count(job.%s) as count`, dbMetric),
|
||||
fmt.Sprintf(`count(%s) as count`, jm),
|
||||
fmt.Sprintf(`CAST(((value.max / %d) * (%s )) as INTEGER ) as min`, bins, binQuery),
|
||||
fmt.Sprintf(`CAST(((value.max / %d) * (%s + 1 )) as INTEGER ) as max`, bins, binQuery),
|
||||
).From("job").CrossJoin(
|
||||
fmt.Sprintf(`(%s) as value`, crossJoinQuerySql), crossJoinQueryArgs...,
|
||||
).Where(fmt.Sprintf(`job.%s is not null and job.%s <= %f`, dbMetric, dbMetric, peak))
|
||||
).Where(fmt.Sprintf(`%s is not null and %s <= %f`, jm, jm, peak))
|
||||
|
||||
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
|
||||
|
||||
@@ -641,14 +661,14 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
for rows.Next() {
|
||||
point := model.MetricHistoPoint{}
|
||||
if err := rows.Scan(&point.Bin, &point.Count, &point.Min, &point.Max); err != nil {
|
||||
log.Warnf("Error while scanning rows for %s", metric)
|
||||
log.Warnf("Error while scanning rows for %s", jm)
|
||||
return nil, err // Totally bricks cc-backend if returned and if all metrics requested?
|
||||
}
|
||||
|
||||
points = append(points, &point)
|
||||
}
|
||||
|
||||
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Data: points}
|
||||
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Stat: &footprintStat, Data: points}
|
||||
|
||||
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
|
||||
return &result, nil
|
||||
@@ -657,8 +677,8 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
metrics []string,
|
||||
filters []*model.JobFilter) []*model.MetricHistoPoints {
|
||||
|
||||
filters []*model.JobFilter,
|
||||
) []*model.MetricHistoPoints {
|
||||
// Get Jobs
|
||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
||||
if err != nil {
|
||||
@@ -681,7 +701,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
continue
|
||||
}
|
||||
|
||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
log.Errorf("Error while loading averages for histogram: %s", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@@ -14,7 +16,14 @@ import (
|
||||
)
|
||||
|
||||
// Add the tag with id `tagId` to the job with the database id `jobId`.
|
||||
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
func (r *JobRepository) AddTag(ctx context.Context, job int64, tag int64) ([]*schema.Tag, error) {
|
||||
|
||||
j, err := r.FindById(ctx, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
@@ -23,49 +32,62 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
tags, err := r.GetTags(ctx, &job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, tags)
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
}
|
||||
|
||||
// Removes a tag from a job
|
||||
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
|
||||
func (r *JobRepository) RemoveTag(ctx context.Context, job, tag int64) ([]*schema.Tag, error) {
|
||||
|
||||
j, err := r.FindById(ctx, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
q := sq.Delete("jobtag").Where("jobtag.job_id = ?", job).Where("jobtag.tag_id = ?", tag)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error adding tag with %s: %v", s, err)
|
||||
log.Errorf("Error removing tag with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
tags, err := r.GetTags(ctx, &job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, tags)
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
}
|
||||
|
||||
// CreateTag creates a new tag with the specified type and name and returns its database id.
|
||||
func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64, err error) {
|
||||
q := sq.Insert("tag").Columns("tag_type", "tag_name").Values(tagType, tagName)
|
||||
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||
|
||||
// Default to "Global" scope if none defined
|
||||
if tagScope == "" {
|
||||
tagScope = "global"
|
||||
}
|
||||
|
||||
q := sq.Insert("tag").Columns("tag_type", "tag_name", "tag_scope").Values(tagType, tagName, tagScope)
|
||||
|
||||
res, err := q.RunWith(r.stmtCache).Exec()
|
||||
if err != nil {
|
||||
@@ -77,9 +99,10 @@ func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64,
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
|
||||
func (r *JobRepository) CountTags(ctx context.Context) (tags []schema.Tag, counts map[string]int, err error) {
|
||||
// Fetch all Tags in DB for Display in Frontend Tag-View
|
||||
tags = make([]schema.Tag, 0, 100)
|
||||
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name FROM tag")
|
||||
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -89,16 +112,38 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
if err = xrows.StructScan(&t); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
tags = append(tags, t)
|
||||
|
||||
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
|
||||
readable, err := r.checkScopeAuth(ctx, "read", t.Scope)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if readable {
|
||||
tags = append(tags, t)
|
||||
}
|
||||
}
|
||||
|
||||
q := sq.Select("t.tag_name, count(jt.tag_id)").
|
||||
user := GetUserFromContext(ctx)
|
||||
|
||||
// Query and Count Jobs with attached Tags
|
||||
q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
|
||||
From("tag t").
|
||||
LeftJoin("jobtag jt ON t.id = jt.tag_id").
|
||||
GroupBy("t.tag_name")
|
||||
|
||||
// Handle Scope Filtering
|
||||
scopeList := "\"global\""
|
||||
if user != nil {
|
||||
scopeList += ",\"" + user.Username + "\""
|
||||
}
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
scopeList += ",\"admin\""
|
||||
}
|
||||
q = q.Where("t.tag_scope IN (" + scopeList + ")")
|
||||
|
||||
// Handle Job Ownership
|
||||
if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
|
||||
log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
|
||||
// log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
|
||||
// Unchanged: Needs to be own case still, due to UserRole/NoRole compatibility handling in else case
|
||||
} else if user != nil && user.HasRole(schema.RoleManager) { // MANAGER: Count own jobs plus project's jobs
|
||||
// Build ("project1", "project2", ...) list of variable length directly in SQL string
|
||||
@@ -115,29 +160,45 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
counts = make(map[string]int)
|
||||
for rows.Next() {
|
||||
var tagName string
|
||||
var tagId int
|
||||
var count int
|
||||
if err = rows.Scan(&tagName, &count); err != nil {
|
||||
if err = rows.Scan(&tagName, &tagId, &count); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
counts[tagName] = count
|
||||
// Use tagId as second Map-Key component to differentiate tags with identical names
|
||||
counts[fmt.Sprint(tagName, tagId)] = count
|
||||
}
|
||||
err = rows.Err()
|
||||
|
||||
return
|
||||
return tags, counts, err
|
||||
}
|
||||
|
||||
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
|
||||
// If such a tag does not yet exist, it is created.
|
||||
func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName string) (tagId int64, err error) {
|
||||
tagId, exists := r.TagId(tagType, tagName)
|
||||
func (r *JobRepository) AddTagOrCreate(ctx context.Context, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||
|
||||
// Default to "Global" scope if none defined
|
||||
if tagScope == "" {
|
||||
tagScope = "global"
|
||||
}
|
||||
|
||||
writable, err := r.checkScopeAuth(ctx, "write", tagScope)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if !writable {
|
||||
return 0, fmt.Errorf("cannot write tag scope with current authorization")
|
||||
}
|
||||
|
||||
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
tagId, err = r.CreateTag(tagType, tagName)
|
||||
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := r.AddTag(jobId, tagId); err != nil {
|
||||
if _, err := r.AddTag(ctx, jobId, tagId); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@@ -145,19 +206,19 @@ func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName stri
|
||||
}
|
||||
|
||||
// TagId returns the database id of the tag with the specified type and name.
|
||||
func (r *JobRepository) TagId(tagType string, tagName string) (tagId int64, exists bool) {
|
||||
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
|
||||
exists = true
|
||||
if err := sq.Select("id").From("tag").
|
||||
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).
|
||||
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
|
||||
exists = false
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// GetTags returns a list of all tags if job is nil or of the tags that the job with that database ID has.
|
||||
func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name").From("tag")
|
||||
// GetTags returns a list of all scoped tags if job is nil or of the tags that the job with that database ID has.
|
||||
func (r *JobRepository) GetTags(ctx context.Context, job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||
if job != nil {
|
||||
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||
}
|
||||
@@ -172,7 +233,41 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
tags := make([]*schema.Tag, 0)
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
|
||||
readable, err := r.checkScopeAuth(ctx, "read", tag.Scope)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if readable {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
|
||||
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||
if job != nil {
|
||||
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error get tags with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags := make([]*schema.Tag, 0)
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
@@ -181,3 +276,60 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
|
||||
// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err
|
||||
|
||||
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error adding tag on import with %s: %v", s, err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) checkScopeAuth(ctx context.Context, operation string, scope string) (pass bool, err error) {
|
||||
user := GetUserFromContext(ctx)
|
||||
if user != nil {
|
||||
switch {
|
||||
case operation == "write" && scope == "admin":
|
||||
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
case operation == "write" && scope == "global":
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
case operation == "write" && scope == user.Username:
|
||||
return true, nil
|
||||
case operation == "read" && scope == "admin":
|
||||
return user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}), nil
|
||||
case operation == "read" && scope == "global":
|
||||
return true, nil
|
||||
case operation == "read" && scope == user.Username:
|
||||
return true, nil
|
||||
default:
|
||||
if operation == "read" || operation == "write" {
|
||||
// No acceptable scope: deny tag
|
||||
return false, nil
|
||||
} else {
|
||||
return false, fmt.Errorf("error while checking tag operation auth: unknown operation (%s)", operation)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false, fmt.Errorf("error while checking tag operation auth: no user in context")
|
||||
}
|
||||
}
|
||||
|
||||
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
@@ -6,7 +6,6 @@ package repository
|
||||
|
||||
import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
@@ -18,20 +17,12 @@ type Transaction struct {
|
||||
func (r *JobRepository) TransactionInit() (*Transaction, error) {
|
||||
var err error
|
||||
t := new(Transaction)
|
||||
// Inserts are bundled into transactions because in sqlite,
|
||||
// that speeds up inserts A LOT.
|
||||
|
||||
t.tx, err = r.DB.Beginx()
|
||||
if err != nil {
|
||||
log.Warn("Error while bundling transactions")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t.stmt, err = t.tx.PrepareNamed(NamedJobInsert)
|
||||
if err != nil {
|
||||
log.Warn("Error while preparing namedJobInsert")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
@@ -50,7 +41,6 @@ func (r *JobRepository) TransactionCommit(t *Transaction) error {
|
||||
return err
|
||||
}
|
||||
|
||||
t.stmt = t.tx.NamedStmt(t.stmt)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -59,14 +49,17 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error {
|
||||
log.Warn("Error while committing SQL transactions")
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, error) {
|
||||
res, err := t.stmt.Exec(job)
|
||||
func (r *JobRepository) TransactionAddNamed(
|
||||
t *Transaction,
|
||||
query string,
|
||||
args ...interface{},
|
||||
) (int64, error) {
|
||||
res, err := t.tx.NamedExec(query, args)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
log.Errorf("Named Exec failed: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@@ -79,26 +72,19 @@ func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, e
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionAddTag(t *Transaction, tag *schema.Tag) (int64, error) {
|
||||
res, err := t.tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
|
||||
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
|
||||
|
||||
res, err := t.tx.Exec(query, args...)
|
||||
if err != nil {
|
||||
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
|
||||
return 0, err
|
||||
}
|
||||
tagId, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
log.Errorf("TransactionAdd(), Exec() Error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return tagId, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionSetTag(t *Transaction, jobId int64, tagId int64) error {
|
||||
if _, err := t.tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, jobId, tagId); err != nil {
|
||||
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", jobId, tagId)
|
||||
return err
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Errorf("TransactionAdd(), LastInsertId() Error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return nil
|
||||
return id, nil
|
||||
}
|
||||
|
||||
@@ -72,7 +72,6 @@ func (r *UserRepository) GetUser(username string) (*schema.User, error) {
|
||||
}
|
||||
|
||||
func (r *UserRepository) GetLdapUsernames() ([]string, error) {
|
||||
|
||||
var users []string
|
||||
rows, err := r.DB.Query(`SELECT username FROM user WHERE user.ldap = 1`)
|
||||
if err != nil {
|
||||
@@ -131,8 +130,28 @@ func (r *UserRepository) AddUser(user *schema.User) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *UserRepository) DelUser(username string) error {
|
||||
func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) error {
|
||||
// user contains updated info, apply to dbuser
|
||||
// TODO: Discuss updatable fields
|
||||
if dbUser.Name != user.Name {
|
||||
if _, err := sq.Update("user").Set("name", user.Name).Where("user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("error while updating name of user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Toggled until greenlit
|
||||
// if dbUser.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
|
||||
// projects, _ := json.Marshal(user.Projects)
|
||||
// if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *UserRepository) DelUser(username string) error {
|
||||
_, err := r.DB.Exec(`DELETE FROM user WHERE user.username = ?`, username)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting user '%s' from DB", username)
|
||||
@@ -143,7 +162,6 @@ func (r *UserRepository) DelUser(username string) error {
|
||||
}
|
||||
|
||||
func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
||||
|
||||
q := sq.Select("username", "name", "email", "roles", "projects").From("user")
|
||||
if specialsOnly {
|
||||
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
|
||||
@@ -186,8 +204,8 @@ func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
||||
func (r *UserRepository) AddRole(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
queryrole string) error {
|
||||
|
||||
queryrole string,
|
||||
) error {
|
||||
newRole := strings.ToLower(queryrole)
|
||||
user, err := r.GetUser(username)
|
||||
if err != nil {
|
||||
@@ -198,15 +216,15 @@ func (r *UserRepository) AddRole(
|
||||
exists, valid := user.HasValidRole(newRole)
|
||||
|
||||
if !valid {
|
||||
return fmt.Errorf("Supplied role is no valid option : %v", newRole)
|
||||
return fmt.Errorf("supplied role is no valid option : %v", newRole)
|
||||
}
|
||||
if exists {
|
||||
return fmt.Errorf("User %v already has role %v", username, newRole)
|
||||
return fmt.Errorf("user %v already has role %v", username, newRole)
|
||||
}
|
||||
|
||||
roles, _ := json.Marshal(append(user.Roles, newRole))
|
||||
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error while adding new role for user '%s'", user.Username)
|
||||
log.Errorf("error while adding new role for user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@@ -223,14 +241,14 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
exists, valid := user.HasValidRole(oldRole)
|
||||
|
||||
if !valid {
|
||||
return fmt.Errorf("Supplied role is no valid option : %v", oldRole)
|
||||
return fmt.Errorf("supplied role is no valid option : %v", oldRole)
|
||||
}
|
||||
if !exists {
|
||||
return fmt.Errorf("Role already deleted for user '%v': %v", username, oldRole)
|
||||
return fmt.Errorf("role already deleted for user '%v': %v", username, oldRole)
|
||||
}
|
||||
|
||||
if oldRole == schema.GetRoleString(schema.RoleManager) && len(user.Projects) != 0 {
|
||||
return fmt.Errorf("Cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
|
||||
return fmt.Errorf("cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
|
||||
}
|
||||
|
||||
var newroles []string
|
||||
@@ -240,7 +258,7 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
}
|
||||
}
|
||||
|
||||
var mroles, _ = json.Marshal(newroles)
|
||||
mroles, _ := json.Marshal(newroles)
|
||||
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error while removing role for user '%s'", user.Username)
|
||||
return err
|
||||
@@ -251,15 +269,15 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
func (r *UserRepository) AddProject(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
project string) error {
|
||||
|
||||
project string,
|
||||
) error {
|
||||
user, err := r.GetUser(username)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleManager) {
|
||||
return fmt.Errorf("user '%s' is not a manager!", username)
|
||||
return fmt.Errorf("user '%s' is not a manager", username)
|
||||
}
|
||||
|
||||
if user.HasProject(project) {
|
||||
@@ -281,11 +299,11 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleManager) {
|
||||
return fmt.Errorf("user '%#v' is not a manager!", username)
|
||||
return fmt.Errorf("user '%#v' is not a manager", username)
|
||||
}
|
||||
|
||||
if !user.HasProject(project) {
|
||||
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match!", username, project)
|
||||
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match", username, project)
|
||||
}
|
||||
|
||||
var exists bool
|
||||
@@ -298,7 +316,7 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
|
||||
}
|
||||
}
|
||||
|
||||
if exists == true {
|
||||
if exists {
|
||||
var result interface{}
|
||||
if len(newprojects) == 0 {
|
||||
result = "[]"
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
@@ -81,6 +82,9 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
|
||||
|
||||
func setupJobRoute(i InfoType, r *http.Request) InfoType {
|
||||
i["id"] = mux.Vars(r)["id"]
|
||||
if config.Keys.EmissionConstant != 0 {
|
||||
i["emission"] = config.Keys.EmissionConstant
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
@@ -128,24 +132,41 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType {
|
||||
|
||||
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
|
||||
jobRepo := repository.GetJobRepository()
|
||||
user := repository.GetUserFromContext(r.Context())
|
||||
|
||||
tags, counts, err := jobRepo.CountTags(user)
|
||||
tags, counts, err := jobRepo.CountTags(r.Context())
|
||||
tagMap := make(map[string][]map[string]interface{})
|
||||
if err != nil {
|
||||
log.Warnf("GetTags failed: %s", err.Error())
|
||||
i["tagmap"] = tagMap
|
||||
return i
|
||||
}
|
||||
|
||||
for _, tag := range tags {
|
||||
tagItem := map[string]interface{}{
|
||||
"id": tag.ID,
|
||||
"name": tag.Name,
|
||||
"count": counts[tag.Name],
|
||||
// Reduces displayed tags for unauth'd users
|
||||
userAuthlevel := repository.GetUserFromContext(r.Context()).GetAuthLevel()
|
||||
// Uses tag.ID as second Map-Key component to differentiate tags with identical names
|
||||
if userAuthlevel >= 4 { // Support+ : Show tags for all scopes, regardless of count
|
||||
for _, tag := range tags {
|
||||
tagItem := map[string]interface{}{
|
||||
"id": tag.ID,
|
||||
"name": tag.Name,
|
||||
"scope": tag.Scope,
|
||||
"count": counts[fmt.Sprint(tag.Name, tag.ID)],
|
||||
}
|
||||
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
|
||||
}
|
||||
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
|
||||
}
|
||||
} else if userAuthlevel < 4 && userAuthlevel >= 2 { // User+ : Show global and admin scope only if at least 1 tag used, private scope regardless of count
|
||||
for _, tag := range tags {
|
||||
tagCount := counts[fmt.Sprint(tag.Name, tag.ID)]
|
||||
if ((tag.Scope == "global" || tag.Scope == "admin") && tagCount >= 1) || (tag.Scope != "global" && tag.Scope != "admin") {
|
||||
tagItem := map[string]interface{}{
|
||||
"id": tag.ID,
|
||||
"name": tag.Name,
|
||||
"scope": tag.Scope,
|
||||
"count": tagCount,
|
||||
}
|
||||
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
|
||||
}
|
||||
}
|
||||
} // auth < 2 return nothing for this route
|
||||
|
||||
i["tagmap"] = tagMap
|
||||
return i
|
||||
}
|
||||
@@ -238,7 +259,7 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||
}
|
||||
if query.Get("startTime") != "" {
|
||||
parts := strings.Split(query.Get("startTime"), "-")
|
||||
if len(parts) == 2 {
|
||||
if len(parts) == 2 { // Time in seconds, from - to
|
||||
a, e1 := strconv.ParseInt(parts[0], 10, 64)
|
||||
b, e2 := strconv.ParseInt(parts[1], 10, 64)
|
||||
if e1 == nil && e2 == nil {
|
||||
@@ -247,6 +268,10 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||
"to": time.Unix(b, 0).Format(time.RFC3339),
|
||||
}
|
||||
}
|
||||
} else { // named range
|
||||
filterPresets["startTime"] = map[string]string{
|
||||
"range": query.Get("startTime"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -277,12 +302,13 @@ func SetupRoutes(router *mux.Router, buildInfo web.Build) {
|
||||
availableRoles, _ := schema.GetValidRolesMap(user)
|
||||
|
||||
page := web.Page{
|
||||
Title: title,
|
||||
User: *user,
|
||||
Roles: availableRoles,
|
||||
Build: buildInfo,
|
||||
Config: conf,
|
||||
Infos: infos,
|
||||
Title: title,
|
||||
User: *user,
|
||||
Roles: availableRoles,
|
||||
Build: buildInfo,
|
||||
Config: conf,
|
||||
Resampling: config.Keys.EnableResampling,
|
||||
Infos: infos,
|
||||
}
|
||||
|
||||
if route.Filter {
|
||||
|
||||
41
internal/taskManager/compressionService.go
Normal file
41
internal/taskManager/compressionService.go
Normal file
@@ -0,0 +1,41 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterCompressionService(compressOlderThan int) {
|
||||
log.Info("Register compression service")
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(05, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
var jobs []*schema.Job
|
||||
var err error
|
||||
|
||||
ar := archive.GetHandle()
|
||||
startTime := time.Now().Unix() - int64(compressOlderThan*24*3600)
|
||||
lastTime := ar.CompressLast(startTime)
|
||||
if startTime == lastTime {
|
||||
log.Info("Compression Service - Complete archive run")
|
||||
jobs, err = jobRepo.FindJobsBetween(0, startTime)
|
||||
|
||||
} else {
|
||||
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for compression jobs: %v", err)
|
||||
}
|
||||
ar.Compress(jobs)
|
||||
}))
|
||||
}
|
||||
36
internal/taskManager/ldapSyncService.go
Normal file
36
internal/taskManager/ldapSyncService.go
Normal file
@@ -0,0 +1,36 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterLdapSyncService(ds string) {
|
||||
interval, err := parseDuration(ds)
|
||||
if err != nil {
|
||||
log.Warnf("Could not parse duration for sync interval: %v",
|
||||
ds)
|
||||
return
|
||||
}
|
||||
|
||||
auth := auth.GetAuthInstance()
|
||||
|
||||
log.Info("Register LDAP sync service")
|
||||
s.NewJob(gocron.DurationJob(interval),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
t := time.Now()
|
||||
log.Printf("ldap sync started at %s", t.Format(time.RFC3339))
|
||||
if err := auth.LdapAuth.Sync(); err != nil {
|
||||
log.Errorf("ldap sync failed: %s", err.Error())
|
||||
}
|
||||
log.Print("ldap sync done")
|
||||
}))
|
||||
}
|
||||
67
internal/taskManager/retentionService.go
Normal file
67
internal/taskManager/retentionService.go
Normal file
@@ -0,0 +1,67 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterRetentionDeleteService(age int, includeDB bool) {
|
||||
log.Info("Register retention delete service")
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(04, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
startTime := time.Now().Unix() - int64(age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().CleanUp(jobs)
|
||||
|
||||
if includeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %s", err.Error())
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
func RegisterRetentionMoveService(age int, includeDB bool, location string) {
|
||||
log.Info("Register retention move service")
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(04, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
startTime := time.Now().Unix() - int64(age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().Move(jobs, location)
|
||||
|
||||
if includeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %v", err)
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %v", err)
|
||||
}
|
||||
}
|
||||
}))
|
||||
}
|
||||
27
internal/taskManager/stopJobsExceedTime.go
Normal file
27
internal/taskManager/stopJobsExceedTime.go
Normal file
@@ -0,0 +1,27 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterStopJobsExceedTime() {
|
||||
log.Info("Register undead jobs service")
|
||||
|
||||
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(03, 0, 0))),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
|
||||
}
|
||||
runtime.GC()
|
||||
}))
|
||||
}
|
||||
90
internal/taskManager/taskManager.go
Normal file
90
internal/taskManager/taskManager.go
Normal file
@@ -0,0 +1,90 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
s gocron.Scheduler
|
||||
jobRepo *repository.JobRepository
|
||||
)
|
||||
|
||||
func parseDuration(s string) (time.Duration, error) {
|
||||
interval, err := time.ParseDuration(s)
|
||||
if err != nil {
|
||||
log.Warnf("Could not parse duration for sync interval: %v",
|
||||
s)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if interval == 0 {
|
||||
log.Info("TaskManager: Sync interval is zero")
|
||||
}
|
||||
|
||||
return interval, nil
|
||||
}
|
||||
|
||||
func Start() {
|
||||
var err error
|
||||
jobRepo = repository.GetJobRepository()
|
||||
s, err = gocron.NewScheduler()
|
||||
if err != nil {
|
||||
log.Fatalf("Error while creating gocron scheduler: %s", err.Error())
|
||||
}
|
||||
|
||||
if config.Keys.StopJobsExceedingWalltime > 0 {
|
||||
RegisterStopJobsExceedTime()
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
Retention schema.Retention `json:"retention"`
|
||||
Compression int `json:"compression"`
|
||||
}
|
||||
cfg.Retention.IncludeDB = true
|
||||
|
||||
if err := json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
}
|
||||
|
||||
switch cfg.Retention.Policy {
|
||||
case "delete":
|
||||
RegisterRetentionDeleteService(
|
||||
cfg.Retention.Age,
|
||||
cfg.Retention.IncludeDB)
|
||||
case "move":
|
||||
RegisterRetentionMoveService(
|
||||
cfg.Retention.Age,
|
||||
cfg.Retention.IncludeDB,
|
||||
cfg.Retention.Location)
|
||||
}
|
||||
|
||||
if cfg.Compression > 0 {
|
||||
RegisterCompressionService(cfg.Compression)
|
||||
}
|
||||
|
||||
lc := config.Keys.LdapConfig
|
||||
|
||||
if lc != nil && lc.SyncInterval != "" {
|
||||
RegisterLdapSyncService(lc.SyncInterval)
|
||||
}
|
||||
|
||||
RegisterFootprintWorker()
|
||||
RegisterUpdateDurationWorker()
|
||||
|
||||
s.Start()
|
||||
}
|
||||
|
||||
func Shutdown() {
|
||||
s.Shutdown()
|
||||
}
|
||||
33
internal/taskManager/updateDurationService.go
Normal file
33
internal/taskManager/updateDurationService.go
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterUpdateDurationWorker() {
|
||||
var frequency string
|
||||
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.DurationWorker != "" {
|
||||
frequency = config.Keys.CronFrequency.DurationWorker
|
||||
} else {
|
||||
frequency = "5m"
|
||||
}
|
||||
d, _ := time.ParseDuration(frequency)
|
||||
log.Infof("Register Duration Update service with %s interval", frequency)
|
||||
|
||||
s.NewJob(gocron.DurationJob(d),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
start := time.Now()
|
||||
log.Printf("Update duration started at %s", start.Format(time.RFC3339))
|
||||
jobRepo.UpdateDuration()
|
||||
log.Printf("Update duration is done and took %s", time.Since(start))
|
||||
}))
|
||||
}
|
||||
143
internal/taskManager/updateFootprintService.go
Normal file
143
internal/taskManager/updateFootprintService.go
Normal file
@@ -0,0 +1,143 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package taskManager
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/go-co-op/gocron/v2"
|
||||
)
|
||||
|
||||
func RegisterFootprintWorker() {
|
||||
var frequency string
|
||||
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.FootprintWorker != "" {
|
||||
frequency = config.Keys.CronFrequency.FootprintWorker
|
||||
} else {
|
||||
frequency = "10m"
|
||||
}
|
||||
d, _ := time.ParseDuration(frequency)
|
||||
log.Infof("Register Footprint Update service with %s interval", frequency)
|
||||
|
||||
s.NewJob(gocron.DurationJob(d),
|
||||
gocron.NewTask(
|
||||
func() {
|
||||
s := time.Now()
|
||||
c := 0
|
||||
ce := 0
|
||||
cl := 0
|
||||
log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
|
||||
|
||||
t, err := jobRepo.TransactionInit()
|
||||
if err != nil {
|
||||
log.Errorf("Failed TransactionInit %v", err)
|
||||
}
|
||||
|
||||
for _, cluster := range archive.Clusters {
|
||||
jobs, err := jobRepo.FindRunningJobs(cluster.Name)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||
|
||||
for _, job := range jobs {
|
||||
log.Debugf("Try job %d", job.JobID)
|
||||
cl++
|
||||
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, context.Background(), 0) // 0 Resolution-Value retrieves highest res
|
||||
if err != nil {
|
||||
log.Errorf("Error wile loading job data for footprint update: %v", err)
|
||||
ce++
|
||||
continue
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// This should never happen ?
|
||||
ce++
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
// Add values rounded to 2 digits
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
|
||||
Min: (math.Round(min*100) / 100),
|
||||
Max: (math.Round(max*100) / 100),
|
||||
}
|
||||
}
|
||||
|
||||
// Init UpdateBuilder
|
||||
stmt := sq.Update("job")
|
||||
// Add SET queries
|
||||
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
|
||||
if err != nil {
|
||||
log.Errorf("Update job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
||||
ce++
|
||||
continue
|
||||
}
|
||||
stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta)
|
||||
if err != nil {
|
||||
log.Errorf("Update job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
||||
ce++
|
||||
continue
|
||||
}
|
||||
// Add WHERE Filter
|
||||
stmt = stmt.Where("job.id = ?", job.ID)
|
||||
|
||||
query, args, err := stmt.ToSql()
|
||||
if err != nil {
|
||||
log.Errorf("Failed in ToSQL conversion: %v", err)
|
||||
ce++
|
||||
continue
|
||||
}
|
||||
|
||||
// Args: JSON, JSON, ENERGY, JOBID
|
||||
jobRepo.TransactionAdd(t, query, args...)
|
||||
// if err := jobRepo.Execute(stmt); err != nil {
|
||||
// log.Errorf("Update job footprint (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
||||
// continue
|
||||
// }
|
||||
c++
|
||||
log.Debugf("Finish Job %d", job.JobID)
|
||||
}
|
||||
jobRepo.TransactionCommit(t)
|
||||
log.Debugf("Finish Cluster %s", cluster.Name)
|
||||
}
|
||||
jobRepo.TransactionEnd(t)
|
||||
log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
|
||||
}))
|
||||
}
|
||||
@@ -4,7 +4,13 @@
|
||||
// license that can be found in the LICENSE file.
|
||||
package util
|
||||
|
||||
import "golang.org/x/exp/constraints"
|
||||
import (
|
||||
"golang.org/x/exp/constraints"
|
||||
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
)
|
||||
|
||||
func Min[T constraints.Ordered](a, b T) T {
|
||||
if a < b {
|
||||
@@ -19,3 +25,36 @@ func Max[T constraints.Ordered](a, b T) T {
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func sortedCopy(input []float64) []float64 {
|
||||
sorted := make([]float64, len(input))
|
||||
copy(sorted, input)
|
||||
sort.Float64s(sorted)
|
||||
return sorted
|
||||
}
|
||||
|
||||
func Mean(input []float64) (float64, error) {
|
||||
if len(input) == 0 {
|
||||
return math.NaN(), fmt.Errorf("input array is empty: %#v", input)
|
||||
}
|
||||
sum := 0.0
|
||||
for _, n := range input {
|
||||
sum += n
|
||||
}
|
||||
return sum / float64(len(input)), nil
|
||||
}
|
||||
|
||||
func Median(input []float64) (median float64, err error) {
|
||||
c := sortedCopy(input)
|
||||
// Even numbers: add the two middle numbers, divide by two (use mean function)
|
||||
// Odd numbers: Use the middle number
|
||||
l := len(c)
|
||||
if l == 0 {
|
||||
return math.NaN(), fmt.Errorf("input array is empty: %#v", input)
|
||||
} else if l%2 == 0 {
|
||||
median, _ = Mean(c[l/2-1 : l/2+1])
|
||||
} else {
|
||||
median = c[l/2]
|
||||
}
|
||||
return median, nil
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ package archive
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
@@ -53,40 +54,48 @@ type JobContainer struct {
|
||||
}
|
||||
|
||||
var (
|
||||
initOnce sync.Once
|
||||
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
ar ArchiveBackend
|
||||
useArchive bool
|
||||
)
|
||||
|
||||
func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
||||
useArchive = !disableArchive
|
||||
var err error
|
||||
|
||||
var cfg struct {
|
||||
Kind string `json:"kind"`
|
||||
}
|
||||
initOnce.Do(func() {
|
||||
useArchive = !disableArchive
|
||||
|
||||
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
return err
|
||||
}
|
||||
var cfg struct {
|
||||
Kind string `json:"kind"`
|
||||
}
|
||||
|
||||
switch cfg.Kind {
|
||||
case "file":
|
||||
ar = &FsArchive{}
|
||||
// case "s3":
|
||||
// ar = &S3Archive{}
|
||||
default:
|
||||
return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
|
||||
}
|
||||
if err = json.Unmarshal(rawConfig, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
return
|
||||
}
|
||||
|
||||
version, err := ar.Init(rawConfig)
|
||||
if err != nil {
|
||||
log.Error("Error while initializing archiveBackend")
|
||||
return err
|
||||
}
|
||||
log.Infof("Load archive version %d", version)
|
||||
switch cfg.Kind {
|
||||
case "file":
|
||||
ar = &FsArchive{}
|
||||
// case "s3":
|
||||
// ar = &S3Archive{}
|
||||
default:
|
||||
err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
|
||||
}
|
||||
|
||||
return initClusterConfig()
|
||||
var version uint64
|
||||
version, err = ar.Init(rawConfig)
|
||||
if err != nil {
|
||||
log.Error("Error while initializing archiveBackend")
|
||||
return
|
||||
}
|
||||
log.Infof("Load archive version %d", version)
|
||||
|
||||
err = initClusterConfig()
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func GetHandle() ArchiveBackend {
|
||||
@@ -162,8 +171,9 @@ func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
||||
jobMeta.Tags = make([]*schema.Tag, 0)
|
||||
for _, tag := range tags {
|
||||
jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{
|
||||
Name: tag.Name,
|
||||
Type: tag.Type,
|
||||
Name: tag.Name,
|
||||
Type: tag.Type,
|
||||
Scope: tag.Scope,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -12,13 +12,16 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
var Clusters []*schema.Cluster
|
||||
var nodeLists map[string]map[string]NodeList
|
||||
var (
|
||||
Clusters []*schema.Cluster
|
||||
GlobalMetricList []*schema.GlobalMetricListItem
|
||||
nodeLists map[string]map[string]NodeList
|
||||
)
|
||||
|
||||
func initClusterConfig() error {
|
||||
|
||||
Clusters = []*schema.Cluster{}
|
||||
nodeLists = map[string]map[string]NodeList{}
|
||||
metricLookup := make(map[string]schema.GlobalMetricListItem)
|
||||
|
||||
for _, c := range ar.GetClusters() {
|
||||
|
||||
@@ -49,6 +52,59 @@ func initClusterConfig() error {
|
||||
if !mc.Scope.Valid() {
|
||||
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
|
||||
}
|
||||
|
||||
ml, ok := metricLookup[mc.Name]
|
||||
if !ok {
|
||||
metricLookup[mc.Name] = schema.GlobalMetricListItem{
|
||||
Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint,
|
||||
}
|
||||
ml = metricLookup[mc.Name]
|
||||
}
|
||||
availability := schema.ClusterSupport{Cluster: cluster.Name}
|
||||
scLookup := make(map[string]*schema.SubClusterConfig)
|
||||
|
||||
for _, scc := range mc.SubClusters {
|
||||
scLookup[scc.Name] = scc
|
||||
}
|
||||
|
||||
for _, sc := range cluster.SubClusters {
|
||||
newMetric := mc
|
||||
newMetric.SubClusters = nil
|
||||
|
||||
if cfg, ok := scLookup[sc.Name]; ok {
|
||||
if !cfg.Remove {
|
||||
availability.SubClusters = append(availability.SubClusters, sc.Name)
|
||||
newMetric.Peak = cfg.Peak
|
||||
newMetric.Normal = cfg.Normal
|
||||
newMetric.Caution = cfg.Caution
|
||||
newMetric.Alert = cfg.Alert
|
||||
newMetric.Footprint = cfg.Footprint
|
||||
newMetric.Energy = cfg.Energy
|
||||
newMetric.LowerIsBetter = cfg.LowerIsBetter
|
||||
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
|
||||
|
||||
if newMetric.Footprint != "" {
|
||||
sc.Footprint = append(sc.Footprint, newMetric.Name)
|
||||
ml.Footprint = newMetric.Footprint
|
||||
}
|
||||
if newMetric.Energy != "" {
|
||||
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
availability.SubClusters = append(availability.SubClusters, sc.Name)
|
||||
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
|
||||
|
||||
if newMetric.Footprint != "" {
|
||||
sc.Footprint = append(sc.Footprint, newMetric.Name)
|
||||
}
|
||||
if newMetric.Energy != "" {
|
||||
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
ml.Availability = append(metricLookup[mc.Name].Availability, availability)
|
||||
metricLookup[mc.Name] = ml
|
||||
}
|
||||
|
||||
Clusters = append(Clusters, cluster)
|
||||
@@ -67,11 +123,14 @@ func initClusterConfig() error {
|
||||
}
|
||||
}
|
||||
|
||||
for _, ml := range metricLookup {
|
||||
GlobalMetricList = append(GlobalMetricList, &ml)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func GetCluster(cluster string) *schema.Cluster {
|
||||
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
return c
|
||||
@@ -90,11 +149,10 @@ func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("Subcluster '%v' not found for cluster '%v', or cluster '%v' not configured!", subcluster, cluster, cluster)
|
||||
return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster)
|
||||
}
|
||||
|
||||
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
||||
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
for _, m := range c.MetricConfig {
|
||||
@@ -110,7 +168,6 @@ func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
||||
// AssignSubCluster sets the `job.subcluster` property of the job based
|
||||
// on its cluster and resources.
|
||||
func AssignSubCluster(job *schema.BaseJob) error {
|
||||
|
||||
cluster := GetCluster(job.Cluster)
|
||||
if cluster == nil {
|
||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
|
||||
@@ -146,7 +203,6 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
||||
}
|
||||
|
||||
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||
|
||||
for sc, nl := range nodeLists[cluster] {
|
||||
if nl != nil && nl.Contains(hostname) {
|
||||
return sc, nil
|
||||
@@ -164,3 +220,13 @@ func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||
|
||||
return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", cluster, hostname)
|
||||
}
|
||||
|
||||
func MetricIndex(mc []schema.MetricConfig, name string) (int, error) {
|
||||
for i, m := range mc {
|
||||
if m.Name == name {
|
||||
return i, nil
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("unknown metric name %s", name)
|
||||
}
|
||||
|
||||
39
pkg/archive/clusterConfig_test.go
Normal file
39
pkg/archive/clusterConfig_test.go
Normal file
@@ -0,0 +1,39 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archive_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
)
|
||||
|
||||
func TestClusterConfig(t *testing.T) {
|
||||
if err := archive.Init(json.RawMessage("{\"kind\": \"file\",\"path\": \"testdata/archive\"}"), false); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
sc, err := archive.GetSubCluster("fritz", "spr1tb")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// spew.Dump(sc.MetricConfig)
|
||||
if len(sc.Footprint) != 3 {
|
||||
t.Fail()
|
||||
}
|
||||
if len(sc.MetricConfig) != 15 {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
for _, metric := range sc.MetricConfig {
|
||||
if metric.LowerIsBetter && metric.Name != "mem_used" {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
// spew.Dump(archive.GlobalMetricList)
|
||||
// t.Fail()
|
||||
}
|
||||
@@ -30,6 +30,7 @@ func TestInitNoJson(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitNotExists(t *testing.T) {
|
||||
var fsa FsArchive
|
||||
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/job-archive\"}"))
|
||||
@@ -50,7 +51,7 @@ func TestInit(t *testing.T) {
|
||||
if version != 1 {
|
||||
t.Fail()
|
||||
}
|
||||
if len(fsa.clusters) != 1 || fsa.clusters[0] != "emmy" {
|
||||
if len(fsa.clusters) != 3 || fsa.clusters[1] != "emmy" {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
@@ -133,7 +134,6 @@ func TestLoadJobData(t *testing.T) {
|
||||
}
|
||||
|
||||
func BenchmarkLoadJobData(b *testing.B) {
|
||||
|
||||
tmpdir := b.TempDir()
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
util.CopyDir("./testdata/archive/", jobarchive)
|
||||
@@ -157,7 +157,6 @@ func BenchmarkLoadJobData(b *testing.B) {
|
||||
}
|
||||
|
||||
func BenchmarkLoadJobDataCompressed(b *testing.B) {
|
||||
|
||||
tmpdir := b.TempDir()
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
util.CopyDir("./testdata/archive/", jobarchive)
|
||||
|
||||
@@ -9,8 +9,8 @@ import (
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
|
||||
|
||||
2772
pkg/archive/testdata/archive/alex/cluster.json
vendored
Normal file
2772
pkg/archive/testdata/archive/alex/cluster.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2246
pkg/archive/testdata/archive/fritz/cluster.json
vendored
Normal file
2246
pkg/archive/testdata/archive/fritz/cluster.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
123
pkg/resampler/resampler.go
Normal file
123
pkg/resampler/resampler.go
Normal file
@@ -0,0 +1,123 @@
|
||||
package resampler
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
func SimpleResampler(data []schema.Float, old_frequency int64, new_frequency int64) ([]schema.Float, error) {
|
||||
if old_frequency == 0 || new_frequency == 0 {
|
||||
return nil, errors.New("either old or new frequency is set to 0")
|
||||
}
|
||||
|
||||
if new_frequency%old_frequency != 0 {
|
||||
return nil, errors.New("new sampling frequency should be multiple of the old frequency")
|
||||
}
|
||||
|
||||
var step int = int(new_frequency / old_frequency)
|
||||
var new_data_length = len(data) / step
|
||||
|
||||
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
|
||||
return data, nil
|
||||
}
|
||||
|
||||
new_data := make([]schema.Float, new_data_length)
|
||||
|
||||
for i := 0; i < new_data_length; i++ {
|
||||
new_data[i] = data[i*step]
|
||||
}
|
||||
|
||||
return new_data, nil
|
||||
}
|
||||
|
||||
// Inspired by one of the algorithms from https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf
|
||||
// Adapted from https://github.com/haoel/downsampling/blob/master/core/lttb.go
|
||||
func LargestTriangleThreeBucket(data []schema.Float, old_frequency int, new_frequency int) ([]schema.Float, int, error) {
|
||||
|
||||
if old_frequency == 0 || new_frequency == 0 {
|
||||
return data, old_frequency, nil
|
||||
}
|
||||
|
||||
if new_frequency%old_frequency != 0 {
|
||||
return nil, 0, errors.New(fmt.Sprintf("new sampling frequency : %d should be multiple of the old frequency : %d", new_frequency, old_frequency))
|
||||
}
|
||||
|
||||
var step int = int(new_frequency / old_frequency)
|
||||
var new_data_length = len(data) / step
|
||||
|
||||
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
|
||||
return data, old_frequency, nil
|
||||
}
|
||||
|
||||
new_data := make([]schema.Float, 0, new_data_length)
|
||||
|
||||
// Bucket size. Leave room for start and end data points
|
||||
bucketSize := float64(len(data)-2) / float64(new_data_length-2)
|
||||
|
||||
new_data = append(new_data, data[0]) // Always add the first point
|
||||
|
||||
// We have 3 pointers represent for
|
||||
// > bucketLow - the current bucket's beginning location
|
||||
// > bucketMiddle - the current bucket's ending location,
|
||||
// also the beginning location of next bucket
|
||||
// > bucketHight - the next bucket's ending location.
|
||||
bucketLow := 1
|
||||
bucketMiddle := int(math.Floor(bucketSize)) + 1
|
||||
|
||||
var prevMaxAreaPoint int
|
||||
|
||||
for i := 0; i < new_data_length-2; i++ {
|
||||
|
||||
bucketHigh := int(math.Floor(float64(i+2)*bucketSize)) + 1
|
||||
if bucketHigh >= len(data)-1 {
|
||||
bucketHigh = len(data) - 2
|
||||
}
|
||||
|
||||
// Calculate point average for next bucket (containing c)
|
||||
avgPointX, avgPointY := calculateAverageDataPoint(data[bucketMiddle:bucketHigh+1], int64(bucketMiddle))
|
||||
|
||||
// Get the range for current bucket
|
||||
currBucketStart := bucketLow
|
||||
currBucketEnd := bucketMiddle
|
||||
|
||||
// Point a
|
||||
pointX := prevMaxAreaPoint
|
||||
pointY := data[prevMaxAreaPoint]
|
||||
|
||||
maxArea := -1.0
|
||||
|
||||
var maxAreaPoint int
|
||||
flag_ := 0
|
||||
for ; currBucketStart < currBucketEnd; currBucketStart++ {
|
||||
|
||||
area := calculateTriangleArea(schema.Float(pointX), pointY, avgPointX, avgPointY, schema.Float(currBucketStart), data[currBucketStart])
|
||||
if area > maxArea {
|
||||
maxArea = area
|
||||
maxAreaPoint = currBucketStart
|
||||
}
|
||||
if math.IsNaN(float64(avgPointY)) {
|
||||
flag_ = 1
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if flag_ == 1 {
|
||||
new_data = append(new_data, schema.NaN) // Pick this point from the bucket
|
||||
|
||||
} else {
|
||||
new_data = append(new_data, data[maxAreaPoint]) // Pick this point from the bucket
|
||||
}
|
||||
prevMaxAreaPoint = maxAreaPoint // This MaxArea point is the next's prevMAxAreaPoint
|
||||
|
||||
//move to the next window
|
||||
bucketLow = bucketMiddle
|
||||
bucketMiddle = bucketHigh
|
||||
}
|
||||
|
||||
new_data = append(new_data, data[len(data)-1]) // Always add last
|
||||
|
||||
return new_data, new_frequency, nil
|
||||
}
|
||||
35
pkg/resampler/util.go
Normal file
35
pkg/resampler/util.go
Normal file
@@ -0,0 +1,35 @@
|
||||
package resampler
|
||||
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
func calculateTriangleArea(paX, paY, pbX, pbY, pcX, pcY schema.Float) float64 {
|
||||
area := ((paX-pcX)*(pbY-paY) - (paX-pbX)*(pcY-paY)) * 0.5
|
||||
return math.Abs(float64(area))
|
||||
}
|
||||
|
||||
func calculateAverageDataPoint(points []schema.Float, xStart int64) (avgX schema.Float, avgY schema.Float) {
|
||||
flag := 0
|
||||
for _, point := range points {
|
||||
avgX += schema.Float(xStart)
|
||||
avgY += point
|
||||
xStart++
|
||||
if math.IsNaN(float64(point)) {
|
||||
flag = 1
|
||||
}
|
||||
}
|
||||
|
||||
l := schema.Float(len(points))
|
||||
|
||||
avgX /= l
|
||||
avgY /= l
|
||||
|
||||
if flag == 1 {
|
||||
return avgX, schema.NaN
|
||||
} else {
|
||||
return avgX, avgY
|
||||
}
|
||||
}
|
||||
@@ -30,38 +30,47 @@ type MetricValue struct {
|
||||
}
|
||||
|
||||
type SubCluster struct {
|
||||
Name string `json:"name"`
|
||||
Nodes string `json:"nodes"`
|
||||
ProcessorType string `json:"processorType"`
|
||||
SocketsPerNode int `json:"socketsPerNode"`
|
||||
CoresPerSocket int `json:"coresPerSocket"`
|
||||
ThreadsPerCore int `json:"threadsPerCore"`
|
||||
FlopRateScalar MetricValue `json:"flopRateScalar"`
|
||||
FlopRateSimd MetricValue `json:"flopRateSimd"`
|
||||
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
|
||||
Topology Topology `json:"topology"`
|
||||
Name string `json:"name"`
|
||||
Nodes string `json:"nodes"`
|
||||
ProcessorType string `json:"processorType"`
|
||||
Topology Topology `json:"topology"`
|
||||
FlopRateScalar MetricValue `json:"flopRateScalar"`
|
||||
FlopRateSimd MetricValue `json:"flopRateSimd"`
|
||||
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
|
||||
MetricConfig []MetricConfig `json:"metricConfig,omitempty"`
|
||||
Footprint []string `json:"footprint,omitempty"`
|
||||
EnergyFootprint []string `json:"energyFootprint,omitempty"`
|
||||
SocketsPerNode int `json:"socketsPerNode"`
|
||||
CoresPerSocket int `json:"coresPerSocket"`
|
||||
ThreadsPerCore int `json:"threadsPerCore"`
|
||||
}
|
||||
|
||||
type SubClusterConfig struct {
|
||||
Name string `json:"name"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
Remove bool `json:"remove"`
|
||||
Name string `json:"name"`
|
||||
Footprint string `json:"footprint,omitempty"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
Remove bool `json:"remove"`
|
||||
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||
Energy string `json:"energy"`
|
||||
}
|
||||
|
||||
type MetricConfig struct {
|
||||
Name string `json:"name"`
|
||||
Unit Unit `json:"unit"`
|
||||
Scope MetricScope `json:"scope"`
|
||||
Aggregation string `json:"aggregation"`
|
||||
Timestep int `json:"timestep"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
||||
Unit Unit `json:"unit"`
|
||||
Name string `json:"name"`
|
||||
Scope MetricScope `json:"scope"`
|
||||
Aggregation string `json:"aggregation"`
|
||||
Footprint string `json:"footprint,omitempty"`
|
||||
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
||||
Peak float64 `json:"peak"`
|
||||
Normal float64 `json:"normal"`
|
||||
Caution float64 `json:"caution"`
|
||||
Alert float64 `json:"alert"`
|
||||
Timestep int `json:"timestep"`
|
||||
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||
Energy string `json:"energy"`
|
||||
}
|
||||
|
||||
type Cluster struct {
|
||||
@@ -70,14 +79,27 @@ type Cluster struct {
|
||||
SubClusters []*SubCluster `json:"subClusters"`
|
||||
}
|
||||
|
||||
type ClusterSupport struct {
|
||||
Cluster string `json:"cluster"`
|
||||
SubClusters []string `json:"subclusters"`
|
||||
}
|
||||
|
||||
type GlobalMetricListItem struct {
|
||||
Name string `json:"name"`
|
||||
Unit Unit `json:"unit"`
|
||||
Scope MetricScope `json:"scope"`
|
||||
Footprint string `json:"footprint,omitempty"`
|
||||
Availability []ClusterSupport `json:"availability"`
|
||||
}
|
||||
|
||||
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
|
||||
// hwthread is in that socket, add it to the list. If no hwthreads other than
|
||||
// those in the argument list are assigned to one of the sockets in the first
|
||||
// return value, return true as the second value. TODO: Optimize this, there
|
||||
// must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetSocketsFromHWThreads(
|
||||
hwthreads []int) (sockets []int, exclusive bool) {
|
||||
|
||||
hwthreads []int,
|
||||
) (sockets []int, exclusive bool) {
|
||||
socketsMap := map[int]int{}
|
||||
for _, hwthread := range hwthreads {
|
||||
for socket, hwthreadsInSocket := range topo.Socket {
|
||||
@@ -106,8 +128,8 @@ func (topo *Topology) GetSocketsFromHWThreads(
|
||||
// return value, return true as the second value. TODO: Optimize this, there
|
||||
// must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetCoresFromHWThreads(
|
||||
hwthreads []int) (cores []int, exclusive bool) {
|
||||
|
||||
hwthreads []int,
|
||||
) (cores []int, exclusive bool) {
|
||||
coresMap := map[int]int{}
|
||||
for _, hwthread := range hwthreads {
|
||||
for core, hwthreadsInCore := range topo.Core {
|
||||
@@ -136,8 +158,8 @@ func (topo *Topology) GetCoresFromHWThreads(
|
||||
// memory domains in the first return value, return true as the second value.
|
||||
// TODO: Optimize this, there must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetMemoryDomainsFromHWThreads(
|
||||
hwthreads []int) (memDoms []int, exclusive bool) {
|
||||
|
||||
hwthreads []int,
|
||||
) (memDoms []int, exclusive bool) {
|
||||
memDomsMap := map[int]int{}
|
||||
for _, hwthread := range hwthreads {
|
||||
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
|
||||
|
||||
@@ -24,8 +24,9 @@ type LdapConfig struct {
|
||||
}
|
||||
|
||||
type OpenIDConfig struct {
|
||||
Provider string `json:"provider"`
|
||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||
Provider string `json:"provider"`
|
||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||
}
|
||||
|
||||
type JWTAuthConfig struct {
|
||||
@@ -45,6 +46,9 @@ type JWTAuthConfig struct {
|
||||
|
||||
// Should an non-existent user be added to the DB based on the information in the token
|
||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||
|
||||
// Should an existent user be updated in the DB based on the information in the token
|
||||
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||
}
|
||||
|
||||
type IntRange struct {
|
||||
@@ -53,8 +57,9 @@ type IntRange struct {
|
||||
}
|
||||
|
||||
type TimeRange struct {
|
||||
From *time.Time `json:"from"`
|
||||
To *time.Time `json:"to"`
|
||||
Range string `json:"range,omitempty"` // Optional, e.g. 'last6h'
|
||||
From *time.Time `json:"from"`
|
||||
To *time.Time `json:"to"`
|
||||
}
|
||||
|
||||
type FilterRanges struct {
|
||||
@@ -76,6 +81,20 @@ type Retention struct {
|
||||
IncludeDB bool `json:"includeDB"`
|
||||
}
|
||||
|
||||
type ResampleConfig struct {
|
||||
// Trigger next zoom level at less than this many visible datapoints
|
||||
Trigger int `json:"trigger"`
|
||||
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
|
||||
Resolutions []int `json:"resolutions"`
|
||||
}
|
||||
|
||||
type CronFrequency struct {
|
||||
// Duration Update Worker [Defaults to '5m']
|
||||
DurationWorker string `json:"duration-worker"`
|
||||
// Metric- and Energy Footprint Update Worker [Defaults to '10m']
|
||||
FootprintWorker string `json:"footprint-worker"`
|
||||
}
|
||||
|
||||
// Format of the configuration (file). See below for the defaults.
|
||||
type ProgramConfig struct {
|
||||
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||
@@ -133,6 +152,9 @@ type ProgramConfig struct {
|
||||
// be provided! Most options here can be overwritten by the user.
|
||||
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
||||
|
||||
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
||||
EnableResampling *ResampleConfig `json:"enable-resampling"`
|
||||
|
||||
// Where to store MachineState files
|
||||
MachineStateDir string `json:"machine-state-dir"`
|
||||
|
||||
@@ -144,4 +166,11 @@ type ProgramConfig struct {
|
||||
|
||||
// Array of Clusters
|
||||
Clusters []*ClusterConfig `json:"clusters"`
|
||||
|
||||
// Energy Mix CO2 Emission Constant [g/kWh]
|
||||
// If entered, displays estimated CO2 emission for job based on jobs totalEnergy
|
||||
EmissionConstant int `json:"emission-constant"`
|
||||
|
||||
// Frequency of cron job workers
|
||||
CronFrequency *CronFrequency `json:"cron-frequency"`
|
||||
}
|
||||
|
||||
@@ -16,30 +16,33 @@ import (
|
||||
// Common subset of Job and JobMeta. Use one of those, not this type directly.
|
||||
|
||||
type BaseJob struct {
|
||||
// The unique identifier of a job
|
||||
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
||||
User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user
|
||||
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
|
||||
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
|
||||
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
|
||||
Partition string `json:"partition,omitempty" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
|
||||
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"` // The unique identifier of an array job
|
||||
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
|
||||
// NumCores int32 `json:"numCores" db:"num_cores" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
||||
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
||||
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
|
||||
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
|
||||
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
|
||||
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"` // SMT threads used by job
|
||||
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job
|
||||
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
|
||||
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
|
||||
Tags []*Tag `json:"tags,omitempty"` // List of tags
|
||||
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
|
||||
Resources []*Resource `json:"resources"` // Resources used by job
|
||||
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
|
||||
MetaData map[string]string `json:"metaData"` // Additional information about the job
|
||||
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
||||
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
|
||||
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
|
||||
Partition string `json:"partition,omitempty" db:"partition" example:"main"`
|
||||
Project string `json:"project" db:"project" example:"abcd200"`
|
||||
User string `json:"user" db:"user" example:"abcd100h"`
|
||||
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
||||
Tags []*Tag `json:"tags,omitempty"`
|
||||
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
|
||||
RawFootprint []byte `json:"-" db:"footprint"`
|
||||
RawMetaData []byte `json:"-" db:"meta_data"`
|
||||
RawResources []byte `json:"-" db:"resources"`
|
||||
Resources []*Resource `json:"resources"`
|
||||
EnergyFootprint map[string]float64 `json:"energyFootprint"`
|
||||
Footprint map[string]float64 `json:"footprint"`
|
||||
MetaData map[string]string `json:"metaData"`
|
||||
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
||||
Energy float64 `json:"energy" db:"energy"`
|
||||
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
|
||||
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
|
||||
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
||||
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
|
||||
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
|
||||
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
|
||||
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
|
||||
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
|
||||
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
|
||||
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
|
||||
}
|
||||
|
||||
// Job struct type
|
||||
@@ -49,19 +52,10 @@ type BaseJob struct {
|
||||
// Job model
|
||||
// @Description Information of a HPC job.
|
||||
type Job struct {
|
||||
// The unique identifier of a job in the database
|
||||
ID int64 `json:"id" db:"id"`
|
||||
StartTime time.Time `json:"startTime"`
|
||||
BaseJob
|
||||
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
|
||||
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
|
||||
MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64
|
||||
FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64
|
||||
MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64
|
||||
LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64
|
||||
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
|
||||
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
|
||||
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64
|
||||
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"` // FileDataVolTotal as Float64
|
||||
ID int64 `json:"id" db:"id"`
|
||||
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"`
|
||||
}
|
||||
|
||||
// JobMeta struct type
|
||||
@@ -88,11 +82,10 @@ type JobLinkResultList struct {
|
||||
// JobMeta model
|
||||
// @Description Meta data information of a HPC job.
|
||||
type JobMeta struct {
|
||||
// The unique identifier of a job in the database
|
||||
ID *int64 `json:"id,omitempty"`
|
||||
ID *int64 `json:"id,omitempty"`
|
||||
Statistics map[string]JobStatistics `json:"statistics"`
|
||||
BaseJob
|
||||
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` // Start epoch time stamp in seconds (Min > 0)
|
||||
Statistics map[string]JobStatistics `json:"statistics"` // Metric statistics of job
|
||||
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"`
|
||||
}
|
||||
|
||||
const (
|
||||
@@ -124,18 +117,19 @@ type JobStatistics struct {
|
||||
// Tag model
|
||||
// @Description Defines a tag using name and type.
|
||||
type Tag struct {
|
||||
ID int64 `json:"id" db:"id"` // The unique DB identifier of a tag
|
||||
Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type
|
||||
Name string `json:"name" db:"tag_name" example:"Testjob"` // Tag Name
|
||||
Type string `json:"type" db:"tag_type" example:"Debug"`
|
||||
Name string `json:"name" db:"tag_name" example:"Testjob"`
|
||||
Scope string `json:"scope" db:"tag_scope" example:"global"`
|
||||
ID int64 `json:"id" db:"id"`
|
||||
}
|
||||
|
||||
// Resource model
|
||||
// @Description A resource used by a job
|
||||
type Resource struct {
|
||||
Hostname string `json:"hostname"` // Name of the host (= node)
|
||||
HWThreads []int `json:"hwthreads,omitempty"` // List of OS processor ids
|
||||
Accelerators []string `json:"accelerators,omitempty"` // List of of accelerator device ids
|
||||
Configuration string `json:"configuration,omitempty"` // The configuration options of the node
|
||||
Hostname string `json:"hostname"`
|
||||
Configuration string `json:"configuration,omitempty"`
|
||||
HWThreads []int `json:"hwthreads,omitempty"`
|
||||
Accelerators []string `json:"accelerators,omitempty"`
|
||||
}
|
||||
|
||||
type JobState string
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
"math"
|
||||
"sort"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
)
|
||||
|
||||
type JobData map[string]map[MetricScope]*JobMetric
|
||||
@@ -36,6 +38,7 @@ type MetricStatistics struct {
|
||||
|
||||
type StatsSeries struct {
|
||||
Mean []Float `json:"mean"`
|
||||
Median []Float `json:"median"`
|
||||
Min []Float `json:"min"`
|
||||
Max []Float `json:"max"`
|
||||
Percentiles map[int][]Float `json:"percentiles,omitempty"`
|
||||
@@ -121,6 +124,7 @@ func (jd *JobData) Size() int {
|
||||
if metric.StatisticsSeries != nil {
|
||||
n += len(metric.StatisticsSeries.Max)
|
||||
n += len(metric.StatisticsSeries.Mean)
|
||||
n += len(metric.StatisticsSeries.Median)
|
||||
n += len(metric.StatisticsSeries.Min)
|
||||
}
|
||||
|
||||
@@ -149,53 +153,74 @@ func (jm *JobMetric) AddStatisticsSeries() {
|
||||
}
|
||||
}
|
||||
|
||||
min, mean, max := make([]Float, n), make([]Float, n), make([]Float, n)
|
||||
// mean := make([]Float, n)
|
||||
min, median, max := make([]Float, n), make([]Float, n), make([]Float, n)
|
||||
i := 0
|
||||
for ; i < m; i++ {
|
||||
smin, ssum, smax := math.MaxFloat32, 0.0, -math.MaxFloat32
|
||||
seriesCount := len(jm.Series)
|
||||
// ssum := 0.0
|
||||
smin, smed, smax := math.MaxFloat32, make([]float64, seriesCount), -math.MaxFloat32
|
||||
notnan := 0
|
||||
for j := 0; j < len(jm.Series); j++ {
|
||||
for j := 0; j < seriesCount; j++ {
|
||||
x := float64(jm.Series[j].Data[i])
|
||||
if math.IsNaN(x) {
|
||||
continue
|
||||
}
|
||||
|
||||
notnan += 1
|
||||
ssum += x
|
||||
// ssum += x
|
||||
smed[j] = x
|
||||
smin = math.Min(smin, x)
|
||||
smax = math.Max(smax, x)
|
||||
}
|
||||
|
||||
if notnan < 3 {
|
||||
min[i] = NaN
|
||||
mean[i] = NaN
|
||||
// mean[i] = NaN
|
||||
median[i] = NaN
|
||||
max[i] = NaN
|
||||
} else {
|
||||
min[i] = Float(smin)
|
||||
mean[i] = Float(ssum / float64(notnan))
|
||||
// mean[i] = Float(ssum / float64(notnan))
|
||||
max[i] = Float(smax)
|
||||
|
||||
medianRaw, err := util.Median(smed)
|
||||
if err != nil {
|
||||
median[i] = NaN
|
||||
} else {
|
||||
median[i] = Float(medianRaw)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for ; i < n; i++ {
|
||||
min[i] = NaN
|
||||
mean[i] = NaN
|
||||
// mean[i] = NaN
|
||||
median[i] = NaN
|
||||
max[i] = NaN
|
||||
}
|
||||
|
||||
if smooth {
|
||||
for i := 2; i < len(mean)-2; i++ {
|
||||
for i := 2; i < len(median)-2; i++ {
|
||||
if min[i].IsNaN() {
|
||||
continue
|
||||
}
|
||||
|
||||
min[i] = (min[i-2] + min[i-1] + min[i] + min[i+1] + min[i+2]) / 5
|
||||
max[i] = (max[i-2] + max[i-1] + max[i] + max[i+1] + max[i+2]) / 5
|
||||
mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5
|
||||
// mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5
|
||||
// Reduce Median further
|
||||
smoothRaw := []float64{float64(median[i-2]), float64(median[i-1]), float64(median[i]), float64(median[i+1]), float64(median[i+2])}
|
||||
smoothMedian, err := util.Median(smoothRaw)
|
||||
if err != nil {
|
||||
median[i] = NaN
|
||||
} else {
|
||||
median[i] = Float(smoothMedian)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jm.StatisticsSeries = &StatsSeries{Mean: mean, Min: min, Max: max}
|
||||
jm.StatisticsSeries = &StatsSeries{Median: median, Min: min, Max: max} // Mean: mean
|
||||
}
|
||||
|
||||
func (jd *JobData) AddNodeScope(metric string) bool {
|
||||
|
||||
@@ -1,284 +1,327 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedfs://cluster.schema.json",
|
||||
"title": "HPC cluster description",
|
||||
"description": "Meta data information of a HPC cluster",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "The unique identifier of a cluster",
|
||||
"type": "string"
|
||||
},
|
||||
"metricConfig": {
|
||||
"description": "Metric specifications",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Metric name",
|
||||
"type": "string"
|
||||
},
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"scope": {
|
||||
"description": "Native measurement resolution",
|
||||
"type": "string"
|
||||
},
|
||||
"timestep": {
|
||||
"description": "Frequency of timeseries points",
|
||||
"type": "integer"
|
||||
},
|
||||
"aggregation": {
|
||||
"description": "How the metric is aggregated",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"sum",
|
||||
"avg"
|
||||
]
|
||||
},
|
||||
"peak": {
|
||||
"description": "Metric peak threshold (Upper metric limit)",
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"description": "Metric normal threshold",
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"description": "Metric alert threshold (Requires immediate action)",
|
||||
"type": "number"
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partition metric thresholds",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"peak": {
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"type": "number"
|
||||
},
|
||||
"remove": {
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"unit",
|
||||
"scope",
|
||||
"timestep",
|
||||
"aggregation",
|
||||
"peak",
|
||||
"normal",
|
||||
"caution",
|
||||
"alert"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partitions",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"processorType": {
|
||||
"description": "Processor type",
|
||||
"type": "string"
|
||||
},
|
||||
"socketsPerNode": {
|
||||
"description": "Number of sockets per node",
|
||||
"type": "integer"
|
||||
},
|
||||
"coresPerSocket": {
|
||||
"description": "Number of cores per socket",
|
||||
"type": "integer"
|
||||
},
|
||||
"threadsPerCore": {
|
||||
"description": "Number of SMT threads per core",
|
||||
"type": "integer"
|
||||
},
|
||||
"flopRateScalar": {
|
||||
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"description": "Theoretical node peak memory bandwidth in GB/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodes": {
|
||||
"description": "Node list expression",
|
||||
"type": "string"
|
||||
},
|
||||
"topology": {
|
||||
"description": "Node topology",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"node": {
|
||||
"description": "HwTread lists of node",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"socket": {
|
||||
"description": "HwTread lists of sockets",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryDomain": {
|
||||
"description": "HwTread lists of memory domains",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"die": {
|
||||
"description": "HwTread lists of dies",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"core": {
|
||||
"description": "HwTread lists of cores",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"accelerators": {
|
||||
"type": "array",
|
||||
"description": "List of of accelerator devices",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique device id"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The accelerator type",
|
||||
"enum": [
|
||||
"Nvidia GPU",
|
||||
"AMD GPU",
|
||||
"Intel GPU"
|
||||
]
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The accelerator model"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"type",
|
||||
"model"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node",
|
||||
"socket",
|
||||
"memoryDomain"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"nodes",
|
||||
"topology",
|
||||
"processorType",
|
||||
"socketsPerNode",
|
||||
"coresPerSocket",
|
||||
"threadsPerCore",
|
||||
"flopRateScalar",
|
||||
"flopRateSimd",
|
||||
"memoryBandwidth"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedfs://cluster.schema.json",
|
||||
"title": "HPC cluster description",
|
||||
"description": "Meta data information of a HPC cluster",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "The unique identifier of a cluster",
|
||||
"type": "string"
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"metricConfig",
|
||||
"subClusters"
|
||||
]
|
||||
"metricConfig": {
|
||||
"description": "Metric specifications",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Metric name",
|
||||
"type": "string"
|
||||
},
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"scope": {
|
||||
"description": "Native measurement resolution",
|
||||
"type": "string"
|
||||
},
|
||||
"timestep": {
|
||||
"description": "Frequency of timeseries points",
|
||||
"type": "integer"
|
||||
},
|
||||
"aggregation": {
|
||||
"description": "How the metric is aggregated",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"sum",
|
||||
"avg"
|
||||
]
|
||||
},
|
||||
"footprint": {
|
||||
"description": "Is it a footprint metric and what type",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"avg",
|
||||
"max",
|
||||
"min"
|
||||
]
|
||||
},
|
||||
"energy": {
|
||||
"description": "Is it used to calculate job energy",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"power",
|
||||
"energy"
|
||||
]
|
||||
},
|
||||
"lowerIsBetter": {
|
||||
"description": "Is lower better.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"peak": {
|
||||
"description": "Metric peak threshold (Upper metric limit)",
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"description": "Metric normal threshold",
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"description": "Metric alert threshold (Requires immediate action)",
|
||||
"type": "number"
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partition metric thresholds",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"footprint": {
|
||||
"description": "Is it a footprint metric and what type. Overwrite global setting",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"avg",
|
||||
"max",
|
||||
"min"
|
||||
]
|
||||
},
|
||||
"energy": {
|
||||
"description": "Is it used to calculate job energy. Overwrite global",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"power",
|
||||
"energy"
|
||||
]
|
||||
},
|
||||
"lowerIsBetter": {
|
||||
"description": "Is lower better. Overwrite global",
|
||||
"type": "boolean"
|
||||
},
|
||||
"peak": {
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"type": "number"
|
||||
},
|
||||
"remove": {
|
||||
"description": "Remove this metric for this subcluster",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"unit",
|
||||
"scope",
|
||||
"timestep",
|
||||
"aggregation",
|
||||
"peak",
|
||||
"normal",
|
||||
"caution",
|
||||
"alert"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partitions",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"processorType": {
|
||||
"description": "Processor type",
|
||||
"type": "string"
|
||||
},
|
||||
"socketsPerNode": {
|
||||
"description": "Number of sockets per node",
|
||||
"type": "integer"
|
||||
},
|
||||
"coresPerSocket": {
|
||||
"description": "Number of cores per socket",
|
||||
"type": "integer"
|
||||
},
|
||||
"threadsPerCore": {
|
||||
"description": "Number of SMT threads per core",
|
||||
"type": "integer"
|
||||
},
|
||||
"flopRateScalar": {
|
||||
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"description": "Theoretical node peak memory bandwidth in GB/s",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedfs://unit.schema.json"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodes": {
|
||||
"description": "Node list expression",
|
||||
"type": "string"
|
||||
},
|
||||
"topology": {
|
||||
"description": "Node topology",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"node": {
|
||||
"description": "HwTread lists of node",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"socket": {
|
||||
"description": "HwTread lists of sockets",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryDomain": {
|
||||
"description": "HwTread lists of memory domains",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"die": {
|
||||
"description": "HwTread lists of dies",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"core": {
|
||||
"description": "HwTread lists of cores",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"accelerators": {
|
||||
"type": "array",
|
||||
"description": "List of of accelerator devices",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique device id"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The accelerator type",
|
||||
"enum": [
|
||||
"Nvidia GPU",
|
||||
"AMD GPU",
|
||||
"Intel GPU"
|
||||
]
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The accelerator model"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"type",
|
||||
"model"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node",
|
||||
"socket",
|
||||
"memoryDomain"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"nodes",
|
||||
"topology",
|
||||
"processorType",
|
||||
"socketsPerNode",
|
||||
"coresPerSocket",
|
||||
"threadsPerCore",
|
||||
"flopRateScalar",
|
||||
"flopRateSimd",
|
||||
"memoryBandwidth"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"metricConfig",
|
||||
"subClusters"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -424,6 +424,27 @@
|
||||
"plot_general_colorscheme",
|
||||
"plot_list_selectedMetrics"
|
||||
]
|
||||
},
|
||||
"enable-resampling": {
|
||||
"description": "Enable dynamic zoom in frontend metric plots.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trigger": {
|
||||
"description": "Trigger next zoom level at less than this many visible datapoints.",
|
||||
"type": "integer"
|
||||
},
|
||||
"resolutions": {
|
||||
"description": "Array of resampling target resolutions, in seconds.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"trigger",
|
||||
"resolutions"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
|
||||
581
web/frontend/package-lock.json
generated
581
web/frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -7,25 +7,25 @@
|
||||
"dev": "rollup -c -w"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@rollup/plugin-commonjs": "^25.0.7",
|
||||
"@rollup/plugin-node-resolve": "^15.2.3",
|
||||
"@rollup/plugin-commonjs": "^25.0.8",
|
||||
"@rollup/plugin-node-resolve": "^15.3.0",
|
||||
"@rollup/plugin-terser": "^0.4.4",
|
||||
"@timohausmann/quadtree-js": "^1.2.6",
|
||||
"rollup": "^4.12.1",
|
||||
"rollup": "^4.24.0",
|
||||
"rollup-plugin-css-only": "^4.5.2",
|
||||
"rollup-plugin-svelte": "^7.1.6",
|
||||
"svelte": "^4.2.12"
|
||||
"rollup-plugin-svelte": "^7.2.2",
|
||||
"svelte": "^4.2.19"
|
||||
},
|
||||
"dependencies": {
|
||||
"@rollup/plugin-replace": "^5.0.5",
|
||||
"@sveltestrap/sveltestrap": "^6.2.6",
|
||||
"@urql/svelte": "^4.1.0",
|
||||
"chart.js": "^4.4.2",
|
||||
"@rollup/plugin-replace": "^5.0.7",
|
||||
"@sveltestrap/sveltestrap": "^6.2.7",
|
||||
"@urql/svelte": "^4.2.1",
|
||||
"chart.js": "^4.4.5",
|
||||
"date-fns": "^2.30.0",
|
||||
"graphql": "^16.8.1",
|
||||
"mathjs": "^12.4.0",
|
||||
"graphql": "^16.9.0",
|
||||
"mathjs": "^12.4.3",
|
||||
"svelte-chartjs": "^3.1.5",
|
||||
"uplot": "^1.6.30",
|
||||
"uplot": "^1.6.31",
|
||||
"wonka": "^6.3.4"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
<!--
|
||||
@component Main analysis view component
|
||||
|
||||
Properties:
|
||||
- `filterPresets Object`: Optional predefined filter values
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { init, convert2uplot } from "./utils.js";
|
||||
import { getContext, onMount } from "svelte";
|
||||
import {
|
||||
queryStore,
|
||||
@@ -15,14 +21,18 @@
|
||||
Table,
|
||||
Icon,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import Filters from "./filters/Filters.svelte";
|
||||
import PlotSelection from "./PlotSelection.svelte";
|
||||
import Histogram from "./plots/Histogram.svelte";
|
||||
import Pie, { colors } from "./plots/Pie.svelte";
|
||||
import { binsFromFootprint } from "./utils.js";
|
||||
import ScatterPlot from "./plots/Scatter.svelte";
|
||||
import PlotTable from "./PlotTable.svelte";
|
||||
import RooflineHeatmap from "./plots/RooflineHeatmap.svelte";
|
||||
import {
|
||||
init,
|
||||
convert2uplot,
|
||||
binsFromFootprint,
|
||||
} from "./generic/utils.js";
|
||||
import PlotSelection from "./analysis/PlotSelection.svelte";
|
||||
import Filters from "./generic/Filters.svelte";
|
||||
import PlotGrid from "./generic/PlotGrid.svelte";
|
||||
import Histogram from "./generic/plots/Histogram.svelte";
|
||||
import Pie, { colors } from "./generic/plots/Pie.svelte";
|
||||
import ScatterPlot from "./generic/plots/Scatter.svelte";
|
||||
import RooflineHeatmap from "./generic/plots/RooflineHeatmap.svelte";
|
||||
|
||||
const { query: initq } = init();
|
||||
|
||||
@@ -45,11 +55,13 @@
|
||||
let filterComponent; // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
||||
let jobFilters = [];
|
||||
let rooflineMaxY;
|
||||
let colWidth1, colWidth2, colWidth3, colWidth4;
|
||||
let colWidth1, colWidth2;
|
||||
let numBins = 50;
|
||||
let maxY = -1;
|
||||
|
||||
const initialized = getContext("initialized");
|
||||
const globalMetrics = getContext("globalMetrics");
|
||||
const ccconfig = getContext("cc-config");
|
||||
const metricConfig = getContext("metrics");
|
||||
|
||||
let metricsInHistograms = ccconfig.analysis_view_histogramMetrics,
|
||||
metricsInScatterplots = ccconfig.analysis_view_scatterPlotMetrics;
|
||||
@@ -268,10 +280,23 @@
|
||||
}
|
||||
}
|
||||
|
||||
let availableMetrics = [];
|
||||
let metricUnits = {};
|
||||
let metricScopes = {};
|
||||
function loadMetrics(isInitialized) {
|
||||
if (!isInitialized) return
|
||||
availableMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster.name))]
|
||||
for (let sm of availableMetrics) {
|
||||
metricUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
|
||||
metricScopes[sm.name] = sm?.scope
|
||||
}
|
||||
}
|
||||
|
||||
$: loadMetrics($initialized)
|
||||
$: updateEntityConfiguration(groupSelection.key);
|
||||
$: updateCategoryConfiguration(sortSelection.key);
|
||||
|
||||
onMount(() => filterComponent.update());
|
||||
onMount(() => filterComponent.updateFilters());
|
||||
</script>
|
||||
|
||||
<Row>
|
||||
@@ -280,12 +305,12 @@
|
||||
<Spinner />
|
||||
</Col>
|
||||
{/if}
|
||||
<Col xs="auto">
|
||||
<Col xs="auto" class="mb-2 mb-lg-0">
|
||||
{#if $initq.error}
|
||||
<Card body color="danger">{$initq.error.message}</Card>
|
||||
{:else if cluster}
|
||||
<PlotSelection
|
||||
availableMetrics={cluster.metricConfig.map((mc) => mc.name)}
|
||||
availableMetrics={availableMetrics.map((av) => av.name)}
|
||||
bind:metricsInHistograms
|
||||
bind:metricsInScatterplots
|
||||
/>
|
||||
@@ -297,7 +322,7 @@
|
||||
{filterPresets}
|
||||
disableClusterSelection={true}
|
||||
startTimeQuickSelect={true}
|
||||
on:update={({ detail }) => {
|
||||
on:update-filters={({ detail }) => {
|
||||
jobFilters = detail.filters;
|
||||
}}
|
||||
/>
|
||||
@@ -430,7 +455,7 @@
|
||||
width={colWidth2}
|
||||
height={300}
|
||||
tiles={$rooflineQuery.data.rooflineHeatmap}
|
||||
cluster={cluster.subClusters.length == 1
|
||||
subCluster={cluster.subClusters.length == 1
|
||||
? cluster.subClusters[0]
|
||||
: null}
|
||||
maxY={rooflineMaxY}
|
||||
@@ -440,36 +465,30 @@
|
||||
{/if}
|
||||
</Col>
|
||||
<Col>
|
||||
<div bind:clientWidth={colWidth3}>
|
||||
{#key $statsQuery.data.stats[0].histDuration}
|
||||
<Histogram
|
||||
width={colWidth3}
|
||||
height={300}
|
||||
data={convert2uplot($statsQuery.data.stats[0].histDuration)}
|
||||
title="Duration Distribution"
|
||||
xlabel="Current Runtimes"
|
||||
xunit="Hours"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
{#key $statsQuery.data.stats[0].histDuration}
|
||||
<Histogram
|
||||
height={300}
|
||||
data={convert2uplot($statsQuery.data.stats[0].histDuration)}
|
||||
title="Duration Distribution"
|
||||
xlabel="Current Runtimes"
|
||||
xunit="Hours"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</Col>
|
||||
<Col>
|
||||
<div bind:clientWidth={colWidth4}>
|
||||
{#key $statsQuery.data.stats[0].histNumCores}
|
||||
<Histogram
|
||||
width={colWidth4}
|
||||
height={300}
|
||||
data={convert2uplot($statsQuery.data.stats[0].histNumCores)}
|
||||
title="Number of Cores Distribution"
|
||||
xlabel="Allocated Cores"
|
||||
xunit="Cores"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
{#key $statsQuery.data.stats[0].histNumCores}
|
||||
<Histogram
|
||||
height={300}
|
||||
data={convert2uplot($statsQuery.data.stats[0].histNumCores)}
|
||||
title="Number of Cores Distribution"
|
||||
xlabel="Allocated Cores"
|
||||
xunit="Cores"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</Col>
|
||||
</Row>
|
||||
{/if}
|
||||
@@ -498,15 +517,14 @@
|
||||
</Row>
|
||||
<Row>
|
||||
<Col>
|
||||
<PlotTable
|
||||
<PlotGrid
|
||||
let:item
|
||||
let:width
|
||||
renderFor="analysis"
|
||||
items={metricsInHistograms.map((metric) => ({
|
||||
metric,
|
||||
...binsFromFootprint(
|
||||
$footprintsQuery.data.footprints.timeWeights,
|
||||
metricConfig(cluster.name, metric)?.scope,
|
||||
metricScopes[metric],
|
||||
$footprintsQuery.data.footprints.metrics.find(
|
||||
(f) => f.metric == metric,
|
||||
).data,
|
||||
@@ -517,30 +535,14 @@
|
||||
>
|
||||
<Histogram
|
||||
data={convert2uplot(item.bins)}
|
||||
{width}
|
||||
height={250}
|
||||
usesBins={true}
|
||||
title="Average Distribution of '{item.metric}'"
|
||||
xlabel={`${item.metric} bin maximum ${
|
||||
(metricConfig(cluster.name, item.metric)?.unit?.prefix
|
||||
? "[" + metricConfig(cluster.name, item.metric)?.unit?.prefix
|
||||
: "") +
|
||||
(metricConfig(cluster.name, item.metric)?.unit?.base
|
||||
? metricConfig(cluster.name, item.metric)?.unit?.base + "]"
|
||||
: "")
|
||||
}`}
|
||||
xunit={`${
|
||||
(metricConfig(cluster.name, item.metric)?.unit?.prefix
|
||||
? metricConfig(cluster.name, item.metric)?.unit?.prefix
|
||||
: "") +
|
||||
(metricConfig(cluster.name, item.metric)?.unit?.base
|
||||
? metricConfig(cluster.name, item.metric)?.unit?.base
|
||||
: "")
|
||||
}`}
|
||||
xlabel={`${item.metric} bin maximum [${metricUnits[item.metric]}]`}
|
||||
xunit={`${metricUnits[item.metric]}`}
|
||||
ylabel="Normalized Hours"
|
||||
yunit="Hours"
|
||||
/>
|
||||
</PlotTable>
|
||||
</PlotGrid>
|
||||
</Col>
|
||||
</Row>
|
||||
<br />
|
||||
@@ -558,7 +560,7 @@
|
||||
</Row>
|
||||
<Row>
|
||||
<Col>
|
||||
<PlotTable
|
||||
<PlotGrid
|
||||
let:item
|
||||
let:width
|
||||
renderFor="analysis"
|
||||
@@ -578,27 +580,13 @@
|
||||
{width}
|
||||
height={250}
|
||||
color={"rgba(0, 102, 204, 0.33)"}
|
||||
xLabel={`${item.m1} [${
|
||||
(metricConfig(cluster.name, item.m1)?.unit?.prefix
|
||||
? metricConfig(cluster.name, item.m1)?.unit?.prefix
|
||||
: "") +
|
||||
(metricConfig(cluster.name, item.m1)?.unit?.base
|
||||
? metricConfig(cluster.name, item.m1)?.unit?.base
|
||||
: "")
|
||||
}]`}
|
||||
yLabel={`${item.m2} [${
|
||||
(metricConfig(cluster.name, item.m2)?.unit?.prefix
|
||||
? metricConfig(cluster.name, item.m2)?.unit?.prefix
|
||||
: "") +
|
||||
(metricConfig(cluster.name, item.m2)?.unit?.base
|
||||
? metricConfig(cluster.name, item.m2)?.unit?.base
|
||||
: "")
|
||||
}]`}
|
||||
xLabel={`${item.m1} [${metricUnits[item.m1]}]`}
|
||||
yLabel={`${item.m2} [${metricUnits[item.m2]}]`}
|
||||
X={item.f1}
|
||||
Y={item.f2}
|
||||
S={$footprintsQuery.data.footprints.timeWeights.nodeHours}
|
||||
/>
|
||||
</PlotTable>
|
||||
</PlotGrid>
|
||||
</Col>
|
||||
</Row>
|
||||
{/if}
|
||||
|
||||
@@ -1,16 +1,20 @@
|
||||
<script>
|
||||
import { getContext } from "svelte";
|
||||
import { init } from "./utils.js";
|
||||
import { Card, CardHeader, CardTitle } from "@sveltestrap/sveltestrap";
|
||||
<!--
|
||||
@component Main Config Option Component, Wrapper for admin and user sub-components
|
||||
|
||||
import PlotSettings from "./config/PlotSettings.svelte";
|
||||
Properties:
|
||||
- `ìsAdmin Bool!`: Is currently logged in user admin authority
|
||||
- `isApi Bool!`: Is currently logged in user api authority
|
||||
- `username String!`: Empty string if auth. is disabled, otherwise the username as string
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { Card, CardHeader, CardTitle } from "@sveltestrap/sveltestrap";
|
||||
import UserSettings from "./config/UserSettings.svelte";
|
||||
import AdminSettings from "./config/AdminSettings.svelte";
|
||||
|
||||
const { query: initq } = init();
|
||||
|
||||
const ccconfig = getContext("cc-config");
|
||||
|
||||
export let isAdmin;
|
||||
export let isApi;
|
||||
export let username;
|
||||
</script>
|
||||
|
||||
{#if isAdmin == true}
|
||||
@@ -24,7 +28,7 @@
|
||||
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle class="mb-1">Plotting Options</CardTitle>
|
||||
<CardTitle class="mb-1">User Options</CardTitle>
|
||||
</CardHeader>
|
||||
<PlotSettings config={ccconfig} />
|
||||
<UserSettings {username} {isApi}/>
|
||||
</Card>
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
@component Main navbar component; handles view display based on user roles
|
||||
|
||||
Properties:
|
||||
- `username String`: Empty string if auth. is disabled, otherwise the username as string
|
||||
- `authlevel Number`: The current users authentication level
|
||||
- `clusters [String]`: List of cluster names
|
||||
- `roles [Number]`: Enum containing available roles
|
||||
-->
|
||||
|
||||
<script>
|
||||
import {
|
||||
Icon,
|
||||
@@ -10,13 +20,13 @@
|
||||
DropdownToggle,
|
||||
DropdownMenu,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import NavbarLinks from "./NavbarLinks.svelte";
|
||||
import NavbarTools from "./NavbarTools.svelte";
|
||||
import NavbarLinks from "./header/NavbarLinks.svelte";
|
||||
import NavbarTools from "./header/NavbarTools.svelte";
|
||||
|
||||
export let username; // empty string if auth. is disabled, otherwise the username as string
|
||||
export let authlevel; // Integer
|
||||
export let clusters; // array of names
|
||||
export let roles; // Role Enum-Like
|
||||
export let username;
|
||||
export let authlevel;
|
||||
export let clusters;
|
||||
export let roles;
|
||||
|
||||
let isOpen = false;
|
||||
let screenSize;
|
||||
@@ -30,14 +40,19 @@
|
||||
usersTitle.set(3, "Managed Users");
|
||||
usersTitle.set(4, "Users");
|
||||
usersTitle.set(5, "Users");
|
||||
const projectsTitle = new Map();
|
||||
projectsTitle.set(3, "Managed Projects");
|
||||
projectsTitle.set(4, "Projects");
|
||||
projectsTitle.set(5, "Projects");
|
||||
|
||||
const views = [
|
||||
{
|
||||
title: "My Jobs",
|
||||
requiredRole: roles.user,
|
||||
href: `/monitoring/user/${username}`,
|
||||
icon: "bar-chart-line-fill",
|
||||
icon: "bar-chart-line",
|
||||
perCluster: false,
|
||||
listOptions: false,
|
||||
menu: "none",
|
||||
},
|
||||
{
|
||||
@@ -46,23 +61,8 @@
|
||||
href: `/monitoring/jobs/`,
|
||||
icon: "card-list",
|
||||
perCluster: false,
|
||||
menu: "none",
|
||||
},
|
||||
{
|
||||
title: usersTitle.get(authlevel),
|
||||
requiredRole: roles.manager,
|
||||
href: "/monitoring/users/",
|
||||
icon: "people-fill",
|
||||
perCluster: false,
|
||||
menu: "Groups",
|
||||
},
|
||||
{
|
||||
title: "Projects",
|
||||
requiredRole: roles.support,
|
||||
href: "/monitoring/projects/",
|
||||
icon: "folder",
|
||||
perCluster: false,
|
||||
menu: "Groups",
|
||||
listOptions: false,
|
||||
menu: "Jobs",
|
||||
},
|
||||
{
|
||||
title: "Tags",
|
||||
@@ -70,31 +70,53 @@
|
||||
href: "/monitoring/tags/",
|
||||
icon: "tags",
|
||||
perCluster: false,
|
||||
listOptions: false,
|
||||
menu: "Jobs",
|
||||
},
|
||||
{
|
||||
title: usersTitle.get(authlevel),
|
||||
requiredRole: roles.manager,
|
||||
href: "/monitoring/users/",
|
||||
icon: "people",
|
||||
perCluster: true,
|
||||
listOptions: true,
|
||||
menu: "Groups",
|
||||
},
|
||||
{
|
||||
title: projectsTitle.get(authlevel),
|
||||
requiredRole: roles.manager,
|
||||
href: "/monitoring/projects/",
|
||||
icon: "journals",
|
||||
perCluster: true,
|
||||
listOptions: true,
|
||||
menu: "Groups",
|
||||
},
|
||||
{
|
||||
title: "Nodes",
|
||||
requiredRole: roles.admin,
|
||||
href: "/monitoring/systems/",
|
||||
icon: "hdd-rack",
|
||||
perCluster: true,
|
||||
listOptions: false,
|
||||
menu: "Info",
|
||||
},
|
||||
{
|
||||
title: "Status",
|
||||
requiredRole: roles.admin,
|
||||
href: "/monitoring/status/",
|
||||
icon: "clipboard-data",
|
||||
perCluster: true,
|
||||
listOptions: false,
|
||||
menu: "Info",
|
||||
},
|
||||
{
|
||||
title: "Analysis",
|
||||
requiredRole: roles.support,
|
||||
href: "/monitoring/analysis/",
|
||||
icon: "graph-up",
|
||||
perCluster: true,
|
||||
menu: "Stats",
|
||||
},
|
||||
{
|
||||
title: "Nodes",
|
||||
requiredRole: roles.admin,
|
||||
href: "/monitoring/systems/",
|
||||
icon: "cpu",
|
||||
perCluster: true,
|
||||
menu: "Groups",
|
||||
},
|
||||
{
|
||||
title: "Status",
|
||||
requiredRole: roles.admin,
|
||||
href: "/monitoring/status/",
|
||||
icon: "cpu",
|
||||
perCluster: true,
|
||||
menu: "Stats",
|
||||
listOptions: false,
|
||||
menu: "Info",
|
||||
},
|
||||
];
|
||||
</script>
|
||||
@@ -122,24 +144,27 @@
|
||||
<NavbarLinks
|
||||
{clusters}
|
||||
links={views.filter(
|
||||
(item) => item.requiredRole <= authlevel && item.menu != "Stats",
|
||||
(item) => item.requiredRole <= authlevel && item.menu != "Info",
|
||||
)}
|
||||
/>
|
||||
<Dropdown nav>
|
||||
<DropdownToggle nav caret>
|
||||
<Icon name="graph-up" />
|
||||
Stats
|
||||
</DropdownToggle>
|
||||
<DropdownMenu class="dropdown-menu-lg-end">
|
||||
<NavbarLinks
|
||||
{clusters}
|
||||
links={views.filter(
|
||||
(item) =>
|
||||
item.requiredRole <= authlevel && item.menu == "Stats",
|
||||
)}
|
||||
/>
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{#if authlevel >= 4} <!-- Support+ Info Menu-->
|
||||
<Dropdown nav>
|
||||
<DropdownToggle nav caret>
|
||||
<Icon name="graph-up" />
|
||||
Info
|
||||
</DropdownToggle>
|
||||
<DropdownMenu class="dropdown-menu-lg-end">
|
||||
<NavbarLinks
|
||||
{clusters}
|
||||
direction="right"
|
||||
links={views.filter(
|
||||
(item) =>
|
||||
item.requiredRole <= authlevel && item.menu == "Info",
|
||||
)}
|
||||
/>
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{/if}
|
||||
{:else}
|
||||
<NavbarLinks
|
||||
{clusters}
|
||||
@@ -147,21 +172,54 @@
|
||||
(item) => item.requiredRole <= authlevel && item.menu == "none",
|
||||
)}
|
||||
/>
|
||||
{#each Array("Groups", "Stats") as menu}
|
||||
{#if authlevel >= 2} <!-- User+ Job Menu -->
|
||||
<Dropdown nav>
|
||||
<DropdownToggle nav caret>
|
||||
{menu}
|
||||
Jobs
|
||||
</DropdownToggle>
|
||||
<DropdownMenu class="dropdown-menu-lg-end">
|
||||
<NavbarLinks
|
||||
{clusters}
|
||||
direction="right"
|
||||
links={views.filter(
|
||||
(item) => item.requiredRole <= authlevel && item.menu == menu,
|
||||
(item) => item.requiredRole <= authlevel && item.menu == 'Jobs',
|
||||
)}
|
||||
/>
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{/each}
|
||||
{/if}
|
||||
{#if authlevel >= 3} <!-- Manager+ Group Lists Menu-->
|
||||
<Dropdown nav>
|
||||
<DropdownToggle nav caret>
|
||||
Groups
|
||||
</DropdownToggle>
|
||||
<DropdownMenu class="dropdown-menu-lg-end">
|
||||
<NavbarLinks
|
||||
{clusters}
|
||||
direction="right"
|
||||
links={views.filter(
|
||||
(item) => item.requiredRole <= authlevel && item.menu == 'Groups',
|
||||
)}
|
||||
/>
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{/if}
|
||||
{#if authlevel >= 4} <!-- Support+ Info Menu-->
|
||||
<Dropdown nav>
|
||||
<DropdownToggle nav caret>
|
||||
Info
|
||||
</DropdownToggle>
|
||||
<DropdownMenu class="dropdown-menu-lg-end">
|
||||
<NavbarLinks
|
||||
{clusters}
|
||||
direction="right"
|
||||
links={views.filter(
|
||||
(item) => item.requiredRole <= authlevel && item.menu == 'Info',
|
||||
)}
|
||||
/>
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{/if}
|
||||
{/if}
|
||||
</Nav>
|
||||
<NavbarTools {username} {authlevel} {roles} {screenSize} />
|
||||
|
||||
@@ -1,11 +1,19 @@
|
||||
<!--
|
||||
@component Main single job display component; displays plots for every metric as well as various information
|
||||
|
||||
Properties:
|
||||
- `dbid Number`: The jobs DB ID
|
||||
- `username String`: Empty string if auth. is disabled, otherwise the username as string
|
||||
- `authlevel Number`: The current users authentication level
|
||||
- `roles [Number]`: Enum containing available roles
|
||||
-->
|
||||
|
||||
<script>
|
||||
import {
|
||||
init,
|
||||
groupByScope,
|
||||
fetchMetricsStore,
|
||||
checkMetricDisabled,
|
||||
transformDataForRoofline,
|
||||
} from "./utils.js";
|
||||
import {
|
||||
queryStore,
|
||||
gql,
|
||||
getContextClient
|
||||
} from "@urql/svelte";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
@@ -17,137 +25,147 @@
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
Button,
|
||||
Icon,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import PlotTable from "./PlotTable.svelte";
|
||||
import Metric from "./Metric.svelte";
|
||||
import Polar from "./plots/Polar.svelte";
|
||||
import Roofline from "./plots/Roofline.svelte";
|
||||
import JobInfo from "./joblist/JobInfo.svelte";
|
||||
import TagManagement from "./TagManagement.svelte";
|
||||
import MetricSelection from "./MetricSelection.svelte";
|
||||
import StatsTable from "./StatsTable.svelte";
|
||||
import JobFootprint from "./JobFootprint.svelte";
|
||||
import { getContext } from "svelte";
|
||||
import {
|
||||
init,
|
||||
groupByScope,
|
||||
checkMetricDisabled,
|
||||
transformDataForRoofline,
|
||||
} from "./generic/utils.js";
|
||||
import Metric from "./job/Metric.svelte";
|
||||
import StatsTable from "./job/StatsTable.svelte";
|
||||
import JobSummary from "./job/JobSummary.svelte";
|
||||
import EnergySummary from "./job/EnergySummary.svelte";
|
||||
import ConcurrentJobs from "./generic/helper/ConcurrentJobs.svelte";
|
||||
import PlotGrid from "./generic/PlotGrid.svelte";
|
||||
import Roofline from "./generic/plots/Roofline.svelte";
|
||||
import JobInfo from "./generic/joblist/JobInfo.svelte";
|
||||
import MetricSelection from "./generic/select/MetricSelection.svelte";
|
||||
|
||||
export let dbid;
|
||||
export let username;
|
||||
export let authlevel;
|
||||
export let roles;
|
||||
|
||||
const accMetrics = [
|
||||
"acc_utilization",
|
||||
"acc_mem_used",
|
||||
"acc_power",
|
||||
"nv_mem_util",
|
||||
"nv_sm_clock",
|
||||
"nv_temp",
|
||||
];
|
||||
let accNodeOnly;
|
||||
// Setup General
|
||||
|
||||
const ccconfig = getContext("cc-config")
|
||||
|
||||
let isMetricsSelectionOpen = false,
|
||||
selectedMetrics = [],
|
||||
selectedScopes = [];
|
||||
|
||||
let plots = {},
|
||||
roofWidth,
|
||||
statsTable
|
||||
|
||||
let missingMetrics = [],
|
||||
missingHosts = [],
|
||||
somethingMissing = false;
|
||||
|
||||
// Setup GQL
|
||||
// First: Add Job Query to init function -> Only requires DBID as argument, received via URL-ID
|
||||
// Second: Trigger jobMetrics query with now received jobInfos (scopes: from job metadata, selectedMetrics: from config or all, job: from url-id)
|
||||
|
||||
const { query: initq } = init(`
|
||||
job(id: "${dbid}") {
|
||||
id, jobId, user, project, cluster, startTime,
|
||||
duration, numNodes, numHWThreads, numAcc,
|
||||
duration, numNodes, numHWThreads, numAcc, energy,
|
||||
SMT, exclusive, partition, subCluster, arrayJobId,
|
||||
monitoringStatus, state, walltime,
|
||||
tags { id, type, name },
|
||||
tags { id, type, scope, name },
|
||||
resources { hostname, hwthreads, accelerators },
|
||||
metaData,
|
||||
userData { name, email },
|
||||
concurrentJobs { items { id, jobId }, count, listQuery },
|
||||
flopsAnyAvg, memBwAvg, loadAvg
|
||||
footprint { name, stat, value },
|
||||
energyFootprint { hardware, metric, value }
|
||||
}
|
||||
`);
|
||||
|
||||
const ccconfig = getContext("cc-config"),
|
||||
clusters = getContext("clusters"),
|
||||
metrics = getContext("metrics");
|
||||
const client = getContextClient();
|
||||
const query = gql`
|
||||
query ($dbid: ID!, $selectedMetrics: [String!]!, $selectedScopes: [MetricScope!]!) {
|
||||
jobMetrics(id: $dbid, metrics: $selectedMetrics, scopes: $selectedScopes) {
|
||||
name
|
||||
scope
|
||||
metric {
|
||||
unit {
|
||||
prefix
|
||||
base
|
||||
}
|
||||
timestep
|
||||
statisticsSeries {
|
||||
min
|
||||
mean
|
||||
median
|
||||
max
|
||||
}
|
||||
series {
|
||||
hostname
|
||||
id
|
||||
data
|
||||
statistics {
|
||||
min
|
||||
avg
|
||||
max
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
`;
|
||||
|
||||
let isMetricsSelectionOpen = false,
|
||||
selectedMetrics = [],
|
||||
showFootprint = true,
|
||||
isFetched = new Set();
|
||||
const [jobMetrics, startFetching] = fetchMetricsStore();
|
||||
$: jobMetrics = queryStore({
|
||||
client: client,
|
||||
query: query,
|
||||
variables: { dbid, selectedMetrics, selectedScopes },
|
||||
});
|
||||
|
||||
// Handle Job Query on Init -> is not executed anymore
|
||||
getContext("on-init")(() => {
|
||||
let job = $initq.data.job;
|
||||
if (!job) return;
|
||||
|
||||
selectedMetrics =
|
||||
ccconfig[`job_view_selectedMetrics:${job.cluster}`] ||
|
||||
clusters
|
||||
.find((c) => c.name == job.cluster)
|
||||
.metricConfig.map((mc) => mc.name);
|
||||
|
||||
showFootprint =
|
||||
ccconfig[`job_view_showFootprint`]
|
||||
|
||||
let toFetch = new Set([
|
||||
const pendingMetrics = [
|
||||
"flops_any",
|
||||
"mem_bw",
|
||||
...selectedMetrics,
|
||||
...(ccconfig[`job_view_selectedMetrics:${job.cluster}`] ||
|
||||
$initq.data.globalMetrics.reduce((names, gm) => {
|
||||
if (gm.availability.find((av) => av.cluster === job.cluster)) {
|
||||
names.push(gm.name);
|
||||
}
|
||||
return names;
|
||||
}, [])
|
||||
),
|
||||
...(ccconfig[`job_view_polarPlotMetrics:${job.cluster}`] ||
|
||||
ccconfig[`job_view_polarPlotMetrics`]),
|
||||
ccconfig[`job_view_polarPlotMetrics`]
|
||||
),
|
||||
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
|
||||
ccconfig[`job_view_nodestats_selectedMetrics`]),
|
||||
]);
|
||||
ccconfig[`job_view_nodestats_selectedMetrics`]
|
||||
),
|
||||
];
|
||||
|
||||
// Select default Scopes to load: Check before if accelerator metrics are not on accelerator scope by default
|
||||
accNodeOnly = [...toFetch].some(function (m) {
|
||||
if (accMetrics.includes(m)) {
|
||||
const mc = metrics(job.cluster, m);
|
||||
return mc.scope !== "accelerator";
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
// Select default Scopes to load: Check before if any metric has accelerator scope by default
|
||||
const accScopeDefault = [...pendingMetrics].some(function (m) {
|
||||
const cluster = $initq.data.clusters.find((c) => c.name == job.cluster);
|
||||
const subCluster = cluster.subClusters.find((sc) => sc.name == job.subCluster);
|
||||
return subCluster.metricConfig.find((smc) => smc.name == m)?.scope === "accelerator";
|
||||
});
|
||||
|
||||
if (job.numAcc === 0 || accNodeOnly === true) {
|
||||
// No Accels or Accels on Node Scope
|
||||
startFetching(
|
||||
job,
|
||||
[...toFetch],
|
||||
job.numNodes > 2 ? ["node"] : ["node", "socket", "core"],
|
||||
);
|
||||
} else {
|
||||
// Accels and not on node scope
|
||||
startFetching(
|
||||
job,
|
||||
[...toFetch],
|
||||
job.numNodes > 2
|
||||
? ["node", "accelerator"]
|
||||
: ["node", "accelerator", "socket", "core"],
|
||||
);
|
||||
const pendingScopes = ["node"]
|
||||
if (accScopeDefault) pendingScopes.push("accelerator")
|
||||
if (job.numNodes === 1) {
|
||||
pendingScopes.push("socket")
|
||||
pendingScopes.push("core")
|
||||
}
|
||||
|
||||
isFetched = toFetch;
|
||||
selectedMetrics = [...new Set(pendingMetrics)];
|
||||
selectedScopes = [...new Set(pendingScopes)];
|
||||
});
|
||||
|
||||
const lazyFetchMoreMetrics = () => {
|
||||
let notYetFetched = new Set();
|
||||
for (let m of selectedMetrics) {
|
||||
if (!isFetched.has(m)) {
|
||||
notYetFetched.add(m);
|
||||
isFetched.add(m);
|
||||
}
|
||||
}
|
||||
|
||||
if (notYetFetched.size > 0)
|
||||
startFetching(
|
||||
$initq.data.job,
|
||||
[...notYetFetched],
|
||||
$initq.data.job.numNodes > 2 ? ["node"] : ["node", "core"],
|
||||
);
|
||||
};
|
||||
|
||||
// Fetch more data once required:
|
||||
$: if ($initq.data && $jobMetrics.data && selectedMetrics)
|
||||
lazyFetchMoreMetrics();
|
||||
|
||||
let plots = {},
|
||||
jobTags,
|
||||
statsTable,
|
||||
jobFootprint;
|
||||
|
||||
// Interactive Document Title
|
||||
$: document.title = $initq.fetching
|
||||
? "Loading..."
|
||||
: $initq.error
|
||||
@@ -155,15 +173,15 @@
|
||||
: `Job ${$initq.data.job.jobId} - ClusterCockpit`;
|
||||
|
||||
// Find out what metrics or hosts are missing:
|
||||
let missingMetrics = [],
|
||||
missingHosts = [],
|
||||
somethingMissing = false;
|
||||
$: if ($initq.data && $jobMetrics.data) {
|
||||
$: if ($initq?.data && $jobMetrics?.data?.jobMetrics) {
|
||||
let job = $initq.data.job,
|
||||
metrics = $jobMetrics.data.jobMetrics,
|
||||
metricNames = clusters
|
||||
.find((c) => c.name == job.cluster)
|
||||
.metricConfig.map((mc) => mc.name);
|
||||
metricNames = $initq.data.globalMetrics.reduce((names, gm) => {
|
||||
if (gm.availability.find((av) => av.cluster === job.cluster)) {
|
||||
names.push(gm.name);
|
||||
}
|
||||
return names;
|
||||
}, []);
|
||||
|
||||
// Metric not found in JobMetrics && Metric not explicitly disabled in config or deselected: Was expected, but is Missing
|
||||
missingMetrics = metricNames.filter(
|
||||
@@ -192,6 +210,7 @@
|
||||
somethingMissing = missingMetrics.length > 0 || missingHosts.length > 0;
|
||||
}
|
||||
|
||||
// Helper
|
||||
const orderAndMap = (grouped, selectedMetrics) =>
|
||||
selectedMetrics.map((metric) => ({
|
||||
metric: metric,
|
||||
@@ -204,124 +223,134 @@
|
||||
}));
|
||||
</script>
|
||||
|
||||
<Row>
|
||||
<Col>
|
||||
<Row class="mb-3">
|
||||
<!-- Column 1: Job Info, Job Tags, Concurrent Jobs, Admin Message if found-->
|
||||
<Col xs={12} md={6} xl={3} class="mb-3 mb-xxl-0">
|
||||
{#if $initq.error}
|
||||
<Card body color="danger">{$initq.error.message}</Card>
|
||||
{:else if $initq.data}
|
||||
<JobInfo job={$initq.data.job} {jobTags} />
|
||||
<Card class="overflow-auto" style="height: 400px;">
|
||||
<TabContent> <!-- on:tab={(e) => (status = e.detail)} -->
|
||||
{#if $initq.data?.job?.metaData?.message}
|
||||
<TabPane tabId="admin-msg" tab="Admin Note" active>
|
||||
<CardBody>
|
||||
<Card body class="mb-2" color="warning">
|
||||
<h5>Job {$initq.data?.job?.jobId} ({$initq.data?.job?.cluster})</h5>
|
||||
The following note was added by administrators:
|
||||
</Card>
|
||||
<Card body>
|
||||
{@html $initq.data.job.metaData.message}
|
||||
</Card>
|
||||
</CardBody>
|
||||
</TabPane>
|
||||
{/if}
|
||||
<TabPane tabId="meta-info" tab="Job Info" active={$initq.data?.job?.metaData?.message?false:true}>
|
||||
<CardBody class="pb-2">
|
||||
<JobInfo job={$initq.data.job} {username} {authlevel} {roles} showTagedit/>
|
||||
</CardBody>
|
||||
</TabPane>
|
||||
{#if $initq.data.job.concurrentJobs != null && $initq.data.job.concurrentJobs.items.length != 0}
|
||||
<TabPane tabId="shared-jobs">
|
||||
<span slot="tab">
|
||||
{$initq.data.job.concurrentJobs.items.length} Concurrent Jobs
|
||||
</span>
|
||||
<CardBody>
|
||||
<ConcurrentJobs cJobs={$initq.data.job.concurrentJobs} showLinks={(authlevel > roles.manager)}/>
|
||||
</CardBody>
|
||||
</TabPane>
|
||||
{/if}
|
||||
</TabContent>
|
||||
</Card>
|
||||
{:else}
|
||||
<Spinner secondary />
|
||||
{/if}
|
||||
</Col>
|
||||
{#if $jobMetrics.data && showFootprint}
|
||||
{#key $jobMetrics.data}
|
||||
<Col>
|
||||
<JobFootprint
|
||||
bind:this={jobFootprint}
|
||||
job={$initq.data.job}
|
||||
jobMetrics={$jobMetrics.data.jobMetrics}
|
||||
/>
|
||||
</Col>
|
||||
{/key}
|
||||
{/if}
|
||||
{#if $jobMetrics.data && $initq.data}
|
||||
{#if $initq.data.job.concurrentJobs != null && $initq.data.job.concurrentJobs.items.length != 0}
|
||||
{#if authlevel > roles.manager}
|
||||
<Col>
|
||||
<h5>
|
||||
Concurrent Jobs <Icon
|
||||
name="info-circle"
|
||||
style="cursor:help;"
|
||||
title="Shared jobs running on the same node with overlapping runtimes"
|
||||
/>
|
||||
</h5>
|
||||
<ul>
|
||||
<li>
|
||||
<a
|
||||
href="/monitoring/jobs/?{$initq.data.job.concurrentJobs
|
||||
.listQuery}"
|
||||
target="_blank">See All</a
|
||||
>
|
||||
</li>
|
||||
{#each $initq.data.job.concurrentJobs.items as pjob, index}
|
||||
<li>
|
||||
<a href="/monitoring/job/{pjob.id}" target="_blank"
|
||||
>{pjob.jobId}</a
|
||||
>
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
</Col>
|
||||
{:else}
|
||||
<Col>
|
||||
<h5>
|
||||
{$initq.data.job.concurrentJobs.items.length} Concurrent Jobs
|
||||
</h5>
|
||||
<p>
|
||||
Number of shared jobs on the same node with overlapping runtimes.
|
||||
</p>
|
||||
</Col>
|
||||
{/if}
|
||||
{/if}
|
||||
<Col>
|
||||
<Polar
|
||||
metrics={ccconfig[
|
||||
`job_view_polarPlotMetrics:${$initq.data.job.cluster}`
|
||||
] || ccconfig[`job_view_polarPlotMetrics`]}
|
||||
cluster={$initq.data.job.cluster}
|
||||
jobMetrics={$jobMetrics.data.jobMetrics}
|
||||
/>
|
||||
</Col>
|
||||
<Col>
|
||||
<Roofline
|
||||
renderTime={true}
|
||||
cluster={clusters
|
||||
.find((c) => c.name == $initq.data.job.cluster)
|
||||
.subClusters.find((sc) => sc.name == $initq.data.job.subCluster)}
|
||||
data={transformDataForRoofline(
|
||||
$jobMetrics.data?.jobMetrics?.find(
|
||||
(m) => m.name == "flops_any" && m.scope == "node",
|
||||
)?.metric,
|
||||
$jobMetrics.data?.jobMetrics?.find(
|
||||
(m) => m.name == "mem_bw" && m.scope == "node",
|
||||
)?.metric,
|
||||
)}
|
||||
/>
|
||||
</Col>
|
||||
{:else}
|
||||
<Col />
|
||||
<Col />
|
||||
{/if}
|
||||
</Row>
|
||||
<Row class="mb-3">
|
||||
<Col xs="auto">
|
||||
{#if $initq.data}
|
||||
<TagManagement job={$initq.data.job} bind:jobTags />
|
||||
{/if}
|
||||
</Col>
|
||||
<Col xs="auto">
|
||||
{#if $initq.data}
|
||||
<Button outline on:click={() => (isMetricsSelectionOpen = true)}>
|
||||
<Icon name="graph-up" /> Metrics
|
||||
</Button>
|
||||
{/if}
|
||||
</Col>
|
||||
</Row>
|
||||
<Row>
|
||||
<Col>
|
||||
{#if $jobMetrics.error}
|
||||
{#if $initq.data.job.monitoringStatus == 0 || $initq.data.job.monitoringStatus == 2}
|
||||
<Card body color="warning">Not monitored or archiving failed</Card>
|
||||
<br />
|
||||
{/if}
|
||||
<Card body color="danger">{$jobMetrics.error.message}</Card>
|
||||
{:else if $jobMetrics.fetching}
|
||||
|
||||
<!-- Column 2: Job Footprint, Polar Representation, Heuristic Summary -->
|
||||
<Col xs={12} md={6} xl={4} xxl={3} class="mb-3 mb-xxl-0">
|
||||
{#if $initq.error}
|
||||
<Card body color="danger">{$initq.error.message}</Card>
|
||||
{:else if $initq?.data && $jobMetrics?.data}
|
||||
<JobSummary job={$initq.data.job} jobMetrics={$jobMetrics.data.jobMetrics}/>
|
||||
{:else}
|
||||
<Spinner secondary />
|
||||
{:else if $jobMetrics.data && $initq.data}
|
||||
<PlotTable
|
||||
{/if}
|
||||
</Col>
|
||||
|
||||
<!-- Column 3: Job Roofline; If footprint Enabled: full width, else half width -->
|
||||
<Col xs={12} md={12} xl={5} xxl={6}>
|
||||
{#if $initq.error || $jobMetrics.error}
|
||||
<Card body color="danger">
|
||||
<p>Initq Error: {$initq.error?.message}</p>
|
||||
<p>jobMetrics Error: {$jobMetrics.error?.message}</p>
|
||||
</Card>
|
||||
{:else if $initq?.data && $jobMetrics?.data}
|
||||
<Card style="height: 400px;">
|
||||
<div bind:clientWidth={roofWidth}>
|
||||
<Roofline
|
||||
allowSizeChange={true}
|
||||
width={roofWidth}
|
||||
renderTime={true}
|
||||
subCluster={$initq.data.clusters
|
||||
.find((c) => c.name == $initq.data.job.cluster)
|
||||
.subClusters.find((sc) => sc.name == $initq.data.job.subCluster)}
|
||||
data={transformDataForRoofline(
|
||||
$jobMetrics.data?.jobMetrics?.find(
|
||||
(m) => m.name == "flops_any" && m.scope == "node",
|
||||
)?.metric,
|
||||
$jobMetrics.data?.jobMetrics?.find(
|
||||
(m) => m.name == "mem_bw" && m.scope == "node",
|
||||
)?.metric,
|
||||
)}
|
||||
/>
|
||||
</div>
|
||||
</Card>
|
||||
{:else}
|
||||
<Spinner secondary />
|
||||
{/if}
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
{#if $initq?.data && $initq.data.job.energyFootprint.length != 0}
|
||||
<Row class="mb-3">
|
||||
<Col>
|
||||
<EnergySummary jobId={$initq.data.job.jobId} jobEnergy={$initq.data.job.energy} jobEnergyFootprint={$initq.data.job.energyFootprint}/>
|
||||
</Col>
|
||||
</Row>
|
||||
{/if}
|
||||
|
||||
<Card class="mb-3">
|
||||
<CardBody>
|
||||
<Row class="mb-2">
|
||||
{#if $initq.data}
|
||||
<Col xs="auto">
|
||||
<Button outline on:click={() => (isMetricsSelectionOpen = true)} color="primary">
|
||||
Select Metrics
|
||||
</Button>
|
||||
</Col>
|
||||
{/if}
|
||||
</Row>
|
||||
<hr class="mb-2"/>
|
||||
|
||||
{#if $jobMetrics.error}
|
||||
<Row class="mt-2">
|
||||
<Col>
|
||||
{#if $initq.data.job.monitoringStatus == 0 || $initq.data.job.monitoringStatus == 2}
|
||||
<Card body color="warning">Not monitored or archiving failed</Card>
|
||||
<br />
|
||||
{/if}
|
||||
<Card body color="danger">{$jobMetrics.error.message}</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $jobMetrics.fetching}
|
||||
<Row class="mt-2">
|
||||
<Col>
|
||||
<Spinner secondary />
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $initq?.data && $jobMetrics?.data?.jobMetrics}
|
||||
<PlotGrid
|
||||
let:item
|
||||
let:width
|
||||
renderFor="job"
|
||||
items={orderAndMap(
|
||||
groupByScope($jobMetrics.data.jobMetrics),
|
||||
@@ -335,90 +364,95 @@
|
||||
on:more-loaded={({ detail }) => statsTable.moreLoaded(detail)}
|
||||
job={$initq.data.job}
|
||||
metricName={item.metric}
|
||||
metricUnit={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.unit}
|
||||
nativeScope={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.scope}
|
||||
rawData={item.data.map((x) => x.metric)}
|
||||
scopes={item.data.map((x) => x.scope)}
|
||||
{width}
|
||||
isShared={$initq.data.job.exclusive != 1}
|
||||
/>
|
||||
{:else}
|
||||
<Card body color="warning"
|
||||
<Card body color="warning" class="mt-2"
|
||||
>No dataset returned for <code>{item.metric}</code></Card
|
||||
>
|
||||
{/if}
|
||||
</PlotTable>
|
||||
</PlotGrid>
|
||||
{/if}
|
||||
</Col>
|
||||
</Row>
|
||||
<Row class="mt-2">
|
||||
</CardBody>
|
||||
</Card>
|
||||
|
||||
<Row class="mb-3">
|
||||
<Col>
|
||||
{#if $initq.data}
|
||||
<TabContent>
|
||||
{#if somethingMissing}
|
||||
<TabPane tabId="resources" tab="Resources" active={somethingMissing}>
|
||||
<div style="margin: 10px;">
|
||||
<Card color="warning">
|
||||
<CardHeader>
|
||||
<CardTitle>Missing Metrics/Resources</CardTitle>
|
||||
</CardHeader>
|
||||
<CardBody>
|
||||
{#if missingMetrics.length > 0}
|
||||
<p>
|
||||
No data at all is available for the metrics: {missingMetrics.join(
|
||||
", ",
|
||||
)}
|
||||
</p>
|
||||
{/if}
|
||||
{#if missingHosts.length > 0}
|
||||
<p>Some metrics are missing for the following hosts:</p>
|
||||
<ul>
|
||||
{#each missingHosts as missing}
|
||||
<li>
|
||||
{missing.hostname}: {missing.metrics.join(", ")}
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{/if}
|
||||
</CardBody>
|
||||
</Card>
|
||||
<Card>
|
||||
<TabContent>
|
||||
{#if somethingMissing}
|
||||
<TabPane tabId="resources" tab="Resources" active={somethingMissing}>
|
||||
<div style="margin: 10px;">
|
||||
<Card color="warning">
|
||||
<CardHeader>
|
||||
<CardTitle>Missing Metrics/Resources</CardTitle>
|
||||
</CardHeader>
|
||||
<CardBody>
|
||||
{#if missingMetrics.length > 0}
|
||||
<p>
|
||||
No data at all is available for the metrics: {missingMetrics.join(
|
||||
", ",
|
||||
)}
|
||||
</p>
|
||||
{/if}
|
||||
{#if missingHosts.length > 0}
|
||||
<p>Some metrics are missing for the following hosts:</p>
|
||||
<ul>
|
||||
{#each missingHosts as missing}
|
||||
<li>
|
||||
{missing.hostname}: {missing.metrics.join(", ")}
|
||||
</li>
|
||||
{/each}
|
||||
</ul>
|
||||
{/if}
|
||||
</CardBody>
|
||||
</Card>
|
||||
</div>
|
||||
</TabPane>
|
||||
{/if}
|
||||
<TabPane
|
||||
tabId="stats"
|
||||
tab="Statistics Table"
|
||||
class="overflow-x-auto"
|
||||
active={!somethingMissing}
|
||||
>
|
||||
{#if $jobMetrics?.data?.jobMetrics}
|
||||
{#key $jobMetrics.data.jobMetrics}
|
||||
<StatsTable
|
||||
bind:this={statsTable}
|
||||
job={$initq.data.job}
|
||||
jobMetrics={$jobMetrics.data.jobMetrics}
|
||||
/>
|
||||
{/key}
|
||||
{/if}
|
||||
</TabPane>
|
||||
<TabPane tabId="job-script" tab="Job Script">
|
||||
<div class="pre-wrapper">
|
||||
{#if $initq.data.job.metaData?.jobScript}
|
||||
<pre><code>{$initq.data.job.metaData?.jobScript}</code></pre>
|
||||
{:else}
|
||||
<Card body color="warning">No job script available</Card>
|
||||
{/if}
|
||||
</div>
|
||||
</TabPane>
|
||||
{/if}
|
||||
<TabPane
|
||||
tabId="stats"
|
||||
tab="Statistics Table"
|
||||
active={!somethingMissing}
|
||||
>
|
||||
{#if $jobMetrics.data}
|
||||
{#key $jobMetrics.data}
|
||||
<StatsTable
|
||||
bind:this={statsTable}
|
||||
job={$initq.data.job}
|
||||
jobMetrics={$jobMetrics.data.jobMetrics}
|
||||
/>
|
||||
{/key}
|
||||
{/if}
|
||||
</TabPane>
|
||||
<TabPane tabId="job-script" tab="Job Script">
|
||||
<div class="pre-wrapper">
|
||||
{#if $initq.data.job.metaData?.jobScript}
|
||||
<pre><code>{$initq.data.job.metaData?.jobScript}</code></pre>
|
||||
{:else}
|
||||
<Card body color="warning">No job script available</Card>
|
||||
{/if}
|
||||
</div>
|
||||
</TabPane>
|
||||
<TabPane tabId="slurm-info" tab="Slurm Info">
|
||||
<div class="pre-wrapper">
|
||||
{#if $initq.data.job.metaData?.slurmInfo}
|
||||
<pre><code>{$initq.data.job.metaData?.slurmInfo}</code></pre>
|
||||
{:else}
|
||||
<Card body color="warning"
|
||||
>No additional slurm information available</Card
|
||||
>
|
||||
{/if}
|
||||
</div>
|
||||
</TabPane>
|
||||
</TabContent>
|
||||
<TabPane tabId="slurm-info" tab="Slurm Info">
|
||||
<div class="pre-wrapper">
|
||||
{#if $initq.data.job.metaData?.slurmInfo}
|
||||
<pre><code>{$initq.data.job.metaData?.slurmInfo}</code></pre>
|
||||
{:else}
|
||||
<Card body color="warning"
|
||||
>No additional slurm information available</Card
|
||||
>
|
||||
{/if}
|
||||
</div>
|
||||
</TabPane>
|
||||
</TabContent>
|
||||
</Card>
|
||||
{/if}
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
@@ -1,268 +0,0 @@
|
||||
<script context="module">
|
||||
export function findJobThresholds(job, metricConfig, subClusterConfig) {
|
||||
if (!job || !metricConfig || !subClusterConfig) {
|
||||
console.warn("Argument missing for findJobThresholds!");
|
||||
return null;
|
||||
}
|
||||
|
||||
const subclusterThresholds = metricConfig.subClusters.find(
|
||||
(sc) => sc.name == subClusterConfig.name,
|
||||
);
|
||||
const defaultThresholds = {
|
||||
peak: subclusterThresholds
|
||||
? subclusterThresholds.peak
|
||||
: metricConfig.peak,
|
||||
normal: subclusterThresholds
|
||||
? subclusterThresholds.normal
|
||||
: metricConfig.normal,
|
||||
caution: subclusterThresholds
|
||||
? subclusterThresholds.caution
|
||||
: metricConfig.caution,
|
||||
alert: subclusterThresholds
|
||||
? subclusterThresholds.alert
|
||||
: metricConfig.alert,
|
||||
};
|
||||
|
||||
// Job_Exclusivity does not matter, only aggregation
|
||||
if (metricConfig.aggregation === "avg") {
|
||||
return defaultThresholds;
|
||||
} else if (metricConfig.aggregation === "sum") {
|
||||
const jobFraction =
|
||||
job.numHWThreads / subClusterConfig.topology.node.length;
|
||||
return {
|
||||
peak: round(defaultThresholds.peak * jobFraction, 0),
|
||||
normal: round(defaultThresholds.normal * jobFraction, 0),
|
||||
caution: round(defaultThresholds.caution * jobFraction, 0),
|
||||
alert: round(defaultThresholds.alert * jobFraction, 0),
|
||||
};
|
||||
} else {
|
||||
console.warn(
|
||||
"Missing or unkown aggregation mode (sum/avg) for metric:",
|
||||
metricConfig,
|
||||
);
|
||||
return defaultThresholds;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<script>
|
||||
import { getContext } from "svelte";
|
||||
import {
|
||||
Card,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
CardBody,
|
||||
Progress,
|
||||
Icon,
|
||||
Tooltip,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { mean, round } from "mathjs";
|
||||
|
||||
export let job;
|
||||
export let jobMetrics;
|
||||
export let view = "job";
|
||||
export let width = "auto";
|
||||
|
||||
const clusters = getContext("clusters");
|
||||
const subclusterConfig = clusters
|
||||
.find((c) => c.name == job.cluster)
|
||||
.subClusters.find((sc) => sc.name == job.subCluster);
|
||||
|
||||
const footprintMetrics =
|
||||
job.numAcc !== 0
|
||||
? job.exclusive !== 1 // GPU
|
||||
? ["acc_utilization", "acc_mem_used", "nv_sm_clock", "nv_mem_util"] // Shared
|
||||
: ["acc_utilization", "acc_mem_used", "nv_sm_clock", "nv_mem_util"] // Exclusive
|
||||
: (job.exclusive !== 1) // CPU Only
|
||||
? ["flops_any", "mem_used"] // Shared
|
||||
: ["cpu_load", "flops_any", "mem_used", "mem_bw"]; // Exclusive
|
||||
|
||||
const footprintData = footprintMetrics.map((fm) => {
|
||||
// Unit
|
||||
const fmc = getContext("metrics")(job.cluster, fm);
|
||||
let unit = "";
|
||||
if (fmc?.unit?.base) unit = fmc.unit.prefix + fmc.unit.base;
|
||||
|
||||
// Threshold / -Differences
|
||||
const fmt = findJobThresholds(job, fmc, subclusterConfig);
|
||||
if (fm === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0);
|
||||
|
||||
// Value: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
|
||||
// Exclusivity does not matter
|
||||
let mv = 0.0;
|
||||
if (fmc.aggregation === "avg") {
|
||||
if (fm === "cpu_load" && job.loadAvg !== 0) {
|
||||
mv = round(job.loadAvg, 2);
|
||||
} else if (fm === "flops_any" && job.flopsAnyAvg !== 0) {
|
||||
mv = round(job.flopsAnyAvg, 2);
|
||||
} else if (fm === "mem_bw" && job.memBwAvg !== 0) {
|
||||
mv = round(job.memBwAvg, 2);
|
||||
} else {
|
||||
// Calculate Avg from jobMetrics
|
||||
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
|
||||
if (jm?.metric?.statisticsSeries) {
|
||||
const noNan = jm.metric.statisticsSeries.mean.filter(function (val) {
|
||||
return val != null;
|
||||
});
|
||||
mv = round(mean(noNan), 2);
|
||||
} else if (jm?.metric?.series?.length > 1) {
|
||||
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
|
||||
mv = round(mean(avgs), 2);
|
||||
} else if (jm?.metric?.series) {
|
||||
mv = round(jm.metric.series[0].statistics.avg, 2);
|
||||
}
|
||||
}
|
||||
} else if (fmc.aggregation === "sum") {
|
||||
// Calculate Sum from jobMetrics: Sum all node averages
|
||||
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
|
||||
if (jm?.metric?.series?.length > 1) { // More than 1 node
|
||||
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
|
||||
mv = round(avgs.reduce((a, b) => a + b, 0));
|
||||
} else if (jm?.metric?.series) {
|
||||
mv = round(jm.metric.series[0].statistics.avg, 2);
|
||||
}
|
||||
} else {
|
||||
console.warn(
|
||||
"Missing or unkown aggregation mode (sum/avg) for metric:",
|
||||
metricConfig,
|
||||
);
|
||||
}
|
||||
|
||||
// Define basic data
|
||||
const fmBase = {
|
||||
name: fm,
|
||||
unit: unit,
|
||||
avg: mv,
|
||||
max: fmt.peak,
|
||||
};
|
||||
|
||||
if (evalFootprint(fm, mv, fmt, "alert")) {
|
||||
return {
|
||||
...fmBase,
|
||||
color: "danger",
|
||||
message: `Metric average way ${fm === "mem_used" ? "above" : "below"} expected normal thresholds.`,
|
||||
impact: 3,
|
||||
};
|
||||
} else if (evalFootprint(fm, mv, fmt, "caution")) {
|
||||
return {
|
||||
...fmBase,
|
||||
color: "warning",
|
||||
message: `Metric average ${fm === "mem_used" ? "above" : "below"} expected normal thresholds.`,
|
||||
impact: 2,
|
||||
};
|
||||
} else if (evalFootprint(fm, mv, fmt, "normal")) {
|
||||
return {
|
||||
...fmBase,
|
||||
color: "success",
|
||||
message: "Metric average within expected thresholds.",
|
||||
impact: 1,
|
||||
};
|
||||
} else if (evalFootprint(fm, mv, fmt, "peak")) {
|
||||
return {
|
||||
...fmBase,
|
||||
color: "info",
|
||||
message:
|
||||
"Metric average above expected normal thresholds: Check for artifacts recommended.",
|
||||
impact: 0,
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
...fmBase,
|
||||
color: "secondary",
|
||||
message:
|
||||
"Metric average above expected peak threshold: Check for artifacts!",
|
||||
impact: -1,
|
||||
};
|
||||
}
|
||||
});
|
||||
|
||||
function evalFootprint(metric, mean, thresholds, level) {
|
||||
// mem_used has inverse logic regarding threshold levels, notify levels triggered if mean > threshold
|
||||
switch (level) {
|
||||
case "peak":
|
||||
if (metric === "mem_used")
|
||||
return false; // mem_used over peak -> return false to trigger impact -1
|
||||
else return mean <= thresholds.peak && mean > thresholds.normal;
|
||||
case "alert":
|
||||
if (metric === "mem_used")
|
||||
return mean <= thresholds.peak && mean >= thresholds.alert;
|
||||
else return mean <= thresholds.alert && mean >= 0;
|
||||
case "caution":
|
||||
if (metric === "mem_used")
|
||||
return mean < thresholds.alert && mean >= thresholds.caution;
|
||||
else return mean <= thresholds.caution && mean > thresholds.alert;
|
||||
case "normal":
|
||||
if (metric === "mem_used")
|
||||
return mean < thresholds.caution && mean >= 0;
|
||||
else return mean <= thresholds.normal && mean > thresholds.caution;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<Card class="h-auto mt-1" style="width: {width}px;">
|
||||
{#if view === "job"}
|
||||
<CardHeader>
|
||||
<CardTitle class="mb-0 d-flex justify-content-center">
|
||||
Core Metrics Footprint
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
{/if}
|
||||
<CardBody>
|
||||
{#each footprintData as fpd, index}
|
||||
<div class="mb-1 d-flex justify-content-between">
|
||||
<div> <b>{fpd.name}</b></div>
|
||||
<!-- For symmetry, see below ...-->
|
||||
<div
|
||||
class="cursor-help d-inline-flex"
|
||||
id={`footprint-${job.jobId}-${index}`}
|
||||
>
|
||||
<div class="mx-1">
|
||||
<!-- Alerts Only -->
|
||||
{#if fpd.impact === 3 || fpd.impact === -1}
|
||||
<Icon name="exclamation-triangle-fill" class="text-danger" />
|
||||
{:else if fpd.impact === 2}
|
||||
<Icon name="exclamation-triangle" class="text-warning" />
|
||||
{/if}
|
||||
<!-- Emoji for all states-->
|
||||
{#if fpd.impact === 3}
|
||||
<Icon name="emoji-frown" class="text-danger" />
|
||||
{:else if fpd.impact === 2}
|
||||
<Icon name="emoji-neutral" class="text-warning" />
|
||||
{:else if fpd.impact === 1}
|
||||
<Icon name="emoji-smile" class="text-success" />
|
||||
{:else if fpd.impact === 0}
|
||||
<Icon name="emoji-laughing" class="text-info" />
|
||||
{:else if fpd.impact === -1}
|
||||
<Icon name="emoji-dizzy" class="text-danger" />
|
||||
{/if}
|
||||
</div>
|
||||
<div>
|
||||
<!-- Print Values -->
|
||||
{fpd.avg} / {fpd.max}
|
||||
{fpd.unit} <!-- To increase margin to tooltip: No other way manageable ... -->
|
||||
</div>
|
||||
</div>
|
||||
<Tooltip
|
||||
target={`footprint-${job.jobId}-${index}`}
|
||||
placement="right"
|
||||
offset={[0, 20]}>{fpd.message}</Tooltip
|
||||
>
|
||||
</div>
|
||||
<div class="mb-2">
|
||||
<Progress value={fpd.avg} max={fpd.max} color={fpd.color} />
|
||||
</div>
|
||||
{/each}
|
||||
{#if job?.metaData?.message}
|
||||
<hr class="mt-1 mb-2" />
|
||||
{@html job.metaData.message}
|
||||
{/if}
|
||||
</CardBody>
|
||||
</Card>
|
||||
|
||||
<style>
|
||||
.cursor-help {
|
||||
cursor: help;
|
||||
}
|
||||
</style>
|
||||
@@ -1,20 +1,30 @@
|
||||
<script>
|
||||
<!--
|
||||
@component Main job list component
|
||||
|
||||
Properties:
|
||||
- `filterPresets Object?`: Optional predefined filter values [Default: {}]
|
||||
- `authlevel Number`: The current users authentication level
|
||||
- `roles [Number]`: Enum containing available roles
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { onMount, getContext } from "svelte";
|
||||
import { init } from "./utils.js";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
Button,
|
||||
ButtonGroup,
|
||||
Icon,
|
||||
Card,
|
||||
Spinner,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import Filters from "./filters/Filters.svelte";
|
||||
import JobList from "./joblist/JobList.svelte";
|
||||
import Refresher from "./joblist/Refresher.svelte";
|
||||
import Sorting from "./joblist/SortSelection.svelte";
|
||||
import MetricSelection from "./MetricSelection.svelte";
|
||||
import TextFilter from "./filters/TextFilter.svelte";
|
||||
import { init } from "./generic/utils.js";
|
||||
import Filters from "./generic/Filters.svelte";
|
||||
import JobList from "./generic/JobList.svelte";
|
||||
import TextFilter from "./generic/helper/TextFilter.svelte";
|
||||
import Refresher from "./generic/helper/Refresher.svelte";
|
||||
import Sorting from "./generic/select/SortSelection.svelte";
|
||||
import MetricSelection from "./generic/select/MetricSelection.svelte";
|
||||
|
||||
const { query: initq } = init();
|
||||
|
||||
@@ -27,7 +37,7 @@
|
||||
let filterComponent; // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
||||
let jobList,
|
||||
matchedJobs = null;
|
||||
let sorting = { field: "startTime", order: "DESC" },
|
||||
let sorting = { field: "startTime", type: "col", order: "DESC" },
|
||||
isSortingOpen = false,
|
||||
isMetricsSelectionOpen = false;
|
||||
let metrics = filterPresets.cluster
|
||||
@@ -43,69 +53,77 @@
|
||||
// The filterPresets are handled by the Filters component,
|
||||
// so we need to wait for it to be ready before we can start a query.
|
||||
// This is also why JobList component starts out with a paused query.
|
||||
onMount(() => filterComponent.update());
|
||||
onMount(() => filterComponent.updateFilters());
|
||||
</script>
|
||||
|
||||
<Row>
|
||||
{#if $initq.fetching}
|
||||
<Col xs="auto">
|
||||
<!-- ROW1: Status-->
|
||||
{#if $initq.fetching}
|
||||
<Row class="mb-3">
|
||||
<Col>
|
||||
<Spinner />
|
||||
</Col>
|
||||
{:else if $initq.error}
|
||||
<Col xs="auto">
|
||||
</Row>
|
||||
{:else if $initq.error}
|
||||
<Row class="mb-3">
|
||||
<Col>
|
||||
<Card body color="danger">{$initq.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
</Row>
|
||||
<Row>
|
||||
<Col xs="auto">
|
||||
<Button outline color="primary" on:click={() => (isSortingOpen = true)}>
|
||||
<Icon name="sort-up" /> Sorting
|
||||
</Button>
|
||||
<Button
|
||||
outline
|
||||
color="primary"
|
||||
on:click={() => (isMetricsSelectionOpen = true)}
|
||||
>
|
||||
<Icon name="graph-up" /> Metrics
|
||||
</Button>
|
||||
<Button disabled outline
|
||||
>{matchedJobs == null ? "Loading..." : `${matchedJobs} jobs`}</Button
|
||||
>
|
||||
</Row>
|
||||
{/if}
|
||||
|
||||
<!-- ROW2: Tools-->
|
||||
<Row cols={{ xs: 1, md: 2, lg: 4}} class="mb-3">
|
||||
<Col lg="2" class="mb-2 mb-lg-0">
|
||||
<ButtonGroup class="w-100">
|
||||
<Button outline color="primary" on:click={() => (isSortingOpen = true)}>
|
||||
<Icon name="sort-up" /> Sorting
|
||||
</Button>
|
||||
<Button
|
||||
outline
|
||||
color="primary"
|
||||
on:click={() => (isMetricsSelectionOpen = true)}
|
||||
>
|
||||
<Icon name="graph-up" /> Metrics
|
||||
</Button>
|
||||
</ButtonGroup>
|
||||
</Col>
|
||||
<Col xs="auto">
|
||||
<Col lg="4" xl="{(presetProject !== '') ? 5 : 6}" class="mb-1 mb-lg-0">
|
||||
<Filters
|
||||
{filterPresets}
|
||||
{matchedJobs}
|
||||
bind:this={filterComponent}
|
||||
on:update={({ detail }) => {
|
||||
on:update-filters={({ detail }) => {
|
||||
selectedCluster = detail.filters[0]?.cluster
|
||||
? detail.filters[0].cluster.eq
|
||||
: null;
|
||||
jobList.update(detail.filters);
|
||||
jobList.queryJobs(detail.filters);
|
||||
}}
|
||||
/>
|
||||
</Col>
|
||||
|
||||
<Col xs="3" style="margin-left: auto;">
|
||||
<Col lg="3" xl="{(presetProject !== '') ? 3 : 2}" class="mb-2 mb-lg-0">
|
||||
<TextFilter
|
||||
{presetProject}
|
||||
bind:authlevel
|
||||
bind:roles
|
||||
on:update={({ detail }) => filterComponent.update(detail)}
|
||||
on:set-filter={({ detail }) => filterComponent.updateFilters(detail)}
|
||||
/>
|
||||
</Col>
|
||||
<Col xs="2">
|
||||
<Refresher on:reload={() => jobList.refresh()} />
|
||||
<Col lg="3" xl="2" class="mb-1 mb-lg-0">
|
||||
<Refresher on:refresh={() => {
|
||||
jobList.refreshJobs()
|
||||
jobList.refreshAllMetrics()
|
||||
}} />
|
||||
</Col>
|
||||
</Row>
|
||||
<br />
|
||||
|
||||
<!-- ROW3: Job List-->
|
||||
<Row>
|
||||
<Col>
|
||||
<JobList
|
||||
bind:this={jobList}
|
||||
bind:metrics
|
||||
bind:sorting
|
||||
bind:matchedJobs
|
||||
bind:this={jobList}
|
||||
bind:showFootprint
|
||||
/>
|
||||
</Col>
|
||||
@@ -119,5 +137,5 @@
|
||||
bind:metrics
|
||||
bind:isOpen={isMetricsSelectionOpen}
|
||||
bind:showFootprint
|
||||
view="list"
|
||||
footprintSelect={true}
|
||||
/>
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
<!--
|
||||
@component List of users or projects
|
||||
@component Main component for listing users or projects
|
||||
|
||||
Properties:
|
||||
- `type String?`: The type of list ['USER' || 'PROJECT']
|
||||
- `filterPresets Object?`: Optional predefined filter values [Default: {}]
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { onMount } from "svelte";
|
||||
import { init } from "./utils.js";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
@@ -15,9 +19,17 @@
|
||||
InputGroup,
|
||||
Input,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import Filters from "./filters/Filters.svelte";
|
||||
import { queryStore, gql, getContextClient } from "@urql/svelte";
|
||||
import { scramble, scrambleNames } from "./joblist/JobInfo.svelte";
|
||||
import {
|
||||
queryStore,
|
||||
gql,
|
||||
getContextClient,
|
||||
} from "@urql/svelte";
|
||||
import {
|
||||
init,
|
||||
scramble,
|
||||
scrambleNames,
|
||||
} from "./generic/utils.js";
|
||||
import Filters from "./generic/Filters.svelte";
|
||||
|
||||
const {} = init();
|
||||
|
||||
@@ -28,15 +40,9 @@
|
||||
if (filterPresets?.startTime == null) {
|
||||
if (filterPresets == null) filterPresets = {};
|
||||
|
||||
const lastMonth = new Date(
|
||||
Date.now() - 30 * 24 * 60 * 60 * 1000,
|
||||
).toISOString();
|
||||
const now = new Date(Date.now()).toISOString();
|
||||
filterPresets.startTime = {
|
||||
from: lastMonth,
|
||||
to: now,
|
||||
range: "last30d",
|
||||
text: "Last 30 Days",
|
||||
url: "last30d",
|
||||
};
|
||||
}
|
||||
|
||||
@@ -89,11 +95,11 @@
|
||||
return stats.filter((u) => u.id.includes(nameFilter)).sort(cmp);
|
||||
}
|
||||
|
||||
onMount(() => filterComponent.update());
|
||||
onMount(() => filterComponent.updateFilters());
|
||||
</script>
|
||||
|
||||
<Row>
|
||||
<Col xs="auto">
|
||||
<Row cols={{ xs: 1, md: 2}}>
|
||||
<Col xs="12" md="5" lg="4" xl="3" class="mb-2 mb-md-0">
|
||||
<InputGroup>
|
||||
<Button disabled outline>
|
||||
Search {type.toLowerCase()}s
|
||||
@@ -107,13 +113,13 @@
|
||||
/>
|
||||
</InputGroup>
|
||||
</Col>
|
||||
<Col xs="auto">
|
||||
<Col xs="12" md="7" lg="8" xl="9">
|
||||
<Filters
|
||||
bind:this={filterComponent}
|
||||
{filterPresets}
|
||||
startTimeQuickSelect={true}
|
||||
menuText="Only {type.toLowerCase()}s with jobs that match the filters will show up"
|
||||
on:update={({ detail }) => {
|
||||
on:update-filters={({ detail }) => {
|
||||
jobFilters = detail.filters;
|
||||
}}
|
||||
/>
|
||||
@@ -123,10 +129,11 @@
|
||||
<thead>
|
||||
<tr>
|
||||
<th scope="col">
|
||||
{{
|
||||
USER: "Username",
|
||||
PROJECT: "Project Name",
|
||||
}[type]}
|
||||
{#if type === 'USER'}
|
||||
Username
|
||||
{:else if type === 'PROJECT'}
|
||||
Project Name
|
||||
{/if}
|
||||
<Button
|
||||
color={sorting.field == "id" ? "primary" : "light"}
|
||||
size="sm"
|
||||
|
||||
@@ -1,147 +0,0 @@
|
||||
<script>
|
||||
import { getContext, createEventDispatcher } from "svelte";
|
||||
import Timeseries from "./plots/MetricPlot.svelte";
|
||||
import {
|
||||
InputGroup,
|
||||
InputGroupText,
|
||||
Spinner,
|
||||
Card,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { fetchMetrics, minScope } from "./utils";
|
||||
|
||||
export let job;
|
||||
export let metricName;
|
||||
export let scopes;
|
||||
export let width;
|
||||
export let rawData;
|
||||
export let isShared = false;
|
||||
|
||||
const dispatch = createEventDispatcher();
|
||||
const cluster = getContext("clusters").find(
|
||||
(cluster) => cluster.name == job.cluster,
|
||||
);
|
||||
const subCluster = cluster.subClusters.find(
|
||||
(subCluster) => subCluster.name == job.subCluster,
|
||||
);
|
||||
const metricConfig = cluster.metricConfig.find(
|
||||
(metricConfig) => metricConfig.name == metricName,
|
||||
);
|
||||
|
||||
let selectedHost = null,
|
||||
plot,
|
||||
fetching = false,
|
||||
error = null;
|
||||
let selectedScope = minScope(scopes);
|
||||
|
||||
let statsPattern = /(.*)-stats$/
|
||||
let statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null)
|
||||
let selectedScopeIndex
|
||||
|
||||
$: availableScopes = scopes;
|
||||
$: patternMatches = statsPattern.exec(selectedScope)
|
||||
$: if (!patternMatches) {
|
||||
selectedScopeIndex = scopes.findIndex((s) => s == selectedScope);
|
||||
} else {
|
||||
selectedScopeIndex = scopes.findIndex((s) => s == patternMatches[1]);
|
||||
}
|
||||
$: data = rawData[selectedScopeIndex];
|
||||
$: series = data?.series.filter(
|
||||
(series) => selectedHost == null || series.hostname == selectedHost,
|
||||
);
|
||||
|
||||
let from = null,
|
||||
to = null;
|
||||
export function setTimeRange(f, t) {
|
||||
(from = f), (to = t);
|
||||
}
|
||||
|
||||
$: if (plot != null) plot.setTimeRange(from, to);
|
||||
|
||||
export async function loadMore() {
|
||||
fetching = true;
|
||||
let response = await fetchMetrics(job, [metricName], ["core"]);
|
||||
fetching = false;
|
||||
|
||||
if (response.error) {
|
||||
error = response.error;
|
||||
return;
|
||||
}
|
||||
|
||||
for (let jm of response.data.jobMetrics) {
|
||||
if (jm.scope != "node") {
|
||||
scopes = [...scopes, jm.scope];
|
||||
rawData.push(jm.metric);
|
||||
statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null)
|
||||
selectedScope = jm.scope;
|
||||
selectedScopeIndex = scopes.findIndex((s) => s == jm.scope);
|
||||
dispatch("more-loaded", jm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$: if (selectedScope == "load-more") loadMore();
|
||||
</script>
|
||||
|
||||
<InputGroup>
|
||||
<InputGroupText style="min-width: 150px;">
|
||||
{metricName} ({(metricConfig?.unit?.prefix
|
||||
? metricConfig.unit.prefix
|
||||
: "") + (metricConfig?.unit?.base ? metricConfig.unit.base : "")})
|
||||
</InputGroupText>
|
||||
<select class="form-select" bind:value={selectedScope}>
|
||||
{#each availableScopes as scope, index}
|
||||
<option value={scope}>{scope}</option>
|
||||
{#if statsSeries[index]}
|
||||
<option value={scope + '-stats'}>stats series ({scope})</option>
|
||||
{/if}
|
||||
{/each}
|
||||
{#if availableScopes.length == 1 && metricConfig?.scope != "node"}
|
||||
<option value={"load-more"}>Load more...</option>
|
||||
{/if}
|
||||
</select>
|
||||
{#if job.resources.length > 1}
|
||||
<select class="form-select" bind:value={selectedHost} disabled={patternMatches}>
|
||||
<option value={null}>All Hosts</option>
|
||||
{#each job.resources as { hostname }}
|
||||
<option value={hostname}>{hostname}</option>
|
||||
{/each}
|
||||
</select>
|
||||
{/if}
|
||||
</InputGroup>
|
||||
{#key series}
|
||||
{#if fetching == true}
|
||||
<Spinner />
|
||||
{:else if error != null}
|
||||
<Card body color="danger">{error.message}</Card>
|
||||
{:else if series != null && !patternMatches}
|
||||
<Timeseries
|
||||
bind:this={plot}
|
||||
{width}
|
||||
height={300}
|
||||
{cluster}
|
||||
{subCluster}
|
||||
timestep={data.timestep}
|
||||
scope={selectedScope}
|
||||
metric={metricName}
|
||||
{series}
|
||||
{isShared}
|
||||
resources={job.resources}
|
||||
/>
|
||||
{:else if statsSeries[selectedScopeIndex] != null && patternMatches}
|
||||
<Timeseries
|
||||
bind:this={plot}
|
||||
{width}
|
||||
height={300}
|
||||
{cluster}
|
||||
{subCluster}
|
||||
timestep={data.timestep}
|
||||
scope={selectedScope}
|
||||
metric={metricName}
|
||||
{series}
|
||||
{isShared}
|
||||
resources={job.resources}
|
||||
statisticsSeries={statsSeries[selectedScopeIndex]}
|
||||
useStatsSeries={!!statsSeries[selectedScopeIndex]}
|
||||
/>
|
||||
{/if}
|
||||
{/key}
|
||||
@@ -1,38 +0,0 @@
|
||||
<script>
|
||||
import {
|
||||
Icon,
|
||||
NavLink,
|
||||
Dropdown,
|
||||
DropdownToggle,
|
||||
DropdownMenu,
|
||||
DropdownItem,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
|
||||
export let clusters; // array of names
|
||||
export let links; // array of nav links
|
||||
</script>
|
||||
|
||||
{#each links as item}
|
||||
{#if !item.perCluster}
|
||||
<NavLink href={item.href} active={window.location.pathname == item.href}
|
||||
><Icon name={item.icon} /> {item.title}</NavLink
|
||||
>
|
||||
{:else}
|
||||
<Dropdown nav inNavbar>
|
||||
<DropdownToggle nav caret>
|
||||
<Icon name={item.icon} />
|
||||
{item.title}
|
||||
</DropdownToggle>
|
||||
<DropdownMenu class="dropdown-menu-lg-end">
|
||||
{#each clusters as cluster}
|
||||
<DropdownItem
|
||||
href={item.href + cluster.name}
|
||||
active={window.location.pathname == item.href + cluster.name}
|
||||
>
|
||||
{cluster.name}
|
||||
</DropdownItem>
|
||||
{/each}
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{/if}
|
||||
{/each}
|
||||
@@ -1,20 +1,38 @@
|
||||
<!--
|
||||
@component System-View subcomponent; renders all current metrics for specified node
|
||||
|
||||
Properties:
|
||||
- `cluster String`: Currently selected cluster
|
||||
- `hostname String`: Currently selected host (== node)
|
||||
- `from Date?`: Custom Time Range selection 'from' [Default: null]
|
||||
- `to Date?`: Custom Time Range selection 'to' [Default: null]
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { init, checkMetricDisabled } from "./utils.js";
|
||||
import { getContext } from "svelte";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
Input,
|
||||
InputGroup,
|
||||
InputGroupText,
|
||||
Icon,
|
||||
Spinner,
|
||||
Card,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { queryStore, gql, getContextClient } from "@urql/svelte";
|
||||
import TimeSelection from "./filters/TimeSelection.svelte";
|
||||
import Refresher from "./joblist/Refresher.svelte";
|
||||
import PlotTable from "./PlotTable.svelte";
|
||||
import MetricPlot from "./plots/MetricPlot.svelte";
|
||||
import { getContext } from "svelte";
|
||||
import {
|
||||
queryStore,
|
||||
gql,
|
||||
getContextClient,
|
||||
} from "@urql/svelte";
|
||||
import {
|
||||
init,
|
||||
checkMetricDisabled,
|
||||
} from "./generic/utils.js";
|
||||
import PlotGrid from "./generic/PlotGrid.svelte";
|
||||
import MetricPlot from "./generic/plots/MetricPlot.svelte";
|
||||
import TimeSelection from "./generic/select/TimeSelection.svelte";
|
||||
import Refresher from "./generic/helper/Refresher.svelte";
|
||||
|
||||
export let cluster;
|
||||
export let hostname;
|
||||
@@ -26,9 +44,11 @@
|
||||
if (from == null || to == null) {
|
||||
to = new Date(Date.now());
|
||||
from = new Date(to.getTime());
|
||||
from.setMinutes(from.getMinutes() - 30);
|
||||
from.setHours(from.getHours() - 12);
|
||||
}
|
||||
|
||||
const initialized = getContext("initialized")
|
||||
const globalMetrics = getContext("globalMetrics")
|
||||
const ccconfig = getContext("cc-config");
|
||||
const clusters = getContext("clusters");
|
||||
const client = getContextClient();
|
||||
@@ -71,18 +91,13 @@
|
||||
},
|
||||
});
|
||||
|
||||
let itemsPerPage = ccconfig.plot_list_jobsPerPage;
|
||||
let page = 1;
|
||||
let paging = { itemsPerPage, page };
|
||||
let sorting = { field: "startTime", order: "DESC" };
|
||||
$: filter = [
|
||||
|
||||
const paging = { itemsPerPage: 50, page: 1 };
|
||||
const sorting = { field: "startTime", type: "col", order: "DESC" };
|
||||
const filter = [
|
||||
{ cluster: { eq: cluster } },
|
||||
{ node: { contains: hostname } },
|
||||
{ state: ["running"] },
|
||||
// {startTime: {
|
||||
// from: from.toISOString(),
|
||||
// to: to.toISOString()
|
||||
// }}
|
||||
];
|
||||
|
||||
const nodeJobsQuery = gql`
|
||||
@@ -92,10 +107,6 @@
|
||||
$paging: PageRequest!
|
||||
) {
|
||||
jobs(filter: $filter, order: $sorting, page: $paging) {
|
||||
# items {
|
||||
# id
|
||||
# jobId
|
||||
# }
|
||||
count
|
||||
}
|
||||
}
|
||||
@@ -107,66 +118,65 @@
|
||||
variables: { paging, sorting, filter },
|
||||
});
|
||||
|
||||
let metricUnits = {};
|
||||
$: if ($nodeMetricsData.data) {
|
||||
let thisCluster = clusters.find((c) => c.name == cluster);
|
||||
if (thisCluster) {
|
||||
for (let metric of thisCluster.metricConfig) {
|
||||
if (metric.unit.prefix || metric.unit.base) {
|
||||
metricUnits[metric.name] =
|
||||
"(" +
|
||||
(metric.unit.prefix ? metric.unit.prefix : "") +
|
||||
(metric.unit.base ? metric.unit.base : "") +
|
||||
")";
|
||||
} else {
|
||||
// If no unit defined: Omit Unit Display
|
||||
metricUnits[metric.name] = "";
|
||||
}
|
||||
}
|
||||
let systemUnits = {};
|
||||
function loadUnits(isInitialized) {
|
||||
if (!isInitialized) return
|
||||
const systemMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))]
|
||||
for (let sm of systemMetrics) {
|
||||
systemUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
|
||||
}
|
||||
}
|
||||
|
||||
const dateToUnixEpoch = (rfc3339) => Math.floor(Date.parse(rfc3339) / 1000);
|
||||
$: loadUnits($initialized)
|
||||
</script>
|
||||
|
||||
<Row>
|
||||
<Row cols={{ xs: 2, lg: 4 }}>
|
||||
{#if $initq.error}
|
||||
<Card body color="danger">{$initq.error.message}</Card>
|
||||
{:else if $initq.fetching}
|
||||
<Spinner />
|
||||
{:else}
|
||||
<!-- Node Col -->
|
||||
<Col>
|
||||
<InputGroup>
|
||||
<InputGroupText><Icon name="hdd" /></InputGroupText>
|
||||
<InputGroupText>{hostname} ({cluster})</InputGroupText>
|
||||
<InputGroupText>Selected Node</InputGroupText>
|
||||
<Input style="background-color: white;"type="text" value="{hostname} ({cluster})" disabled/>
|
||||
</InputGroup>
|
||||
</Col>
|
||||
<!-- Time Col -->
|
||||
<Col>
|
||||
<TimeSelection bind:from bind:to />
|
||||
</Col>
|
||||
<!-- Concurrent Col -->
|
||||
<Col class="mt-2 mt-lg-0">
|
||||
{#if $nodeJobsData.fetching}
|
||||
<Spinner />
|
||||
{:else if $nodeJobsData.data}
|
||||
Currently running jobs on this node: {$nodeJobsData.data.jobs.count}
|
||||
[
|
||||
<a
|
||||
href="/monitoring/jobs/?cluster={cluster}&state=running&node={hostname}"
|
||||
target="_blank">View in Job List</a
|
||||
> ]
|
||||
<InputGroup>
|
||||
<InputGroupText><Icon name="activity" /></InputGroupText>
|
||||
<InputGroupText>Activity</InputGroupText>
|
||||
<Input style="background-color: white;"type="text" value="{$nodeJobsData.data.jobs.count} Jobs" disabled/>
|
||||
<a title="Show jobs running on this node" href="/monitoring/jobs/?cluster={cluster}&state=running&node={hostname}" target="_blank" class="btn btn-outline-secondary" role="button" aria-disabled="true">
|
||||
<Icon name="view-list" /> Show List
|
||||
</a>
|
||||
</InputGroup>
|
||||
{:else}
|
||||
<Input type="text" disabled>
|
||||
No currently running jobs.
|
||||
</Input>
|
||||
{/if}
|
||||
</Col>
|
||||
<Col>
|
||||
<!-- Refresh Col-->
|
||||
<Col class="mt-2 mt-lg-0">
|
||||
<Refresher
|
||||
on:reload={() => {
|
||||
on:refresh={() => {
|
||||
const diff = Date.now() - to;
|
||||
from = new Date(from.getTime() + diff);
|
||||
to = new Date(to.getTime() + diff);
|
||||
}}
|
||||
/>
|
||||
</Col>
|
||||
<Col>
|
||||
<TimeSelection bind:from bind:to />
|
||||
</Col>
|
||||
{/if}
|
||||
</Row>
|
||||
<br />
|
||||
@@ -177,9 +187,8 @@
|
||||
{:else if $nodeMetricsData.fetching || $initq.fetching}
|
||||
<Spinner />
|
||||
{:else}
|
||||
<PlotTable
|
||||
<PlotGrid
|
||||
let:item
|
||||
let:width
|
||||
renderFor="node"
|
||||
itemsPerRow={ccconfig.plot_view_plotsPerRow}
|
||||
items={$nodeMetricsData.data.nodeMetrics[0].metrics
|
||||
@@ -195,18 +204,15 @@
|
||||
>
|
||||
<h4 style="text-align: center; padding-top:15px;">
|
||||
{item.name}
|
||||
{metricUnits[item.name]}
|
||||
{systemUnits[item.name] ? "(" + systemUnits[item.name] + ")" : ""}
|
||||
</h4>
|
||||
{#if item.disabled === false && item.metric}
|
||||
<MetricPlot
|
||||
{width}
|
||||
height={300}
|
||||
metric={item.name}
|
||||
timestep={item.metric.timestep}
|
||||
cluster={clusters.find((c) => c.name == cluster)}
|
||||
subCluster={$nodeMetricsData.data.nodeMetrics[0].subCluster}
|
||||
series={item.metric.series}
|
||||
resources={[{ hostname: hostname }]}
|
||||
forNode={true}
|
||||
/>
|
||||
{:else if item.disabled === true && item.metric}
|
||||
@@ -224,7 +230,7 @@
|
||||
>No dataset returned for <code>{item.name}</code></Card
|
||||
>
|
||||
{/if}
|
||||
</PlotTable>
|
||||
</PlotGrid>
|
||||
{/if}
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
<script>
|
||||
<!--
|
||||
@component Main cluster status view component; renders current system-usage information
|
||||
|
||||
Properties:
|
||||
- `cluster String`: The cluster to show status information for
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { getContext } from "svelte";
|
||||
import Refresher from "./joblist/Refresher.svelte";
|
||||
import Roofline from "./plots/Roofline.svelte";
|
||||
import Pie, { colors } from "./plots/Pie.svelte";
|
||||
import Histogram from "./plots/Histogram.svelte";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
@@ -17,29 +20,32 @@
|
||||
Icon,
|
||||
Button,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import {
|
||||
init,
|
||||
convert2uplot,
|
||||
transformPerNodeDataForRoofline,
|
||||
} from "./utils.js";
|
||||
import { scaleNumbers } from "./units.js";
|
||||
import {
|
||||
queryStore,
|
||||
gql,
|
||||
getContextClient,
|
||||
mutationStore,
|
||||
} from "@urql/svelte";
|
||||
import PlotTable from "./PlotTable.svelte";
|
||||
import HistogramSelection from "./HistogramSelection.svelte";
|
||||
import {
|
||||
init,
|
||||
convert2uplot,
|
||||
transformPerNodeDataForRoofline,
|
||||
} from "./generic/utils.js";
|
||||
import { scaleNumbers } from "./generic/units.js";
|
||||
import PlotGrid from "./generic/PlotGrid.svelte";
|
||||
import Roofline from "./generic/plots/Roofline.svelte";
|
||||
import Pie, { colors } from "./generic/plots/Pie.svelte";
|
||||
import Histogram from "./generic/plots/Histogram.svelte";
|
||||
import Refresher from "./generic/helper/Refresher.svelte";
|
||||
import HistogramSelection from "./generic/select/HistogramSelection.svelte";
|
||||
|
||||
const { query: initq } = init();
|
||||
const ccconfig = getContext("cc-config");
|
||||
|
||||
export let cluster;
|
||||
|
||||
let plotWidths = [],
|
||||
colWidth1,
|
||||
colWidth2;
|
||||
let plotWidths = [];
|
||||
let colWidth;
|
||||
let from = new Date(Date.now() - 5 * 60 * 1000),
|
||||
to = new Date(Date.now());
|
||||
const topOptions = [
|
||||
@@ -146,7 +152,7 @@
|
||||
`,
|
||||
variables: {
|
||||
cluster: cluster,
|
||||
metrics: ["flops_any", "mem_bw"],
|
||||
metrics: ["flops_any", "mem_bw"], // Fixed names for roofline and status bars
|
||||
from: from.toISOString(),
|
||||
to: to.toISOString(),
|
||||
filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
|
||||
@@ -331,7 +337,7 @@
|
||||
<Col class="mt-2 mt-md-0">
|
||||
<Refresher
|
||||
initially={120}
|
||||
on:reload={() => {
|
||||
on:refresh={() => {
|
||||
from = new Date(Date.now() - 5 * 60 * 1000);
|
||||
to = new Date(Date.now());
|
||||
}}
|
||||
@@ -442,7 +448,7 @@
|
||||
allowSizeChange={true}
|
||||
width={plotWidths[i] - 10}
|
||||
height={300}
|
||||
cluster={subCluster}
|
||||
subCluster={subCluster}
|
||||
data={transformPerNodeDataForRoofline(
|
||||
$mainQuery.data.nodeMetrics.filter(
|
||||
(data) => data.subCluster == subCluster.name,
|
||||
@@ -461,7 +467,7 @@
|
||||
|
||||
<Row cols={{ lg: 4, md: 2, sm: 1 }}>
|
||||
<Col class="p-2">
|
||||
<div bind:clientWidth={colWidth1}>
|
||||
<div bind:clientWidth={colWidth}>
|
||||
<h4 class="text-center">
|
||||
Top Users on {cluster.charAt(0).toUpperCase() + cluster.slice(1)}
|
||||
</h4>
|
||||
@@ -472,7 +478,7 @@
|
||||
<Card body color="danger">{$topUserQuery.error.message}</Card>
|
||||
{:else}
|
||||
<Pie
|
||||
size={colWidth1}
|
||||
size={colWidth}
|
||||
sliceLabel={topUserSelection.label}
|
||||
quantities={$topUserQuery.data.topUser.map(
|
||||
(tu) => tu[topUserSelection.key],
|
||||
@@ -532,7 +538,7 @@
|
||||
<Card body color="danger">{$topProjectQuery.error.message}</Card>
|
||||
{:else}
|
||||
<Pie
|
||||
size={colWidth1}
|
||||
size={colWidth}
|
||||
sliceLabel={topProjectSelection.label}
|
||||
quantities={$topProjectQuery.data.topProjects.map(
|
||||
(tp) => tp[topProjectSelection.key],
|
||||
@@ -584,25 +590,21 @@
|
||||
<hr class="my-2" />
|
||||
<Row cols={{ lg: 2, md: 1 }}>
|
||||
<Col class="p-2">
|
||||
<div bind:clientWidth={colWidth2}>
|
||||
{#key $mainQuery.data.stats}
|
||||
<Histogram
|
||||
data={convert2uplot($mainQuery.data.stats[0].histDuration)}
|
||||
width={colWidth2 - 25}
|
||||
title="Duration Distribution"
|
||||
xlabel="Current Runtimes"
|
||||
xunit="Hours"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
{#key $mainQuery.data.stats}
|
||||
<Histogram
|
||||
data={convert2uplot($mainQuery.data.stats[0].histDuration)}
|
||||
title="Duration Distribution"
|
||||
xlabel="Current Runtimes"
|
||||
xunit="Hours"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</Col>
|
||||
<Col class="p-2">
|
||||
{#key $mainQuery.data.stats}
|
||||
<Histogram
|
||||
data={convert2uplot($mainQuery.data.stats[0].histNumNodes)}
|
||||
width={colWidth2 - 25}
|
||||
title="Number of Nodes Distribution"
|
||||
xlabel="Allocated Nodes"
|
||||
xunit="Nodes"
|
||||
@@ -614,25 +616,21 @@
|
||||
</Row>
|
||||
<Row cols={{ lg: 2, md: 1 }}>
|
||||
<Col class="p-2">
|
||||
<div bind:clientWidth={colWidth2}>
|
||||
{#key $mainQuery.data.stats}
|
||||
<Histogram
|
||||
data={convert2uplot($mainQuery.data.stats[0].histNumCores)}
|
||||
width={colWidth2 - 25}
|
||||
title="Number of Cores Distribution"
|
||||
xlabel="Allocated Cores"
|
||||
xunit="Cores"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
{#key $mainQuery.data.stats}
|
||||
<Histogram
|
||||
data={convert2uplot($mainQuery.data.stats[0].histNumCores)}
|
||||
title="Number of Cores Distribution"
|
||||
xlabel="Allocated Cores"
|
||||
xunit="Cores"
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</Col>
|
||||
<Col class="p-2">
|
||||
{#key $mainQuery.data.stats}
|
||||
<Histogram
|
||||
data={convert2uplot($mainQuery.data.stats[0].histNumAccs)}
|
||||
width={colWidth2 - 25}
|
||||
title="Number of Accelerators Distribution"
|
||||
xlabel="Allocated Accs"
|
||||
xunit="Accs"
|
||||
@@ -644,31 +642,24 @@
|
||||
</Row>
|
||||
<hr class="my-2" />
|
||||
{#if metricsInHistograms}
|
||||
<Row cols={1}>
|
||||
<Col>
|
||||
{#key $mainQuery.data.stats[0].histMetrics}
|
||||
<PlotTable
|
||||
let:item
|
||||
let:width
|
||||
renderFor="user"
|
||||
items={$mainQuery.data.stats[0].histMetrics}
|
||||
itemsPerRow={2}
|
||||
>
|
||||
<Histogram
|
||||
data={convert2uplot(item.data)}
|
||||
usesBins={true}
|
||||
{width}
|
||||
height={250}
|
||||
title="Distribution of '{item.metric}' averages"
|
||||
xlabel={`${item.metric} bin maximum ${item?.unit ? `[${item.unit}]` : ``}`}
|
||||
xunit={item.unit}
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
</PlotTable>
|
||||
{/key}
|
||||
</Col>
|
||||
</Row>
|
||||
{#key $mainQuery.data.stats[0].histMetrics}
|
||||
<PlotGrid
|
||||
let:item
|
||||
renderFor="user"
|
||||
items={$mainQuery.data.stats[0].histMetrics}
|
||||
itemsPerRow={2}
|
||||
>
|
||||
<Histogram
|
||||
data={convert2uplot(item.data)}
|
||||
usesBins={true}
|
||||
title="Distribution of '{item.metric}' averages"
|
||||
xlabel={`${item.metric} bin maximum ${item?.unit ? `[${item.unit}]` : ``}`}
|
||||
xunit={item.unit}
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
</PlotGrid>
|
||||
{/key}
|
||||
{/if}
|
||||
{/if}
|
||||
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
<!--
|
||||
@component Main cluster metric status view component; renders current state of metrics / nodes
|
||||
|
||||
Properties:
|
||||
- `cluster String`: The cluster to show status information for
|
||||
- `from Date?`: Custom Time Range selection 'from' [Default: null]
|
||||
- `to Date?`: Custom Time Range selection 'to' [Default: null]
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { init, checkMetricDisabled } from "./utils.js";
|
||||
import Refresher from "./joblist/Refresher.svelte";
|
||||
import { getContext } from "svelte";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
@@ -11,11 +19,19 @@
|
||||
Spinner,
|
||||
Card,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { queryStore, gql, getContextClient } from "@urql/svelte";
|
||||
import TimeSelection from "./filters/TimeSelection.svelte";
|
||||
import PlotTable from "./PlotTable.svelte";
|
||||
import MetricPlot from "./plots/MetricPlot.svelte";
|
||||
import { getContext } from "svelte";
|
||||
import {
|
||||
queryStore,
|
||||
gql,
|
||||
getContextClient,
|
||||
} from "@urql/svelte";
|
||||
import {
|
||||
init,
|
||||
checkMetricDisabled,
|
||||
} from "./generic/utils.js";
|
||||
import PlotGrid from "./generic/PlotGrid.svelte";
|
||||
import MetricPlot from "./generic/plots/MetricPlot.svelte";
|
||||
import TimeSelection from "./generic/select/TimeSelection.svelte";
|
||||
import Refresher from "./generic/helper/Refresher.svelte";
|
||||
|
||||
export let cluster;
|
||||
export let from = null;
|
||||
@@ -26,14 +42,14 @@
|
||||
if (from == null || to == null) {
|
||||
to = new Date(Date.now());
|
||||
from = new Date(to.getTime());
|
||||
from.setMinutes(from.getMinutes() - 30);
|
||||
from.setHours(from.getHours() - 12);
|
||||
}
|
||||
|
||||
const clusters = getContext("clusters");
|
||||
const initialized = getContext("initialized");
|
||||
const ccconfig = getContext("cc-config");
|
||||
const metricConfig = getContext("metrics");
|
||||
const clusters = getContext("clusters");
|
||||
const globalMetrics = getContext("globalMetrics");
|
||||
|
||||
let plotHeight = 300;
|
||||
let hostnameFilter = "";
|
||||
let selectedMetric = ccconfig.system_view_selectedMetric;
|
||||
|
||||
@@ -80,57 +96,27 @@
|
||||
},
|
||||
});
|
||||
|
||||
let metricUnits = {};
|
||||
$: if ($nodesQuery.data) {
|
||||
let thisCluster = clusters.find((c) => c.name == cluster);
|
||||
if (thisCluster) {
|
||||
for (let metric of thisCluster.metricConfig) {
|
||||
if (metric.unit.prefix || metric.unit.base) {
|
||||
metricUnits[metric.name] =
|
||||
"(" +
|
||||
(metric.unit.prefix ? metric.unit.prefix : "") +
|
||||
(metric.unit.base ? metric.unit.base : "") +
|
||||
")";
|
||||
} else {
|
||||
// If no unit defined: Omit Unit Display
|
||||
metricUnits[metric.name] = "";
|
||||
}
|
||||
}
|
||||
let systemMetrics = [];
|
||||
let systemUnits = {};
|
||||
function loadMetrics(isInitialized) {
|
||||
if (!isInitialized) return
|
||||
systemMetrics = [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))]
|
||||
for (let sm of systemMetrics) {
|
||||
systemUnits[sm.name] = (sm?.unit?.prefix ? sm.unit.prefix : "") + (sm?.unit?.base ? sm.unit.base : "")
|
||||
}
|
||||
}
|
||||
|
||||
$: loadMetrics($initialized)
|
||||
|
||||
</script>
|
||||
|
||||
<Row>
|
||||
<Row cols={{ xs: 2, lg: 4 }}>
|
||||
{#if $initq.error}
|
||||
<Card body color="danger">{$initq.error.message}</Card>
|
||||
{:else if $initq.fetching}
|
||||
<Spinner />
|
||||
{:else}
|
||||
<Col>
|
||||
<Refresher
|
||||
on:reload={() => {
|
||||
const diff = Date.now() - to;
|
||||
from = new Date(from.getTime() + diff);
|
||||
to = new Date(to.getTime() + diff);
|
||||
}}
|
||||
/>
|
||||
</Col>
|
||||
<Col>
|
||||
<TimeSelection bind:from bind:to />
|
||||
</Col>
|
||||
<Col>
|
||||
<InputGroup>
|
||||
<InputGroupText><Icon name="graph-up" /></InputGroupText>
|
||||
<InputGroupText>Metric</InputGroupText>
|
||||
<select class="form-select" bind:value={selectedMetric}>
|
||||
{#each clusters.find((c) => c.name == cluster).metricConfig as metric}
|
||||
<option value={metric.name}
|
||||
>{metric.name} {metricUnits[metric.name]}</option
|
||||
>
|
||||
{/each}
|
||||
</select>
|
||||
</InputGroup>
|
||||
</Col>
|
||||
<!-- Node Col-->
|
||||
<Col>
|
||||
<InputGroup>
|
||||
<InputGroupText><Icon name="hdd" /></InputGroupText>
|
||||
@@ -142,77 +128,105 @@
|
||||
/>
|
||||
</InputGroup>
|
||||
</Col>
|
||||
<!-- Range Col-->
|
||||
<Col>
|
||||
<TimeSelection bind:from bind:to />
|
||||
</Col>
|
||||
<!-- Metric Col-->
|
||||
<Col class="mt-2 mt-lg-0">
|
||||
<InputGroup>
|
||||
<InputGroupText><Icon name="graph-up" /></InputGroupText>
|
||||
<InputGroupText>Metric</InputGroupText>
|
||||
<select class="form-select" bind:value={selectedMetric}>
|
||||
{#each systemMetrics as metric}
|
||||
<option value={metric.name}
|
||||
>{metric.name} {systemUnits[metric.name] ? "("+systemUnits[metric.name]+")" : ""}</option
|
||||
>
|
||||
{/each}
|
||||
</select>
|
||||
</InputGroup>
|
||||
</Col>
|
||||
<!-- Refresh Col-->
|
||||
<Col class="mt-2 mt-lg-0">
|
||||
<Refresher
|
||||
on:refresh={() => {
|
||||
const diff = Date.now() - to;
|
||||
from = new Date(from.getTime() + diff);
|
||||
to = new Date(to.getTime() + diff);
|
||||
}}
|
||||
/>
|
||||
</Col>
|
||||
{/if}
|
||||
</Row>
|
||||
<br />
|
||||
<Row>
|
||||
<Col>
|
||||
{#if $nodesQuery.error}
|
||||
{#if $nodesQuery.error}
|
||||
<Row>
|
||||
<Col>
|
||||
<Card body color="danger">{$nodesQuery.error.message}</Card>
|
||||
{:else if $nodesQuery.fetching || $initq.fetching}
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $nodesQuery.fetching || $initq.fetching}
|
||||
<Row>
|
||||
<Col>
|
||||
<Spinner />
|
||||
{:else}
|
||||
<PlotTable
|
||||
let:item
|
||||
let:width
|
||||
renderFor="systems"
|
||||
itemsPerRow={ccconfig.plot_view_plotsPerRow}
|
||||
items={$nodesQuery.data.nodeMetrics
|
||||
.filter(
|
||||
(h) =>
|
||||
h.host.includes(hostnameFilter) &&
|
||||
h.metrics.some(
|
||||
(m) => m.name == selectedMetric && m.scope == "node",
|
||||
),
|
||||
)
|
||||
.map((h) => ({
|
||||
host: h.host,
|
||||
subCluster: h.subCluster,
|
||||
data: h.metrics.find(
|
||||
(m) => m.name == selectedMetric && m.scope == "node",
|
||||
),
|
||||
disabled: checkMetricDisabled(
|
||||
selectedMetric,
|
||||
cluster,
|
||||
h.subCluster,
|
||||
),
|
||||
}))
|
||||
.sort((a, b) => a.host.localeCompare(b.host))}
|
||||
</Col>
|
||||
</Row>
|
||||
{:else}
|
||||
<PlotGrid
|
||||
let:item
|
||||
renderFor="systems"
|
||||
itemsPerRow={ccconfig.plot_view_plotsPerRow}
|
||||
items={$nodesQuery.data.nodeMetrics
|
||||
.filter(
|
||||
(h) =>
|
||||
h.host.includes(hostnameFilter) &&
|
||||
h.metrics.some(
|
||||
(m) => m.name == selectedMetric && m.scope == "node",
|
||||
),
|
||||
)
|
||||
.map((h) => ({
|
||||
host: h.host,
|
||||
subCluster: h.subCluster,
|
||||
data: h.metrics.find(
|
||||
(m) => m.name == selectedMetric && m.scope == "node",
|
||||
),
|
||||
disabled: checkMetricDisabled(
|
||||
selectedMetric,
|
||||
cluster,
|
||||
h.subCluster,
|
||||
),
|
||||
}))
|
||||
.sort((a, b) => a.host.localeCompare(b.host))}
|
||||
>
|
||||
<h4 style="width: 100%; text-align: center;">
|
||||
<a
|
||||
style="display: block;padding-top: 15px;"
|
||||
href="/monitoring/node/{cluster}/{item.host}"
|
||||
>{item.host} ({item.subCluster})</a
|
||||
>
|
||||
</h4>
|
||||
{#if item.disabled === false && item.data}
|
||||
<MetricPlot
|
||||
timestep={item.data.metric.timestep}
|
||||
series={item.data.metric.series}
|
||||
metric={item.data.name}
|
||||
cluster={clusters.find((c) => c.name == cluster)}
|
||||
subCluster={item.subCluster}
|
||||
forNode={true}
|
||||
/>
|
||||
{:else if item.disabled === true && item.data}
|
||||
<Card style="margin-left: 2rem;margin-right: 2rem;" body color="info"
|
||||
>Metric disabled for subcluster <code
|
||||
>{selectedMetric}:{item.subCluster}</code
|
||||
></Card
|
||||
>
|
||||
{:else}
|
||||
<Card
|
||||
style="margin-left: 2rem;margin-right: 2rem;"
|
||||
body
|
||||
color="warning"
|
||||
>No dataset returned for <code>{selectedMetric}</code></Card
|
||||
>
|
||||
<h4 style="width: 100%; text-align: center;">
|
||||
<a
|
||||
style="display: block;padding-top: 15px;"
|
||||
href="/monitoring/node/{cluster}/{item.host}"
|
||||
>{item.host} ({item.subCluster})</a
|
||||
>
|
||||
</h4>
|
||||
{#if item.disabled === false && item.data}
|
||||
<MetricPlot
|
||||
{width}
|
||||
height={plotHeight}
|
||||
timestep={item.data.metric.timestep}
|
||||
series={item.data.metric.series}
|
||||
metric={item.data.name}
|
||||
cluster={clusters.find((c) => c.name == cluster)}
|
||||
subCluster={item.subCluster}
|
||||
resources={[{ hostname: item.host }]}
|
||||
forNode={true}
|
||||
/>
|
||||
{:else if item.disabled === true && item.data}
|
||||
<Card style="margin-left: 2rem;margin-right: 2rem;" body color="info"
|
||||
>Metric disabled for subcluster <code
|
||||
>{selectedMetric}:{item.subCluster}</code
|
||||
></Card
|
||||
>
|
||||
{:else}
|
||||
<Card
|
||||
style="margin-left: 2rem;margin-right: 2rem;"
|
||||
body
|
||||
color="warning"
|
||||
>No dataset returned for <code>{selectedMetric}</code></Card
|
||||
>
|
||||
{/if}
|
||||
</PlotTable>
|
||||
{/if}
|
||||
</Col>
|
||||
</Row>
|
||||
</PlotGrid>
|
||||
{/if}
|
||||
|
||||
@@ -1,234 +0,0 @@
|
||||
<script>
|
||||
import { getContext } from "svelte";
|
||||
import { gql, getContextClient, mutationStore } from "@urql/svelte";
|
||||
import {
|
||||
Icon,
|
||||
Button,
|
||||
ListGroupItem,
|
||||
Spinner,
|
||||
Modal,
|
||||
Input,
|
||||
ModalBody,
|
||||
ModalHeader,
|
||||
ModalFooter,
|
||||
Alert,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { fuzzySearchTags } from "./utils.js";
|
||||
import Tag from "./Tag.svelte";
|
||||
|
||||
export let job;
|
||||
export let jobTags = job.tags;
|
||||
|
||||
let allTags = getContext("tags"),
|
||||
initialized = getContext("initialized");
|
||||
let newTagType = "",
|
||||
newTagName = "";
|
||||
let filterTerm = "";
|
||||
let pendingChange = false;
|
||||
let isOpen = false;
|
||||
|
||||
const client = getContextClient();
|
||||
|
||||
const createTagMutation = ({ type, name }) => {
|
||||
return mutationStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
mutation ($type: String!, $name: String!) {
|
||||
createTag(type: $type, name: $name) {
|
||||
id
|
||||
type
|
||||
name
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: { type, name },
|
||||
});
|
||||
};
|
||||
|
||||
const addTagsToJobMutation = ({ job, tagIds }) => {
|
||||
return mutationStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
mutation ($job: ID!, $tagIds: [ID!]!) {
|
||||
addTagsToJob(job: $job, tagIds: $tagIds) {
|
||||
id
|
||||
type
|
||||
name
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: { job, tagIds },
|
||||
});
|
||||
};
|
||||
|
||||
const removeTagsFromJobMutation = ({ job, tagIds }) => {
|
||||
return mutationStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
mutation ($job: ID!, $tagIds: [ID!]!) {
|
||||
removeTagsFromJob(job: $job, tagIds: $tagIds) {
|
||||
id
|
||||
type
|
||||
name
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: { job, tagIds },
|
||||
});
|
||||
};
|
||||
|
||||
let allTagsFiltered; // $initialized is in there because when it becomes true, allTags is initailzed.
|
||||
$: allTagsFiltered = ($initialized, fuzzySearchTags(filterTerm, allTags));
|
||||
|
||||
$: {
|
||||
newTagType = "";
|
||||
newTagName = "";
|
||||
let parts = filterTerm.split(":").map((s) => s.trim());
|
||||
if (parts.length == 2 && parts.every((s) => s.length > 0)) {
|
||||
newTagType = parts[0];
|
||||
newTagName = parts[1];
|
||||
}
|
||||
}
|
||||
|
||||
function isNewTag(type, name) {
|
||||
for (let tag of allTagsFiltered)
|
||||
if (tag.type == type && tag.name == name) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
function createTag(type, name) {
|
||||
pendingChange = true;
|
||||
createTagMutation({ type: type, name: name }).subscribe((res) => {
|
||||
if (res.fetching === false && !res.error) {
|
||||
pendingChange = false;
|
||||
allTags = [...allTags, res.data.createTag];
|
||||
newTagType = "";
|
||||
newTagName = "";
|
||||
addTagToJob(res.data.createTag);
|
||||
} else if (res.fetching === false && res.error) {
|
||||
throw res.error;
|
||||
// console.log('Error on subscription: ' + res.error)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function addTagToJob(tag) {
|
||||
pendingChange = tag.id;
|
||||
addTagsToJobMutation({ job: job.id, tagIds: [tag.id] }).subscribe((res) => {
|
||||
if (res.fetching === false && !res.error) {
|
||||
jobTags = job.tags = res.data.addTagsToJob;
|
||||
pendingChange = false;
|
||||
} else if (res.fetching === false && res.error) {
|
||||
throw res.error;
|
||||
// console.log('Error on subscription: ' + res.error)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function removeTagFromJob(tag) {
|
||||
pendingChange = tag.id;
|
||||
removeTagsFromJobMutation({ job: job.id, tagIds: [tag.id] }).subscribe(
|
||||
(res) => {
|
||||
if (res.fetching === false && !res.error) {
|
||||
jobTags = job.tags = res.data.removeTagsFromJob;
|
||||
pendingChange = false;
|
||||
} else if (res.fetching === false && res.error) {
|
||||
throw res.error;
|
||||
// console.log('Error on subscription: ' + res.error)
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
</script>
|
||||
|
||||
<Modal {isOpen} toggle={() => (isOpen = !isOpen)}>
|
||||
<ModalHeader>
|
||||
Manage Tags
|
||||
{#if pendingChange !== false}
|
||||
<Spinner size="sm" secondary />
|
||||
{:else}
|
||||
<Icon name="tags" />
|
||||
{/if}
|
||||
</ModalHeader>
|
||||
<ModalBody>
|
||||
<Input
|
||||
style="width: 100%;"
|
||||
type="text"
|
||||
placeholder="Search Tags"
|
||||
bind:value={filterTerm}
|
||||
/>
|
||||
|
||||
<br />
|
||||
|
||||
<Alert color="info">
|
||||
Search using "<code>type: name</code>". If no tag matches your search, a
|
||||
button for creating a new one will appear.
|
||||
</Alert>
|
||||
|
||||
<ul class="list-group">
|
||||
{#each allTagsFiltered as tag}
|
||||
<ListGroupItem>
|
||||
<Tag {tag} />
|
||||
|
||||
<span style="float: right;">
|
||||
{#if pendingChange === tag.id}
|
||||
<Spinner size="sm" secondary />
|
||||
{:else if job.tags.find((t) => t.id == tag.id)}
|
||||
<Button
|
||||
size="sm"
|
||||
outline
|
||||
color="danger"
|
||||
on:click={() => removeTagFromJob(tag)}
|
||||
>
|
||||
<Icon name="x" />
|
||||
</Button>
|
||||
{:else}
|
||||
<Button
|
||||
size="sm"
|
||||
outline
|
||||
color="success"
|
||||
on:click={() => addTagToJob(tag)}
|
||||
>
|
||||
<Icon name="plus" />
|
||||
</Button>
|
||||
{/if}
|
||||
</span>
|
||||
</ListGroupItem>
|
||||
{:else}
|
||||
<ListGroupItem disabled>
|
||||
<i>No tags matching</i>
|
||||
</ListGroupItem>
|
||||
{/each}
|
||||
</ul>
|
||||
<br />
|
||||
{#if newTagType && newTagName && isNewTag(newTagType, newTagName)}
|
||||
<Button
|
||||
outline
|
||||
color="success"
|
||||
on:click={(e) => (
|
||||
e.preventDefault(), createTag(newTagType, newTagName)
|
||||
)}
|
||||
>
|
||||
Create & Add Tag:
|
||||
<Tag tag={{ type: newTagType, name: newTagName }} clickable={false} />
|
||||
</Button>
|
||||
{:else if allTagsFiltered.length == 0}
|
||||
<Alert>Search Term is not a valid Tag (<code>type: name</code>)</Alert>
|
||||
{/if}
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button color="primary" on:click={() => (isOpen = false)}>Close</Button>
|
||||
</ModalFooter>
|
||||
</Modal>
|
||||
|
||||
<Button outline on:click={() => (isOpen = true)}>
|
||||
Manage Tags <Icon name="tags" />
|
||||
</Button>
|
||||
|
||||
<style>
|
||||
ul.list-group {
|
||||
max-height: 450px;
|
||||
margin-bottom: 10px;
|
||||
overflow: scroll;
|
||||
}
|
||||
</style>
|
||||
@@ -1,26 +1,43 @@
|
||||
<!--
|
||||
@component Main user jobs list display component; displays job list and additional information for a given user
|
||||
|
||||
Properties:
|
||||
- `user Object`: The GraphQL user object
|
||||
- `filterPresets Object`: Optional predefined filter values
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { onMount, getContext } from "svelte";
|
||||
import { init, convert2uplot } from "./utils.js";
|
||||
import {
|
||||
Table,
|
||||
Row,
|
||||
Col,
|
||||
Button,
|
||||
ButtonGroup,
|
||||
Icon,
|
||||
Card,
|
||||
Spinner,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { queryStore, gql, getContextClient } from "@urql/svelte";
|
||||
import Filters from "./filters/Filters.svelte";
|
||||
import TextFilter from "./filters/TextFilter.svelte"
|
||||
import JobList from "./joblist/JobList.svelte";
|
||||
import Sorting from "./joblist/SortSelection.svelte";
|
||||
import Refresher from "./joblist/Refresher.svelte";
|
||||
import Histogram from "./plots/Histogram.svelte";
|
||||
import MetricSelection from "./MetricSelection.svelte";
|
||||
import HistogramSelection from "./HistogramSelection.svelte";
|
||||
import PlotTable from "./PlotTable.svelte";
|
||||
import { scramble, scrambleNames } from "./joblist/JobInfo.svelte";
|
||||
import {
|
||||
queryStore,
|
||||
gql,
|
||||
getContextClient,
|
||||
} from "@urql/svelte";
|
||||
import {
|
||||
init,
|
||||
convert2uplot,
|
||||
scramble,
|
||||
scrambleNames,
|
||||
} from "./generic/utils.js";
|
||||
import JobList from "./generic/JobList.svelte";
|
||||
import Filters from "./generic/Filters.svelte";
|
||||
import PlotGrid from "./generic/PlotGrid.svelte";
|
||||
import Histogram from "./generic/plots/Histogram.svelte";
|
||||
import MetricSelection from "./generic/select/MetricSelection.svelte";
|
||||
import HistogramSelection from "./generic/select/HistogramSelection.svelte";
|
||||
import Sorting from "./generic/select/SortSelection.svelte";
|
||||
import TextFilter from "./generic/helper/TextFilter.svelte"
|
||||
import Refresher from "./generic/helper/Refresher.svelte";
|
||||
|
||||
const { query: initq } = init();
|
||||
|
||||
@@ -32,14 +49,12 @@
|
||||
let filterComponent; // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
||||
let jobList;
|
||||
let jobFilters = [];
|
||||
let sorting = { field: "startTime", order: "DESC" },
|
||||
let matchedJobs = 0;
|
||||
let sorting = { field: "startTime", type: "col", order: "DESC" },
|
||||
isSortingOpen = false;
|
||||
let metrics = ccconfig.plot_list_selectedMetrics,
|
||||
isMetricsSelectionOpen = false;
|
||||
let w1,
|
||||
w2,
|
||||
histogramHeight = 250,
|
||||
isHistogramSelectionOpen = false;
|
||||
let isHistogramSelectionOpen = false;
|
||||
let selectedCluster = filterPresets?.cluster ? filterPresets.cluster : null;
|
||||
let showFootprint = filterPresets.cluster
|
||||
? !!ccconfig[`plot_list_showFootprint:${filterPresets.cluster}`]
|
||||
@@ -70,6 +85,7 @@
|
||||
histMetrics {
|
||||
metric
|
||||
unit
|
||||
stat
|
||||
data {
|
||||
min
|
||||
max
|
||||
@@ -83,66 +99,70 @@
|
||||
variables: { jobFilters, metricsInHistograms },
|
||||
});
|
||||
|
||||
onMount(() => filterComponent.update());
|
||||
onMount(() => filterComponent.updateFilters());
|
||||
</script>
|
||||
|
||||
<Row>
|
||||
{#if $initq.fetching}
|
||||
<!-- ROW1: Status-->
|
||||
{#if $initq.fetching}
|
||||
<Row>
|
||||
<Col>
|
||||
<Spinner />
|
||||
</Col>
|
||||
{:else if $initq.error}
|
||||
<Col xs="auto">
|
||||
</Row>
|
||||
{:else if $initq.error}
|
||||
<Row>
|
||||
<Col>
|
||||
<Card body color="danger">{$initq.error.message}</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
</Row>
|
||||
{/if}
|
||||
|
||||
<Col xs="auto">
|
||||
<Button outline color="primary" on:click={() => (isSortingOpen = true)}>
|
||||
<Icon name="sort-up" /> Sorting
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
outline
|
||||
color="primary"
|
||||
on:click={() => (isMetricsSelectionOpen = true)}
|
||||
>
|
||||
<Icon name="graph-up" /> Metrics
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
outline
|
||||
color="secondary"
|
||||
on:click={() => (isHistogramSelectionOpen = true)}
|
||||
>
|
||||
<Icon name="bar-chart-line" /> Select Histograms
|
||||
</Button>
|
||||
<!-- ROW2: Tools-->
|
||||
<Row cols={{ xs: 1, md: 2, lg: 4}} class="mb-3">
|
||||
<Col lg="2" class="mb-2 mb-lg-0">
|
||||
<ButtonGroup class="w-100">
|
||||
<Button outline color="primary" on:click={() => (isSortingOpen = true)}>
|
||||
<Icon name="sort-up" /> Sorting
|
||||
</Button>
|
||||
<Button
|
||||
outline
|
||||
color="primary"
|
||||
on:click={() => (isMetricsSelectionOpen = true)}
|
||||
>
|
||||
<Icon name="graph-up" /> Metrics
|
||||
</Button>
|
||||
</ButtonGroup>
|
||||
</Col>
|
||||
<Col xs="auto">
|
||||
<Col lg="4" xl="6" class="mb-1 mb-lg-0">
|
||||
<Filters
|
||||
{filterPresets}
|
||||
{matchedJobs}
|
||||
startTimeQuickSelect={true}
|
||||
bind:this={filterComponent}
|
||||
on:update={({ detail }) => {
|
||||
on:update-filters={({ detail }) => {
|
||||
jobFilters = [...detail.filters, { user: { eq: user.username } }];
|
||||
selectedCluster = jobFilters[0]?.cluster
|
||||
? jobFilters[0].cluster.eq
|
||||
: null;
|
||||
jobList.update(jobFilters);
|
||||
jobList.queryJobs(jobFilters);
|
||||
}}
|
||||
/>
|
||||
</Col>
|
||||
<Col xs="auto" style="margin-left: auto;">
|
||||
<Col lg="3" xl="2" class="mb-2 mb-lg-0">
|
||||
<TextFilter
|
||||
on:update={({ detail }) => filterComponent.update(detail)}
|
||||
on:set-filter={({ detail }) => filterComponent.updateFilters(detail)}
|
||||
/>
|
||||
</Col>
|
||||
<Col xs="auto">
|
||||
<Refresher on:reload={() => jobList.refresh()} />
|
||||
<Col lg="3" xl="2" class="mb-1 mb-lg-0">
|
||||
<Refresher on:refresh={() => {
|
||||
jobList.refreshJobs()
|
||||
jobList.refreshAllMetrics()
|
||||
}} />
|
||||
</Col>
|
||||
</Row>
|
||||
<br />
|
||||
<Row>
|
||||
|
||||
<!-- ROW3: Base Information-->
|
||||
<Row cols={{ xs: 1, md: 3}} class="mb-2">
|
||||
{#if $stats.error}
|
||||
<Col>
|
||||
<Card body color="danger">{$stats.error.message}</Card>
|
||||
@@ -152,7 +172,7 @@
|
||||
<Spinner secondary />
|
||||
</Col>
|
||||
{:else}
|
||||
<Col xs="4">
|
||||
<Col>
|
||||
<Table>
|
||||
<tbody>
|
||||
<tr>
|
||||
@@ -190,12 +210,10 @@
|
||||
</tbody>
|
||||
</Table>
|
||||
</Col>
|
||||
<div class="col-4 text-center" bind:clientWidth={w1}>
|
||||
<Col class="px-1">
|
||||
{#key $stats.data.jobsStatistics[0].histDuration}
|
||||
<Histogram
|
||||
data={convert2uplot($stats.data.jobsStatistics[0].histDuration)}
|
||||
width={w1 - 25}
|
||||
height={histogramHeight}
|
||||
title="Duration Distribution"
|
||||
xlabel="Current Runtimes"
|
||||
xunit="Hours"
|
||||
@@ -203,13 +221,11 @@
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
<div class="col-4 text-center" bind:clientWidth={w2}>
|
||||
</Col>
|
||||
<Col class="px-1">
|
||||
{#key $stats.data.jobsStatistics[0].histNumNodes}
|
||||
<Histogram
|
||||
data={convert2uplot($stats.data.jobsStatistics[0].histNumNodes)}
|
||||
width={w2 - 25}
|
||||
height={histogramHeight}
|
||||
title="Number of Nodes Distribution"
|
||||
xlabel="Allocated Nodes"
|
||||
xunit="Nodes"
|
||||
@@ -217,50 +233,74 @@
|
||||
yunit="Jobs"
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
</Col>
|
||||
{/if}
|
||||
</Row>
|
||||
{#if metricsInHistograms}
|
||||
<Row>
|
||||
{#if $stats.error}
|
||||
|
||||
<!-- ROW4+5: Selectable Histograms -->
|
||||
<Row cols={{ xs: 1, md: 5}}>
|
||||
<Col>
|
||||
<Button
|
||||
outline
|
||||
color="secondary"
|
||||
on:click={() => (isHistogramSelectionOpen = true)}
|
||||
>
|
||||
<Icon name="bar-chart-line" /> Select Histograms
|
||||
</Button>
|
||||
</Col>
|
||||
</Row>
|
||||
{#if metricsInHistograms?.length > 0}
|
||||
{#if $stats.error}
|
||||
<Row>
|
||||
<Col>
|
||||
<Card body color="danger">{$stats.error.message}</Card>
|
||||
</Col>
|
||||
{:else if !$stats.data}
|
||||
</Row>
|
||||
{:else if !$stats.data}
|
||||
<Row>
|
||||
<Col>
|
||||
<Spinner secondary />
|
||||
</Col>
|
||||
{:else}
|
||||
<Col>
|
||||
{#key $stats.data.jobsStatistics[0].histMetrics}
|
||||
<PlotTable
|
||||
let:item
|
||||
let:width
|
||||
renderFor="user"
|
||||
items={$stats.data.jobsStatistics[0].histMetrics}
|
||||
itemsPerRow={3}
|
||||
>
|
||||
<Histogram
|
||||
data={convert2uplot(item.data)}
|
||||
usesBins={true}
|
||||
{width}
|
||||
height={250}
|
||||
title="Distribution of '{item.metric}' averages"
|
||||
xlabel={`${item.metric} bin maximum ${item?.unit ? `[${item.unit}]` : ``}`}
|
||||
xunit={item.unit}
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
</PlotTable>
|
||||
{/key}
|
||||
</Col>
|
||||
{/if}
|
||||
</Row>
|
||||
{:else}
|
||||
<hr class="my-2"/>
|
||||
{#key $stats.data.jobsStatistics[0].histMetrics}
|
||||
<PlotGrid
|
||||
let:item
|
||||
renderFor="user"
|
||||
items={$stats.data.jobsStatistics[0].histMetrics}
|
||||
itemsPerRow={3}
|
||||
>
|
||||
<Histogram
|
||||
data={convert2uplot(item.data)}
|
||||
usesBins={true}
|
||||
title="Distribution of '{item.metric} ({item.stat})' footprints"
|
||||
xlabel={`${item.metric} bin maximum ${item?.unit ? `[${item.unit}]` : ``}`}
|
||||
xunit={item.unit}
|
||||
ylabel="Number of Jobs"
|
||||
yunit="Jobs"
|
||||
/>
|
||||
</PlotGrid>
|
||||
{/key}
|
||||
{/if}
|
||||
{:else}
|
||||
<Row class="mt-2">
|
||||
<Col>
|
||||
<Card body>No footprint histograms selected.</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
{/if}
|
||||
<br />
|
||||
<Row>
|
||||
|
||||
<!-- ROW6: JOB LIST-->
|
||||
<Row class="mt-3">
|
||||
<Col>
|
||||
<JobList bind:metrics bind:sorting bind:this={jobList} bind:showFootprint />
|
||||
<JobList
|
||||
bind:this={jobList}
|
||||
bind:matchedJobs
|
||||
bind:metrics
|
||||
bind:sorting
|
||||
bind:showFootprint
|
||||
/>
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
@@ -272,7 +312,7 @@
|
||||
bind:metrics
|
||||
bind:isOpen={isMetricsSelectionOpen}
|
||||
bind:showFootprint
|
||||
view="list"
|
||||
footprintSelect={true}
|
||||
/>
|
||||
|
||||
<HistogramSelection
|
||||
|
||||
@@ -1,65 +0,0 @@
|
||||
<script>
|
||||
import { Icon, InputGroup, InputGroupText } from "@sveltestrap/sveltestrap";
|
||||
|
||||
export let timeseriesPlots;
|
||||
|
||||
let windowSize = 100; // Goes from 0 to 100
|
||||
let windowPosition = 50; // Goes from 0 to 100
|
||||
|
||||
function updatePlots() {
|
||||
let ws = windowSize / (100 * 2),
|
||||
wp = windowPosition / 100;
|
||||
let from = wp - ws,
|
||||
to = wp + ws;
|
||||
Object.values(timeseriesPlots).forEach((plot) =>
|
||||
plot.setTimeRange(from, to),
|
||||
);
|
||||
}
|
||||
|
||||
// Rendering a big job can take a long time, so we
|
||||
// throttle the rerenders to every 100ms here.
|
||||
let timeoutId = null;
|
||||
function requestUpdatePlots() {
|
||||
if (timeoutId != null) window.cancelAnimationFrame(timeoutId);
|
||||
|
||||
timeoutId = window.requestAnimationFrame(() => {
|
||||
updatePlots();
|
||||
timeoutId = null;
|
||||
}, 100);
|
||||
}
|
||||
|
||||
$: requestUpdatePlots(windowSize, windowPosition);
|
||||
</script>
|
||||
|
||||
<div>
|
||||
<InputGroup>
|
||||
<InputGroupText>
|
||||
<Icon name="zoom-in" />
|
||||
</InputGroupText>
|
||||
<InputGroupText>
|
||||
Window Size:
|
||||
<input
|
||||
style="margin: 0em 0em 0em 1em"
|
||||
type="range"
|
||||
bind:value={windowSize}
|
||||
min="1"
|
||||
max="100"
|
||||
step="1"
|
||||
/>
|
||||
<span style="width: 5em;">
|
||||
({windowSize}%)
|
||||
</span>
|
||||
</InputGroupText>
|
||||
<InputGroupText>
|
||||
Window Position:
|
||||
<input
|
||||
style="margin: 0em 0em 0em 1em"
|
||||
type="range"
|
||||
bind:value={windowPosition}
|
||||
min="0"
|
||||
max="100"
|
||||
step="1"
|
||||
/>
|
||||
</InputGroupText>
|
||||
</InputGroup>
|
||||
</div>
|
||||
@@ -6,7 +6,8 @@ filterPresets.cluster = cluster
|
||||
new Analysis({
|
||||
target: document.getElementById('svelte-app'),
|
||||
props: {
|
||||
filterPresets: filterPresets
|
||||
filterPresets: filterPresets,
|
||||
cluster: cluster
|
||||
},
|
||||
context: new Map([
|
||||
['cc-config', clusterCockpitConfig]
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
<!--
|
||||
@component Analysis-View subcomponent; allows selection for normalized histograms and scatterplots
|
||||
|
||||
Properties:
|
||||
- `availableMetrics [String]`: Available metrics in selected cluster
|
||||
- `metricsInHistograms [String]`: The currently selected metrics to display as histogram
|
||||
- `metricsInScatterplots [[String, String]]`: The currently selected metrics to display as scatterplot
|
||||
-->
|
||||
|
||||
<script>
|
||||
import {
|
||||
Modal,
|
||||
@@ -41,7 +50,6 @@
|
||||
}).subscribe((res) => {
|
||||
if (res.fetching === false && res.error) {
|
||||
throw res.error;
|
||||
// console.log('Error on subscription: ' + res.error)
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -4,9 +4,12 @@ import Config from './Config.root.svelte'
|
||||
new Config({
|
||||
target: document.getElementById('svelte-app'),
|
||||
props: {
|
||||
isAdmin: isAdmin
|
||||
isAdmin: isAdmin,
|
||||
isApi: isApi,
|
||||
username: username
|
||||
},
|
||||
context: new Map([
|
||||
['cc-config', clusterCockpitConfig]
|
||||
['cc-config', clusterCockpitConfig],
|
||||
['resampling', resampleConfig]
|
||||
])
|
||||
})
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
<!--
|
||||
@component Admin settings wrapper
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { Row, Col } from "@sveltestrap/sveltestrap";
|
||||
import { onMount } from "svelte";
|
||||
@@ -11,7 +15,7 @@
|
||||
let roles = [];
|
||||
|
||||
function getUserList() {
|
||||
fetch("/api/users/?via-ldap=false¬-just-user=true")
|
||||
fetch("/config/users/?via-ldap=false¬-just-user=true")
|
||||
.then((res) => res.json())
|
||||
.then((usersRaw) => {
|
||||
users = usersRaw;
|
||||
@@ -19,7 +23,7 @@
|
||||
}
|
||||
|
||||
function getValidRoles() {
|
||||
fetch("/api/roles/")
|
||||
fetch("/config/roles/")
|
||||
.then((res) => res.json())
|
||||
.then((rolesRaw) => {
|
||||
roles = rolesRaw;
|
||||
@@ -47,7 +51,5 @@
|
||||
<Col>
|
||||
<EditProject on:reload={getUserList} />
|
||||
</Col>
|
||||
<Col>
|
||||
<Options />
|
||||
</Col>
|
||||
<Options />
|
||||
</Row>
|
||||
|
||||
@@ -1,552 +0,0 @@
|
||||
<script>
|
||||
import {
|
||||
Button,
|
||||
Table,
|
||||
Row,
|
||||
Col,
|
||||
Card,
|
||||
CardTitle,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { fade } from "svelte/transition";
|
||||
|
||||
export let config;
|
||||
|
||||
let message = { msg: "", target: "", color: "#d63384" };
|
||||
let displayMessage = false;
|
||||
|
||||
const colorschemes = {
|
||||
Default: [
|
||||
"#00bfff",
|
||||
"#0000ff",
|
||||
"#ff00ff",
|
||||
"#ff0000",
|
||||
"#ff8000",
|
||||
"#ffff00",
|
||||
"#80ff00",
|
||||
],
|
||||
Autumn: [
|
||||
"rgb(255,0,0)",
|
||||
"rgb(255,11,0)",
|
||||
"rgb(255,20,0)",
|
||||
"rgb(255,30,0)",
|
||||
"rgb(255,41,0)",
|
||||
"rgb(255,50,0)",
|
||||
"rgb(255,60,0)",
|
||||
"rgb(255,71,0)",
|
||||
"rgb(255,80,0)",
|
||||
"rgb(255,90,0)",
|
||||
"rgb(255,101,0)",
|
||||
"rgb(255,111,0)",
|
||||
"rgb(255,120,0)",
|
||||
"rgb(255,131,0)",
|
||||
"rgb(255,141,0)",
|
||||
"rgb(255,150,0)",
|
||||
"rgb(255,161,0)",
|
||||
"rgb(255,171,0)",
|
||||
"rgb(255,180,0)",
|
||||
"rgb(255,190,0)",
|
||||
"rgb(255,201,0)",
|
||||
"rgb(255,210,0)",
|
||||
"rgb(255,220,0)",
|
||||
"rgb(255,231,0)",
|
||||
"rgb(255,240,0)",
|
||||
"rgb(255,250,0)",
|
||||
],
|
||||
Beach: [
|
||||
"rgb(0,252,0)",
|
||||
"rgb(0,233,0)",
|
||||
"rgb(0,212,0)",
|
||||
"rgb(0,189,0)",
|
||||
"rgb(0,169,0)",
|
||||
"rgb(0,148,0)",
|
||||
"rgb(0,129,4)",
|
||||
"rgb(0,145,46)",
|
||||
"rgb(0,162,90)",
|
||||
"rgb(0,180,132)",
|
||||
"rgb(29,143,136)",
|
||||
"rgb(73,88,136)",
|
||||
"rgb(115,32,136)",
|
||||
"rgb(81,9,64)",
|
||||
"rgb(124,51,23)",
|
||||
"rgb(162,90,0)",
|
||||
"rgb(194,132,0)",
|
||||
"rgb(220,171,0)",
|
||||
"rgb(231,213,0)",
|
||||
"rgb(0,0,13)",
|
||||
"rgb(0,0,55)",
|
||||
"rgb(0,0,92)",
|
||||
"rgb(0,0,127)",
|
||||
"rgb(0,0,159)",
|
||||
"rgb(0,0,196)",
|
||||
"rgb(0,0,233)",
|
||||
],
|
||||
BlueRed: [
|
||||
"rgb(0,0,131)",
|
||||
"rgb(0,0,168)",
|
||||
"rgb(0,0,208)",
|
||||
"rgb(0,0,247)",
|
||||
"rgb(0,27,255)",
|
||||
"rgb(0,67,255)",
|
||||
"rgb(0,108,255)",
|
||||
"rgb(0,148,255)",
|
||||
"rgb(0,187,255)",
|
||||
"rgb(0,227,255)",
|
||||
"rgb(8,255,247)",
|
||||
"rgb(48,255,208)",
|
||||
"rgb(87,255,168)",
|
||||
"rgb(127,255,127)",
|
||||
"rgb(168,255,87)",
|
||||
"rgb(208,255,48)",
|
||||
"rgb(247,255,8)",
|
||||
"rgb(255,224,0)",
|
||||
"rgb(255,183,0)",
|
||||
"rgb(255,143,0)",
|
||||
"rgb(255,104,0)",
|
||||
"rgb(255,64,0)",
|
||||
"rgb(255,23,0)",
|
||||
"rgb(238,0,0)",
|
||||
"rgb(194,0,0)",
|
||||
"rgb(150,0,0)",
|
||||
],
|
||||
Rainbow: [
|
||||
"rgb(125,0,255)",
|
||||
"rgb(85,0,255)",
|
||||
"rgb(39,0,255)",
|
||||
"rgb(0,6,255)",
|
||||
"rgb(0,51,255)",
|
||||
"rgb(0,97,255)",
|
||||
"rgb(0,141,255)",
|
||||
"rgb(0,187,255)",
|
||||
"rgb(0,231,255)",
|
||||
"rgb(0,255,233)",
|
||||
"rgb(0,255,189)",
|
||||
"rgb(0,255,143)",
|
||||
"rgb(0,255,99)",
|
||||
"rgb(0,255,53)",
|
||||
"rgb(0,255,9)",
|
||||
"rgb(37,255,0)",
|
||||
"rgb(83,255,0)",
|
||||
"rgb(127,255,0)",
|
||||
"rgb(173,255,0)",
|
||||
"rgb(217,255,0)",
|
||||
"rgb(255,248,0)",
|
||||
"rgb(255,203,0)",
|
||||
"rgb(255,159,0)",
|
||||
"rgb(255,113,0)",
|
||||
"rgb(255,69,0)",
|
||||
"rgb(255,23,0)",
|
||||
],
|
||||
Binary: [
|
||||
"rgb(215,215,215)",
|
||||
"rgb(206,206,206)",
|
||||
"rgb(196,196,196)",
|
||||
"rgb(185,185,185)",
|
||||
"rgb(176,176,176)",
|
||||
"rgb(166,166,166)",
|
||||
"rgb(155,155,155)",
|
||||
"rgb(145,145,145)",
|
||||
"rgb(136,136,136)",
|
||||
"rgb(125,125,125)",
|
||||
"rgb(115,115,115)",
|
||||
"rgb(106,106,106)",
|
||||
"rgb(95,95,95)",
|
||||
"rgb(85,85,85)",
|
||||
"rgb(76,76,76)",
|
||||
"rgb(66,66,66)",
|
||||
"rgb(55,55,55)",
|
||||
"rgb(46,46,46)",
|
||||
"rgb(36,36,36)",
|
||||
"rgb(25,25,25)",
|
||||
"rgb(16,16,16)",
|
||||
"rgb(6,6,6)",
|
||||
],
|
||||
GistEarth: [
|
||||
"rgb(0,0,0)",
|
||||
"rgb(2,7,117)",
|
||||
"rgb(9,30,118)",
|
||||
"rgb(16,53,120)",
|
||||
"rgb(23,73,122)",
|
||||
"rgb(31,93,124)",
|
||||
"rgb(39,110,125)",
|
||||
"rgb(47,126,127)",
|
||||
"rgb(51,133,119)",
|
||||
"rgb(57,138,106)",
|
||||
"rgb(62,145,94)",
|
||||
"rgb(66,150,82)",
|
||||
"rgb(74,157,71)",
|
||||
"rgb(97,162,77)",
|
||||
"rgb(121,168,83)",
|
||||
"rgb(136,173,85)",
|
||||
"rgb(153,176,88)",
|
||||
"rgb(170,180,92)",
|
||||
"rgb(185,182,94)",
|
||||
"rgb(189,173,99)",
|
||||
"rgb(192,164,101)",
|
||||
"rgb(203,169,124)",
|
||||
"rgb(215,178,149)",
|
||||
"rgb(226,192,176)",
|
||||
"rgb(238,212,204)",
|
||||
"rgb(248,236,236)",
|
||||
],
|
||||
BlueWaves: [
|
||||
"rgb(83,0,215)",
|
||||
"rgb(43,6,108)",
|
||||
"rgb(9,16,16)",
|
||||
"rgb(8,32,25)",
|
||||
"rgb(0,50,8)",
|
||||
"rgb(27,64,66)",
|
||||
"rgb(69,67,178)",
|
||||
"rgb(115,62,210)",
|
||||
"rgb(155,50,104)",
|
||||
"rgb(178,43,41)",
|
||||
"rgb(180,51,34)",
|
||||
"rgb(161,78,87)",
|
||||
"rgb(124,117,187)",
|
||||
"rgb(78,155,203)",
|
||||
"rgb(34,178,85)",
|
||||
"rgb(4,176,2)",
|
||||
"rgb(9,152,27)",
|
||||
"rgb(4,118,2)",
|
||||
"rgb(34,92,85)",
|
||||
"rgb(78,92,203)",
|
||||
"rgb(124,127,187)",
|
||||
"rgb(161,187,87)",
|
||||
"rgb(180,248,34)",
|
||||
"rgb(178,220,41)",
|
||||
"rgb(155,217,104)",
|
||||
"rgb(115,254,210)",
|
||||
],
|
||||
BlueGreenRedYellow: [
|
||||
"rgb(0,0,0)",
|
||||
"rgb(0,0,20)",
|
||||
"rgb(0,0,41)",
|
||||
"rgb(0,0,62)",
|
||||
"rgb(0,25,83)",
|
||||
"rgb(0,57,101)",
|
||||
"rgb(0,87,101)",
|
||||
"rgb(0,118,101)",
|
||||
"rgb(0,150,101)",
|
||||
"rgb(0,150,69)",
|
||||
"rgb(0,148,37)",
|
||||
"rgb(0,141,6)",
|
||||
"rgb(60,120,0)",
|
||||
"rgb(131,87,0)",
|
||||
"rgb(180,25,0)",
|
||||
"rgb(203,13,0)",
|
||||
"rgb(208,36,0)",
|
||||
"rgb(213,60,0)",
|
||||
"rgb(219,83,0)",
|
||||
"rgb(224,106,0)",
|
||||
"rgb(229,129,0)",
|
||||
"rgb(233,152,0)",
|
||||
"rgb(238,176,0)",
|
||||
"rgb(243,199,0)",
|
||||
"rgb(248,222,0)",
|
||||
"rgb(254,245,0)",
|
||||
],
|
||||
};
|
||||
|
||||
async function handleSettingSubmit(selector, target) {
|
||||
let form = document.querySelector(selector);
|
||||
let formData = new FormData(form);
|
||||
try {
|
||||
const res = await fetch(form.action, { method: "POST", body: formData });
|
||||
if (res.ok) {
|
||||
let text = await res.text();
|
||||
popMessage(text, target, "#048109");
|
||||
} else {
|
||||
let text = await res.text();
|
||||
// console.log(res.statusText)
|
||||
throw new Error("Response Code " + res.status + "-> " + text);
|
||||
}
|
||||
} catch (err) {
|
||||
popMessage(err, target, "#d63384");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function popMessage(response, restarget, rescolor) {
|
||||
message = { msg: response, target: restarget, color: rescolor };
|
||||
displayMessage = true;
|
||||
setTimeout(function () {
|
||||
displayMessage = false;
|
||||
}, 3500);
|
||||
}
|
||||
</script>
|
||||
|
||||
<Row cols={4} class="p-2 g-2">
|
||||
<!-- LINE WIDTH -->
|
||||
<Col
|
||||
><Card class="h-100">
|
||||
<!-- Important: Function with arguments needs to be event-triggered like on:submit={() => functionName('Some','Args')} OR no arguments and like this: on:submit={functionName} -->
|
||||
<form
|
||||
id="line-width-form"
|
||||
method="post"
|
||||
action="/api/configuration/"
|
||||
class="card-body"
|
||||
on:submit|preventDefault={() =>
|
||||
handleSettingSubmit("#line-width-form", "lw")}
|
||||
>
|
||||
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
|
||||
<CardTitle
|
||||
style="margin-bottom: 1em; display: flex; align-items: center;"
|
||||
>
|
||||
<div>Line Width</div>
|
||||
<!-- Expand If-Clause for clarity once -->
|
||||
{#if displayMessage && message.target == "lw"}
|
||||
<div style="margin-left: auto; font-size: 0.9em;">
|
||||
<code style="color: {message.color};" out:fade>
|
||||
Update: {message.msg}
|
||||
</code>
|
||||
</div>
|
||||
{/if}
|
||||
</CardTitle>
|
||||
<input type="hidden" name="key" value="plot_general_lineWidth" />
|
||||
<div class="mb-3">
|
||||
<label for="value" class="form-label">Line Width</label>
|
||||
<input
|
||||
type="number"
|
||||
class="form-control"
|
||||
id="lwvalue"
|
||||
name="value"
|
||||
aria-describedby="lineWidthHelp"
|
||||
value={config.plot_general_lineWidth}
|
||||
min="1"
|
||||
/>
|
||||
<div id="lineWidthHelp" class="form-text">
|
||||
Width of the lines in the timeseries plots.
|
||||
</div>
|
||||
</div>
|
||||
<Button color="primary" type="submit">Submit</Button>
|
||||
</form>
|
||||
</Card></Col
|
||||
>
|
||||
|
||||
<!-- PLOTS PER ROW -->
|
||||
<Col
|
||||
><Card class="h-100">
|
||||
<form
|
||||
id="plots-per-row-form"
|
||||
method="post"
|
||||
action="/api/configuration/"
|
||||
class="card-body"
|
||||
on:submit|preventDefault={() =>
|
||||
handleSettingSubmit("#plots-per-row-form", "ppr")}
|
||||
>
|
||||
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
|
||||
<CardTitle
|
||||
style="margin-bottom: 1em; display: flex; align-items: center;"
|
||||
>
|
||||
<div>Plots per Row</div>
|
||||
{#if displayMessage && message.target == "ppr"}<div
|
||||
style="margin-left: auto; font-size: 0.9em;"
|
||||
>
|
||||
<code style="color: {message.color};" out:fade
|
||||
>Update: {message.msg}</code
|
||||
>
|
||||
</div>{/if}
|
||||
</CardTitle>
|
||||
<input type="hidden" name="key" value="plot_view_plotsPerRow" />
|
||||
<div class="mb-3">
|
||||
<label for="value" class="form-label">Plots per Row</label>
|
||||
<input
|
||||
type="number"
|
||||
class="form-control"
|
||||
id="pprvalue"
|
||||
name="value"
|
||||
aria-describedby="plotsperrowHelp"
|
||||
value={config.plot_view_plotsPerRow}
|
||||
min="1"
|
||||
/>
|
||||
<div id="plotsperrowHelp" class="form-text">
|
||||
How many plots to show next to each other on pages such as
|
||||
/monitoring/job/, /monitoring/system/...
|
||||
</div>
|
||||
</div>
|
||||
<Button color="primary" type="submit">Submit</Button>
|
||||
</form>
|
||||
</Card></Col
|
||||
>
|
||||
|
||||
<!-- BACKGROUND -->
|
||||
<Col
|
||||
><Card class="h-100">
|
||||
<form
|
||||
id="backgrounds-form"
|
||||
method="post"
|
||||
action="/api/configuration/"
|
||||
class="card-body"
|
||||
on:submit|preventDefault={() =>
|
||||
handleSettingSubmit("#backgrounds-form", "bg")}
|
||||
>
|
||||
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
|
||||
<CardTitle
|
||||
style="margin-bottom: 1em; display: flex; align-items: center;"
|
||||
>
|
||||
<div>Colored Backgrounds</div>
|
||||
{#if displayMessage && message.target == "bg"}<div
|
||||
style="margin-left: auto; font-size: 0.9em;"
|
||||
>
|
||||
<code style="color: {message.color};" out:fade
|
||||
>Update: {message.msg}</code
|
||||
>
|
||||
</div>{/if}
|
||||
</CardTitle>
|
||||
<input type="hidden" name="key" value="plot_general_colorBackground" />
|
||||
<div class="mb-3">
|
||||
<div>
|
||||
{#if config.plot_general_colorBackground}
|
||||
<input type="radio" id="true" name="value" value="true" checked />
|
||||
{:else}
|
||||
<input type="radio" id="true" name="value" value="true" />
|
||||
{/if}
|
||||
<label for="true">Yes</label>
|
||||
</div>
|
||||
<div>
|
||||
{#if config.plot_general_colorBackground}
|
||||
<input type="radio" id="false" name="value" value="false" />
|
||||
{:else}
|
||||
<input
|
||||
type="radio"
|
||||
id="false"
|
||||
name="value"
|
||||
value="false"
|
||||
checked
|
||||
/>
|
||||
{/if}
|
||||
<label for="false">No</label>
|
||||
</div>
|
||||
</div>
|
||||
<Button color="primary" type="submit">Submit</Button>
|
||||
</form>
|
||||
</Card></Col
|
||||
>
|
||||
|
||||
<!-- PAGING -->
|
||||
<Col
|
||||
><Card class="h-100">
|
||||
<form
|
||||
id="paging-form"
|
||||
method="post"
|
||||
action="/api/configuration/"
|
||||
class="card-body"
|
||||
on:submit|preventDefault={() =>
|
||||
handleSettingSubmit("#paging-form", "pag")}
|
||||
>
|
||||
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
|
||||
<CardTitle
|
||||
style="margin-bottom: 1em; display: flex; align-items: center;"
|
||||
>
|
||||
<div>Paging Type</div>
|
||||
{#if displayMessage && message.target == "pag"}<div
|
||||
style="margin-left: auto; font-size: 0.9em;"
|
||||
>
|
||||
<code style="color: {message.color};" out:fade
|
||||
>Update: {message.msg}</code
|
||||
>
|
||||
</div>{/if}
|
||||
</CardTitle>
|
||||
<input type="hidden" name="key" value="job_list_usePaging" />
|
||||
<div class="mb-3">
|
||||
<div>
|
||||
{#if config.job_list_usePaging}
|
||||
<input type="radio" id="true" name="value" value="true" checked />
|
||||
{:else}
|
||||
<input type="radio" id="true" name="value" value="true" />
|
||||
{/if}
|
||||
<label for="true">Paging with selectable count of jobs.</label>
|
||||
</div>
|
||||
<div>
|
||||
{#if config.job_list_usePaging}
|
||||
<input type="radio" id="false" name="value" value="false" />
|
||||
{:else}
|
||||
<input
|
||||
type="radio"
|
||||
id="false"
|
||||
name="value"
|
||||
value="false"
|
||||
checked
|
||||
/>
|
||||
{/if}
|
||||
<label for="false">Continuous scroll iteratively adding 10 jobs.</label>
|
||||
</div>
|
||||
</div>
|
||||
<Button color="primary" type="submit">Submit</Button>
|
||||
</form>
|
||||
</Card></Col
|
||||
>
|
||||
</Row>
|
||||
|
||||
<Row cols={1} class="p-2 g-2">
|
||||
<!-- COLORSCHEME -->
|
||||
<Col
|
||||
><Card>
|
||||
<form
|
||||
id="colorscheme-form"
|
||||
method="post"
|
||||
action="/api/configuration/"
|
||||
class="card-body"
|
||||
>
|
||||
<!-- Svelte 'class' directive only on DOMs directly, normal 'class="xxx"' does not work, so style-array it is. -->
|
||||
<CardTitle
|
||||
style="margin-bottom: 1em; display: flex; align-items: center;"
|
||||
>
|
||||
<div>Color Scheme for Timeseries Plots</div>
|
||||
{#if displayMessage && message.target == "cs"}<div
|
||||
style="margin-left: auto; font-size: 0.9em;"
|
||||
>
|
||||
<code style="color: {message.color};" out:fade
|
||||
>Update: {message.msg}</code
|
||||
>
|
||||
</div>{/if}
|
||||
</CardTitle>
|
||||
<input type="hidden" name="key" value="plot_general_colorscheme" />
|
||||
<Table hover>
|
||||
<tbody>
|
||||
{#each Object.entries(colorschemes) as [name, rgbrow]}
|
||||
<tr>
|
||||
<th scope="col">{name}</th>
|
||||
<td>
|
||||
{#if rgbrow.join(",") == config.plot_general_colorscheme}
|
||||
<input
|
||||
type="radio"
|
||||
name="value"
|
||||
value={JSON.stringify(rgbrow)}
|
||||
checked
|
||||
on:click={() =>
|
||||
handleSettingSubmit("#colorscheme-form", "cs")}
|
||||
/>
|
||||
{:else}
|
||||
<input
|
||||
type="radio"
|
||||
name="value"
|
||||
value={JSON.stringify(rgbrow)}
|
||||
on:click={() =>
|
||||
handleSettingSubmit("#colorscheme-form", "cs")}
|
||||
/>
|
||||
{/if}
|
||||
</td>
|
||||
<td>
|
||||
{#each rgbrow as rgb}
|
||||
<span class="color-dot" style="background-color: {rgb};"
|
||||
></span>
|
||||
{/each}
|
||||
</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</Table>
|
||||
</form>
|
||||
</Card></Col
|
||||
>
|
||||
</Row>
|
||||
|
||||
<style>
|
||||
.color-dot {
|
||||
height: 10px;
|
||||
width: 10px;
|
||||
border-radius: 50%;
|
||||
display: inline-block;
|
||||
}
|
||||
</style>
|
||||
54
web/frontend/src/config/UserSettings.svelte
Normal file
54
web/frontend/src/config/UserSettings.svelte
Normal file
@@ -0,0 +1,54 @@
|
||||
<!--
|
||||
@component User settings wrapper
|
||||
|
||||
Properties:
|
||||
- `username String!`: Empty string if auth. is disabled, otherwise the username as string
|
||||
- `isApi Bool!`: Is currently logged in user api authority
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { getContext } from "svelte";
|
||||
import UserOptions from "./user/UserOptions.svelte";
|
||||
import PlotRenderOptions from "./user/PlotRenderOptions.svelte";
|
||||
import PlotColorScheme from "./user/PlotColorScheme.svelte";
|
||||
|
||||
export let username
|
||||
export let isApi
|
||||
|
||||
const ccconfig = getContext("cc-config");
|
||||
let message = { msg: "", target: "", color: "#d63384" };
|
||||
let displayMessage = false;
|
||||
|
||||
async function handleSettingSubmit(event) {
|
||||
const selector = event.detail.selector
|
||||
const target = event.detail.target
|
||||
let form = document.querySelector(selector);
|
||||
let formData = new FormData(form);
|
||||
try {
|
||||
const res = await fetch(form.action, { method: "POST", body: formData });
|
||||
if (res.ok) {
|
||||
let text = await res.text();
|
||||
popMessage(text, target, "#048109");
|
||||
} else {
|
||||
let text = await res.text();
|
||||
throw new Error("Response Code " + res.status + "-> " + text);
|
||||
}
|
||||
} catch (err) {
|
||||
popMessage(err, target, "#d63384");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function popMessage(response, restarget, rescolor) {
|
||||
message = { msg: response, target: restarget, color: rescolor };
|
||||
displayMessage = true;
|
||||
setTimeout(function () {
|
||||
displayMessage = false;
|
||||
}, 3500);
|
||||
}
|
||||
</script>
|
||||
|
||||
<UserOptions config={ccconfig} {username} {isApi} bind:message bind:displayMessage on:update-config={(e) => handleSettingSubmit(e)}/>
|
||||
<PlotRenderOptions config={ccconfig} bind:message bind:displayMessage on:update-config={(e) => handleSettingSubmit(e)}/>
|
||||
<PlotColorScheme config={ccconfig} bind:message bind:displayMessage on:update-config={(e) => handleSettingSubmit(e)}/>
|
||||
@@ -1,4 +1,14 @@
|
||||
<script>
|
||||
<!--
|
||||
@component User creation form card
|
||||
|
||||
Properties:
|
||||
- `roles [String]!`: List of roles used in app as strings
|
||||
|
||||
Events:
|
||||
- `reload`: Trigger upstream reload of user list after user creation
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { Button, Card, CardTitle } from "@sveltestrap/sveltestrap";
|
||||
import { createEventDispatcher } from "svelte";
|
||||
import { fade } from "svelte/transition";
|
||||
@@ -8,7 +18,7 @@
|
||||
let message = { msg: "", color: "#d63384" };
|
||||
let displayMessage = false;
|
||||
|
||||
export let roles = [];
|
||||
export let roles;
|
||||
|
||||
async function handleUserSubmit() {
|
||||
let form = document.querySelector("#create-user-form");
|
||||
@@ -23,7 +33,6 @@
|
||||
form.reset();
|
||||
} else {
|
||||
let text = await res.text();
|
||||
// console.log(res.statusText)
|
||||
throw new Error("Response Code " + res.status + "-> " + text);
|
||||
}
|
||||
} catch (err) {
|
||||
@@ -48,7 +57,7 @@
|
||||
<form
|
||||
id="create-user-form"
|
||||
method="post"
|
||||
action="/api/users/"
|
||||
action="/config/users/"
|
||||
class="card-body"
|
||||
on:submit|preventDefault={handleUserSubmit}
|
||||
>
|
||||
|
||||
@@ -1,3 +1,10 @@
|
||||
<!--
|
||||
@component User managed project edit form card
|
||||
|
||||
Events:
|
||||
- `reload`: Trigger upstream reload of user list after project update
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { Card, CardTitle, CardBody } from "@sveltestrap/sveltestrap";
|
||||
import { createEventDispatcher } from "svelte";
|
||||
@@ -22,7 +29,7 @@
|
||||
formData.append("add-project", project);
|
||||
|
||||
try {
|
||||
const res = await fetch(`/api/user/${username}`, {
|
||||
const res = await fetch(`/config/user/${username}`, {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
@@ -32,7 +39,6 @@
|
||||
reloadUserList();
|
||||
} else {
|
||||
let text = await res.text();
|
||||
// console.log(res.statusText)
|
||||
throw new Error("Response Code " + res.status + "-> " + text);
|
||||
}
|
||||
} catch (err) {
|
||||
@@ -54,7 +60,7 @@
|
||||
formData.append("remove-project", project);
|
||||
|
||||
try {
|
||||
const res = await fetch(`/api/user/${username}`, {
|
||||
const res = await fetch(`/config/user/${username}`, {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
@@ -64,7 +70,6 @@
|
||||
reloadUserList();
|
||||
} else {
|
||||
let text = await res.text();
|
||||
// console.log(res.statusText)
|
||||
throw new Error("Response Code " + res.status + "-> " + text);
|
||||
}
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
@component User role edit form card
|
||||
|
||||
Properties:
|
||||
- `roles [String]!`: List of roles used in app as strings
|
||||
|
||||
Events:
|
||||
- `reload`: Trigger upstream reload of user list after role edit
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { Card, CardTitle, CardBody } from "@sveltestrap/sveltestrap";
|
||||
import { createEventDispatcher } from "svelte";
|
||||
@@ -8,7 +18,7 @@
|
||||
let message = { msg: "", color: "#d63384" };
|
||||
let displayMessage = false;
|
||||
|
||||
export let roles = [];
|
||||
export let roles;
|
||||
|
||||
async function handleAddRole() {
|
||||
const username = document.querySelector("#role-username").value;
|
||||
@@ -24,7 +34,7 @@
|
||||
formData.append("add-role", role);
|
||||
|
||||
try {
|
||||
const res = await fetch(`/api/user/${username}`, {
|
||||
const res = await fetch(`/config/user/${username}`, {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
@@ -34,7 +44,6 @@
|
||||
reloadUserList();
|
||||
} else {
|
||||
let text = await res.text();
|
||||
// console.log(res.statusText)
|
||||
throw new Error("Response Code " + res.status + "-> " + text);
|
||||
}
|
||||
} catch (err) {
|
||||
@@ -56,7 +65,7 @@
|
||||
formData.append("remove-role", role);
|
||||
|
||||
try {
|
||||
const res = await fetch(`/api/user/${username}`, {
|
||||
const res = await fetch(`/config/user/${username}`, {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
@@ -66,7 +75,6 @@
|
||||
reloadUserList();
|
||||
} else {
|
||||
let text = await res.text();
|
||||
// console.log(res.statusText)
|
||||
throw new Error("Response Code " + res.status + "-> " + text);
|
||||
}
|
||||
} catch (err) {
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
<!--
|
||||
@component Admin option select card
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { onMount } from "svelte";
|
||||
import { Card, CardBody, CardTitle } from "@sveltestrap/sveltestrap";
|
||||
import { getContext, onMount } from "svelte";
|
||||
import { Col, Card, CardBody, CardTitle } from "@sveltestrap/sveltestrap";
|
||||
|
||||
let scrambled;
|
||||
|
||||
const resampleConfig = getContext("resampling");
|
||||
|
||||
onMount(() => {
|
||||
scrambled = window.localStorage.getItem("cc-scramble-names") != null;
|
||||
});
|
||||
@@ -19,16 +25,30 @@
|
||||
}
|
||||
</script>
|
||||
|
||||
<Card class="h-100">
|
||||
<CardBody>
|
||||
<CardTitle class="mb-3">Scramble Names / Presentation Mode</CardTitle>
|
||||
<input
|
||||
type="checkbox"
|
||||
id="scramble-names-checkbox"
|
||||
style="margin-right: 1em;"
|
||||
on:click={handleScramble}
|
||||
bind:checked={scrambled}
|
||||
/>
|
||||
Active?
|
||||
</CardBody>
|
||||
</Card>
|
||||
<Col>
|
||||
<Card class="h-100">
|
||||
<CardBody>
|
||||
<CardTitle class="mb-3">Scramble Names / Presentation Mode</CardTitle>
|
||||
<input
|
||||
type="checkbox"
|
||||
id="scramble-names-checkbox"
|
||||
style="margin-right: 1em;"
|
||||
on:click={handleScramble}
|
||||
bind:checked={scrambled}
|
||||
/>
|
||||
Active?
|
||||
</CardBody>
|
||||
</Card>
|
||||
</Col>
|
||||
|
||||
{#if resampleConfig}
|
||||
<Col>
|
||||
<Card class="h-100">
|
||||
<CardBody>
|
||||
<CardTitle class="mb-3">Metric Plot Resampling</CardTitle>
|
||||
<p>Triggered at {resampleConfig.trigger} datapoints.</p>
|
||||
<p>Configured resolutions: {resampleConfig.resolutions}</p>
|
||||
</CardBody>
|
||||
</Card>
|
||||
</Col>
|
||||
{/if}
|
||||
|
||||
@@ -1,3 +1,13 @@
|
||||
<!--
|
||||
@component User management table
|
||||
|
||||
Properties:
|
||||
- `users [Object]?`: List of users
|
||||
|
||||
Events:
|
||||
- `reload`: Trigger upstream reload of user list
|
||||
-->
|
||||
|
||||
<script>
|
||||
import {
|
||||
Button,
|
||||
@@ -20,7 +30,7 @@
|
||||
if (confirm("Are you sure?")) {
|
||||
let formData = new FormData();
|
||||
formData.append("username", username);
|
||||
fetch("/api/users/", { method: "DELETE", body: formData }).then((res) => {
|
||||
fetch("/config/users/", { method: "DELETE", body: formData }).then((res) => {
|
||||
if (res.status == 200) {
|
||||
reloadUserList();
|
||||
} else {
|
||||
|
||||
@@ -1,18 +1,25 @@
|
||||
<!--
|
||||
@component User data row for table
|
||||
|
||||
Properties:
|
||||
- `user Object!`: User Object
|
||||
- {username: String, name: String, roles: [String], projects: String, email: String}
|
||||
-->
|
||||
|
||||
<script>
|
||||
import { Button } from "@sveltestrap/sveltestrap";
|
||||
import { fetchJwt } from "../../generic/utils.js"
|
||||
|
||||
export let user;
|
||||
let jwt = "";
|
||||
|
||||
let jwt = "";
|
||||
function getUserJwt(username) {
|
||||
fetch(`/api/jwt/?username=${username}`)
|
||||
.then((res) => res.text())
|
||||
.then((text) => {
|
||||
jwt = text;
|
||||
navigator.clipboard
|
||||
.writeText(text)
|
||||
.catch((reason) => console.error(reason));
|
||||
});
|
||||
const p = fetchJwt(username);
|
||||
p.then((content) => {
|
||||
jwt = content
|
||||
}).catch((error) => {
|
||||
console.error(`Could not get JWT: ${error}`);
|
||||
});
|
||||
}
|
||||
</script>
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user