19 Commits

Author SHA1 Message Date
Jan Eitzinger
9489ebc7d6 Merge pull request #320 from ClusterCockpit/hotfix
Fixes for Bugfix Release 1.4.2
2024-12-19 14:51:07 +01:00
2a5c525193 Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2024-12-19 11:12:50 +01:00
9e2d981c60 Add notice about footprint to ReleaseNotes 2024-12-19 11:12:40 +01:00
Christoph Kluge
53dfe9e4f5 fix: footprint peak is default if footprint stat is avg 2024-12-19 11:00:12 +01:00
48e95fbdb0 Prepare release 1.4.2 2024-12-19 06:34:35 +01:00
fd94d85edf Compute duration for running jobs on the fly 2024-12-19 06:24:08 +01:00
f2d1a85afb Reformat json schema files 2024-12-19 06:14:35 +01:00
0bdbcb8bab Use persisted duration for running jobs
Fixes #318
2024-12-19 05:55:31 +01:00
Christoph Kluge
7b91a819be add workaround for clipboard button 2024-12-18 16:40:49 +01:00
bc89025924 Revert to blocking startJob REST api
Fixes #316
2024-12-18 11:45:56 +01:00
Jan Eitzinger
16bcaef4c3 Merge pull request #319 from ClusterCockpit/dependabot/go_modules/golang.org/x/crypto-0.31.0
Bump golang.org/x/crypto from 0.29.0 to 0.31.0
2024-12-18 07:27:19 +01:00
dependabot[bot]
fcbfa451f2 Bump golang.org/x/crypto from 0.29.0 to 0.31.0
Bumps [golang.org/x/crypto](https://github.com/golang/crypto) from 0.29.0 to 0.31.0.
- [Commits](https://github.com/golang/crypto/compare/v0.29.0...v0.31.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-12-18 06:22:10 +00:00
Christoph Kluge
559ce53ca4 Merge branch 'hotfix' of https://github.com/ClusterCockpit/cc-backend into hotfix 2024-12-17 15:14:27 +01:00
Christoph Kluge
ee2c5b58d7 fix: add missing sorting parameter to REST API call and test 2024-12-17 15:14:24 +01:00
Jan Eitzinger
d98d998106 Merge pull request #315 from ClusterCockpit/hotfix
Prepare Bugfix release 1.4.1
2024-12-10 16:54:17 +01:00
212c45e070 Prepare bug fix release 1.4.1 2024-12-10 16:45:05 +01:00
143fa9b6ed Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2024-12-10 16:36:22 +01:00
4849928288 Rename old column name for user
Fixes #314
2024-12-10 16:35:43 +01:00
Christoph Kluge
9248ee8868 fix: fix renamed column reference in searchbar workflow 2024-12-09 11:06:12 +01:00
25 changed files with 1197 additions and 1284 deletions

View File

@@ -2,7 +2,7 @@ TARGET = ./cc-backend
VAR = ./var VAR = ./var
CFG = config.json .env CFG = config.json .env
FRONTEND = ./web/frontend FRONTEND = ./web/frontend
VERSION = 1.4.0 VERSION = 1.4.2
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development') GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S") CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}' LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'

View File

@@ -1,8 +1,8 @@
# `cc-backend` version 1.4.0 # `cc-backend` version 1.4.2
Supports job archive version 2 and database version 8. Supports job archive version 2 and database version 8.
This is a minor release of `cc-backend`, the API backend and frontend This is a small bug fix release of `cc-backend`, the API backend and frontend
implementation of ClusterCockpit. implementation of ClusterCockpit.
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/). For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
@@ -12,7 +12,8 @@ For release specific notes visit the [ClusterCockpit Documentation](https://clus
migration might require several hours! migration might require several hours!
- You need to adapt the `cluster.json` configuration files in the job-archive, - You need to adapt the `cluster.json` configuration files in the job-archive,
add new required attributes to the metric list and after that edit add new required attributes to the metric list and after that edit
`./job-archive/version.txt` to version 2. `./job-archive/version.txt` to version 2. Only metrics that have the footprint
attribute set can be filtered and show up in the footprint UI and polar plot.
- Continuous scrolling is default now in all job lists. You can change this back - Continuous scrolling is default now in all job lists. You can change this back
to paging globally, also every user can configure to use paging or continuous to paging globally, also every user can configure to use paging or continuous
scrolling individually. scrolling individually.

View File

@@ -14,7 +14,7 @@ var (
func cliInit() { func cliInit() {
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env") flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)") flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap") flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'hpc_user' table with ldap")
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling") flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)") flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI") flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")

View File

@@ -112,7 +112,7 @@ func main() {
if flagInit { if flagInit {
initEnv() initEnv()
fmt.Print("Succesfully setup environment!\n") fmt.Print("Successfully setup environment!\n")
fmt.Print("Please review config.json and .env and adjust it to your needs.\n") fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
fmt.Print("Add your job-archive at ./var/job-archive.\n") fmt.Print("Add your job-archive at ./var/job-archive.\n")
os.Exit(0) os.Exit(0)

View File

@@ -25,7 +25,6 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph" "github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated" "github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/routerConfig" "github.com/ClusterCockpit/cc-backend/internal/routerConfig"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv" "github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
@@ -314,9 +313,6 @@ func serverShutdown() {
// First shut down the server gracefully (waiting for all ongoing requests) // First shut down the server gracefully (waiting for all ongoing requests)
server.Shutdown(context.Background()) server.Shutdown(context.Background())
// Then, wait for any async jobStarts still pending...
repository.WaitForJobStart()
// Then, wait for any async archivings still pending... // Then, wait for any async archivings still pending...
archiver.WaitForArchiving() archiver.WaitForArchiving()
} }

8
go.mod
View File

@@ -26,7 +26,7 @@ require (
github.com/swaggo/http-swagger v1.3.4 github.com/swaggo/http-swagger v1.3.4
github.com/swaggo/swag v1.16.4 github.com/swaggo/swag v1.16.4
github.com/vektah/gqlparser/v2 v2.5.20 github.com/vektah/gqlparser/v2 v2.5.20
golang.org/x/crypto v0.29.0 golang.org/x/crypto v0.31.0
golang.org/x/exp v0.0.0-20240707233637-46b078467d37 golang.org/x/exp v0.0.0-20240707233637-46b078467d37
golang.org/x/oauth2 v0.21.0 golang.org/x/oauth2 v0.21.0
) )
@@ -79,9 +79,9 @@ require (
go.uber.org/atomic v1.11.0 // indirect go.uber.org/atomic v1.11.0 // indirect
golang.org/x/mod v0.22.0 // indirect golang.org/x/mod v0.22.0 // indirect
golang.org/x/net v0.31.0 // indirect golang.org/x/net v0.31.0 // indirect
golang.org/x/sync v0.9.0 // indirect golang.org/x/sync v0.10.0 // indirect
golang.org/x/sys v0.27.0 // indirect golang.org/x/sys v0.28.0 // indirect
golang.org/x/text v0.20.0 // indirect golang.org/x/text v0.21.0 // indirect
golang.org/x/tools v0.27.0 // indirect golang.org/x/tools v0.27.0 // indirect
google.golang.org/protobuf v1.35.2 // indirect google.golang.org/protobuf v1.35.2 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect

16
go.sum
View File

@@ -238,8 +238,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/exp v0.0.0-20240707233637-46b078467d37 h1:uLDX+AfeFCct3a2C7uIWBKMJIR3CJMhcgfrUAqjRK6w= golang.org/x/exp v0.0.0-20240707233637-46b078467d37 h1:uLDX+AfeFCct3a2C7uIWBKMJIR3CJMhcgfrUAqjRK6w=
golang.org/x/exp v0.0.0-20240707233637-46b078467d37/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/exp v0.0.0-20240707233637-46b078467d37/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
@@ -262,8 +262,8 @@ golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbht
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -273,8 +273,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@@ -287,8 +287,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=

View File

@@ -1,5 +1,5 @@
[Unit] [Unit]
Description=ClusterCockpit Web Server (Go edition) Description=ClusterCockpit Web Server
Documentation=https://github.com/ClusterCockpit/cc-backend Documentation=https://github.com/ClusterCockpit/cc-backend
Wants=network-online.target Wants=network-online.target
After=network-online.target After=network-online.target

View File

@@ -249,9 +249,6 @@ func TestRestApi(t *testing.T) {
if response.StatusCode != http.StatusCreated { if response.StatusCode != http.StatusCreated {
t.Fatal(response.Status, recorder.Body.String()) t.Fatal(response.Status, recorder.Body.String())
} }
time.Sleep(1 * time.Second)
resolver := graph.GetResolverInstance() resolver := graph.GetResolverInstance()
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime) job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
if err != nil { if err != nil {

View File

@@ -123,18 +123,8 @@ func (api *RestApi) MountFrontendApiRoutes(r *mux.Router) {
} }
} }
// StartJobApiResponse model // DefaultApiResponse model
type StartJobApiResponse struct { type DefaultJobApiResponse struct {
Message string `json:"msg"`
}
// DeleteJobApiResponse model
type DeleteJobApiResponse struct {
Message string `json:"msg"`
}
// UpdateUserApiResponse model
type UpdateUserApiResponse struct {
Message string `json:"msg"` Message string `json:"msg"`
} }
@@ -341,7 +331,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
withMetadata := false withMetadata := false
filter := &model.JobFilter{} filter := &model.JobFilter{}
page := &model.PageRequest{ItemsPerPage: 25, Page: 1} page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc} order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
for key, vals := range r.URL.Query() { for key, vals := range r.URL.Query() {
switch key { switch key {
@@ -790,6 +780,11 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
return return
} }
// aquire lock to avoid race condition between API calls
var unlockOnce sync.Once
api.RepositoryMutex.Lock()
defer unlockOnce.Do(api.RepositoryMutex.Unlock)
// Check if combination of (job_id, cluster_id, start_time) already exists: // Check if combination of (job_id, cluster_id, start_time) already exists:
jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil) jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
if err != nil && err != sql.ErrNoRows { if err != nil && err != sql.ErrNoRows {
@@ -804,12 +799,27 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
} }
} }
repository.TriggerJobStart(repository.JobWithUser{Job: &req, User: repository.GetUserFromContext(r.Context())}) id, err := api.JobRepository.Start(&req)
if err != nil {
handleError(fmt.Errorf("insert into database failed: %w", err), http.StatusInternalServerError, rw)
return
}
// unlock here, adding Tags can be async
unlockOnce.Do(api.RepositoryMutex.Unlock)
for _, tag := range req.Tags {
if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
return
}
}
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
rw.Header().Add("Content-Type", "application/json") rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusCreated) rw.WriteHeader(http.StatusCreated)
json.NewEncoder(rw).Encode(StartJobApiResponse{ json.NewEncoder(rw).Encode(DefaultJobApiResponse{
Message: fmt.Sprintf("Successfully triggered job start"), Message: "success",
}) })
} }
@@ -892,7 +902,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
} }
rw.Header().Add("Content-Type", "application/json") rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK) rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(DeleteJobApiResponse{ json.NewEncoder(rw).Encode(DefaultJobApiResponse{
Message: fmt.Sprintf("Successfully deleted job %s", id), Message: fmt.Sprintf("Successfully deleted job %s", id),
}) })
} }
@@ -943,7 +953,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
rw.Header().Add("Content-Type", "application/json") rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK) rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(DeleteJobApiResponse{ json.NewEncoder(rw).Encode(DefaultJobApiResponse{
Message: fmt.Sprintf("Successfully deleted job %d", job.ID), Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
}) })
} }
@@ -987,7 +997,7 @@ func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "application/json") rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusOK) rw.WriteHeader(http.StatusOK)
json.NewEncoder(rw).Encode(DeleteJobApiResponse{ json.NewEncoder(rw).Encode(DefaultJobApiResponse{
Message: fmt.Sprintf("Successfully deleted %d jobs", cnt), Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
}) })
} }

View File

@@ -36,10 +36,7 @@ func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag,
// ConcurrentJobs is the resolver for the concurrentJobs field. // ConcurrentJobs is the resolver for the concurrentJobs field.
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) { func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
if obj.State == schema.JobStateRunning { // FIXME: Make the hardcoded duration configurable
obj.Duration = int32(time.Now().Unix() - obj.StartTimeUnix)
}
if obj.Exclusive != 1 && obj.Duration > 600 { if obj.Exclusive != 1 && obj.Duration > 600 {
return r.Repo.FindConcurrentJobs(ctx, obj) return r.Repo.FindConcurrentJobs(ctx, obj)
} }

View File

@@ -82,8 +82,6 @@ func Connect(driver string, db string) {
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
startJobStartWorker()
}) })
} }

View File

@@ -79,12 +79,9 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
} }
job.RawFootprint = nil job.RawFootprint = nil
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
// return nil, err
// }
job.StartTime = time.Unix(job.StartTimeUnix, 0) job.StartTime = time.Unix(job.StartTimeUnix, 0)
if job.Duration == 0 && job.State == schema.JobStateRunning { // Always ensure accurate duration for running jobs
if job.State == schema.JobStateRunning {
job.Duration = int32(time.Since(job.StartTime).Seconds()) job.Duration = int32(time.Since(job.StartTime).Seconds())
} }
@@ -308,17 +305,17 @@ func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm
return searchterm, "", "", "" return searchterm, "", "", ""
} else { // Has to have letters and logged-in user for other guesses } else { // Has to have letters and logged-in user for other guesses
if user != nil { if user != nil {
// Find username in jobs (match) // Find username by username in job table (match)
uresult, _ := r.FindColumnValue(user, searchterm, "job", "user", "user", false) uresult, _ := r.FindColumnValue(user, searchterm, "job", "hpc_user", "hpc_user", false)
if uresult != "" { if uresult != "" {
return "", uresult, "", "" return "", uresult, "", ""
} }
// Find username by name (like) // Find username by real name in hpc_user table (like)
nresult, _ := r.FindColumnValue(user, searchterm, "hpc_user", "username", "name", true) nresult, _ := r.FindColumnValue(user, searchterm, "hpc_user", "username", "name", true)
if nresult != "" { if nresult != "" {
return "", nresult, "", "" return "", nresult, "", ""
} }
// Find projectId in jobs (match) // Find projectId by projectId in job table (match)
presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false) presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false)
if presult != "" { if presult != "" {
return "", "", presult, "" return "", "", presult, ""
@@ -457,6 +454,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
return subclusters, nil return subclusters, nil
} }
// FIXME: Set duration to requested walltime?
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error { func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
start := time.Now() start := time.Now()
res, err := sq.Update("job"). res, err := sq.Update("job").

View File

@@ -170,8 +170,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
query = buildTimeCondition("job.start_time", filter.StartTime, query) query = buildTimeCondition("job.start_time", filter.StartTime, query)
} }
if filter.Duration != nil { if filter.Duration != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs. query = buildIntCondition("job.duration", filter.Duration, query)
query = query.Where("(CASE WHEN job.job_state = 'running' THEN (? - job.start_time) ELSE job.duration END) BETWEEN ? AND ?", now, filter.Duration.From, filter.Duration.To)
} }
if filter.MinRunningFor != nil { if filter.MinRunningFor != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs. now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.

View File

@@ -1,83 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
type JobWithUser struct {
Job *schema.JobMeta
User *schema.User
}
var (
jobStartPending sync.WaitGroup
jobStartChannel chan JobWithUser
)
func startJobStartWorker() {
jobStartChannel = make(chan JobWithUser, 128)
go jobStartWorker()
}
// Archiving worker thread
func jobStartWorker() {
for {
select {
case req, ok := <-jobStartChannel:
if !ok {
break
}
jobRepo := GetJobRepository()
var id int64
for i := 0; i < 5; i++ {
var err error
id, err = jobRepo.Start(req.Job)
if err != nil {
log.Errorf("Attempt %d: insert into database failed: %v", i, err)
} else {
break
}
time.Sleep(1 * time.Second)
}
for _, tag := range req.Job.Tags {
if _, err := jobRepo.AddTagOrCreate(req.User, id,
tag.Type, tag.Name, tag.Scope); err != nil {
log.Errorf("adding tag to new job %d failed: %v", id, err)
}
}
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d",
id, req.Job.Cluster, req.Job.JobID, req.Job.User, req.Job.StartTime)
jobStartPending.Done()
}
}
}
// Trigger async archiving
func TriggerJobStart(req JobWithUser) {
if jobStartChannel == nil {
log.Fatal("Cannot start Job without jobStart channel. Did you Start the worker?")
}
jobStartPending.Add(1)
jobStartChannel <- req
}
// Wait for background thread to finish pending archiving operations
func WaitForJobStart() {
// close channel and wait for worker to process remaining jobs
jobStartPending.Wait()
}

View File

@@ -111,7 +111,7 @@ func BenchmarkDB_QueryJobs(b *testing.B) {
user := "mppi133h" user := "mppi133h"
filter.User = &model.StringInput{Eq: &user} filter.User = &model.StringInput{Eq: &user}
page := &model.PageRequest{ItemsPerPage: 50, Page: 1} page := &model.PageRequest{ItemsPerPage: 50, Page: 1}
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc} order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
b.Run("QueryJobs", func(b *testing.B) { b.Run("QueryJobs", func(b *testing.B) {
db := setup(b) db := setup(b)

View File

@@ -73,7 +73,7 @@ func (r *UserRepository) GetUser(username string) (*schema.User, error) {
func (r *UserRepository) GetLdapUsernames() ([]string, error) { func (r *UserRepository) GetLdapUsernames() ([]string, error) {
var users []string var users []string
rows, err := r.DB.Query(`SELECT username FROM hpc_user WHERE user.ldap = 1`) rows, err := r.DB.Query(`SELECT username FROM hpc_user WHERE hpc_user.ldap = 1`)
if err != nil { if err != nil {
log.Warn("Error while querying usernames") log.Warn("Error while querying usernames")
return nil, err return nil, err

View File

@@ -182,6 +182,7 @@ func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
return i return i
} }
// FIXME: Lots of redundant code. Needs refactoring
func buildFilterPresets(query url.Values) map[string]interface{} { func buildFilterPresets(query url.Values) map[string]interface{} {
filterPresets := map[string]interface{}{} filterPresets := map[string]interface{}{}

View File

@@ -1,490 +1,490 @@
{ {
"$schema": "http://json-schema.org/draft/2020-12/schema", "$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://job-data.schema.json", "$id": "embedfs://job-data.schema.json",
"title": "Job metric data list", "title": "Job metric data list",
"description": "Collection of metric data of a HPC job", "description": "Collection of metric data of a HPC job",
"type": "object", "type": "object",
"properties": { "properties": {
"mem_used": { "mem_used": {
"description": "Memory capacity used", "description": "Memory capacity used",
"type": "object", "type": "object",
"properties": { "properties": {
"node": { "node": {
"$ref": "embedfs://job-metric-data.schema.json" "$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"flops_any": {
"description": "Total flop rate with DP flops scaled up",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"mem_bw": {
"description": "Main memory bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"net_bw": {
"description": "Total fast interconnect network bandwidth",
"type": "object",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ipc": {
"description": "Instructions executed per cycle",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"cpu_user": {
"description": "CPU user active core utilization",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"cpu_load": {
"description": "CPU requested core utilization (load 1m)",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"flops_dp": {
"description": "Double precision flop rate",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"flops_sp": {
"description": "Single precision flops rate",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"vectorization_ratio": {
"description": "Fraction of arithmetic instructions using SIMD instructions",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"cpu_power": {
"description": "CPU power consumption",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"mem_power": {
"description": "Memory power consumption",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"acc_utilization": {
"description": "GPU utilization",
"properties": {
"accelerator": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"accelerator"
]
},
"acc_mem_used": {
"description": "GPU memory capacity used",
"properties": {
"accelerator": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"accelerator"
]
},
"acc_power": {
"description": "GPU power consumption",
"properties": {
"accelerator": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"accelerator"
]
},
"clock": {
"description": "Average core frequency",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"filesystems": {
"description": "Array of filesystems",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string",
"enum": [
"nfs",
"lustre",
"gpfs",
"nvme",
"ssd",
"hdd",
"beegfs"
]
},
"read_bw": {
"description": "File system read bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"write_bw": {
"description": "File system write bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"read_req": {
"description": "File system read requests",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"write_req": {
"description": "File system write requests",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"inodes": {
"description": "File system write requests",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"accesses": {
"description": "File system open and close",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"fsync": {
"description": "File system fsync",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"create": {
"description": "File system create",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"open": {
"description": "File system open",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"close": {
"description": "File system close",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"seek": {
"description": "File system seek",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
}
},
"required": [
"name",
"type",
"read_bw",
"write_bw"
]
},
"minItems": 1
} }
},
"required": [
"node"
]
}, },
"ic_rcv_packets": { "flops_any": {
"description": "Network interconnect read packets", "description": "Total flop rate with DP flops scaled up",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"mem_bw": {
"description": "Main memory bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"net_bw": {
"description": "Total fast interconnect network bandwidth",
"type": "object",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ipc": {
"description": "Instructions executed per cycle",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"cpu_user": {
"description": "CPU user active core utilization",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"cpu_load": {
"description": "CPU requested core utilization (load 1m)",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"flops_dp": {
"description": "Double precision flop rate",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"flops_sp": {
"description": "Single precision flops rate",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"vectorization_ratio": {
"description": "Fraction of arithmetic instructions using SIMD instructions",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"cpu_power": {
"description": "CPU power consumption",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"mem_power": {
"description": "Memory power consumption",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"acc_utilization": {
"description": "GPU utilization",
"properties": {
"accelerator": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"accelerator"
]
},
"acc_mem_used": {
"description": "GPU memory capacity used",
"properties": {
"accelerator": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"accelerator"
]
},
"acc_power": {
"description": "GPU power consumption",
"properties": {
"accelerator": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"accelerator"
]
},
"clock": {
"description": "Average core frequency",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"filesystems": {
"description": "Array of filesystems",
"type": "array",
"items": {
"type": "object",
"properties": { "properties": {
"node": { "name": {
"type": "string"
},
"type": {
"type": "string",
"enum": [
"nfs",
"lustre",
"gpfs",
"nvme",
"ssd",
"hdd",
"beegfs"
]
},
"read_bw": {
"description": "File system read bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json" "$ref": "embedfs://job-metric-data.schema.json"
} }
},
"required": [
"node"
]
},
"write_bw": {
"description": "File system write bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"read_req": {
"description": "File system read requests",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"write_req": {
"description": "File system write requests",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"inodes": {
"description": "File system write requests",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"accesses": {
"description": "File system open and close",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"fsync": {
"description": "File system fsync",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"create": {
"description": "File system create",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"open": {
"description": "File system open",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"close": {
"description": "File system close",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"seek": {
"description": "File system seek",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
}
}, },
"required": [ "required": [
"node" "name",
] "type",
}, "read_bw",
"ic_send_packets": { "write_bw"
"description": "Network interconnect send packet",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_read_bw": {
"description": "Network interconnect read bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_write_bw": {
"description": "Network interconnect write bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
] ]
},
"minItems": 1
}
},
"ic_rcv_packets": {
"description": "Network interconnect read packets",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
}, },
"required": [ "required": [
"cpu_user", "node"
"cpu_load",
"mem_used",
"flops_any",
"mem_bw",
"net_bw",
"filesystems"
] ]
},
"ic_send_packets": {
"description": "Network interconnect send packet",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_read_bw": {
"description": "Network interconnect read bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_write_bw": {
"description": "Network interconnect write bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"required": [
"cpu_user",
"cpu_load",
"mem_used",
"flops_any",
"mem_bw",
"net_bw",
"filesystems"
]
} }

View File

@@ -1,351 +1,351 @@
{ {
"$schema": "http://json-schema.org/draft/2020-12/schema", "$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://job-meta.schema.json", "$id": "embedfs://job-meta.schema.json",
"title": "Job meta data", "title": "Job meta data",
"description": "Meta data information of a HPC job", "description": "Meta data information of a HPC job",
"type": "object", "type": "object",
"properties": { "properties": {
"jobId": { "jobId": {
"description": "The unique identifier of a job", "description": "The unique identifier of a job",
"type": "integer" "type": "integer"
}, },
"user": { "user": {
"description": "The unique identifier of a user", "description": "The unique identifier of a user",
"type": "string"
},
"project": {
"description": "The unique identifier of a project",
"type": "string"
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string"
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string"
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string"
},
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer"
},
"numNodes": {
"description": "Number of nodes used",
"type": "integer",
"exclusiveMinimum": 0
},
"numHwthreads": {
"description": "Number of HWThreads used",
"type": "integer",
"exclusiveMinimum": 0
},
"numAcc": {
"description": "Number of accelerators used",
"type": "integer",
"exclusiveMinimum": 0
},
"exclusive": {
"description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user",
"type": "integer",
"minimum": 0,
"maximum": 2
},
"monitoringStatus": {
"description": "State of monitoring system during job run",
"type": "integer"
},
"smt": {
"description": "SMT threads used by job",
"type": "integer"
},
"walltime": {
"description": "Requested walltime of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"out_of_memory",
"timeout"
]
},
"startTime": {
"description": "Start epoch time stamp in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"duration": {
"description": "Duration of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"type": "object",
"properties": {
"hostname": {
"type": "string" "type": "string"
}, },
"project": { "hwthreads": {
"description": "The unique identifier of a project", "type": "array",
"type": "string" "description": "List of OS processor ids",
}, "items": {
"cluster": { "type": "integer"
"description": "The unique identifier of a cluster", }
"type": "string" },
}, "accelerators": {
"subCluster": { "type": "array",
"description": "The unique identifier of a sub cluster", "description": "List of of accelerator device ids",
"type": "string" "items": {
}, "type": "string"
"partition": { }
"description": "The Slurm partition to which the job was submitted", },
"type": "string" "configuration": {
},
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer"
},
"numNodes": {
"description": "Number of nodes used",
"type": "integer",
"exclusiveMinimum": 0
},
"numHwthreads": {
"description": "Number of HWThreads used",
"type": "integer",
"exclusiveMinimum": 0
},
"numAcc": {
"description": "Number of accelerators used",
"type": "integer",
"exclusiveMinimum": 0
},
"exclusive": {
"description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user",
"type": "integer",
"minimum": 0,
"maximum": 2
},
"monitoringStatus": {
"description": "State of monitoring system during job run",
"type": "integer"
},
"smt": {
"description": "SMT threads used by job",
"type": "integer"
},
"walltime": {
"description": "Requested walltime of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"jobState": {
"description": "Final state of job",
"type": "string", "type": "string",
"enum": [ "description": "The configuration options of the node"
"completed", }
"failed",
"cancelled",
"stopped",
"out_of_memory",
"timeout"
]
}, },
"startTime": { "required": [
"description": "Start epoch time stamp in seconds", "hostname"
"type": "integer", ],
"exclusiveMinimum": 0 "minItems": 1
}
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"properties": {
"jobScript": {
"type": "string",
"description": "The batch script of the job"
}, },
"duration": { "jobName": {
"description": "Duration of job in seconds", "type": "string",
"type": "integer", "description": "Slurm Job name"
"exclusiveMinimum": 0
}, },
"resources": { "slurmInfo": {
"description": "Resources used by job", "type": "string",
"type": "array", "description": "Additional slurm infos as show by scontrol show job"
"items": { }
"type": "object", }
"properties": { },
"hostname": { "tags": {
"type": "string" "description": "List of tags",
}, "type": "array",
"hwthreads": { "items": {
"type": "array", "type": "object",
"description": "List of OS processor ids", "properties": {
"items": { "name": {
"type": "integer" "type": "string"
} },
}, "type": {
"accelerators": { "type": "string"
"type": "array", }
"description": "List of of accelerator device ids",
"items": {
"type": "string"
}
},
"configuration": {
"type": "string",
"description": "The configuration options of the node"
}
},
"required": [
"hostname"
],
"minItems": 1
}
}, },
"metaData": { "required": [
"description": "Additional information about the job", "name",
"type"
]
},
"uniqueItems": true
},
"statistics": {
"description": "Job statistic data",
"type": "object",
"properties": {
"mem_used": {
"description": "Memory capacity used (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"cpu_load": {
"description": "CPU requested core utilization (load 1m) (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"flops_any": {
"description": "Total flop rate with DP flops scaled up (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"mem_bw": {
"description": "Main memory bandwidth (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"net_bw": {
"description": "Total fast interconnect network bandwidth (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"file_bw": {
"description": "Total file IO bandwidth (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ipc": {
"description": "Instructions executed per cycle",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"cpu_user": {
"description": "CPU user active core utilization",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"flops_dp": {
"description": "Double precision flop rate",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"flops_sp": {
"description": "Single precision flops rate",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"rapl_power": {
"description": "CPU power consumption",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"acc_used": {
"description": "GPU utilization",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"acc_mem_used": {
"description": "GPU memory capacity used",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"acc_power": {
"description": "GPU power consumption",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"clock": {
"description": "Average core frequency",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_rcv_packets": {
"description": "Network interconnect read packets",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_send_packets": {
"description": "Network interconnect send packet",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_read_bw": {
"description": "Network interconnect read bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_write_bw": {
"description": "Network interconnect write bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"filesystems": {
"description": "Array of filesystems",
"type": "array",
"items": {
"type": "object", "type": "object",
"properties": { "properties": {
"jobScript": { "name": {
"type": "string", "type": "string"
"description": "The batch script of the job" },
}, "type": {
"jobName": { "type": "string",
"type": "string", "enum": [
"description": "Slurm Job name" "nfs",
}, "lustre",
"slurmInfo": { "gpfs",
"type": "string", "nvme",
"description": "Additional slurm infos as show by scontrol show job" "ssd",
} "hdd",
} "beegfs"
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string"
}
},
"required": [
"name",
"type"
] ]
}, },
"uniqueItems": true "read_bw": {
}, "description": "File system read bandwidth",
"statistics": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "Job statistic data", },
"type": "object", "write_bw": {
"properties": { "description": "File system write bandwidth",
"mem_used": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "Memory capacity used (required)", },
"$ref": "embedfs://job-metric-statistics.schema.json" "read_req": {
}, "description": "File system read requests",
"cpu_load": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "CPU requested core utilization (load 1m) (required)", },
"$ref": "embedfs://job-metric-statistics.schema.json" "write_req": {
}, "description": "File system write requests",
"flops_any": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "Total flop rate with DP flops scaled up (required)", },
"$ref": "embedfs://job-metric-statistics.schema.json" "inodes": {
}, "description": "File system write requests",
"mem_bw": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "Main memory bandwidth (required)", },
"$ref": "embedfs://job-metric-statistics.schema.json" "accesses": {
}, "description": "File system open and close",
"net_bw": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "Total fast interconnect network bandwidth (required)", },
"$ref": "embedfs://job-metric-statistics.schema.json" "fsync": {
}, "description": "File system fsync",
"file_bw": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "Total file IO bandwidth (required)", },
"$ref": "embedfs://job-metric-statistics.schema.json" "create": {
}, "description": "File system create",
"ipc": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "Instructions executed per cycle", },
"$ref": "embedfs://job-metric-statistics.schema.json" "open": {
}, "description": "File system open",
"cpu_user": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "CPU user active core utilization", },
"$ref": "embedfs://job-metric-statistics.schema.json" "close": {
}, "description": "File system close",
"flops_dp": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "Double precision flop rate", },
"$ref": "embedfs://job-metric-statistics.schema.json" "seek": {
}, "description": "File system seek",
"flops_sp": { "$ref": "embedfs://job-metric-statistics.schema.json"
"description": "Single precision flops rate", }
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"rapl_power": {
"description": "CPU power consumption",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"acc_used": {
"description": "GPU utilization",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"acc_mem_used": {
"description": "GPU memory capacity used",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"acc_power": {
"description": "GPU power consumption",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"clock": {
"description": "Average core frequency",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_rcv_packets": {
"description": "Network interconnect read packets",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_send_packets": {
"description": "Network interconnect send packet",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_read_bw": {
"description": "Network interconnect read bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_write_bw": {
"description": "Network interconnect write bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"filesystems": {
"description": "Array of filesystems",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string",
"enum": [
"nfs",
"lustre",
"gpfs",
"nvme",
"ssd",
"hdd",
"beegfs"
]
},
"read_bw": {
"description": "File system read bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"write_bw": {
"description": "File system write bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"read_req": {
"description": "File system read requests",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"write_req": {
"description": "File system write requests",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"inodes": {
"description": "File system write requests",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"accesses": {
"description": "File system open and close",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"fsync": {
"description": "File system fsync",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"create": {
"description": "File system create",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"open": {
"description": "File system open",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"close": {
"description": "File system close",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"seek": {
"description": "File system seek",
"$ref": "embedfs://job-metric-statistics.schema.json"
}
},
"required": [
"name",
"type",
"read_bw",
"write_bw"
]
},
"minItems": 1
}
}, },
"required": [ "required": [
"cpu_user", "name",
"cpu_load", "type",
"mem_used", "read_bw",
"flops_any", "write_bw"
"mem_bw"
] ]
},
"minItems": 1
} }
}, },
"required": [ "required": [
"jobId", "cpu_user",
"user", "cpu_load",
"project", "mem_used",
"cluster", "flops_any",
"subCluster", "mem_bw"
"numNodes", ]
"exclusive", }
"startTime", },
"jobState", "required": [
"duration", "jobId",
"resources", "user",
"statistics" "project",
] "cluster",
"subCluster",
"numNodes",
"exclusive",
"startTime",
"jobState",
"duration",
"resources",
"statistics"
]
} }

View File

@@ -1,216 +1,216 @@
{ {
"$schema": "http://json-schema.org/draft/2020-12/schema", "$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://job-metric-data.schema.json", "$id": "embedfs://job-metric-data.schema.json",
"title": "Job metric data", "title": "Job metric data",
"description": "Metric data of a HPC job", "description": "Metric data of a HPC job",
"type": "object", "type": "object",
"properties": { "properties": {
"unit": { "unit": {
"description": "Metric unit", "description": "Metric unit",
"$ref": "embedfs://unit.schema.json" "$ref": "embedfs://unit.schema.json"
},
"timestep": {
"description": "Measurement interval in seconds",
"type": "integer"
},
"thresholds": {
"description": "Metric thresholds for specific system",
"type": "object",
"properties": {
"peak": {
"type": "number"
},
"normal": {
"type": "number"
},
"caution": {
"type": "number"
},
"alert": {
"type": "number"
}
}
},
"statisticsSeries": {
"type": "object",
"description": "Statistics series across topology",
"properties": {
"min": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"max": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"mean": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"percentiles": {
"type": "object",
"properties": {
"10": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"20": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"30": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"40": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"50": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"60": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"70": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"80": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"90": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"25": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"75": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
}
}
}
}
},
"series": {
"type": "array",
"items": {
"type": "object",
"properties": {
"hostname": {
"type": "string"
},
"id": {
"type": "string"
},
"statistics": {
"type": "object",
"description": "Statistics across time dimension",
"properties": {
"avg": {
"description": "Series average",
"type": "number",
"minimum": 0
},
"min": {
"description": "Series minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Series maximum",
"type": "number",
"minimum": 0
}
},
"required": [
"avg",
"min",
"max"
]
},
"data": {
"type": "array",
"contains": {
"type": "number",
"minimum": 0
},
"minItems": 1
}
},
"required": [
"hostname",
"statistics",
"data"
]
}
}
}, },
"required": [ "timestep": {
"unit", "description": "Measurement interval in seconds",
"timestep", "type": "integer"
"series" },
] "thresholds": {
"description": "Metric thresholds for specific system",
"type": "object",
"properties": {
"peak": {
"type": "number"
},
"normal": {
"type": "number"
},
"caution": {
"type": "number"
},
"alert": {
"type": "number"
}
}
},
"statisticsSeries": {
"type": "object",
"description": "Statistics series across topology",
"properties": {
"min": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"max": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"mean": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"percentiles": {
"type": "object",
"properties": {
"10": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"20": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"30": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"40": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"50": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"60": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"70": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"80": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"90": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"25": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"75": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
}
}
}
}
},
"series": {
"type": "array",
"items": {
"type": "object",
"properties": {
"hostname": {
"type": "string"
},
"id": {
"type": "string"
},
"statistics": {
"type": "object",
"description": "Statistics across time dimension",
"properties": {
"avg": {
"description": "Series average",
"type": "number",
"minimum": 0
},
"min": {
"description": "Series minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Series maximum",
"type": "number",
"minimum": 0
}
},
"required": [
"avg",
"min",
"max"
]
},
"data": {
"type": "array",
"contains": {
"type": "number",
"minimum": 0
},
"minItems": 1
}
},
"required": [
"hostname",
"statistics",
"data"
]
}
}
},
"required": [
"unit",
"timestep",
"series"
]
} }

View File

@@ -1,34 +1,34 @@
{ {
"$schema": "http://json-schema.org/draft/2020-12/schema", "$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://job-metric-statistics.schema.json", "$id": "embedfs://job-metric-statistics.schema.json",
"title": "Job statistics", "title": "Job statistics",
"description": "Format specification for job metric statistics", "description": "Format specification for job metric statistics",
"type": "object", "type": "object",
"properties": { "properties": {
"unit": { "unit": {
"description": "Metric unit", "description": "Metric unit",
"$ref": "embedfs://unit.schema.json" "$ref": "embedfs://unit.schema.json"
},
"avg": {
"description": "Job metric average",
"type": "number",
"minimum": 0
},
"min": {
"description": "Job metric minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Job metric maximum",
"type": "number",
"minimum": 0
}
}, },
"required": [ "avg": {
"unit", "description": "Job metric average",
"avg", "type": "number",
"min", "minimum": 0
"max" },
] "min": {
"description": "Job metric minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Job metric maximum",
"type": "number",
"minimum": 0
}
},
"required": [
"unit",
"avg",
"min",
"max"
]
} }

View File

@@ -1,40 +1,40 @@
{ {
"$schema": "http://json-schema.org/draft/2020-12/schema", "$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://unit.schema.json", "$id": "embedfs://unit.schema.json",
"title": "Metric unit", "title": "Metric unit",
"description": "Format specification for job metric units", "description": "Format specification for job metric units",
"type": "object", "type": "object",
"properties": { "properties": {
"base": { "base": {
"description": "Metric base unit", "description": "Metric base unit",
"type": "string", "type": "string",
"enum": [ "enum": [
"B", "B",
"F", "F",
"B/s", "B/s",
"F/s", "F/s",
"CPI", "CPI",
"IPC", "IPC",
"Hz", "Hz",
"W", "W",
"°C", "°C",
"" ""
] ]
},
"prefix": {
"description": "Unit prefix",
"type": "string",
"enum": [
"K",
"M",
"G",
"T",
"P",
"E"
]
}
}, },
"required": [ "prefix": {
"base" "description": "Unit prefix",
] "type": "string",
"enum": [
"K",
"M",
"G",
"T",
"P",
"E"
]
}
},
"required": [
"base"
]
} }

View File

@@ -9,12 +9,11 @@
--> -->
<script context="module"> <script context="module">
function findJobThresholds(job, metricConfig) { function findJobThresholds(job, stat, metricConfig) {
if (!job || !metricConfig) { if (!job || !metricConfig || !stat) {
console.warn("Argument missing for findJobThresholds!"); console.warn("Argument missing for findJobThresholds!");
return null; return null;
} }
// metricConfig is on subCluster-Level // metricConfig is on subCluster-Level
const defaultThresholds = { const defaultThresholds = {
peak: metricConfig.peak, peak: metricConfig.peak,
@@ -22,13 +21,13 @@
caution: metricConfig.caution, caution: metricConfig.caution,
alert: metricConfig.alert alert: metricConfig.alert
}; };
/* /*
NEW: Footprints should be comparable: Always use Unchanged Single Node Thresholds, except for shared jobs. Footprints should be comparable:
HW Clocks, HW Temperatures and File/Net IO Thresholds will be scaled down too, even if they are independent. Always use unchanged single node thresholds for exclusive jobs and "avg" Footprints.
'jf.stats' is one of: avg, min, max -> Always relative to one nodes' thresholds as configured. For shared jobs, scale thresholds by the fraction of the job's HWThreads to the node's HWThreads.
'stat' is one of: avg, min, max
*/ */
if (job.exclusive === 1) { if (job.exclusive === 1 || stat === "avg") {
return defaultThresholds return defaultThresholds
} else { } else {
const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster) const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster)
@@ -40,29 +39,6 @@
alert: round(defaultThresholds.alert * jobFraction, 0), alert: round(defaultThresholds.alert * jobFraction, 0),
}; };
} }
/* OLD: Based on Metric Aggregation Setting
// Job_Exclusivity does not matter, only aggregation
if (metricConfig.aggregation === "avg") {
return defaultThresholds;
} else if (metricConfig.aggregation === "sum") {
const topol = getContext("getHardwareTopology")(job.cluster, job.subCluster)
const jobFraction = job.numHWThreads / topol.node.length;
return {
peak: round(defaultThresholds.peak * jobFraction, 0),
normal: round(defaultThresholds.normal * jobFraction, 0),
caution: round(defaultThresholds.caution * jobFraction, 0),
alert: round(defaultThresholds.alert * jobFraction, 0),
};
} else {
console.warn(
"Missing or unkown aggregation mode (sum/avg) for metric:",
metricConfig,
);
return defaultThresholds;
}
*/
} }
</script> </script>
@@ -93,7 +69,7 @@
const unit = (fmc?.unit?.prefix ? fmc.unit.prefix : "") + (fmc?.unit?.base ? fmc.unit.base : "") const unit = (fmc?.unit?.prefix ? fmc.unit.prefix : "") + (fmc?.unit?.base ? fmc.unit.base : "")
// Threshold / -Differences // Threshold / -Differences
const fmt = findJobThresholds(job, fmc); const fmt = findJobThresholds(job, jf.stat, fmc);
if (jf.name === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0); if (jf.name === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0);
// Define basic data -> Value: Use as Provided // Define basic data -> Value: Use as Provided

View File

@@ -7,7 +7,7 @@
--> -->
<script> <script>
import { Badge, Button, Icon } from "@sveltestrap/sveltestrap"; import { Badge, Button, Icon, Tooltip } from "@sveltestrap/sveltestrap";
import { scrambleNames, scramble } from "../utils.js"; import { scrambleNames, scramble } from "../utils.js";
import Tag from "../helper/Tag.svelte"; import Tag from "../helper/Tag.svelte";
import TagManagement from "../helper/TagManagement.svelte"; import TagManagement from "../helper/TagManagement.svelte";
@@ -42,12 +42,30 @@
let displayCheck = false; let displayCheck = false;
function clipJobId(jid) { function clipJobId(jid) {
displayCheck = true; displayCheck = true;
navigator.clipboard // Navigator clipboard api needs a secure context (https)
.writeText(jid) if (navigator.clipboard && window.isSecureContext) {
.catch((reason) => console.error(reason)); navigator.clipboard
setTimeout(function () { .writeText(jid)
displayCheck = false; .catch((reason) => console.error(reason));
}, 1500); } else {
// Workaround: Create, Fill, And Copy Content of Textarea
const textArea = document.createElement("textarea");
textArea.value = jid;
textArea.style.position = "absolute";
textArea.style.left = "-999999px";
document.body.prepend(textArea);
textArea.select();
try {
document.execCommand('copy');
} catch (error) {
console.error(error);
} finally {
textArea.remove();
}
}
setTimeout(function () {
displayCheck = false;
}, 1000);
} }
</script> </script>
@@ -58,13 +76,18 @@
<a href="/monitoring/job/{job.id}" target="_blank">{job.jobId}</a> <a href="/monitoring/job/{job.id}" target="_blank">{job.jobId}</a>
({job.cluster}) ({job.cluster})
</span> </span>
<Button outline color="secondary" size="sm" title="Copy JobID to Clipboard" on:click={clipJobId(job.jobId)} > <Button id={`${job.cluster}-${job.jobId}-clipboard`} outline color="secondary" size="sm" on:click={clipJobId(job.jobId)} >
{#if displayCheck} {#if displayCheck}
<Icon name="clipboard2-check-fill"/> Copied <Icon name="clipboard2-check-fill"/>
{:else} {:else}
<Icon name="clipboard2"/> Job ID <Icon name="clipboard2"/>
{/if} {/if}
</Button> </Button>
<Tooltip
target={`${job.cluster}-${job.jobId}-clipboard`}
placement="right">
{ displayCheck ? 'Copied!' : 'Copy Job ID to Clipboard' }
</Tooltip>
</span> </span>
{#if job.metaData?.jobName} {#if job.metaData?.jobName}
{#if job.metaData?.jobName.length <= 25} {#if job.metaData?.jobName.length <= 25}