mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-23 16:17:30 +01:00
Compare commits
397 Commits
v1.3.1
...
ccfront-de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
24d43f3540 | ||
|
|
e376f97547 | ||
|
|
f2428d3cb3 | ||
|
|
2fdac85d31 | ||
|
|
b731395689 | ||
|
|
07405e3466 | ||
|
|
c0443cbec2 | ||
|
|
633bd42036 | ||
|
|
998ef8d834 | ||
|
|
c25b076ca9 | ||
|
|
f43379f365 | ||
|
|
58e678d72c | ||
|
|
1b043838ea | ||
|
|
f7a67c72bf | ||
|
|
c5476d08fa | ||
|
|
8af92b1557 | ||
|
|
eaa826bb8a | ||
|
|
140b3c371d | ||
|
|
2bd7c8d51e | ||
|
|
1e63cdbcda | ||
|
|
dd470d49ec | ||
|
|
95d8062b00 | ||
|
5266644725
|
|||
|
81d9e96552
|
|||
|
|
4ec9f06114 | ||
|
0033e9f6c0
|
|||
|
571652c314
|
|||
|
|
7ec233e18a | ||
|
|
13c9a12336 | ||
|
|
83d472ecd6 | ||
|
|
c21da6512a | ||
|
|
4b4374e0df | ||
|
|
407276a04d | ||
|
|
64f60905b4 | ||
|
|
9e6072fed2 | ||
|
|
a3e5c424fd | ||
|
|
6683a350aa | ||
|
|
05bfa9b546 | ||
|
|
735988decb | ||
|
|
d0580592be | ||
|
|
817076bdbf | ||
|
|
736236e9ca | ||
|
|
3f4114c51b | ||
|
|
5c2c493c56 | ||
|
|
2c383ebea1 | ||
|
|
91e73450cf | ||
|
|
e55798944e | ||
|
|
5ea11a5ad2 | ||
|
|
2a3383e9e6 | ||
|
|
e871703724 | ||
|
|
1ee367d7be | ||
|
|
bce536b9b4 | ||
|
|
7c9182e0b0 | ||
|
|
aa915d639d | ||
|
|
9489ebc7d6 | ||
|
2a5c525193
|
|||
|
9e2d981c60
|
|||
|
|
53dfe9e4f5 | ||
|
48e95fbdb0
|
|||
|
fd94d85edf
|
|||
|
f2d1a85afb
|
|||
|
0bdbcb8bab
|
|||
|
|
7b91a819be | ||
| bc89025924 | |||
|
|
16bcaef4c3 | ||
|
|
fcbfa451f2 | ||
|
|
559ce53ca4 | ||
|
|
ee2c5b58d7 | ||
|
|
d98d998106 | ||
|
212c45e070
|
|||
|
143fa9b6ed
|
|||
|
4849928288
|
|||
|
|
9248ee8868 | ||
|
|
1616d96732 | ||
| 0bbedd1600 | |||
|
|
c7e49644d8 | ||
|
010c903c74
|
|||
|
e4d12e3537
|
|||
|
051cc8384e
|
|||
|
49a94170d2
|
|||
|
|
42e8e37bd4 | ||
|
|
5d2c350ce2 | ||
|
|
85dc0362c1 | ||
|
|
01c06728eb | ||
|
|
257250714d | ||
|
|
3b769c3059 | ||
|
|
a7395ed45b | ||
|
|
ab07c7928f | ||
|
|
b0c0d15505 | ||
|
|
fcf50790da | ||
|
|
1e43654607 | ||
|
|
4fecbe820d | ||
|
|
763c9dfa6b | ||
|
9de5879786
|
|||
|
|
9396e7492c | ||
|
3ac3415178
|
|||
|
1aae1c59d0
|
|||
|
907e80a01c
|
|||
|
|
8a10b69716 | ||
|
|
1a3cf7edd6 | ||
|
|
76d0fc979b | ||
|
|
a42d8ece35 | ||
|
|
93377f53fc | ||
|
|
c853d74ba0 | ||
|
|
0b9f74f4f4 | ||
|
|
5da6baf828 | ||
|
5766945006
|
|||
|
a53d473b58
|
|||
|
|
d1207ad80e | ||
|
|
e2efe71b33 | ||
|
|
2aef6ed9c0 | ||
|
|
fcb6db0603 | ||
| 01b1136316 | |||
|
|
2512fe9e75 | ||
|
|
f89b5cd2ec | ||
|
|
ab284ed208 | ||
|
|
00a578657c | ||
|
|
38ce40ae7d | ||
| e1be6c7138 | |||
| 28539e60b0 | |||
|
adb11b3ed0
|
|||
|
|
f1e6dedd44 | ||
|
|
8ea1454c06 | ||
| 81b8d578f2 | |||
|
|
16b11db39c | ||
| 0d923cc920 | |||
| c523e93564 | |||
| d588798ea1 | |||
| a11f165f2a | |||
|
|
d4f487d554 | ||
|
|
93d5a0e532 | ||
|
|
00ddc462d2 | ||
|
|
5f4a74f8ba | ||
|
|
a8eff6fbd1 | ||
|
|
baa7367ebe | ||
|
|
69f8a34aac | ||
|
|
21b3a67988 | ||
|
|
d89574ce73 | ||
| ddeac6b9d9 | |||
| 17906ec0eb | |||
|
|
311c088d3d | ||
| a2584d6083 | |||
| 35bd7739c6 | |||
| 7f43c88a39 | |||
|
|
fc1c54a141 | ||
|
|
2af111c584 | ||
| c093cca8b1 | |||
|
|
2bb1b78ba4 | ||
| 3ab26172c4 | |||
| cdd45ce88b | |||
|
210a7d3136
|
|||
|
92ec64d80f
|
|||
|
ff37f71fdb
|
|||
|
6056341525
|
|||
|
|
075612f5bd | ||
| 1a87ed8210 | |||
|
|
c05ffeb16d | ||
| ee3710c5ed | |||
| 4327c4b1f7 | |||
| 492e56a098 | |||
| f0257a2784 | |||
| ec1ead89ab | |||
|
|
ae53e87aba | ||
|
|
939dd2320a | ||
|
|
2c8b73e2e2 | ||
|
|
eabc6212ea | ||
|
|
c120d6517f | ||
|
|
597ee1dad7 | ||
|
|
c4a901504d | ||
|
|
f5cc5d07fd | ||
|
|
8a0e6c921c | ||
|
|
bf1bff9ace | ||
|
|
06f24e988f | ||
|
|
ae327f545e | ||
|
|
35012b18c5 | ||
|
|
9688bad622 | ||
|
|
447b8d3372 | ||
|
|
01102cb9b0 | ||
|
|
934d1a6114 | ||
|
|
6f74c8cb77 | ||
|
|
63b9e619a4 | ||
|
|
82e28f26d7 | ||
|
|
ca9fd96baa | ||
|
|
39b22267d6 | ||
|
|
60d7984d66 | ||
|
|
33d219d2ac | ||
|
|
85a77e05af | ||
|
|
3dfeabcec6 | ||
|
|
673fdc443c | ||
|
|
2f6e5a7648 | ||
|
|
2cbe8e9517 | ||
|
|
2f0460d6ec | ||
|
|
37f4ed7770 | ||
|
|
e3104c61cb | ||
|
|
bc434ee8cb | ||
|
|
f4102b948e | ||
|
|
ed991de11a | ||
|
|
322e161064 | ||
|
|
1adc741cc2 | ||
|
|
4eff87bbf7 | ||
|
|
fc6970d08a | ||
|
|
f616c7e1c6 | ||
|
|
89ec749172 | ||
|
|
182f0f2c64 | ||
|
|
e3681495ce | ||
|
|
37415fa261 | ||
|
|
7243dbe763 | ||
|
|
0ff5c4bedd | ||
|
|
f047f89ad5 | ||
|
|
0eb0aa1d3b | ||
|
|
6019891591 | ||
|
|
615281601c | ||
|
|
82baf5d384 | ||
|
|
6fe93ecb7e | ||
|
|
b3222f3523 | ||
|
|
3b94863521 | ||
|
|
582dc8bf46 | ||
|
|
a9868fd275 | ||
|
|
218e56576a | ||
|
|
c50e79375a | ||
|
|
dcb8308f35 | ||
|
|
183b310696 | ||
|
|
c7d0c86d52 | ||
|
|
48225662b1 | ||
|
|
f53fc088ec | ||
|
|
05517fcbcd | ||
|
|
18af51b0a4 | ||
|
|
ede3da7a87 | ||
|
|
8e3327ef6a | ||
|
|
827f6daabc | ||
|
|
2567442321 | ||
|
|
9cf5478519 | ||
|
|
e5275311c2 | ||
|
|
21e4870e4c | ||
|
|
beba7c8d2e | ||
|
|
fe35313305 | ||
|
|
d7a8bbf40b | ||
|
|
f1893c596e | ||
|
|
6367c1ab4d | ||
|
|
9579887fc4 | ||
|
|
e29be2f140 | ||
|
|
2736b5d1ef | ||
|
|
ff52fb16b6 | ||
|
|
ccbf3867e1 | ||
|
|
f0de422c6e | ||
|
|
64cc19b252 | ||
|
|
26226009f0 | ||
|
|
d10e09da02 | ||
|
|
00a2e58fee | ||
|
|
b1cb45dfe6 | ||
|
|
a2951d1f05 | ||
|
|
c0b1e97602 | ||
|
|
71621a9dc4 | ||
|
|
b3ed2afebe | ||
|
|
704620baff | ||
|
|
8feb805167 | ||
|
|
065b32755a | ||
|
|
1b5f4bff2c | ||
|
|
8e1c5a485f | ||
| 5fa6c9db35 | |||
| 5482b9be2c | |||
|
|
7400273b0a | ||
|
|
0b7cdde4a0 | ||
|
|
d5382aec4f | ||
|
|
df484dc816 | ||
|
|
7ea4086807 | ||
|
|
b04bf6a951 | ||
| 7c33dcf630 | |||
| 5e65e21f0b | |||
| 53ca38ce53 | |||
|
|
398e3c1b91 | ||
| 508978d586 | |||
| e267481f71 | |||
|
|
193bee5ac8 | ||
| f58efa2871 | |||
| 6568b6d723 | |||
|
|
4b1b34d8a7 | ||
| 39c09f8565 | |||
|
|
275a77807e | ||
|
|
6443541a79 | ||
|
|
5eb6f7d307 | ||
|
|
bce2a66177 | ||
|
|
7602641909 | ||
|
|
54f3a261c5 | ||
|
|
906bac965f | ||
|
|
4ec1de6900 | ||
|
|
8ded131666 | ||
| 47b14f932e | |||
|
|
838ebb3f69 | ||
| c459724114 | |||
| b0c9d1164d | |||
| 7c51d88501 | |||
| 5b03cf826b | |||
| f305863616 | |||
| db5809d522 | |||
|
|
83df6f015c | ||
| e7231b0e13 | |||
|
|
cff60eb51c | ||
|
f914a312f5
|
|||
| 56ebb301ca | |||
|
|
a59df12595 | ||
|
|
5cc7fc6ccb | ||
|
|
55027cb630 | ||
|
|
036eba68e1 | ||
|
|
d34e0d9348 | ||
|
|
31765ce0ef | ||
|
|
9fe7cdca92 | ||
|
|
adc3502b6b | ||
|
|
95fe369648 | ||
|
|
01845a0cb7 | ||
|
|
708eaf4178 | ||
|
|
d629a58712 | ||
|
|
90886b63d6 | ||
|
|
084f89fa32 | ||
|
|
ceb3a095d8 | ||
|
|
1758275f11 | ||
|
|
e74e506ffe | ||
|
|
599a36466a | ||
|
|
613e128cab | ||
|
|
e4f8022b7a | ||
|
|
5603c41900 | ||
| a8a27c9b51 | |||
|
|
b70de5a4be | ||
|
|
b1fd07cd30 | ||
|
|
6ab2e02fe6 | ||
|
|
5535c5780c | ||
|
|
49e0a2c055 | ||
|
5e074dad10
|
|||
|
d6a88896d0
|
|||
|
5c99f5f8bb
|
|||
|
e1faba0ff2
|
|||
|
ba2f406bc0
|
|||
|
9b6db4684a
|
|||
|
|
561fd41d5d | ||
|
|
ce9995dac7 | ||
|
|
0afaea9513 | ||
|
|
9b5c6e3164 | ||
|
|
e6ebec8c1e | ||
|
|
2551921ed6 | ||
|
|
e02575aad7 | ||
|
|
ff3502c87a | ||
|
|
017f9b2140 | ||
|
|
c80d3a6958 | ||
|
|
3ca1127685 | ||
|
|
18369da5bc | ||
|
|
e65100cdc8 | ||
|
|
6a1cb51c2f | ||
|
c4d93e492b
|
|||
|
c2f72f72ac
|
|||
|
721b6b2afa
|
|||
|
b6f011c669
|
|||
|
801607fc16
|
|||
|
01a4d33514
|
|||
|
e348ec74fd
|
|||
|
0458675608
|
|||
|
c61ffce0e9
|
|||
|
68a97dc980
|
|||
|
a07d167390
|
|||
|
|
a8721dcc69 | ||
|
|
68cf952ac6 | ||
|
|
e14d6a81fe | ||
|
|
a4912893a8 | ||
|
0adfb631ef
|
|||
|
b64ce1f67f
|
|||
|
e8e3b1595d
|
|||
|
f1427d5272
|
|||
|
|
bf6b87d65c | ||
|
|
0240997257 | ||
|
|
f1e341f0b9 | ||
|
a54acb8c42
|
|||
|
c6ede67589
|
|||
|
|
11176da5d8 | ||
|
|
0a604336c4 | ||
|
|
be9df7649f | ||
|
|
63fb923995 | ||
|
|
3afe40083d | ||
|
|
9d4767539c | ||
|
ac9bba8b5b
|
|||
|
80c46bea7f
|
|||
|
|
614f694777 | ||
|
|
1072d7b449 | ||
|
1b70596735
|
|||
|
|
61eebc9fbd | ||
|
b05909969f
|
|||
|
bd89ce7cc9
|
|||
|
130613b717
|
|||
|
b3c1f39a0e
|
|||
|
97c807cd33
|
|||
|
aede5f71ec
|
|||
|
786770f56a
|
|||
|
|
74d4f00784 | ||
|
d61c4235dc
|
|||
|
e8794b8c79
|
|||
|
552da005dc
|
|||
|
|
51452d2e68 | ||
|
5c5484b4d2
|
|||
|
|
684cb5a376 |
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
|||||||
- name: Install Go
|
- name: Install Go
|
||||||
uses: actions/setup-go@v4
|
uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: 1.20.x
|
go-version: 1.22.x
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
- name: Build, Vet & Test
|
- name: Build, Vet & Test
|
||||||
|
|||||||
20
.gitignore
vendored
20
.gitignore
vendored
@@ -1,19 +1,23 @@
|
|||||||
/cc-backend
|
/cc-backend
|
||||||
|
|
||||||
/var/job-archive
|
|
||||||
/var/*.db
|
|
||||||
/var/machine-state
|
|
||||||
|
|
||||||
/.env
|
/.env
|
||||||
/config.json
|
/config.json
|
||||||
|
|
||||||
|
/var/job-archive
|
||||||
|
/var/machine-state
|
||||||
|
/var/job.db-shm
|
||||||
|
/var/job.db-wal
|
||||||
|
/var/*.db
|
||||||
|
/var/*.txt
|
||||||
|
|
||||||
/web/frontend/public/build
|
/web/frontend/public/build
|
||||||
/web/frontend/node_modules
|
/web/frontend/node_modules
|
||||||
/.vscode/*
|
|
||||||
/archive-migration
|
/archive-migration
|
||||||
/archive-manager
|
/archive-manager
|
||||||
var/job.db-shm
|
|
||||||
var/job.db-wal
|
|
||||||
|
|
||||||
|
/internal/repository/testdata/job.db-shm
|
||||||
|
/internal/repository/testdata/job.db-wal
|
||||||
|
|
||||||
|
/.vscode/*
|
||||||
dist/
|
dist/
|
||||||
*.db
|
*.db
|
||||||
|
|||||||
@@ -34,19 +34,6 @@ builds:
|
|||||||
main: ./tools/archive-manager
|
main: ./tools/archive-manager
|
||||||
tags:
|
tags:
|
||||||
- static_build
|
- static_build
|
||||||
- env:
|
|
||||||
- CGO_ENABLED=0
|
|
||||||
goos:
|
|
||||||
- linux
|
|
||||||
goarch:
|
|
||||||
- amd64
|
|
||||||
goamd64:
|
|
||||||
- v3
|
|
||||||
id: "archive-migration"
|
|
||||||
binary: archive-migration
|
|
||||||
main: ./tools/archive-migration
|
|
||||||
tags:
|
|
||||||
- static_build
|
|
||||||
- env:
|
- env:
|
||||||
- CGO_ENABLED=0
|
- CGO_ENABLED=0
|
||||||
goos:
|
goos:
|
||||||
@@ -70,7 +57,7 @@ archives:
|
|||||||
{{- else }}{{ .Arch }}{{ end }}
|
{{- else }}{{ .Arch }}{{ end }}
|
||||||
{{- if .Arm }}v{{ .Arm }}{{ end }}
|
{{- if .Arm }}v{{ .Arm }}{{ end }}
|
||||||
checksum:
|
checksum:
|
||||||
name_template: 'checksums.txt'
|
name_template: "checksums.txt"
|
||||||
snapshot:
|
snapshot:
|
||||||
name_template: "{{ incpatch .Version }}-next"
|
name_template: "{{ incpatch .Version }}-next"
|
||||||
changelog:
|
changelog:
|
||||||
@@ -100,7 +87,7 @@ changelog:
|
|||||||
release:
|
release:
|
||||||
draft: false
|
draft: false
|
||||||
footer: |
|
footer: |
|
||||||
Supports job archive version 1 and database version 6.
|
Supports job archive version 2 and database version 8.
|
||||||
Please check out the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for further details on breaking changes.
|
Please check out the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for further details on breaking changes.
|
||||||
|
|
||||||
# vim: set ts=2 sw=2 tw=0 fo=cnqoj
|
# vim: set ts=2 sw=2 tw=0 fo=cnqoj
|
||||||
|
|||||||
31
Makefile
31
Makefile
@@ -2,7 +2,7 @@ TARGET = ./cc-backend
|
|||||||
VAR = ./var
|
VAR = ./var
|
||||||
CFG = config.json .env
|
CFG = config.json .env
|
||||||
FRONTEND = ./web/frontend
|
FRONTEND = ./web/frontend
|
||||||
VERSION = 1.3.1
|
VERSION = 1.4.2
|
||||||
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
||||||
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
|
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
|
||||||
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
|
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
|
||||||
@@ -24,11 +24,21 @@ SVELTE_COMPONENTS = status \
|
|||||||
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
|
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
|
||||||
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||||
$(wildcard $(FRONTEND)/src/*.js) \
|
$(wildcard $(FRONTEND)/src/*.js) \
|
||||||
$(wildcard $(FRONTEND)/src/filters/*.svelte) \
|
$(wildcard $(FRONTEND)/src/analysis/*.svelte) \
|
||||||
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
|
$(wildcard $(FRONTEND)/src/config/*.svelte) \
|
||||||
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
|
$(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/*.js) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/header/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/job/*.svelte)
|
||||||
|
|
||||||
.PHONY: clean distclean test tags frontend $(TARGET)
|
.PHONY: clean distclean test tags frontend swagger graphql $(TARGET)
|
||||||
|
|
||||||
.NOTPARALLEL:
|
.NOTPARALLEL:
|
||||||
|
|
||||||
@@ -40,6 +50,15 @@ frontend:
|
|||||||
$(info ===> BUILD frontend)
|
$(info ===> BUILD frontend)
|
||||||
cd web/frontend && npm install && npm run build
|
cd web/frontend && npm install && npm run build
|
||||||
|
|
||||||
|
swagger:
|
||||||
|
$(info ===> GENERATE swagger)
|
||||||
|
@go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api
|
||||||
|
@mv ./api/docs.go ./internal/api/docs.go
|
||||||
|
|
||||||
|
graphql:
|
||||||
|
$(info ===> GENERATE graphql)
|
||||||
|
@go run github.com/99designs/gqlgen
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(info ===> CLEAN)
|
$(info ===> CLEAN)
|
||||||
@go clean
|
@go clean
|
||||||
@@ -63,7 +82,7 @@ tags:
|
|||||||
@ctags -R
|
@ctags -R
|
||||||
|
|
||||||
$(VAR):
|
$(VAR):
|
||||||
@mkdir $(VAR)
|
@mkdir -p $(VAR)
|
||||||
|
|
||||||
config.json:
|
config.json:
|
||||||
$(info ===> Initialize config.json file)
|
$(info ===> Initialize config.json file)
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ cd ./cc-backend
|
|||||||
./startDemo.sh
|
./startDemo.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also try the demo using the lates release binary.
|
You can also try the demo using the latest release binary.
|
||||||
Create a folder and put the release binary `cc-backend` into this folder.
|
Create a folder and put the release binary `cc-backend` into this folder.
|
||||||
Execute the following steps:
|
Execute the following steps:
|
||||||
|
|
||||||
@@ -88,7 +88,9 @@ Analysis, Systems and Status views).
|
|||||||
There is a Makefile to automate the build of cc-backend. The Makefile supports
|
There is a Makefile to automate the build of cc-backend. The Makefile supports
|
||||||
the following targets:
|
the following targets:
|
||||||
|
|
||||||
* `make`: Initialize `var` directory and build svelte frontend and backend binary. Note that there is no proper prerequesite handling. Any change of frontend source files will result in a complete rebuild.
|
* `make`: Initialize `var` directory and build svelte frontend and backend
|
||||||
|
binary. Note that there is no proper prerequisite handling. Any change of
|
||||||
|
frontend source files will result in a complete rebuild.
|
||||||
* `make clean`: Clean go build cache and remove binary.
|
* `make clean`: Clean go build cache and remove binary.
|
||||||
* `make test`: Run the tests that are also run in the GitHub workflow setup.
|
* `make test`: Run the tests that are also run in the GitHub workflow setup.
|
||||||
|
|
||||||
@@ -147,8 +149,6 @@ contains Go packages that can be used by other projects.
|
|||||||
Additional command line helper tools.
|
Additional command line helper tools.
|
||||||
* [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
* [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
||||||
Commands for getting infos about and existing job archive.
|
Commands for getting infos about and existing job archive.
|
||||||
* [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration)
|
|
||||||
Tool to migrate from previous to current job archive version.
|
|
||||||
* [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
* [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
||||||
Tool to convert external pubkey for use in `cc-backend`.
|
Tool to convert external pubkey for use in `cc-backend`.
|
||||||
* [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
* [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
||||||
|
|||||||
@@ -1,11 +1,46 @@
|
|||||||
# `cc-backend` version 1.3.1
|
# `cc-backend` version 1.4.2
|
||||||
|
|
||||||
Supports job archive version 1 and database version 7.
|
Supports job archive version 2 and database version 8.
|
||||||
|
|
||||||
This is a bugfix release of `cc-backend`, the API backend and frontend
|
This is a small bug fix release of `cc-backend`, the API backend and frontend
|
||||||
implementation of ClusterCockpit.
|
implementation of ClusterCockpit.
|
||||||
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
|
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
|
||||||
|
|
||||||
## Breaking changes
|
## Breaking changes
|
||||||
|
|
||||||
None
|
- You need to perform a database migration. Depending on your database size the
|
||||||
|
migration might require several hours!
|
||||||
|
- You need to adapt the `cluster.json` configuration files in the job-archive,
|
||||||
|
add new required attributes to the metric list and after that edit
|
||||||
|
`./job-archive/version.txt` to version 2. Only metrics that have the footprint
|
||||||
|
attribute set can be filtered and show up in the footprint UI and polar plot.
|
||||||
|
- Continuous scrolling is default now in all job lists. You can change this back
|
||||||
|
to paging globally, also every user can configure to use paging or continuous
|
||||||
|
scrolling individually.
|
||||||
|
- Tags have a scope now. Existing tags will get global scope in the database
|
||||||
|
migration.
|
||||||
|
|
||||||
|
## New features
|
||||||
|
|
||||||
|
- Tags have a scope now. Tags created by a basic user are only visible by that
|
||||||
|
user. Tags created by an admin/support role can be configured to be visible by
|
||||||
|
all users (global scope) or only be admin/support role.
|
||||||
|
- Re-sampling support for running (requires a recent `cc-metric-store`) and
|
||||||
|
archived jobs. This greatly speeds up loading of large or very long jobs. You
|
||||||
|
need to add the new configuration key `enable-resampling` to the `config.json`
|
||||||
|
file.
|
||||||
|
- For finished jobs a total job energy is shown in the job view.
|
||||||
|
- Continuous scrolling in job lists is default now.
|
||||||
|
- All database queries (especially for sqlite) were optimized resulting in
|
||||||
|
dramatically faster load times.
|
||||||
|
- A performance and energy footprint can be freely configured on a per
|
||||||
|
subcluster base. One can filter for footprint statistics for running and
|
||||||
|
finished jobs.
|
||||||
|
|
||||||
|
## Known issues
|
||||||
|
|
||||||
|
- Currently energy footprint metrics of type energy are ignored for calculating
|
||||||
|
total energy.
|
||||||
|
- Resampling for running jobs only works with cc-metric-store
|
||||||
|
- With energy footprint metrics of type power the unit is ignored and it is
|
||||||
|
assumed the metric has the unit Watt.
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ type Job {
|
|||||||
numNodes: Int!
|
numNodes: Int!
|
||||||
numHWThreads: Int!
|
numHWThreads: Int!
|
||||||
numAcc: Int!
|
numAcc: Int!
|
||||||
|
energy: Float!
|
||||||
SMT: Int!
|
SMT: Int!
|
||||||
exclusive: Int!
|
exclusive: Int!
|
||||||
partition: String!
|
partition: String!
|
||||||
@@ -27,12 +28,8 @@ type Job {
|
|||||||
tags: [Tag!]!
|
tags: [Tag!]!
|
||||||
resources: [Resource!]!
|
resources: [Resource!]!
|
||||||
concurrentJobs: JobLinkResultList
|
concurrentJobs: JobLinkResultList
|
||||||
|
footprint: [FootprintValue]
|
||||||
memUsedMax: Float
|
energyFootprint: [EnergyFootprintValue]
|
||||||
flopsAnyAvg: Float
|
|
||||||
memBwAvg: Float
|
|
||||||
loadAvg: Float
|
|
||||||
|
|
||||||
metaData: Any
|
metaData: Any
|
||||||
userData: User
|
userData: User
|
||||||
}
|
}
|
||||||
@@ -45,7 +42,6 @@ type JobLink {
|
|||||||
type Cluster {
|
type Cluster {
|
||||||
name: String!
|
name: String!
|
||||||
partitions: [String!]! # Slurm partitions
|
partitions: [String!]! # Slurm partitions
|
||||||
metricConfig: [MetricConfig!]!
|
|
||||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -61,9 +57,24 @@ type SubCluster {
|
|||||||
flopRateSimd: MetricValue!
|
flopRateSimd: MetricValue!
|
||||||
memoryBandwidth: MetricValue!
|
memoryBandwidth: MetricValue!
|
||||||
topology: Topology!
|
topology: Topology!
|
||||||
|
metricConfig: [MetricConfig!]!
|
||||||
|
footprint: [String!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type FootprintValue {
|
||||||
|
name: String!
|
||||||
|
stat: String!
|
||||||
|
value: Float!
|
||||||
|
}
|
||||||
|
|
||||||
|
type EnergyFootprintValue {
|
||||||
|
hardware: String!
|
||||||
|
metric: String!
|
||||||
|
value: Float!
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricValue {
|
type MetricValue {
|
||||||
|
name: String
|
||||||
unit: Unit!
|
unit: Unit!
|
||||||
value: Float!
|
value: Float!
|
||||||
}
|
}
|
||||||
@@ -102,6 +113,7 @@ type MetricConfig {
|
|||||||
normal: Float
|
normal: Float
|
||||||
caution: Float!
|
caution: Float!
|
||||||
alert: Float!
|
alert: Float!
|
||||||
|
lowerIsBetter: Boolean
|
||||||
subClusters: [SubClusterConfig!]!
|
subClusters: [SubClusterConfig!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,6 +121,7 @@ type Tag {
|
|||||||
id: ID!
|
id: ID!
|
||||||
type: String!
|
type: String!
|
||||||
name: String!
|
name: String!
|
||||||
|
scope: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Resource {
|
type Resource {
|
||||||
@@ -151,6 +164,7 @@ type MetricStatistics {
|
|||||||
|
|
||||||
type StatsSeries {
|
type StatsSeries {
|
||||||
mean: [NullableFloat!]!
|
mean: [NullableFloat!]!
|
||||||
|
median: [NullableFloat!]!
|
||||||
min: [NullableFloat!]!
|
min: [NullableFloat!]!
|
||||||
max: [NullableFloat!]!
|
max: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
@@ -180,6 +194,28 @@ type NodeMetrics {
|
|||||||
metrics: [JobMetricWithName!]!
|
metrics: [JobMetricWithName!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodesResultList {
|
||||||
|
items: [NodeMetrics!]!
|
||||||
|
offset: Int
|
||||||
|
limit: Int
|
||||||
|
count: Int
|
||||||
|
totalNodes: Int
|
||||||
|
hasNextPage: Boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClusterSupport {
|
||||||
|
cluster: String!
|
||||||
|
subClusters: [String!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type GlobalMetricListItem {
|
||||||
|
name: String!
|
||||||
|
unit: Unit!
|
||||||
|
scope: MetricScope!
|
||||||
|
footprint: String
|
||||||
|
availability: [ClusterSupport!]!
|
||||||
|
}
|
||||||
|
|
||||||
type Count {
|
type Count {
|
||||||
name: String!
|
name: String!
|
||||||
count: Int!
|
count: Int!
|
||||||
@@ -191,27 +227,34 @@ type User {
|
|||||||
email: String!
|
email: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
input MetricStatItem {
|
||||||
|
metricName: String!
|
||||||
|
range: FloatRange!
|
||||||
|
}
|
||||||
|
|
||||||
type Query {
|
type Query {
|
||||||
clusters: [Cluster!]! # List of all clusters
|
clusters: [Cluster!]! # List of all clusters
|
||||||
tags: [Tag!]! # List of all tags
|
tags: [Tag!]! # List of all tags
|
||||||
|
globalMetrics: [GlobalMetricListItem!]!
|
||||||
|
|
||||||
user(username: String!): User
|
user(username: String!): User
|
||||||
allocatedNodes(cluster: String!): [Count!]!
|
allocatedNodes(cluster: String!): [Count!]!
|
||||||
|
|
||||||
job(id: ID!): Job
|
job(id: ID!): Job
|
||||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
|
||||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||||
|
|
||||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||||
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate): [JobsStatistics!]!
|
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate, numDurationBins: String, numMetricBins: Int): [JobsStatistics!]!
|
||||||
|
|
||||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
||||||
|
|
||||||
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
||||||
|
nodeMetricsList(cluster: String!, subCluster: String!, nodeFilter: String!, scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!, page: PageRequest, resolution: Int): NodesResultList!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Mutation {
|
type Mutation {
|
||||||
createTag(type: String!, name: String!): Tag!
|
createTag(type: String!, name: String!, scope: String!): Tag!
|
||||||
deleteTag(id: ID!): ID!
|
deleteTag(id: ID!): ID!
|
||||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||||
@@ -220,7 +263,7 @@ type Mutation {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type IntRangeOutput { from: Int!, to: Int! }
|
type IntRangeOutput { from: Int!, to: Int! }
|
||||||
type TimeRangeOutput { from: Time!, to: Time! }
|
type TimeRangeOutput { range: String, from: Time!, to: Time! }
|
||||||
|
|
||||||
input JobFilter {
|
input JobFilter {
|
||||||
tags: [ID!]
|
tags: [ID!]
|
||||||
@@ -232,6 +275,7 @@ input JobFilter {
|
|||||||
cluster: StringInput
|
cluster: StringInput
|
||||||
partition: StringInput
|
partition: StringInput
|
||||||
duration: IntRange
|
duration: IntRange
|
||||||
|
energy: FloatRange
|
||||||
|
|
||||||
minRunningFor: Int
|
minRunningFor: Int
|
||||||
|
|
||||||
@@ -241,17 +285,14 @@ input JobFilter {
|
|||||||
|
|
||||||
startTime: TimeRange
|
startTime: TimeRange
|
||||||
state: [JobState!]
|
state: [JobState!]
|
||||||
flopsAnyAvg: FloatRange
|
metricStats: [MetricStatItem!]
|
||||||
memBwAvg: FloatRange
|
|
||||||
loadAvg: FloatRange
|
|
||||||
memUsedMax: FloatRange
|
|
||||||
|
|
||||||
exclusive: Int
|
exclusive: Int
|
||||||
node: StringInput
|
node: StringInput
|
||||||
}
|
}
|
||||||
|
|
||||||
input OrderByInput {
|
input OrderByInput {
|
||||||
field: String!
|
field: String!
|
||||||
|
type: String!,
|
||||||
order: SortDirectionEnum! = ASC
|
order: SortDirectionEnum! = ASC
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -270,8 +311,12 @@ input StringInput {
|
|||||||
}
|
}
|
||||||
|
|
||||||
input IntRange { from: Int!, to: Int! }
|
input IntRange { from: Int!, to: Int! }
|
||||||
input FloatRange { from: Float!, to: Float! }
|
input TimeRange { range: String, from: Time, to: Time }
|
||||||
input TimeRange { from: Time, to: Time }
|
|
||||||
|
input FloatRange {
|
||||||
|
from: Float!
|
||||||
|
to: Float!
|
||||||
|
}
|
||||||
|
|
||||||
type JobResultList {
|
type JobResultList {
|
||||||
items: [Job!]!
|
items: [Job!]!
|
||||||
@@ -295,6 +340,7 @@ type HistoPoint {
|
|||||||
type MetricHistoPoints {
|
type MetricHistoPoints {
|
||||||
metric: String!
|
metric: String!
|
||||||
unit: String!
|
unit: String!
|
||||||
|
stat: String
|
||||||
data: [MetricHistoPoint!]
|
data: [MetricHistoPoint!]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
333
api/swagger.json
333
api/swagger.json
@@ -202,7 +202,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -272,7 +272,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -342,7 +342,7 @@
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -487,7 +487,7 @@
|
|||||||
"201": {
|
"201": {
|
||||||
"description": "Job added successfully",
|
"description": "Job added successfully",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.StartJobApiResponse"
|
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -581,89 +581,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"422": {
|
"422": {
|
||||||
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
|
"description": "Unprocessable Entity: job has already been stopped",
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"500": {
|
|
||||||
"description": "Internal Server Error",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"/jobs/stop_job/{id}": {
|
|
||||||
"post": {
|
|
||||||
"security": [
|
|
||||||
{
|
|
||||||
"ApiKeyAuth": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"description": "Job to stop is specified by database ID. Only stopTime and final state are required in request body.\nReturns full job resource information according to 'JobMeta' scheme.",
|
|
||||||
"consumes": [
|
|
||||||
"application/json"
|
|
||||||
],
|
|
||||||
"produces": [
|
|
||||||
"application/json"
|
|
||||||
],
|
|
||||||
"tags": [
|
|
||||||
"Job add and modify"
|
|
||||||
],
|
|
||||||
"summary": "Marks job as completed and triggers archiving",
|
|
||||||
"parameters": [
|
|
||||||
{
|
|
||||||
"type": "integer",
|
|
||||||
"description": "Database ID of Job",
|
|
||||||
"name": "id",
|
|
||||||
"in": "path",
|
|
||||||
"required": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description": "stopTime and final state in request body",
|
|
||||||
"name": "request",
|
|
||||||
"in": "body",
|
|
||||||
"required": true,
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.StopJobApiRequest"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"responses": {
|
|
||||||
"200": {
|
|
||||||
"description": "Job resource",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/schema.JobMeta"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"400": {
|
|
||||||
"description": "Bad Request",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"401": {
|
|
||||||
"description": "Unauthorized",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"403": {
|
|
||||||
"description": "Forbidden",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"404": {
|
|
||||||
"description": "Resource not found",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"422": {
|
|
||||||
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
|
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
}
|
}
|
||||||
@@ -684,7 +602,7 @@
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
|
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nTag Scope for frontend visibility will default to \"global\" if none entered, other options: \"admin\" or specific username.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
|
||||||
"consumes": [
|
"consumes": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@@ -909,6 +827,72 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/notice/": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Modifies the content of notice.txt, shown as notice box on the homepage.\nIf more than one formValue is set then only the highest priority field is used.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||||
|
"consumes": [
|
||||||
|
"multipart/form-data"
|
||||||
|
],
|
||||||
|
"produces": [
|
||||||
|
"text/plain"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"User"
|
||||||
|
],
|
||||||
|
"summary": "Updates or empties the notice box content",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Priority 1: New content to display",
|
||||||
|
"name": "new-content",
|
||||||
|
"in": "formData"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Success Response Message",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad Request",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Unauthorized",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"403": {
|
||||||
|
"description": "Forbidden",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"422": {
|
||||||
|
"description": "Unprocessable Entity: The user could not be updated",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Internal Server Error",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/user/{id}": {
|
"/user/{id}": {
|
||||||
"post": {
|
"post": {
|
||||||
"security": [
|
"security": [
|
||||||
@@ -1277,6 +1261,11 @@
|
|||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "Testjob"
|
"example": "Testjob"
|
||||||
},
|
},
|
||||||
|
"scope": {
|
||||||
|
"description": "Tag Scope for Frontend Display",
|
||||||
|
"type": "string",
|
||||||
|
"example": "global"
|
||||||
|
},
|
||||||
"type": {
|
"type": {
|
||||||
"description": "Tag Type",
|
"description": "Tag Type",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@@ -1284,6 +1273,14 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"api.DefaultJobApiResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"msg": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"api.DeleteJobApiRequest": {
|
"api.DeleteJobApiRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
@@ -1307,14 +1304,6 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"api.DeleteJobApiResponse": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"msg": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"api.EditMetaRequest": {
|
"api.EditMetaRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -1401,15 +1390,6 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"api.StartJobApiResponse": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"id": {
|
|
||||||
"description": "Database ID of new job",
|
|
||||||
"type": "integer"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"api.StopJobApiRequest": {
|
"api.StopJobApiRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
@@ -1418,17 +1398,14 @@
|
|||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
"cluster": {
|
"cluster": {
|
||||||
"description": "Cluster of job",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "fritz"
|
"example": "fritz"
|
||||||
},
|
},
|
||||||
"jobId": {
|
"jobId": {
|
||||||
"description": "Cluster Job ID of job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"jobState": {
|
"jobState": {
|
||||||
"description": "Final job state",
|
|
||||||
"allOf": [
|
"allOf": [
|
||||||
{
|
{
|
||||||
"$ref": "#/definitions/schema.JobState"
|
"$ref": "#/definitions/schema.JobState"
|
||||||
@@ -1437,12 +1414,10 @@
|
|||||||
"example": "completed"
|
"example": "completed"
|
||||||
},
|
},
|
||||||
"startTime": {
|
"startTime": {
|
||||||
"description": "Start Time of job as epoch",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 1649723812
|
"example": 1649723812
|
||||||
},
|
},
|
||||||
"stopTime": {
|
"stopTime": {
|
||||||
"description": "Stop Time of job as epoch",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 1649763839
|
"example": 1649763839
|
||||||
}
|
}
|
||||||
@@ -1487,12 +1462,10 @@
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"arrayJobId": {
|
"arrayJobId": {
|
||||||
"description": "The unique identifier of an array job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"cluster": {
|
"cluster": {
|
||||||
"description": "The unique identifier of a cluster",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "fritz"
|
"example": "fritz"
|
||||||
},
|
},
|
||||||
@@ -1500,33 +1473,39 @@
|
|||||||
"$ref": "#/definitions/schema.JobLinkResultList"
|
"$ref": "#/definitions/schema.JobLinkResultList"
|
||||||
},
|
},
|
||||||
"duration": {
|
"duration": {
|
||||||
"description": "Duration of job in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 43200
|
"example": 43200
|
||||||
},
|
},
|
||||||
|
"energy": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"energyFootprint": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
"exclusive": {
|
"exclusive": {
|
||||||
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"maximum": 2,
|
"maximum": 2,
|
||||||
"minimum": 0,
|
"minimum": 0,
|
||||||
"example": 1
|
"example": 1
|
||||||
},
|
},
|
||||||
"flopsAnyAvg": {
|
"footprint": {
|
||||||
"description": "FlopsAnyAvg as Float64",
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"id": {
|
"id": {
|
||||||
"description": "The unique identifier of a job in the database",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"jobId": {
|
"jobId": {
|
||||||
"description": "The unique identifier of a job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"jobState": {
|
"jobState": {
|
||||||
"description": "Final state of job",
|
|
||||||
"enum": [
|
"enum": [
|
||||||
"completed",
|
"completed",
|
||||||
"failed",
|
"failed",
|
||||||
@@ -1542,95 +1521,69 @@
|
|||||||
],
|
],
|
||||||
"example": "completed"
|
"example": "completed"
|
||||||
},
|
},
|
||||||
"loadAvg": {
|
|
||||||
"description": "LoadAvg as Float64",
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"memBwAvg": {
|
|
||||||
"description": "MemBwAvg as Float64",
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"memUsedMax": {
|
|
||||||
"description": "MemUsedMax as Float64",
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"metaData": {
|
"metaData": {
|
||||||
"description": "Additional information about the job",
|
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"monitoringStatus": {
|
"monitoringStatus": {
|
||||||
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"maximum": 3,
|
"maximum": 3,
|
||||||
"minimum": 0,
|
"minimum": 0,
|
||||||
"example": 1
|
"example": 1
|
||||||
},
|
},
|
||||||
"numAcc": {
|
"numAcc": {
|
||||||
"description": "Number of accelerators used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 2
|
"example": 2
|
||||||
},
|
},
|
||||||
"numHwthreads": {
|
"numHwthreads": {
|
||||||
"description": "NumCores int32 `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` // Number of HWThreads used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 20
|
"example": 20
|
||||||
},
|
},
|
||||||
"numNodes": {
|
"numNodes": {
|
||||||
"description": "Number of nodes used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 2
|
"example": 2
|
||||||
},
|
},
|
||||||
"partition": {
|
"partition": {
|
||||||
"description": "The Slurm partition to which the job was submitted",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "main"
|
"example": "main"
|
||||||
},
|
},
|
||||||
"project": {
|
"project": {
|
||||||
"description": "The unique identifier of a project",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd200"
|
"example": "abcd200"
|
||||||
},
|
},
|
||||||
"resources": {
|
"resources": {
|
||||||
"description": "Resources used by job",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.Resource"
|
"$ref": "#/definitions/schema.Resource"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"smt": {
|
"smt": {
|
||||||
"description": "SMT threads used by job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 4
|
"example": 4
|
||||||
},
|
},
|
||||||
"startTime": {
|
"startTime": {
|
||||||
"description": "Start time as 'time.Time' data type",
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"subCluster": {
|
"subCluster": {
|
||||||
"description": "The unique identifier of a sub cluster",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "main"
|
"example": "main"
|
||||||
},
|
},
|
||||||
"tags": {
|
"tags": {
|
||||||
"description": "List of tags",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.Tag"
|
"$ref": "#/definitions/schema.Tag"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"user": {
|
"user": {
|
||||||
"description": "The unique identifier of a user",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd100h"
|
"example": "abcd100h"
|
||||||
},
|
},
|
||||||
"walltime": {
|
"walltime": {
|
||||||
"description": "Requested walltime of job in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 86400
|
"example": 86400
|
||||||
@@ -1667,12 +1620,10 @@
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"arrayJobId": {
|
"arrayJobId": {
|
||||||
"description": "The unique identifier of an array job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"cluster": {
|
"cluster": {
|
||||||
"description": "The unique identifier of a cluster",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "fritz"
|
"example": "fritz"
|
||||||
},
|
},
|
||||||
@@ -1680,29 +1631,39 @@
|
|||||||
"$ref": "#/definitions/schema.JobLinkResultList"
|
"$ref": "#/definitions/schema.JobLinkResultList"
|
||||||
},
|
},
|
||||||
"duration": {
|
"duration": {
|
||||||
"description": "Duration of job in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 43200
|
"example": 43200
|
||||||
},
|
},
|
||||||
|
"energy": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"energyFootprint": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
"exclusive": {
|
"exclusive": {
|
||||||
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"maximum": 2,
|
"maximum": 2,
|
||||||
"minimum": 0,
|
"minimum": 0,
|
||||||
"example": 1
|
"example": 1
|
||||||
},
|
},
|
||||||
|
"footprint": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
"id": {
|
"id": {
|
||||||
"description": "The unique identifier of a job in the database",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"jobId": {
|
"jobId": {
|
||||||
"description": "The unique identifier of a job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"jobState": {
|
"jobState": {
|
||||||
"description": "Final state of job",
|
|
||||||
"enum": [
|
"enum": [
|
||||||
"completed",
|
"completed",
|
||||||
"failed",
|
"failed",
|
||||||
@@ -1719,91 +1680,76 @@
|
|||||||
"example": "completed"
|
"example": "completed"
|
||||||
},
|
},
|
||||||
"metaData": {
|
"metaData": {
|
||||||
"description": "Additional information about the job",
|
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"monitoringStatus": {
|
"monitoringStatus": {
|
||||||
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"maximum": 3,
|
"maximum": 3,
|
||||||
"minimum": 0,
|
"minimum": 0,
|
||||||
"example": 1
|
"example": 1
|
||||||
},
|
},
|
||||||
"numAcc": {
|
"numAcc": {
|
||||||
"description": "Number of accelerators used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 2
|
"example": 2
|
||||||
},
|
},
|
||||||
"numHwthreads": {
|
"numHwthreads": {
|
||||||
"description": "NumCores int32 `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` // Number of HWThreads used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 20
|
"example": 20
|
||||||
},
|
},
|
||||||
"numNodes": {
|
"numNodes": {
|
||||||
"description": "Number of nodes used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 2
|
"example": 2
|
||||||
},
|
},
|
||||||
"partition": {
|
"partition": {
|
||||||
"description": "The Slurm partition to which the job was submitted",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "main"
|
"example": "main"
|
||||||
},
|
},
|
||||||
"project": {
|
"project": {
|
||||||
"description": "The unique identifier of a project",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd200"
|
"example": "abcd200"
|
||||||
},
|
},
|
||||||
"resources": {
|
"resources": {
|
||||||
"description": "Resources used by job",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.Resource"
|
"$ref": "#/definitions/schema.Resource"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"smt": {
|
"smt": {
|
||||||
"description": "SMT threads used by job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 4
|
"example": 4
|
||||||
},
|
},
|
||||||
"startTime": {
|
"startTime": {
|
||||||
"description": "Start epoch time stamp in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 1649723812
|
"example": 1649723812
|
||||||
},
|
},
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"description": "Metric statistics of job",
|
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"$ref": "#/definitions/schema.JobStatistics"
|
"$ref": "#/definitions/schema.JobStatistics"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"subCluster": {
|
"subCluster": {
|
||||||
"description": "The unique identifier of a sub cluster",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "main"
|
"example": "main"
|
||||||
},
|
},
|
||||||
"tags": {
|
"tags": {
|
||||||
"description": "List of tags",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.Tag"
|
"$ref": "#/definitions/schema.Tag"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"user": {
|
"user": {
|
||||||
"description": "The unique identifier of a user",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd100h"
|
"example": "abcd100h"
|
||||||
},
|
},
|
||||||
"walltime": {
|
"walltime": {
|
||||||
"description": "Requested walltime of job in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 86400
|
"example": 86400
|
||||||
@@ -1892,6 +1838,15 @@
|
|||||||
"caution": {
|
"caution": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
|
"energy": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"footprint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"lowerIsBetter": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
@@ -1969,22 +1924,18 @@
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"accelerators": {
|
"accelerators": {
|
||||||
"description": "List of of accelerator device ids",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"configuration": {
|
"configuration": {
|
||||||
"description": "The configuration options of the node",
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"hostname": {
|
"hostname": {
|
||||||
"description": "Name of the host (= node)",
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"hwthreads": {
|
"hwthreads": {
|
||||||
"description": "List of OS processor ids",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
@@ -2027,6 +1978,12 @@
|
|||||||
"type": "number"
|
"type": "number"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"median": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
"min": {
|
"min": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
@@ -2050,15 +2007,33 @@
|
|||||||
"coresPerSocket": {
|
"coresPerSocket": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
|
"energyFootprint": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"flopRateScalar": {
|
"flopRateScalar": {
|
||||||
"$ref": "#/definitions/schema.MetricValue"
|
"$ref": "#/definitions/schema.MetricValue"
|
||||||
},
|
},
|
||||||
"flopRateSimd": {
|
"flopRateSimd": {
|
||||||
"$ref": "#/definitions/schema.MetricValue"
|
"$ref": "#/definitions/schema.MetricValue"
|
||||||
},
|
},
|
||||||
|
"footprint": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"memoryBandwidth": {
|
"memoryBandwidth": {
|
||||||
"$ref": "#/definitions/schema.MetricValue"
|
"$ref": "#/definitions/schema.MetricValue"
|
||||||
},
|
},
|
||||||
|
"metricConfig": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.MetricConfig"
|
||||||
|
}
|
||||||
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
@@ -2088,6 +2063,15 @@
|
|||||||
"caution": {
|
"caution": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
|
"energy": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"footprint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"lowerIsBetter": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
@@ -2107,16 +2091,17 @@
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"id": {
|
"id": {
|
||||||
"description": "The unique DB identifier of a tag",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"description": "Tag Name",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "Testjob"
|
"example": "Testjob"
|
||||||
},
|
},
|
||||||
|
"scope": {
|
||||||
|
"type": "string",
|
||||||
|
"example": "global"
|
||||||
|
},
|
||||||
"type": {
|
"type": {
|
||||||
"description": "Tag Type",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "Debug"
|
"example": "Debug"
|
||||||
}
|
}
|
||||||
|
|||||||
259
api/swagger.yaml
259
api/swagger.yaml
@@ -23,11 +23,20 @@ definitions:
|
|||||||
description: Tag Name
|
description: Tag Name
|
||||||
example: Testjob
|
example: Testjob
|
||||||
type: string
|
type: string
|
||||||
|
scope:
|
||||||
|
description: Tag Scope for Frontend Display
|
||||||
|
example: global
|
||||||
|
type: string
|
||||||
type:
|
type:
|
||||||
description: Tag Type
|
description: Tag Type
|
||||||
example: Debug
|
example: Debug
|
||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
|
api.DefaultJobApiResponse:
|
||||||
|
properties:
|
||||||
|
msg:
|
||||||
|
type: string
|
||||||
|
type: object
|
||||||
api.DeleteJobApiRequest:
|
api.DeleteJobApiRequest:
|
||||||
properties:
|
properties:
|
||||||
cluster:
|
cluster:
|
||||||
@@ -45,11 +54,6 @@ definitions:
|
|||||||
required:
|
required:
|
||||||
- jobId
|
- jobId
|
||||||
type: object
|
type: object
|
||||||
api.DeleteJobApiResponse:
|
|
||||||
properties:
|
|
||||||
msg:
|
|
||||||
type: string
|
|
||||||
type: object
|
|
||||||
api.EditMetaRequest:
|
api.EditMetaRequest:
|
||||||
properties:
|
properties:
|
||||||
key:
|
key:
|
||||||
@@ -108,33 +112,22 @@ definitions:
|
|||||||
scope:
|
scope:
|
||||||
$ref: '#/definitions/schema.MetricScope'
|
$ref: '#/definitions/schema.MetricScope'
|
||||||
type: object
|
type: object
|
||||||
api.StartJobApiResponse:
|
|
||||||
properties:
|
|
||||||
id:
|
|
||||||
description: Database ID of new job
|
|
||||||
type: integer
|
|
||||||
type: object
|
|
||||||
api.StopJobApiRequest:
|
api.StopJobApiRequest:
|
||||||
properties:
|
properties:
|
||||||
cluster:
|
cluster:
|
||||||
description: Cluster of job
|
|
||||||
example: fritz
|
example: fritz
|
||||||
type: string
|
type: string
|
||||||
jobId:
|
jobId:
|
||||||
description: Cluster Job ID of job
|
|
||||||
example: 123000
|
example: 123000
|
||||||
type: integer
|
type: integer
|
||||||
jobState:
|
jobState:
|
||||||
allOf:
|
allOf:
|
||||||
- $ref: '#/definitions/schema.JobState'
|
- $ref: '#/definitions/schema.JobState'
|
||||||
description: Final job state
|
|
||||||
example: completed
|
example: completed
|
||||||
startTime:
|
startTime:
|
||||||
description: Start Time of job as epoch
|
|
||||||
example: 1649723812
|
example: 1649723812
|
||||||
type: integer
|
type: integer
|
||||||
stopTime:
|
stopTime:
|
||||||
description: Stop Time of job as epoch
|
|
||||||
example: 1649763839
|
example: 1649763839
|
||||||
type: integer
|
type: integer
|
||||||
required:
|
required:
|
||||||
@@ -167,42 +160,40 @@ definitions:
|
|||||||
description: Information of a HPC job.
|
description: Information of a HPC job.
|
||||||
properties:
|
properties:
|
||||||
arrayJobId:
|
arrayJobId:
|
||||||
description: The unique identifier of an array job
|
|
||||||
example: 123000
|
example: 123000
|
||||||
type: integer
|
type: integer
|
||||||
cluster:
|
cluster:
|
||||||
description: The unique identifier of a cluster
|
|
||||||
example: fritz
|
example: fritz
|
||||||
type: string
|
type: string
|
||||||
concurrentJobs:
|
concurrentJobs:
|
||||||
$ref: '#/definitions/schema.JobLinkResultList'
|
$ref: '#/definitions/schema.JobLinkResultList'
|
||||||
duration:
|
duration:
|
||||||
description: Duration of job in seconds (Min > 0)
|
|
||||||
example: 43200
|
example: 43200
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
|
energy:
|
||||||
|
type: number
|
||||||
|
energyFootprint:
|
||||||
|
additionalProperties:
|
||||||
|
type: number
|
||||||
|
type: object
|
||||||
exclusive:
|
exclusive:
|
||||||
description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs
|
|
||||||
of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple
|
|
||||||
jobs of same user'
|
|
||||||
example: 1
|
example: 1
|
||||||
maximum: 2
|
maximum: 2
|
||||||
minimum: 0
|
minimum: 0
|
||||||
type: integer
|
type: integer
|
||||||
flopsAnyAvg:
|
footprint:
|
||||||
description: FlopsAnyAvg as Float64
|
additionalProperties:
|
||||||
type: number
|
type: number
|
||||||
|
type: object
|
||||||
id:
|
id:
|
||||||
description: The unique identifier of a job in the database
|
|
||||||
type: integer
|
type: integer
|
||||||
jobId:
|
jobId:
|
||||||
description: The unique identifier of a job
|
|
||||||
example: 123000
|
example: 123000
|
||||||
type: integer
|
type: integer
|
||||||
jobState:
|
jobState:
|
||||||
allOf:
|
allOf:
|
||||||
- $ref: '#/definitions/schema.JobState'
|
- $ref: '#/definitions/schema.JobState'
|
||||||
description: Final state of job
|
|
||||||
enum:
|
enum:
|
||||||
- completed
|
- completed
|
||||||
- failed
|
- failed
|
||||||
@@ -211,79 +202,53 @@ definitions:
|
|||||||
- timeout
|
- timeout
|
||||||
- out_of_memory
|
- out_of_memory
|
||||||
example: completed
|
example: completed
|
||||||
loadAvg:
|
|
||||||
description: LoadAvg as Float64
|
|
||||||
type: number
|
|
||||||
memBwAvg:
|
|
||||||
description: MemBwAvg as Float64
|
|
||||||
type: number
|
|
||||||
memUsedMax:
|
|
||||||
description: MemUsedMax as Float64
|
|
||||||
type: number
|
|
||||||
metaData:
|
metaData:
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
type: string
|
type: string
|
||||||
description: Additional information about the job
|
|
||||||
type: object
|
type: object
|
||||||
monitoringStatus:
|
monitoringStatus:
|
||||||
description: 'State of monitoring system during job run: 0 - Disabled, 1 -
|
|
||||||
Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull'
|
|
||||||
example: 1
|
example: 1
|
||||||
maximum: 3
|
maximum: 3
|
||||||
minimum: 0
|
minimum: 0
|
||||||
type: integer
|
type: integer
|
||||||
numAcc:
|
numAcc:
|
||||||
description: Number of accelerators used (Min > 0)
|
|
||||||
example: 2
|
example: 2
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
numHwthreads:
|
numHwthreads:
|
||||||
description: NumCores int32 `json:"numCores" db:"num_cores"
|
|
||||||
example:"20" minimum:"1"` //
|
|
||||||
Number of HWThreads used (Min > 0)
|
|
||||||
example: 20
|
example: 20
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
numNodes:
|
numNodes:
|
||||||
description: Number of nodes used (Min > 0)
|
|
||||||
example: 2
|
example: 2
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
partition:
|
partition:
|
||||||
description: The Slurm partition to which the job was submitted
|
|
||||||
example: main
|
example: main
|
||||||
type: string
|
type: string
|
||||||
project:
|
project:
|
||||||
description: The unique identifier of a project
|
|
||||||
example: abcd200
|
example: abcd200
|
||||||
type: string
|
type: string
|
||||||
resources:
|
resources:
|
||||||
description: Resources used by job
|
|
||||||
items:
|
items:
|
||||||
$ref: '#/definitions/schema.Resource'
|
$ref: '#/definitions/schema.Resource'
|
||||||
type: array
|
type: array
|
||||||
smt:
|
smt:
|
||||||
description: SMT threads used by job
|
|
||||||
example: 4
|
example: 4
|
||||||
type: integer
|
type: integer
|
||||||
startTime:
|
startTime:
|
||||||
description: Start time as 'time.Time' data type
|
|
||||||
type: string
|
type: string
|
||||||
subCluster:
|
subCluster:
|
||||||
description: The unique identifier of a sub cluster
|
|
||||||
example: main
|
example: main
|
||||||
type: string
|
type: string
|
||||||
tags:
|
tags:
|
||||||
description: List of tags
|
|
||||||
items:
|
items:
|
||||||
$ref: '#/definitions/schema.Tag'
|
$ref: '#/definitions/schema.Tag'
|
||||||
type: array
|
type: array
|
||||||
user:
|
user:
|
||||||
description: The unique identifier of a user
|
|
||||||
example: abcd100h
|
example: abcd100h
|
||||||
type: string
|
type: string
|
||||||
walltime:
|
walltime:
|
||||||
description: Requested walltime of job in seconds (Min > 0)
|
|
||||||
example: 86400
|
example: 86400
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
@@ -308,39 +273,40 @@ definitions:
|
|||||||
description: Meta data information of a HPC job.
|
description: Meta data information of a HPC job.
|
||||||
properties:
|
properties:
|
||||||
arrayJobId:
|
arrayJobId:
|
||||||
description: The unique identifier of an array job
|
|
||||||
example: 123000
|
example: 123000
|
||||||
type: integer
|
type: integer
|
||||||
cluster:
|
cluster:
|
||||||
description: The unique identifier of a cluster
|
|
||||||
example: fritz
|
example: fritz
|
||||||
type: string
|
type: string
|
||||||
concurrentJobs:
|
concurrentJobs:
|
||||||
$ref: '#/definitions/schema.JobLinkResultList'
|
$ref: '#/definitions/schema.JobLinkResultList'
|
||||||
duration:
|
duration:
|
||||||
description: Duration of job in seconds (Min > 0)
|
|
||||||
example: 43200
|
example: 43200
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
|
energy:
|
||||||
|
type: number
|
||||||
|
energyFootprint:
|
||||||
|
additionalProperties:
|
||||||
|
type: number
|
||||||
|
type: object
|
||||||
exclusive:
|
exclusive:
|
||||||
description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs
|
|
||||||
of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple
|
|
||||||
jobs of same user'
|
|
||||||
example: 1
|
example: 1
|
||||||
maximum: 2
|
maximum: 2
|
||||||
minimum: 0
|
minimum: 0
|
||||||
type: integer
|
type: integer
|
||||||
|
footprint:
|
||||||
|
additionalProperties:
|
||||||
|
type: number
|
||||||
|
type: object
|
||||||
id:
|
id:
|
||||||
description: The unique identifier of a job in the database
|
|
||||||
type: integer
|
type: integer
|
||||||
jobId:
|
jobId:
|
||||||
description: The unique identifier of a job
|
|
||||||
example: 123000
|
example: 123000
|
||||||
type: integer
|
type: integer
|
||||||
jobState:
|
jobState:
|
||||||
allOf:
|
allOf:
|
||||||
- $ref: '#/definitions/schema.JobState'
|
- $ref: '#/definitions/schema.JobState'
|
||||||
description: Final state of job
|
|
||||||
enum:
|
enum:
|
||||||
- completed
|
- completed
|
||||||
- failed
|
- failed
|
||||||
@@ -352,74 +318,56 @@ definitions:
|
|||||||
metaData:
|
metaData:
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
type: string
|
type: string
|
||||||
description: Additional information about the job
|
|
||||||
type: object
|
type: object
|
||||||
monitoringStatus:
|
monitoringStatus:
|
||||||
description: 'State of monitoring system during job run: 0 - Disabled, 1 -
|
|
||||||
Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull'
|
|
||||||
example: 1
|
example: 1
|
||||||
maximum: 3
|
maximum: 3
|
||||||
minimum: 0
|
minimum: 0
|
||||||
type: integer
|
type: integer
|
||||||
numAcc:
|
numAcc:
|
||||||
description: Number of accelerators used (Min > 0)
|
|
||||||
example: 2
|
example: 2
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
numHwthreads:
|
numHwthreads:
|
||||||
description: NumCores int32 `json:"numCores" db:"num_cores"
|
|
||||||
example:"20" minimum:"1"` //
|
|
||||||
Number of HWThreads used (Min > 0)
|
|
||||||
example: 20
|
example: 20
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
numNodes:
|
numNodes:
|
||||||
description: Number of nodes used (Min > 0)
|
|
||||||
example: 2
|
example: 2
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
partition:
|
partition:
|
||||||
description: The Slurm partition to which the job was submitted
|
|
||||||
example: main
|
example: main
|
||||||
type: string
|
type: string
|
||||||
project:
|
project:
|
||||||
description: The unique identifier of a project
|
|
||||||
example: abcd200
|
example: abcd200
|
||||||
type: string
|
type: string
|
||||||
resources:
|
resources:
|
||||||
description: Resources used by job
|
|
||||||
items:
|
items:
|
||||||
$ref: '#/definitions/schema.Resource'
|
$ref: '#/definitions/schema.Resource'
|
||||||
type: array
|
type: array
|
||||||
smt:
|
smt:
|
||||||
description: SMT threads used by job
|
|
||||||
example: 4
|
example: 4
|
||||||
type: integer
|
type: integer
|
||||||
startTime:
|
startTime:
|
||||||
description: Start epoch time stamp in seconds (Min > 0)
|
|
||||||
example: 1649723812
|
example: 1649723812
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
statistics:
|
statistics:
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
$ref: '#/definitions/schema.JobStatistics'
|
$ref: '#/definitions/schema.JobStatistics'
|
||||||
description: Metric statistics of job
|
|
||||||
type: object
|
type: object
|
||||||
subCluster:
|
subCluster:
|
||||||
description: The unique identifier of a sub cluster
|
|
||||||
example: main
|
example: main
|
||||||
type: string
|
type: string
|
||||||
tags:
|
tags:
|
||||||
description: List of tags
|
|
||||||
items:
|
items:
|
||||||
$ref: '#/definitions/schema.Tag'
|
$ref: '#/definitions/schema.Tag'
|
||||||
type: array
|
type: array
|
||||||
user:
|
user:
|
||||||
description: The unique identifier of a user
|
|
||||||
example: abcd100h
|
example: abcd100h
|
||||||
type: string
|
type: string
|
||||||
walltime:
|
walltime:
|
||||||
description: Requested walltime of job in seconds (Min > 0)
|
|
||||||
example: 86400
|
example: 86400
|
||||||
minimum: 1
|
minimum: 1
|
||||||
type: integer
|
type: integer
|
||||||
@@ -486,6 +434,12 @@ definitions:
|
|||||||
type: number
|
type: number
|
||||||
caution:
|
caution:
|
||||||
type: number
|
type: number
|
||||||
|
energy:
|
||||||
|
type: string
|
||||||
|
footprint:
|
||||||
|
type: string
|
||||||
|
lowerIsBetter:
|
||||||
|
type: boolean
|
||||||
name:
|
name:
|
||||||
type: string
|
type: string
|
||||||
normal:
|
normal:
|
||||||
@@ -541,18 +495,14 @@ definitions:
|
|||||||
description: A resource used by a job
|
description: A resource used by a job
|
||||||
properties:
|
properties:
|
||||||
accelerators:
|
accelerators:
|
||||||
description: List of of accelerator device ids
|
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
type: array
|
type: array
|
||||||
configuration:
|
configuration:
|
||||||
description: The configuration options of the node
|
|
||||||
type: string
|
type: string
|
||||||
hostname:
|
hostname:
|
||||||
description: Name of the host (= node)
|
|
||||||
type: string
|
type: string
|
||||||
hwthreads:
|
hwthreads:
|
||||||
description: List of OS processor ids
|
|
||||||
items:
|
items:
|
||||||
type: integer
|
type: integer
|
||||||
type: array
|
type: array
|
||||||
@@ -580,6 +530,10 @@ definitions:
|
|||||||
items:
|
items:
|
||||||
type: number
|
type: number
|
||||||
type: array
|
type: array
|
||||||
|
median:
|
||||||
|
items:
|
||||||
|
type: number
|
||||||
|
type: array
|
||||||
min:
|
min:
|
||||||
items:
|
items:
|
||||||
type: number
|
type: number
|
||||||
@@ -595,12 +549,24 @@ definitions:
|
|||||||
properties:
|
properties:
|
||||||
coresPerSocket:
|
coresPerSocket:
|
||||||
type: integer
|
type: integer
|
||||||
|
energyFootprint:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
flopRateScalar:
|
flopRateScalar:
|
||||||
$ref: '#/definitions/schema.MetricValue'
|
$ref: '#/definitions/schema.MetricValue'
|
||||||
flopRateSimd:
|
flopRateSimd:
|
||||||
$ref: '#/definitions/schema.MetricValue'
|
$ref: '#/definitions/schema.MetricValue'
|
||||||
|
footprint:
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
type: array
|
||||||
memoryBandwidth:
|
memoryBandwidth:
|
||||||
$ref: '#/definitions/schema.MetricValue'
|
$ref: '#/definitions/schema.MetricValue'
|
||||||
|
metricConfig:
|
||||||
|
items:
|
||||||
|
$ref: '#/definitions/schema.MetricConfig'
|
||||||
|
type: array
|
||||||
name:
|
name:
|
||||||
type: string
|
type: string
|
||||||
nodes:
|
nodes:
|
||||||
@@ -620,6 +586,12 @@ definitions:
|
|||||||
type: number
|
type: number
|
||||||
caution:
|
caution:
|
||||||
type: number
|
type: number
|
||||||
|
energy:
|
||||||
|
type: string
|
||||||
|
footprint:
|
||||||
|
type: string
|
||||||
|
lowerIsBetter:
|
||||||
|
type: boolean
|
||||||
name:
|
name:
|
||||||
type: string
|
type: string
|
||||||
normal:
|
normal:
|
||||||
@@ -633,14 +605,14 @@ definitions:
|
|||||||
description: Defines a tag using name and type.
|
description: Defines a tag using name and type.
|
||||||
properties:
|
properties:
|
||||||
id:
|
id:
|
||||||
description: The unique DB identifier of a tag
|
|
||||||
type: integer
|
type: integer
|
||||||
name:
|
name:
|
||||||
description: Tag Name
|
|
||||||
example: Testjob
|
example: Testjob
|
||||||
type: string
|
type: string
|
||||||
|
scope:
|
||||||
|
example: global
|
||||||
|
type: string
|
||||||
type:
|
type:
|
||||||
description: Tag Type
|
|
||||||
example: Debug
|
example: Debug
|
||||||
type: string
|
type: string
|
||||||
type: object
|
type: object
|
||||||
@@ -929,7 +901,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Success message
|
description: Success message
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.DeleteJobApiResponse'
|
$ref: '#/definitions/api.DefaultJobApiResponse'
|
||||||
"400":
|
"400":
|
||||||
description: Bad Request
|
description: Bad Request
|
||||||
schema:
|
schema:
|
||||||
@@ -976,7 +948,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Success message
|
description: Success message
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.DeleteJobApiResponse'
|
$ref: '#/definitions/api.DefaultJobApiResponse'
|
||||||
"400":
|
"400":
|
||||||
description: Bad Request
|
description: Bad Request
|
||||||
schema:
|
schema:
|
||||||
@@ -1023,7 +995,7 @@ paths:
|
|||||||
"200":
|
"200":
|
||||||
description: Success message
|
description: Success message
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.DeleteJobApiResponse'
|
$ref: '#/definitions/api.DefaultJobApiResponse'
|
||||||
"400":
|
"400":
|
||||||
description: Bad Request
|
description: Bad Request
|
||||||
schema:
|
schema:
|
||||||
@@ -1121,7 +1093,7 @@ paths:
|
|||||||
"201":
|
"201":
|
||||||
description: Job added successfully
|
description: Job added successfully
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.StartJobApiResponse'
|
$ref: '#/definitions/api.DefaultJobApiResponse'
|
||||||
"400":
|
"400":
|
||||||
description: Bad Request
|
description: Bad Request
|
||||||
schema:
|
schema:
|
||||||
@@ -1184,64 +1156,7 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.ErrorResponse'
|
$ref: '#/definitions/api.ErrorResponse'
|
||||||
"422":
|
"422":
|
||||||
description: 'Unprocessable Entity: finding job failed: sql: no rows in
|
description: 'Unprocessable Entity: job has already been stopped'
|
||||||
result set'
|
|
||||||
schema:
|
|
||||||
$ref: '#/definitions/api.ErrorResponse'
|
|
||||||
"500":
|
|
||||||
description: Internal Server Error
|
|
||||||
schema:
|
|
||||||
$ref: '#/definitions/api.ErrorResponse'
|
|
||||||
security:
|
|
||||||
- ApiKeyAuth: []
|
|
||||||
summary: Marks job as completed and triggers archiving
|
|
||||||
tags:
|
|
||||||
- Job add and modify
|
|
||||||
/jobs/stop_job/{id}:
|
|
||||||
post:
|
|
||||||
consumes:
|
|
||||||
- application/json
|
|
||||||
description: |-
|
|
||||||
Job to stop is specified by database ID. Only stopTime and final state are required in request body.
|
|
||||||
Returns full job resource information according to 'JobMeta' scheme.
|
|
||||||
parameters:
|
|
||||||
- description: Database ID of Job
|
|
||||||
in: path
|
|
||||||
name: id
|
|
||||||
required: true
|
|
||||||
type: integer
|
|
||||||
- description: stopTime and final state in request body
|
|
||||||
in: body
|
|
||||||
name: request
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
$ref: '#/definitions/api.StopJobApiRequest'
|
|
||||||
produces:
|
|
||||||
- application/json
|
|
||||||
responses:
|
|
||||||
"200":
|
|
||||||
description: Job resource
|
|
||||||
schema:
|
|
||||||
$ref: '#/definitions/schema.JobMeta'
|
|
||||||
"400":
|
|
||||||
description: Bad Request
|
|
||||||
schema:
|
|
||||||
$ref: '#/definitions/api.ErrorResponse'
|
|
||||||
"401":
|
|
||||||
description: Unauthorized
|
|
||||||
schema:
|
|
||||||
$ref: '#/definitions/api.ErrorResponse'
|
|
||||||
"403":
|
|
||||||
description: Forbidden
|
|
||||||
schema:
|
|
||||||
$ref: '#/definitions/api.ErrorResponse'
|
|
||||||
"404":
|
|
||||||
description: Resource not found
|
|
||||||
schema:
|
|
||||||
$ref: '#/definitions/api.ErrorResponse'
|
|
||||||
"422":
|
|
||||||
description: 'Unprocessable Entity: finding job failed: sql: no rows in
|
|
||||||
result set'
|
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/definitions/api.ErrorResponse'
|
$ref: '#/definitions/api.ErrorResponse'
|
||||||
"500":
|
"500":
|
||||||
@@ -1259,6 +1174,7 @@ paths:
|
|||||||
- application/json
|
- application/json
|
||||||
description: |-
|
description: |-
|
||||||
Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
||||||
|
Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username.
|
||||||
If tagged job is already finished: Tag will be written directly to respective archive files.
|
If tagged job is already finished: Tag will be written directly to respective archive files.
|
||||||
parameters:
|
parameters:
|
||||||
- description: Job Database ID
|
- description: Job Database ID
|
||||||
@@ -1302,6 +1218,51 @@ paths:
|
|||||||
summary: Adds one or more tags to a job
|
summary: Adds one or more tags to a job
|
||||||
tags:
|
tags:
|
||||||
- Job add and modify
|
- Job add and modify
|
||||||
|
/notice/:
|
||||||
|
post:
|
||||||
|
consumes:
|
||||||
|
- multipart/form-data
|
||||||
|
description: |-
|
||||||
|
Modifies the content of notice.txt, shown as notice box on the homepage.
|
||||||
|
If more than one formValue is set then only the highest priority field is used.
|
||||||
|
Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||||
|
parameters:
|
||||||
|
- description: 'Priority 1: New content to display'
|
||||||
|
in: formData
|
||||||
|
name: new-content
|
||||||
|
type: string
|
||||||
|
produces:
|
||||||
|
- text/plain
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Success Response Message
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
"400":
|
||||||
|
description: Bad Request
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
"401":
|
||||||
|
description: Unauthorized
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
"403":
|
||||||
|
description: Forbidden
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
"422":
|
||||||
|
description: 'Unprocessable Entity: The user could not be updated'
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
"500":
|
||||||
|
description: Internal Server Error
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
security:
|
||||||
|
- ApiKeyAuth: []
|
||||||
|
summary: Updates or empties the notice box content
|
||||||
|
tags:
|
||||||
|
- User
|
||||||
/user/{id}:
|
/user/{id}:
|
||||||
post:
|
post:
|
||||||
consumes:
|
consumes:
|
||||||
|
|||||||
33
cmd/cc-backend/cli.go
Normal file
33
cmd/cc-backend/cli.go
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "flag"
|
||||||
|
|
||||||
|
var (
|
||||||
|
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
||||||
|
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||||
|
)
|
||||||
|
|
||||||
|
func cliInit() {
|
||||||
|
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
|
||||||
|
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||||
|
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'hpc_user' table with ldap")
|
||||||
|
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||||
|
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||||
|
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||||
|
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||||
|
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||||
|
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||||
|
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||||
|
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||||
|
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||||
|
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
|
||||||
|
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
||||||
|
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||||
|
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||||
|
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
||||||
|
flag.Parse()
|
||||||
|
}
|
||||||
85
cmd/cc-backend/init.go
Normal file
85
cmd/cc-backend/init.go
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
)
|
||||||
|
|
||||||
|
const envString = `
|
||||||
|
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||||
|
# You can generate your own keypair using the gen-keypair tool
|
||||||
|
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||||
|
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||||
|
|
||||||
|
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||||
|
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||||
|
`
|
||||||
|
|
||||||
|
const configString = `
|
||||||
|
{
|
||||||
|
"addr": "127.0.0.1:8080",
|
||||||
|
"archive": {
|
||||||
|
"kind": "file",
|
||||||
|
"path": "./var/job-archive"
|
||||||
|
},
|
||||||
|
"jwts": {
|
||||||
|
"max-age": "2000h"
|
||||||
|
},
|
||||||
|
"clusters": [
|
||||||
|
{
|
||||||
|
"name": "name",
|
||||||
|
"metricDataRepository": {
|
||||||
|
"kind": "cc-metric-store",
|
||||||
|
"url": "http://localhost:8082",
|
||||||
|
"token": ""
|
||||||
|
},
|
||||||
|
"filterRanges": {
|
||||||
|
"numNodes": {
|
||||||
|
"from": 1,
|
||||||
|
"to": 64
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"from": 0,
|
||||||
|
"to": 86400
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"from": "2023-01-01T00:00:00Z",
|
||||||
|
"to": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
`
|
||||||
|
|
||||||
|
func initEnv() {
|
||||||
|
if util.CheckFileExists("var") {
|
||||||
|
fmt.Print("Directory ./var already exists. Exiting!\n")
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
|
||||||
|
log.Fatalf("Writing config.json failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
|
||||||
|
log.Fatalf("Writing .env failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.Mkdir("var", 0o777); err != nil {
|
||||||
|
log.Fatalf("Mkdir var failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
err := repository.MigrateDB("sqlite3", "./var/job.db")
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Initialize job.db failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -5,158 +5,48 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"crypto/tls"
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"flag"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"net"
|
|
||||||
"net/http"
|
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"runtime"
|
|
||||||
"runtime/debug"
|
"runtime/debug"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/99designs/gqlgen/graphql/handler"
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
"github.com/99designs/gqlgen/graphql/playground"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/runtimeEnv"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
"github.com/ClusterCockpit/cc-backend/web"
|
|
||||||
"github.com/go-co-op/gocron"
|
|
||||||
"github.com/google/gops/agent"
|
"github.com/google/gops/agent"
|
||||||
"github.com/gorilla/handlers"
|
|
||||||
"github.com/gorilla/mux"
|
|
||||||
httpSwagger "github.com/swaggo/http-swagger"
|
|
||||||
|
|
||||||
_ "github.com/go-sql-driver/mysql"
|
_ "github.com/go-sql-driver/mysql"
|
||||||
_ "github.com/mattn/go-sqlite3"
|
_ "github.com/mattn/go-sqlite3"
|
||||||
)
|
)
|
||||||
|
|
||||||
const logoString = `
|
const logoString = `
|
||||||
____ _ _ ____ _ _ _
|
_____ _ _ ____ _ _ _
|
||||||
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
|
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
|
||||||
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
|
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
|
||||||
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
|
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
|
||||||
\____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
\_____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
||||||
|_|
|
|_|
|
||||||
`
|
`
|
||||||
|
|
||||||
const envString = `
|
|
||||||
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
|
||||||
# You can generate your own keypair using the gen-keypair tool
|
|
||||||
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
|
||||||
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
|
||||||
|
|
||||||
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
|
||||||
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
|
||||||
`
|
|
||||||
|
|
||||||
const configString = `
|
|
||||||
{
|
|
||||||
"addr": "127.0.0.1:8080",
|
|
||||||
"archive": {
|
|
||||||
"kind": "file",
|
|
||||||
"path": "./var/job-archive"
|
|
||||||
},
|
|
||||||
"jwts": {
|
|
||||||
"max-age": "2000h"
|
|
||||||
},
|
|
||||||
"clusters": [
|
|
||||||
{
|
|
||||||
"name": "name",
|
|
||||||
"metricDataRepository": {
|
|
||||||
"kind": "cc-metric-store",
|
|
||||||
"url": "http://localhost:8082",
|
|
||||||
"token": ""
|
|
||||||
},
|
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": {
|
|
||||||
"from": 1,
|
|
||||||
"to": 64
|
|
||||||
},
|
|
||||||
"duration": {
|
|
||||||
"from": 0,
|
|
||||||
"to": 86400
|
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"from": "2023-01-01T00:00:00Z",
|
|
||||||
"to": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
`
|
|
||||||
|
|
||||||
var (
|
var (
|
||||||
date string
|
date string
|
||||||
commit string
|
commit string
|
||||||
version string
|
version string
|
||||||
)
|
)
|
||||||
|
|
||||||
func initEnv() {
|
|
||||||
if util.CheckFileExists("var") {
|
|
||||||
fmt.Print("Directory ./var already exists. Exiting!\n")
|
|
||||||
os.Exit(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
|
|
||||||
log.Fatalf("Writing config.json failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
|
|
||||||
log.Fatalf("Writing .env failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.Mkdir("var", 0o777); err != nil {
|
|
||||||
log.Fatalf("Mkdir var failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
err := repository.MigrateDB("sqlite3", "./var/job.db")
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Initialize job.db failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
cliInit()
|
||||||
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
|
||||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
|
|
||||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
|
||||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
|
|
||||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
|
||||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
|
||||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
|
||||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
|
||||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
|
||||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
|
||||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
|
||||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
|
||||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
|
||||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
|
|
||||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
|
||||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
|
||||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
|
||||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if flagVersion {
|
if flagVersion {
|
||||||
fmt.Print(logoString)
|
fmt.Print(logoString)
|
||||||
@@ -171,14 +61,6 @@ func main() {
|
|||||||
// Apply config flags for pkg/log
|
// Apply config flags for pkg/log
|
||||||
log.Init(flagLogLevel, flagLogDateTime)
|
log.Init(flagLogLevel, flagLogDateTime)
|
||||||
|
|
||||||
if flagInit {
|
|
||||||
initEnv()
|
|
||||||
fmt.Print("Succesfully setup environment!\n")
|
|
||||||
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
|
|
||||||
fmt.Print("Add your job-archive at ./var/job-archive.\n")
|
|
||||||
os.Exit(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
// See https://github.com/google/gops (Runtime overhead is almost zero)
|
// See https://github.com/google/gops (Runtime overhead is almost zero)
|
||||||
if flagGops {
|
if flagGops {
|
||||||
if err := agent.Listen(agent.Options{}); err != nil {
|
if err := agent.Listen(agent.Options{}); err != nil {
|
||||||
@@ -227,18 +109,18 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||||
db := repository.GetConnection()
|
|
||||||
|
|
||||||
var authentication *auth.Authentication
|
if flagInit {
|
||||||
|
initEnv()
|
||||||
|
fmt.Print("Successfully setup environment!\n")
|
||||||
|
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
|
||||||
|
fmt.Print("Add your job-archive at ./var/job-archive.\n")
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
if !config.Keys.DisableAuthentication {
|
if !config.Keys.DisableAuthentication {
|
||||||
var err error
|
|
||||||
if authentication, err = auth.Init(); err != nil {
|
|
||||||
log.Fatalf("auth initialization failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
|
auth.Init()
|
||||||
authentication.SessionMaxAge = d
|
|
||||||
}
|
|
||||||
|
|
||||||
if flagNewUser != "" {
|
if flagNewUser != "" {
|
||||||
parts := strings.SplitN(flagNewUser, ":", 3)
|
parts := strings.SplitN(flagNewUser, ":", 3)
|
||||||
@@ -260,12 +142,14 @@ func main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
authHandle := auth.GetAuthInstance()
|
||||||
|
|
||||||
if flagSyncLDAP {
|
if flagSyncLDAP {
|
||||||
if authentication.LdapAuth == nil {
|
if authHandle.LdapAuth == nil {
|
||||||
log.Fatal("cannot sync: LDAP authentication is not configured")
|
log.Fatal("cannot sync: LDAP authentication is not configured")
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := authentication.LdapAuth.Sync(); err != nil {
|
if err := authHandle.LdapAuth.Sync(); err != nil {
|
||||||
log.Fatalf("LDAP sync failed: %v", err)
|
log.Fatalf("LDAP sync failed: %v", err)
|
||||||
}
|
}
|
||||||
log.Info("LDAP sync successfull")
|
log.Info("LDAP sync successfull")
|
||||||
@@ -282,7 +166,7 @@ func main() {
|
|||||||
log.Warnf("user '%s' does not have the API role", user.Username)
|
log.Warnf("user '%s' does not have the API role", user.Username)
|
||||||
}
|
}
|
||||||
|
|
||||||
jwt, err := authentication.JwtAuth.ProvideJWT(user)
|
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
|
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
|
||||||
}
|
}
|
||||||
@@ -298,7 +182,7 @@ func main() {
|
|||||||
log.Fatalf("failed to initialize archive: %s", err.Error())
|
log.Fatalf("failed to initialize archive: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
if err := metricdata.Init(); err != nil {
|
||||||
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
|
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -318,228 +202,16 @@ func main() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup the http.Handler/Router used by the server
|
archiver.Start(repository.GetJobRepository())
|
||||||
jobRepo := repository.GetJobRepository()
|
taskManager.Start()
|
||||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
serverInit()
|
||||||
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
|
||||||
if os.Getenv("DEBUG") != "1" {
|
|
||||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
|
||||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
|
||||||
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
|
||||||
switch e := err.(type) {
|
|
||||||
case string:
|
|
||||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
|
||||||
case error:
|
|
||||||
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
|
||||||
}
|
|
||||||
|
|
||||||
return errors.New("MAIN > Internal server error (panic)")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
api := &api.RestApi{
|
|
||||||
JobRepository: jobRepo,
|
|
||||||
Resolver: resolver,
|
|
||||||
MachineStateDir: config.Keys.MachineStateDir,
|
|
||||||
Authentication: authentication,
|
|
||||||
}
|
|
||||||
|
|
||||||
r := mux.NewRouter()
|
|
||||||
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
|
||||||
|
|
||||||
info := map[string]interface{}{}
|
|
||||||
info["hasOpenIDConnect"] = false
|
|
||||||
|
|
||||||
if config.Keys.OpenIDConfig != nil {
|
|
||||||
openIDConnect := auth.NewOIDC(authentication)
|
|
||||||
openIDConnect.RegisterEndpoints(r)
|
|
||||||
info["hasOpenIDConnect"] = true
|
|
||||||
}
|
|
||||||
|
|
||||||
r.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
|
||||||
log.Debugf("##%v##", info)
|
|
||||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
|
||||||
}).Methods(http.MethodGet)
|
|
||||||
r.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
|
||||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
|
||||||
})
|
|
||||||
r.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
|
||||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
|
||||||
})
|
|
||||||
|
|
||||||
secured := r.PathPrefix("/").Subrouter()
|
|
||||||
|
|
||||||
if !config.Keys.DisableAuthentication {
|
|
||||||
r.Handle("/login", authentication.Login(
|
|
||||||
// On success:
|
|
||||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
|
||||||
|
|
||||||
// On failure:
|
|
||||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
|
||||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
|
||||||
rw.WriteHeader(http.StatusUnauthorized)
|
|
||||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
|
||||||
Title: "Login failed - ClusterCockpit",
|
|
||||||
MsgType: "alert-warning",
|
|
||||||
Message: err.Error(),
|
|
||||||
Build: buildInfo,
|
|
||||||
Infos: info,
|
|
||||||
})
|
|
||||||
})).Methods(http.MethodPost)
|
|
||||||
|
|
||||||
r.Handle("/jwt-login", authentication.Login(
|
|
||||||
// On success:
|
|
||||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
|
||||||
|
|
||||||
// On failure:
|
|
||||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
|
||||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
|
||||||
rw.WriteHeader(http.StatusUnauthorized)
|
|
||||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
|
||||||
Title: "Login failed - ClusterCockpit",
|
|
||||||
MsgType: "alert-warning",
|
|
||||||
Message: err.Error(),
|
|
||||||
Build: buildInfo,
|
|
||||||
Infos: info,
|
|
||||||
})
|
|
||||||
}))
|
|
||||||
|
|
||||||
r.Handle("/logout", authentication.Logout(
|
|
||||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
|
||||||
rw.WriteHeader(http.StatusOK)
|
|
||||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
|
||||||
Title: "Bye - ClusterCockpit",
|
|
||||||
MsgType: "alert-info",
|
|
||||||
Message: "Logout successful",
|
|
||||||
Build: buildInfo,
|
|
||||||
Infos: info,
|
|
||||||
})
|
|
||||||
}))).Methods(http.MethodPost)
|
|
||||||
|
|
||||||
secured.Use(func(next http.Handler) http.Handler {
|
|
||||||
return authentication.Auth(
|
|
||||||
// On success;
|
|
||||||
next,
|
|
||||||
|
|
||||||
// On failure:
|
|
||||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
|
||||||
rw.WriteHeader(http.StatusUnauthorized)
|
|
||||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
|
||||||
Title: "Authentication failed - ClusterCockpit",
|
|
||||||
MsgType: "alert-danger",
|
|
||||||
Message: err.Error(),
|
|
||||||
Build: buildInfo,
|
|
||||||
Infos: info,
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
if flagDev {
|
|
||||||
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
|
||||||
r.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
|
||||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
|
||||||
}
|
|
||||||
secured.Handle("/query", graphQLEndpoint)
|
|
||||||
|
|
||||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
|
||||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
|
||||||
})
|
|
||||||
|
|
||||||
// Mount all /monitoring/... and /api/... routes.
|
|
||||||
routerConfig.SetupRoutes(secured, buildInfo)
|
|
||||||
api.MountRoutes(secured)
|
|
||||||
|
|
||||||
if config.Keys.EmbedStaticFiles {
|
|
||||||
if i, err := os.Stat("./var/img"); err == nil {
|
|
||||||
if i.IsDir() {
|
|
||||||
log.Info("Use local directory for static images")
|
|
||||||
r.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
r.PathPrefix("/").Handler(web.ServeFiles())
|
|
||||||
} else {
|
|
||||||
r.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
|
||||||
}
|
|
||||||
|
|
||||||
r.Use(handlers.CompressHandler)
|
|
||||||
r.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
|
||||||
r.Use(handlers.CORS(
|
|
||||||
handlers.AllowCredentials(),
|
|
||||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
|
||||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
|
||||||
handlers.AllowedOrigins([]string{"*"})))
|
|
||||||
handler := handlers.CustomLoggingHandler(io.Discard, r, func(_ io.Writer, params handlers.LogFormatterParams) {
|
|
||||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
|
||||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
|
||||||
params.Request.Method, params.URL.RequestURI(),
|
|
||||||
params.StatusCode, float32(params.Size)/1024,
|
|
||||||
time.Since(params.TimeStamp).Milliseconds())
|
|
||||||
} else {
|
|
||||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
|
||||||
params.Request.Method, params.URL.RequestURI(),
|
|
||||||
params.StatusCode, float32(params.Size)/1024,
|
|
||||||
time.Since(params.TimeStamp).Milliseconds())
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
server := http.Server{
|
|
||||||
ReadTimeout: 10 * time.Second,
|
|
||||||
WriteTimeout: 10 * time.Second,
|
|
||||||
Handler: handler,
|
|
||||||
Addr: config.Keys.Addr,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start http or https server
|
|
||||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("starting http listener failed: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
|
||||||
go func() {
|
|
||||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
|
||||||
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("loading X509 keypair failed: %v", err)
|
|
||||||
}
|
|
||||||
listener = tls.NewListener(listener, &tls.Config{
|
|
||||||
Certificates: []tls.Certificate{cert},
|
|
||||||
CipherSuites: []uint16{
|
|
||||||
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
|
||||||
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
|
||||||
},
|
|
||||||
MinVersion: tls.VersionTLS12,
|
|
||||||
PreferServerCipherSuites: true,
|
|
||||||
})
|
|
||||||
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
|
|
||||||
} else {
|
|
||||||
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
|
||||||
// be established first, then the user can be changed, and after that,
|
|
||||||
// the actual http server can be started.
|
|
||||||
if err = runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
|
||||||
log.Fatalf("error while preparing server start: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
serverStart()
|
||||||
log.Fatalf("starting server failed: %v", err)
|
|
||||||
}
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
@@ -550,117 +222,15 @@ func main() {
|
|||||||
<-sigs
|
<-sigs
|
||||||
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
||||||
|
|
||||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
serverShutdown()
|
||||||
server.Shutdown(context.Background())
|
|
||||||
|
|
||||||
// Then, wait for any async archivings still pending...
|
taskManager.Shutdown()
|
||||||
api.JobRepository.WaitForArchiving()
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
s := gocron.NewScheduler(time.Local)
|
|
||||||
|
|
||||||
if config.Keys.StopJobsExceedingWalltime > 0 {
|
|
||||||
log.Info("Register undead jobs service")
|
|
||||||
|
|
||||||
s.Every(1).Day().At("3:00").Do(func() {
|
|
||||||
err = jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
|
|
||||||
}
|
|
||||||
runtime.GC()
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
var cfg struct {
|
|
||||||
Retention schema.Retention `json:"retention"`
|
|
||||||
Compression int `json:"compression"`
|
|
||||||
}
|
|
||||||
|
|
||||||
cfg.Retention.IncludeDB = true
|
|
||||||
|
|
||||||
if err = json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
|
|
||||||
log.Warn("Error while unmarshaling raw config json")
|
|
||||||
}
|
|
||||||
|
|
||||||
switch cfg.Retention.Policy {
|
|
||||||
case "delete":
|
|
||||||
log.Info("Register retention delete service")
|
|
||||||
|
|
||||||
s.Every(1).Day().At("4:00").Do(func() {
|
|
||||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
|
||||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
|
||||||
}
|
|
||||||
archive.GetHandle().CleanUp(jobs)
|
|
||||||
|
|
||||||
if cfg.Retention.IncludeDB {
|
|
||||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
|
|
||||||
} else {
|
|
||||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
|
||||||
}
|
|
||||||
if err = jobRepo.Optimize(); err != nil {
|
|
||||||
log.Errorf("Error occured in db optimization: %s", err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
case "move":
|
|
||||||
log.Info("Register retention move service")
|
|
||||||
|
|
||||||
s.Every(1).Day().At("4:00").Do(func() {
|
|
||||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
|
||||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
|
||||||
}
|
|
||||||
archive.GetHandle().Move(jobs, cfg.Retention.Location)
|
|
||||||
|
|
||||||
if cfg.Retention.IncludeDB {
|
|
||||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Error while deleting retention jobs from db: %v", err)
|
|
||||||
} else {
|
|
||||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
|
||||||
}
|
|
||||||
if err = jobRepo.Optimize(); err != nil {
|
|
||||||
log.Errorf("Error occured in db optimization: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
if cfg.Compression > 0 {
|
|
||||||
log.Info("Register compression service")
|
|
||||||
|
|
||||||
s.Every(1).Day().At("5:00").Do(func() {
|
|
||||||
var jobs []*schema.Job
|
|
||||||
|
|
||||||
ar := archive.GetHandle()
|
|
||||||
startTime := time.Now().Unix() - int64(cfg.Compression*24*3600)
|
|
||||||
lastTime := ar.CompressLast(startTime)
|
|
||||||
if startTime == lastTime {
|
|
||||||
log.Info("Compression Service - Complete archive run")
|
|
||||||
jobs, err = jobRepo.FindJobsBetween(0, startTime)
|
|
||||||
|
|
||||||
} else {
|
|
||||||
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("Error while looking for compression jobs: %v", err)
|
|
||||||
}
|
|
||||||
ar.Compress(jobs)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
s.StartAsync()
|
|
||||||
|
|
||||||
if os.Getenv("GOGC") == "" {
|
if os.Getenv("GOGC") == "" {
|
||||||
debug.SetGCPercent(25)
|
debug.SetGCPercent(25)
|
||||||
}
|
}
|
||||||
runtimeEnv.SystemdNotifiy(true, "running")
|
runtimeEnv.SystemdNotifiy(true, "running")
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
log.Print("Gracefull shutdown completed!")
|
log.Print("Graceful shutdown completed!")
|
||||||
}
|
}
|
||||||
|
|||||||
318
cmd/cc-backend/server.go
Normal file
318
cmd/cc-backend/server.go
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/tls"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/99designs/gqlgen/graphql/handler"
|
||||||
|
"github.com/99designs/gqlgen/graphql/playground"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/web"
|
||||||
|
"github.com/gorilla/handlers"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
httpSwagger "github.com/swaggo/http-swagger"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
router *mux.Router
|
||||||
|
server *http.Server
|
||||||
|
apiHandle *api.RestApi
|
||||||
|
)
|
||||||
|
|
||||||
|
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusUnauthorized)
|
||||||
|
json.NewEncoder(rw).Encode(map[string]string{
|
||||||
|
"status": http.StatusText(http.StatusUnauthorized),
|
||||||
|
"error": err.Error(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func serverInit() {
|
||||||
|
// Setup the http.Handler/Router used by the server
|
||||||
|
graph.Init()
|
||||||
|
resolver := graph.GetResolverInstance()
|
||||||
|
graphQLEndpoint := handler.NewDefaultServer(
|
||||||
|
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||||
|
|
||||||
|
if os.Getenv("DEBUG") != "1" {
|
||||||
|
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||||
|
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||||
|
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||||
|
switch e := err.(type) {
|
||||||
|
case string:
|
||||||
|
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||||
|
case error:
|
||||||
|
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.New("MAIN > Internal server error (panic)")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
authHandle := auth.GetAuthInstance()
|
||||||
|
|
||||||
|
apiHandle = api.New()
|
||||||
|
|
||||||
|
router = mux.NewRouter()
|
||||||
|
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
||||||
|
|
||||||
|
info := map[string]interface{}{}
|
||||||
|
info["hasOpenIDConnect"] = false
|
||||||
|
|
||||||
|
if config.Keys.OpenIDConfig != nil {
|
||||||
|
openIDConnect := auth.NewOIDC(authHandle)
|
||||||
|
openIDConnect.RegisterEndpoints(router)
|
||||||
|
info["hasOpenIDConnect"] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
log.Debugf("##%v##", info)
|
||||||
|
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||||
|
}).Methods(http.MethodGet)
|
||||||
|
router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||||
|
})
|
||||||
|
router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||||
|
})
|
||||||
|
|
||||||
|
secured := router.PathPrefix("/").Subrouter()
|
||||||
|
securedapi := router.PathPrefix("/api").Subrouter()
|
||||||
|
userapi := router.PathPrefix("/userapi").Subrouter()
|
||||||
|
configapi := router.PathPrefix("/config").Subrouter()
|
||||||
|
frontendapi := router.PathPrefix("/frontend").Subrouter()
|
||||||
|
|
||||||
|
if !config.Keys.DisableAuthentication {
|
||||||
|
router.Handle("/login", authHandle.Login(
|
||||||
|
// On success: Handled within Login()
|
||||||
|
// On failure:
|
||||||
|
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
rw.WriteHeader(http.StatusUnauthorized)
|
||||||
|
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||||
|
Title: "Login failed - ClusterCockpit",
|
||||||
|
MsgType: "alert-warning",
|
||||||
|
Message: err.Error(),
|
||||||
|
Build: buildInfo,
|
||||||
|
Infos: info,
|
||||||
|
})
|
||||||
|
})).Methods(http.MethodPost)
|
||||||
|
|
||||||
|
router.Handle("/jwt-login", authHandle.Login(
|
||||||
|
// On success: Handled within Login()
|
||||||
|
// On failure:
|
||||||
|
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
rw.WriteHeader(http.StatusUnauthorized)
|
||||||
|
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||||
|
Title: "Login failed - ClusterCockpit",
|
||||||
|
MsgType: "alert-warning",
|
||||||
|
Message: err.Error(),
|
||||||
|
Build: buildInfo,
|
||||||
|
Infos: info,
|
||||||
|
})
|
||||||
|
}))
|
||||||
|
|
||||||
|
router.Handle("/logout", authHandle.Logout(
|
||||||
|
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||||
|
Title: "Bye - ClusterCockpit",
|
||||||
|
MsgType: "alert-info",
|
||||||
|
Message: "Logout successful",
|
||||||
|
Build: buildInfo,
|
||||||
|
Infos: info,
|
||||||
|
})
|
||||||
|
}))).Methods(http.MethodPost)
|
||||||
|
|
||||||
|
secured.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.Auth(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
|
||||||
|
// On failure:
|
||||||
|
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||||
|
rw.WriteHeader(http.StatusUnauthorized)
|
||||||
|
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||||
|
Title: "Authentication failed - ClusterCockpit",
|
||||||
|
MsgType: "alert-danger",
|
||||||
|
Message: err.Error(),
|
||||||
|
Build: buildInfo,
|
||||||
|
Infos: info,
|
||||||
|
Redirect: r.RequestURI,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
securedapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthApi(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
|
||||||
|
userapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthUserApi(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
|
||||||
|
configapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthConfigApi(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
|
||||||
|
frontendapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthFrontendApi(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if flagDev {
|
||||||
|
router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||||
|
router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||||
|
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||||
|
}
|
||||||
|
secured.Handle("/query", graphQLEndpoint)
|
||||||
|
|
||||||
|
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||||
|
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Mount all /monitoring/... and /api/... routes.
|
||||||
|
routerConfig.SetupRoutes(secured, buildInfo)
|
||||||
|
apiHandle.MountApiRoutes(securedapi)
|
||||||
|
apiHandle.MountUserApiRoutes(userapi)
|
||||||
|
apiHandle.MountConfigApiRoutes(configapi)
|
||||||
|
apiHandle.MountFrontendApiRoutes(frontendapi)
|
||||||
|
|
||||||
|
if config.Keys.EmbedStaticFiles {
|
||||||
|
if i, err := os.Stat("./var/img"); err == nil {
|
||||||
|
if i.IsDir() {
|
||||||
|
log.Info("Use local directory for static images")
|
||||||
|
router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
router.PathPrefix("/").Handler(web.ServeFiles())
|
||||||
|
} else {
|
||||||
|
router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||||
|
}
|
||||||
|
|
||||||
|
router.Use(handlers.CompressHandler)
|
||||||
|
router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||||
|
router.Use(handlers.CORS(
|
||||||
|
handlers.AllowCredentials(),
|
||||||
|
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||||
|
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||||
|
handlers.AllowedOrigins([]string{"*"})))
|
||||||
|
}
|
||||||
|
|
||||||
|
func serverStart() {
|
||||||
|
handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||||
|
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||||
|
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||||
|
params.Request.Method, params.URL.RequestURI(),
|
||||||
|
params.StatusCode, float32(params.Size)/1024,
|
||||||
|
time.Since(params.TimeStamp).Milliseconds())
|
||||||
|
} else {
|
||||||
|
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||||
|
params.Request.Method, params.URL.RequestURI(),
|
||||||
|
params.StatusCode, float32(params.Size)/1024,
|
||||||
|
time.Since(params.TimeStamp).Milliseconds())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
server = &http.Server{
|
||||||
|
ReadTimeout: 20 * time.Second,
|
||||||
|
WriteTimeout: 20 * time.Second,
|
||||||
|
Handler: handler,
|
||||||
|
Addr: config.Keys.Addr,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start http or https server
|
||||||
|
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("starting http listener failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||||
|
go func() {
|
||||||
|
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||||
|
cert, err := tls.LoadX509KeyPair(
|
||||||
|
config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("loading X509 keypair failed: %v", err)
|
||||||
|
}
|
||||||
|
listener = tls.NewListener(listener, &tls.Config{
|
||||||
|
Certificates: []tls.Certificate{cert},
|
||||||
|
CipherSuites: []uint16{
|
||||||
|
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||||
|
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||||
|
},
|
||||||
|
MinVersion: tls.VersionTLS12,
|
||||||
|
PreferServerCipherSuites: true,
|
||||||
|
})
|
||||||
|
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
|
||||||
|
}
|
||||||
|
//
|
||||||
|
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||||
|
// be established first, then the user can be changed, and after that,
|
||||||
|
// the actual http server can be started.
|
||||||
|
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||||
|
log.Fatalf("error while preparing server start: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||||
|
log.Fatalf("starting server failed: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func serverShutdown() {
|
||||||
|
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||||
|
server.Shutdown(context.Background())
|
||||||
|
|
||||||
|
// Then, wait for any async archivings still pending...
|
||||||
|
archiver.WaitForArchiving()
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
{
|
{
|
||||||
"addr": "127.0.0.1:8080",
|
"addr": "127.0.0.1:8080",
|
||||||
|
"short-running-jobs-duration": 300,
|
||||||
"archive": {
|
"archive": {
|
||||||
"kind": "file",
|
"kind": "file",
|
||||||
"path": "./var/job-archive"
|
"path": "./var/job-archive"
|
||||||
@@ -7,6 +8,16 @@
|
|||||||
"jwts": {
|
"jwts": {
|
||||||
"max-age": "2000h"
|
"max-age": "2000h"
|
||||||
},
|
},
|
||||||
|
"enable-resampling": {
|
||||||
|
"trigger": 30,
|
||||||
|
"resolutions": [
|
||||||
|
600,
|
||||||
|
300,
|
||||||
|
120,
|
||||||
|
60
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"emission-constant": 317,
|
||||||
"clusters": [
|
"clusters": [
|
||||||
{
|
{
|
||||||
"name": "fritz",
|
"name": "fritz",
|
||||||
|
|||||||
69
configs/config-mariadb.json
Normal file
69
configs/config-mariadb.json
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
{
|
||||||
|
"addr": "127.0.0.1:8080",
|
||||||
|
"short-running-jobs-duration": 300,
|
||||||
|
"archive": {
|
||||||
|
"kind": "file",
|
||||||
|
"path": "./var/job-archive"
|
||||||
|
},
|
||||||
|
"jwts": {
|
||||||
|
"max-age": "2000h"
|
||||||
|
},
|
||||||
|
"db-driver": "mysql",
|
||||||
|
"db": "clustercockpit:demo@tcp(127.0.0.1:3306)/clustercockpit",
|
||||||
|
"enable-resampling": {
|
||||||
|
"trigger": 30,
|
||||||
|
"resolutions": [
|
||||||
|
600,
|
||||||
|
300,
|
||||||
|
120,
|
||||||
|
60
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"emission-constant": 317,
|
||||||
|
"clusters": [
|
||||||
|
{
|
||||||
|
"name": "fritz",
|
||||||
|
"metricDataRepository": {
|
||||||
|
"kind": "cc-metric-store",
|
||||||
|
"url": "http://localhost:8082",
|
||||||
|
"token": ""
|
||||||
|
},
|
||||||
|
"filterRanges": {
|
||||||
|
"numNodes": {
|
||||||
|
"from": 1,
|
||||||
|
"to": 64
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"from": 0,
|
||||||
|
"to": 86400
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"from": "2022-01-01T00:00:00Z",
|
||||||
|
"to": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "alex",
|
||||||
|
"metricDataRepository": {
|
||||||
|
"kind": "cc-metric-store",
|
||||||
|
"url": "http://localhost:8082",
|
||||||
|
"token": ""
|
||||||
|
},
|
||||||
|
"filterRanges": {
|
||||||
|
"numNodes": {
|
||||||
|
"from": 1,
|
||||||
|
"to": 64
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"from": 0,
|
||||||
|
"to": 86400
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"from": "2022-01-01T00:00:00Z",
|
||||||
|
"to": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -117,10 +117,12 @@ foreach my $ln (split("\n", $topo)) {
|
|||||||
|
|
||||||
my $node;
|
my $node;
|
||||||
my @sockets;
|
my @sockets;
|
||||||
|
my @nodeCores;
|
||||||
foreach my $socket ( @{$DOMAINS{socket}} ) {
|
foreach my $socket ( @{$DOMAINS{socket}} ) {
|
||||||
push @sockets, "[".join(",", @{$socket})."]";
|
push @sockets, "[".join(",", @{$socket})."]";
|
||||||
$node .= join(",", @{$socket})
|
push @nodeCores, join(",", @{$socket});
|
||||||
}
|
}
|
||||||
|
$node = join(",", @nodeCores);
|
||||||
$INFO{sockets} = join(",\n", @sockets);
|
$INFO{sockets} = join(",\n", @sockets);
|
||||||
|
|
||||||
my @memDomains;
|
my @memDomains;
|
||||||
@@ -212,9 +214,27 @@ print <<"END";
|
|||||||
"socketsPerNode": $INFO{socketsPerNode},
|
"socketsPerNode": $INFO{socketsPerNode},
|
||||||
"coresPerSocket": $INFO{coresPerSocket},
|
"coresPerSocket": $INFO{coresPerSocket},
|
||||||
"threadsPerCore": $INFO{threadsPerCore},
|
"threadsPerCore": $INFO{threadsPerCore},
|
||||||
"flopRateScalar": $flopsScalar,
|
"flopRateScalar": {
|
||||||
"flopRateSimd": $flopsSimd,
|
"unit": {
|
||||||
"memoryBandwidth": $memBw,
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"value": $flopsScalar
|
||||||
|
},
|
||||||
|
"flopRateSimd": {
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"value": $flopsSimd
|
||||||
|
},
|
||||||
|
"memoryBandwidth": {
|
||||||
|
"unit": {
|
||||||
|
"base": "B/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"value": $memBw
|
||||||
|
},
|
||||||
"nodes": "<FILL IN NODE RANGES>",
|
"nodes": "<FILL IN NODE RANGES>",
|
||||||
"topology": {
|
"topology": {
|
||||||
"node": [$node],
|
"node": [$node],
|
||||||
|
|||||||
106
go.mod
106
go.mod
@@ -1,91 +1,89 @@
|
|||||||
module github.com/ClusterCockpit/cc-backend
|
module github.com/ClusterCockpit/cc-backend
|
||||||
|
|
||||||
go 1.18
|
go 1.23.5
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/99designs/gqlgen v0.17.45
|
github.com/99designs/gqlgen v0.17.63
|
||||||
github.com/ClusterCockpit/cc-units v0.4.0
|
github.com/ClusterCockpit/cc-units v0.4.0
|
||||||
github.com/Masterminds/squirrel v1.5.3
|
github.com/Masterminds/squirrel v1.5.4
|
||||||
github.com/coreos/go-oidc/v3 v3.9.0
|
github.com/coreos/go-oidc/v3 v3.11.0
|
||||||
github.com/go-co-op/gocron v1.25.0
|
github.com/go-co-op/gocron/v2 v2.9.0
|
||||||
github.com/go-ldap/ldap/v3 v3.4.4
|
github.com/go-ldap/ldap/v3 v3.4.8
|
||||||
github.com/go-sql-driver/mysql v1.7.0
|
github.com/go-sql-driver/mysql v1.8.1
|
||||||
github.com/golang-jwt/jwt/v5 v5.2.1
|
github.com/golang-jwt/jwt/v5 v5.2.1
|
||||||
github.com/golang-migrate/migrate/v4 v4.15.2
|
github.com/golang-migrate/migrate/v4 v4.17.1
|
||||||
github.com/google/gops v0.3.27
|
github.com/google/gops v0.3.28
|
||||||
github.com/gorilla/handlers v1.5.1
|
github.com/gorilla/handlers v1.5.2
|
||||||
github.com/gorilla/mux v1.8.0
|
github.com/gorilla/mux v1.8.1
|
||||||
github.com/gorilla/sessions v1.2.1
|
github.com/gorilla/sessions v1.4.0
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.12.2
|
github.com/influxdata/influxdb-client-go/v2 v2.13.0
|
||||||
github.com/jmoiron/sqlx v1.3.5
|
github.com/jmoiron/sqlx v1.4.0
|
||||||
github.com/mattn/go-sqlite3 v1.14.16
|
github.com/mattn/go-sqlite3 v1.14.22
|
||||||
github.com/prometheus/client_golang v1.14.0
|
github.com/prometheus/client_golang v1.19.1
|
||||||
github.com/prometheus/common v0.40.0
|
github.com/prometheus/common v0.55.0
|
||||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||||
github.com/santhosh-tekuri/jsonschema/v5 v5.2.0
|
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||||
github.com/swaggo/http-swagger v1.3.3
|
github.com/swaggo/http-swagger v1.3.4
|
||||||
github.com/swaggo/swag v1.16.3
|
github.com/swaggo/swag v1.16.4
|
||||||
github.com/vektah/gqlparser/v2 v2.5.11
|
github.com/vektah/gqlparser/v2 v2.5.22
|
||||||
golang.org/x/crypto v0.21.0
|
golang.org/x/crypto v0.32.0
|
||||||
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea
|
golang.org/x/exp v0.0.0-20240707233637-46b078467d37
|
||||||
golang.org/x/oauth2 v0.13.0
|
golang.org/x/oauth2 v0.21.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
filippo.io/edwards25519 v1.1.0 // indirect
|
||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||||
github.com/agnivade/levenshtein v1.1.1 // indirect
|
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
github.com/containerd/containerd v1.6.26 // indirect
|
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.4 // indirect
|
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||||
github.com/deepmap/oapi-codegen v1.12.4 // indirect
|
github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
|
||||||
github.com/felixge/httpsnoop v1.0.3 // indirect
|
github.com/go-jose/go-jose/v4 v4.0.3 // indirect
|
||||||
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
|
|
||||||
github.com/go-jose/go-jose/v3 v3.0.3 // indirect
|
|
||||||
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
||||||
github.com/go-openapi/jsonreference v0.21.0 // indirect
|
github.com/go-openapi/jsonreference v0.21.0 // indirect
|
||||||
github.com/go-openapi/spec v0.21.0 // indirect
|
github.com/go-openapi/spec v0.21.0 // indirect
|
||||||
github.com/go-openapi/swag v0.23.0 // indirect
|
github.com/go-openapi/swag v0.23.0 // indirect
|
||||||
github.com/golang/protobuf v1.5.3 // indirect
|
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
github.com/gorilla/securecookie v1.1.1 // indirect
|
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||||
github.com/gorilla/websocket v1.5.0 // indirect
|
github.com/gorilla/websocket v1.5.3 // indirect
|
||||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
||||||
|
github.com/jonboulle/clockwork v0.4.0 // indirect
|
||||||
github.com/josharian/intern v1.0.0 // indirect
|
github.com/josharian/intern v1.0.0 // indirect
|
||||||
github.com/jpillora/backoff v1.0.0 // indirect
|
github.com/jpillora/backoff v1.0.0 // indirect
|
||||||
github.com/json-iterator/go v1.1.12 // indirect
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||||
github.com/mailru/easyjson v0.7.7 // indirect
|
github.com/mailru/easyjson v0.9.0 // indirect
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
|
||||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||||
|
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||||
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b // indirect
|
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b // indirect
|
||||||
github.com/pkg/errors v0.9.1 // indirect
|
github.com/prometheus/client_model v0.6.1 // indirect
|
||||||
github.com/prometheus/client_model v0.3.0 // indirect
|
github.com/prometheus/procfs v0.15.1 // indirect
|
||||||
github.com/prometheus/procfs v0.9.0 // indirect
|
|
||||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||||
github.com/sosodev/duration v1.2.0 // indirect
|
github.com/sosodev/duration v1.3.1 // indirect
|
||||||
github.com/swaggo/files v1.0.0 // indirect
|
github.com/swaggo/files v1.0.1 // indirect
|
||||||
github.com/urfave/cli/v2 v2.27.1 // indirect
|
github.com/urfave/cli/v2 v2.27.5 // indirect
|
||||||
github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect
|
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
|
||||||
go.uber.org/atomic v1.10.0 // indirect
|
go.uber.org/atomic v1.11.0 // indirect
|
||||||
golang.org/x/mod v0.16.0 // indirect
|
golang.org/x/mod v0.22.0 // indirect
|
||||||
golang.org/x/net v0.22.0 // indirect
|
golang.org/x/net v0.34.0 // indirect
|
||||||
golang.org/x/sys v0.18.0 // indirect
|
golang.org/x/sync v0.10.0 // indirect
|
||||||
golang.org/x/text v0.14.0 // indirect
|
golang.org/x/sys v0.29.0 // indirect
|
||||||
golang.org/x/tools v0.19.0 // indirect
|
golang.org/x/text v0.21.0 // indirect
|
||||||
google.golang.org/appengine v1.6.8 // indirect
|
golang.org/x/tools v0.29.0 // indirect
|
||||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect
|
google.golang.org/protobuf v1.36.1 // indirect
|
||||||
google.golang.org/protobuf v1.33.0 // indirect
|
|
||||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
sigs.k8s.io/yaml v1.4.0 // indirect
|
sigs.k8s.io/yaml v1.4.0 // indirect
|
||||||
|
|||||||
61
gqlgen.yml
61
gqlgen.yml
@@ -61,23 +61,50 @@ models:
|
|||||||
fields:
|
fields:
|
||||||
partitions:
|
partitions:
|
||||||
resolver: true
|
resolver: true
|
||||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
NullableFloat:
|
||||||
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||||
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
MetricScope:
|
||||||
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||||
|
MetricValue:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||||
|
JobStatistics:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||||
|
GlobalMetricListItem:
|
||||||
|
{
|
||||||
|
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.GlobalMetricListItem",
|
||||||
|
}
|
||||||
|
ClusterSupport:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.ClusterSupport" }
|
||||||
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
||||||
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
Resource:
|
||||||
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||||
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
JobState:
|
||||||
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||||
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
TimeRange:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||||
|
IntRange:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
||||||
|
JobMetric:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
||||||
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
|
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
|
||||||
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" }
|
MetricStatistics:
|
||||||
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
{
|
||||||
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" }
|
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics",
|
||||||
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
}
|
||||||
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
MetricConfig:
|
||||||
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
||||||
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
SubClusterConfig:
|
||||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
{
|
||||||
|
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig",
|
||||||
|
}
|
||||||
|
Accelerator:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
||||||
|
Topology:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
||||||
|
FilterRanges:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||||
|
SubCluster:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||||
|
StatsSeries:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||||
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }
|
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=ClusterCockpit Web Server (Go edition)
|
Description=ClusterCockpit Web Server
|
||||||
Documentation=https://github.com/ClusterCockpit/cc-backend
|
Documentation=https://github.com/ClusterCockpit/cc-backend
|
||||||
Wants=network-online.target
|
Wants=network-online.target
|
||||||
After=network-online.target
|
After=network-online.target
|
||||||
|
|||||||
@@ -14,13 +14,16 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
@@ -117,7 +120,7 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -144,23 +147,20 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||||
|
|
||||||
repository.Connect("sqlite3", dbfilepath)
|
repository.Connect("sqlite3", dbfilepath)
|
||||||
db := repository.GetConnection()
|
|
||||||
|
|
||||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
if err := metricdata.Init(); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
jobRepo := repository.GetJobRepository()
|
archiver.Start(repository.GetJobRepository())
|
||||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
auth.Init()
|
||||||
|
graph.Init()
|
||||||
|
|
||||||
return &api.RestApi{
|
return api.New()
|
||||||
JobRepository: resolver.Repo,
|
|
||||||
Resolver: resolver,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func cleanup() {
|
func cleanup() {
|
||||||
@@ -175,7 +175,6 @@ func cleanup() {
|
|||||||
func TestRestApi(t *testing.T) {
|
func TestRestApi(t *testing.T) {
|
||||||
restapi := setup(t)
|
restapi := setup(t)
|
||||||
t.Cleanup(cleanup)
|
t.Cleanup(cleanup)
|
||||||
|
|
||||||
testData := schema.JobData{
|
testData := schema.JobData{
|
||||||
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||||
schema.MetricScopeNode: {
|
schema.MetricScopeNode: {
|
||||||
@@ -192,12 +191,18 @@ func TestRestApi(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||||
return testData, nil
|
return testData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
r := mux.NewRouter()
|
r := mux.NewRouter()
|
||||||
restapi.MountRoutes(r)
|
r.PathPrefix("/api").Subrouter()
|
||||||
|
r.StrictSlash(true)
|
||||||
|
restapi.MountApiRoutes(r)
|
||||||
|
|
||||||
|
var TestJobId int64 = 123
|
||||||
|
var TestClusterName string = "testcluster"
|
||||||
|
var TestStartTime int64 = 123456789
|
||||||
|
|
||||||
const startJobBody string = `{
|
const startJobBody string = `{
|
||||||
"jobId": 123,
|
"jobId": 123,
|
||||||
@@ -213,7 +218,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
"exclusive": 1,
|
"exclusive": 1,
|
||||||
"monitoringStatus": 1,
|
"monitoringStatus": 1,
|
||||||
"smt": 1,
|
"smt": 1,
|
||||||
"tags": [{ "type": "testTagType", "name": "testTagName" }],
|
"tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
|
||||||
"resources": [
|
"resources": [
|
||||||
{
|
{
|
||||||
"hostname": "host123",
|
"hostname": "host123",
|
||||||
@@ -224,28 +229,33 @@ func TestRestApi(t *testing.T) {
|
|||||||
"startTime": 123456789
|
"startTime": 123456789
|
||||||
}`
|
}`
|
||||||
|
|
||||||
var dbid int64
|
const contextUserKey repository.ContextKey = "user"
|
||||||
|
contextUserValue := &schema.User{
|
||||||
|
Username: "testuser",
|
||||||
|
Projects: make([]string, 0),
|
||||||
|
Roles: []string{"user"},
|
||||||
|
AuthType: 0,
|
||||||
|
AuthSource: 2,
|
||||||
|
}
|
||||||
|
|
||||||
if ok := t.Run("StartJob", func(t *testing.T) {
|
if ok := t.Run("StartJob", func(t *testing.T) {
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusCreated {
|
if response.StatusCode != http.StatusCreated {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
}
|
}
|
||||||
|
resolver := graph.GetResolverInstance()
|
||||||
var res api.StartJobApiResponse
|
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||||
if err := json.Unmarshal(recorder.Body.Bytes(), &res); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(res.DBID)))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Tags, err = restapi.Resolver.Job().Tags(context.Background(), job)
|
job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -269,11 +279,9 @@ func TestRestApi(t *testing.T) {
|
|||||||
t.Fatalf("unexpected job properties: %#v", job)
|
t.Fatalf("unexpected job properties: %#v", job)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" {
|
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||||
t.Fatalf("unexpected tags: %#v", job.Tags)
|
t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||||
}
|
}
|
||||||
|
|
||||||
dbid = res.DBID
|
|
||||||
}); !ok {
|
}); !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -289,17 +297,19 @@ func TestRestApi(t *testing.T) {
|
|||||||
|
|
||||||
var stoppedJob *schema.Job
|
var stoppedJob *schema.Job
|
||||||
if ok := t.Run("StopJob", func(t *testing.T) {
|
if ok := t.Run("StopJob", func(t *testing.T) {
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusOK {
|
if response.StatusCode != http.StatusOK {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
restapi.JobRepository.WaitForArchiving()
|
archiver.WaitForArchiving()
|
||||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(dbid)))
|
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -327,7 +337,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
t.Run("CheckArchive", func(t *testing.T) {
|
t.Run("CheckArchive", func(t *testing.T) {
|
||||||
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
|
data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -341,10 +351,12 @@ func TestRestApi(t *testing.T) {
|
|||||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||||
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusUnprocessableEntity {
|
if response.StatusCode != http.StatusUnprocessableEntity {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
@@ -371,10 +383,12 @@ func TestRestApi(t *testing.T) {
|
|||||||
}`
|
}`
|
||||||
|
|
||||||
ok := t.Run("StartJobFailed", func(t *testing.T) {
|
ok := t.Run("StartJobFailed", func(t *testing.T) {
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusCreated {
|
if response.StatusCode != http.StatusCreated {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
@@ -384,6 +398,8 @@ func TestRestApi(t *testing.T) {
|
|||||||
t.Fatal("subtest failed")
|
t.Fatal("subtest failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
|
||||||
const stopJobBodyFailed string = `{
|
const stopJobBodyFailed string = `{
|
||||||
"jobId": 12345,
|
"jobId": 12345,
|
||||||
"cluster": "testcluster",
|
"cluster": "testcluster",
|
||||||
@@ -393,16 +409,18 @@ func TestRestApi(t *testing.T) {
|
|||||||
}`
|
}`
|
||||||
|
|
||||||
ok = t.Run("StopJobFailed", func(t *testing.T) {
|
ok = t.Run("StopJobFailed", func(t *testing.T) {
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusOK {
|
if response.StatusCode != http.StatusOK {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
restapi.JobRepository.WaitForArchiving()
|
archiver.WaitForArchiving()
|
||||||
jobid, cluster := int64(12345), "testcluster"
|
jobid, cluster := int64(12345), "testcluster"
|
||||||
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -208,7 +208,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -278,7 +278,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -348,7 +348,7 @@ const docTemplate = `{
|
|||||||
"200": {
|
"200": {
|
||||||
"description": "Success message",
|
"description": "Success message",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -493,7 +493,7 @@ const docTemplate = `{
|
|||||||
"201": {
|
"201": {
|
||||||
"description": "Job added successfully",
|
"description": "Job added successfully",
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.StartJobApiResponse"
|
"$ref": "#/definitions/api.DefaultJobApiResponse"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"400": {
|
"400": {
|
||||||
@@ -587,89 +587,7 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"422": {
|
"422": {
|
||||||
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
|
"description": "Unprocessable Entity: job has already been stopped",
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"500": {
|
|
||||||
"description": "Internal Server Error",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"/jobs/stop_job/{id}": {
|
|
||||||
"post": {
|
|
||||||
"security": [
|
|
||||||
{
|
|
||||||
"ApiKeyAuth": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"description": "Job to stop is specified by database ID. Only stopTime and final state are required in request body.\nReturns full job resource information according to 'JobMeta' scheme.",
|
|
||||||
"consumes": [
|
|
||||||
"application/json"
|
|
||||||
],
|
|
||||||
"produces": [
|
|
||||||
"application/json"
|
|
||||||
],
|
|
||||||
"tags": [
|
|
||||||
"Job add and modify"
|
|
||||||
],
|
|
||||||
"summary": "Marks job as completed and triggers archiving",
|
|
||||||
"parameters": [
|
|
||||||
{
|
|
||||||
"type": "integer",
|
|
||||||
"description": "Database ID of Job",
|
|
||||||
"name": "id",
|
|
||||||
"in": "path",
|
|
||||||
"required": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"description": "stopTime and final state in request body",
|
|
||||||
"name": "request",
|
|
||||||
"in": "body",
|
|
||||||
"required": true,
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.StopJobApiRequest"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"responses": {
|
|
||||||
"200": {
|
|
||||||
"description": "Job resource",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/schema.JobMeta"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"400": {
|
|
||||||
"description": "Bad Request",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"401": {
|
|
||||||
"description": "Unauthorized",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"403": {
|
|
||||||
"description": "Forbidden",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"404": {
|
|
||||||
"description": "Resource not found",
|
|
||||||
"schema": {
|
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"422": {
|
|
||||||
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
|
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/definitions/api.ErrorResponse"
|
"$ref": "#/definitions/api.ErrorResponse"
|
||||||
}
|
}
|
||||||
@@ -690,7 +608,7 @@ const docTemplate = `{
|
|||||||
"ApiKeyAuth": []
|
"ApiKeyAuth": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
|
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nTag Scope for frontend visibility will default to \"global\" if none entered, other options: \"admin\" or specific username.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
|
||||||
"consumes": [
|
"consumes": [
|
||||||
"application/json"
|
"application/json"
|
||||||
],
|
],
|
||||||
@@ -915,6 +833,72 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/notice/": {
|
||||||
|
"post": {
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"ApiKeyAuth": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Modifies the content of notice.txt, shown as notice box on the homepage.\nIf more than one formValue is set then only the highest priority field is used.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||||
|
"consumes": [
|
||||||
|
"multipart/form-data"
|
||||||
|
],
|
||||||
|
"produces": [
|
||||||
|
"text/plain"
|
||||||
|
],
|
||||||
|
"tags": [
|
||||||
|
"User"
|
||||||
|
],
|
||||||
|
"summary": "Updates or empties the notice box content",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"type": "string",
|
||||||
|
"description": "Priority 1: New content to display",
|
||||||
|
"name": "new-content",
|
||||||
|
"in": "formData"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "Success Response Message",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Bad Request",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Unauthorized",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"403": {
|
||||||
|
"description": "Forbidden",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"422": {
|
||||||
|
"description": "Unprocessable Entity: The user could not be updated",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"description": "Internal Server Error",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/user/{id}": {
|
"/user/{id}": {
|
||||||
"post": {
|
"post": {
|
||||||
"security": [
|
"security": [
|
||||||
@@ -1283,6 +1267,11 @@ const docTemplate = `{
|
|||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "Testjob"
|
"example": "Testjob"
|
||||||
},
|
},
|
||||||
|
"scope": {
|
||||||
|
"description": "Tag Scope for Frontend Display",
|
||||||
|
"type": "string",
|
||||||
|
"example": "global"
|
||||||
|
},
|
||||||
"type": {
|
"type": {
|
||||||
"description": "Tag Type",
|
"description": "Tag Type",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@@ -1290,6 +1279,14 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"api.DefaultJobApiResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"msg": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"api.DeleteJobApiRequest": {
|
"api.DeleteJobApiRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
@@ -1313,14 +1310,6 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"api.DeleteJobApiResponse": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"msg": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"api.EditMetaRequest": {
|
"api.EditMetaRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -1407,15 +1396,6 @@ const docTemplate = `{
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"api.StartJobApiResponse": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"id": {
|
|
||||||
"description": "Database ID of new job",
|
|
||||||
"type": "integer"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"api.StopJobApiRequest": {
|
"api.StopJobApiRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"required": [
|
"required": [
|
||||||
@@ -1424,17 +1404,14 @@ const docTemplate = `{
|
|||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
"cluster": {
|
"cluster": {
|
||||||
"description": "Cluster of job",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "fritz"
|
"example": "fritz"
|
||||||
},
|
},
|
||||||
"jobId": {
|
"jobId": {
|
||||||
"description": "Cluster Job ID of job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"jobState": {
|
"jobState": {
|
||||||
"description": "Final job state",
|
|
||||||
"allOf": [
|
"allOf": [
|
||||||
{
|
{
|
||||||
"$ref": "#/definitions/schema.JobState"
|
"$ref": "#/definitions/schema.JobState"
|
||||||
@@ -1443,12 +1420,10 @@ const docTemplate = `{
|
|||||||
"example": "completed"
|
"example": "completed"
|
||||||
},
|
},
|
||||||
"startTime": {
|
"startTime": {
|
||||||
"description": "Start Time of job as epoch",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 1649723812
|
"example": 1649723812
|
||||||
},
|
},
|
||||||
"stopTime": {
|
"stopTime": {
|
||||||
"description": "Stop Time of job as epoch",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 1649763839
|
"example": 1649763839
|
||||||
}
|
}
|
||||||
@@ -1493,12 +1468,10 @@ const docTemplate = `{
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"arrayJobId": {
|
"arrayJobId": {
|
||||||
"description": "The unique identifier of an array job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"cluster": {
|
"cluster": {
|
||||||
"description": "The unique identifier of a cluster",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "fritz"
|
"example": "fritz"
|
||||||
},
|
},
|
||||||
@@ -1506,33 +1479,39 @@ const docTemplate = `{
|
|||||||
"$ref": "#/definitions/schema.JobLinkResultList"
|
"$ref": "#/definitions/schema.JobLinkResultList"
|
||||||
},
|
},
|
||||||
"duration": {
|
"duration": {
|
||||||
"description": "Duration of job in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 43200
|
"example": 43200
|
||||||
},
|
},
|
||||||
|
"energy": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"energyFootprint": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
"exclusive": {
|
"exclusive": {
|
||||||
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"maximum": 2,
|
"maximum": 2,
|
||||||
"minimum": 0,
|
"minimum": 0,
|
||||||
"example": 1
|
"example": 1
|
||||||
},
|
},
|
||||||
"flopsAnyAvg": {
|
"footprint": {
|
||||||
"description": "FlopsAnyAvg as Float64",
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"id": {
|
"id": {
|
||||||
"description": "The unique identifier of a job in the database",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"jobId": {
|
"jobId": {
|
||||||
"description": "The unique identifier of a job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"jobState": {
|
"jobState": {
|
||||||
"description": "Final state of job",
|
|
||||||
"enum": [
|
"enum": [
|
||||||
"completed",
|
"completed",
|
||||||
"failed",
|
"failed",
|
||||||
@@ -1548,95 +1527,69 @@ const docTemplate = `{
|
|||||||
],
|
],
|
||||||
"example": "completed"
|
"example": "completed"
|
||||||
},
|
},
|
||||||
"loadAvg": {
|
|
||||||
"description": "LoadAvg as Float64",
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"memBwAvg": {
|
|
||||||
"description": "MemBwAvg as Float64",
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"memUsedMax": {
|
|
||||||
"description": "MemUsedMax as Float64",
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"metaData": {
|
"metaData": {
|
||||||
"description": "Additional information about the job",
|
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"monitoringStatus": {
|
"monitoringStatus": {
|
||||||
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"maximum": 3,
|
"maximum": 3,
|
||||||
"minimum": 0,
|
"minimum": 0,
|
||||||
"example": 1
|
"example": 1
|
||||||
},
|
},
|
||||||
"numAcc": {
|
"numAcc": {
|
||||||
"description": "Number of accelerators used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 2
|
"example": 2
|
||||||
},
|
},
|
||||||
"numHwthreads": {
|
"numHwthreads": {
|
||||||
"description": "NumCores int32 ` + "`" + `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` + "`" + ` // Number of HWThreads used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 20
|
"example": 20
|
||||||
},
|
},
|
||||||
"numNodes": {
|
"numNodes": {
|
||||||
"description": "Number of nodes used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 2
|
"example": 2
|
||||||
},
|
},
|
||||||
"partition": {
|
"partition": {
|
||||||
"description": "The Slurm partition to which the job was submitted",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "main"
|
"example": "main"
|
||||||
},
|
},
|
||||||
"project": {
|
"project": {
|
||||||
"description": "The unique identifier of a project",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd200"
|
"example": "abcd200"
|
||||||
},
|
},
|
||||||
"resources": {
|
"resources": {
|
||||||
"description": "Resources used by job",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.Resource"
|
"$ref": "#/definitions/schema.Resource"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"smt": {
|
"smt": {
|
||||||
"description": "SMT threads used by job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 4
|
"example": 4
|
||||||
},
|
},
|
||||||
"startTime": {
|
"startTime": {
|
||||||
"description": "Start time as 'time.Time' data type",
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"subCluster": {
|
"subCluster": {
|
||||||
"description": "The unique identifier of a sub cluster",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "main"
|
"example": "main"
|
||||||
},
|
},
|
||||||
"tags": {
|
"tags": {
|
||||||
"description": "List of tags",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.Tag"
|
"$ref": "#/definitions/schema.Tag"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"user": {
|
"user": {
|
||||||
"description": "The unique identifier of a user",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd100h"
|
"example": "abcd100h"
|
||||||
},
|
},
|
||||||
"walltime": {
|
"walltime": {
|
||||||
"description": "Requested walltime of job in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 86400
|
"example": 86400
|
||||||
@@ -1673,12 +1626,10 @@ const docTemplate = `{
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"arrayJobId": {
|
"arrayJobId": {
|
||||||
"description": "The unique identifier of an array job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"cluster": {
|
"cluster": {
|
||||||
"description": "The unique identifier of a cluster",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "fritz"
|
"example": "fritz"
|
||||||
},
|
},
|
||||||
@@ -1686,29 +1637,39 @@ const docTemplate = `{
|
|||||||
"$ref": "#/definitions/schema.JobLinkResultList"
|
"$ref": "#/definitions/schema.JobLinkResultList"
|
||||||
},
|
},
|
||||||
"duration": {
|
"duration": {
|
||||||
"description": "Duration of job in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 43200
|
"example": 43200
|
||||||
},
|
},
|
||||||
|
"energy": {
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
"energyFootprint": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
"exclusive": {
|
"exclusive": {
|
||||||
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"maximum": 2,
|
"maximum": 2,
|
||||||
"minimum": 0,
|
"minimum": 0,
|
||||||
"example": 1
|
"example": 1
|
||||||
},
|
},
|
||||||
|
"footprint": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
"id": {
|
"id": {
|
||||||
"description": "The unique identifier of a job in the database",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"jobId": {
|
"jobId": {
|
||||||
"description": "The unique identifier of a job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 123000
|
"example": 123000
|
||||||
},
|
},
|
||||||
"jobState": {
|
"jobState": {
|
||||||
"description": "Final state of job",
|
|
||||||
"enum": [
|
"enum": [
|
||||||
"completed",
|
"completed",
|
||||||
"failed",
|
"failed",
|
||||||
@@ -1725,91 +1686,76 @@ const docTemplate = `{
|
|||||||
"example": "completed"
|
"example": "completed"
|
||||||
},
|
},
|
||||||
"metaData": {
|
"metaData": {
|
||||||
"description": "Additional information about the job",
|
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"monitoringStatus": {
|
"monitoringStatus": {
|
||||||
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"maximum": 3,
|
"maximum": 3,
|
||||||
"minimum": 0,
|
"minimum": 0,
|
||||||
"example": 1
|
"example": 1
|
||||||
},
|
},
|
||||||
"numAcc": {
|
"numAcc": {
|
||||||
"description": "Number of accelerators used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 2
|
"example": 2
|
||||||
},
|
},
|
||||||
"numHwthreads": {
|
"numHwthreads": {
|
||||||
"description": "NumCores int32 ` + "`" + `json:\"numCores\" db:\"num_cores\" example:\"20\" minimum:\"1\"` + "`" + ` // Number of HWThreads used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 20
|
"example": 20
|
||||||
},
|
},
|
||||||
"numNodes": {
|
"numNodes": {
|
||||||
"description": "Number of nodes used (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 2
|
"example": 2
|
||||||
},
|
},
|
||||||
"partition": {
|
"partition": {
|
||||||
"description": "The Slurm partition to which the job was submitted",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "main"
|
"example": "main"
|
||||||
},
|
},
|
||||||
"project": {
|
"project": {
|
||||||
"description": "The unique identifier of a project",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd200"
|
"example": "abcd200"
|
||||||
},
|
},
|
||||||
"resources": {
|
"resources": {
|
||||||
"description": "Resources used by job",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.Resource"
|
"$ref": "#/definitions/schema.Resource"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"smt": {
|
"smt": {
|
||||||
"description": "SMT threads used by job",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"example": 4
|
"example": 4
|
||||||
},
|
},
|
||||||
"startTime": {
|
"startTime": {
|
||||||
"description": "Start epoch time stamp in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 1649723812
|
"example": 1649723812
|
||||||
},
|
},
|
||||||
"statistics": {
|
"statistics": {
|
||||||
"description": "Metric statistics of job",
|
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
"$ref": "#/definitions/schema.JobStatistics"
|
"$ref": "#/definitions/schema.JobStatistics"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"subCluster": {
|
"subCluster": {
|
||||||
"description": "The unique identifier of a sub cluster",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "main"
|
"example": "main"
|
||||||
},
|
},
|
||||||
"tags": {
|
"tags": {
|
||||||
"description": "List of tags",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"$ref": "#/definitions/schema.Tag"
|
"$ref": "#/definitions/schema.Tag"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"user": {
|
"user": {
|
||||||
"description": "The unique identifier of a user",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "abcd100h"
|
"example": "abcd100h"
|
||||||
},
|
},
|
||||||
"walltime": {
|
"walltime": {
|
||||||
"description": "Requested walltime of job in seconds (Min \u003e 0)",
|
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"minimum": 1,
|
"minimum": 1,
|
||||||
"example": 86400
|
"example": 86400
|
||||||
@@ -1898,6 +1844,15 @@ const docTemplate = `{
|
|||||||
"caution": {
|
"caution": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
|
"energy": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"footprint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"lowerIsBetter": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
@@ -1975,22 +1930,18 @@ const docTemplate = `{
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"accelerators": {
|
"accelerators": {
|
||||||
"description": "List of of accelerator device ids",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"configuration": {
|
"configuration": {
|
||||||
"description": "The configuration options of the node",
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"hostname": {
|
"hostname": {
|
||||||
"description": "Name of the host (= node)",
|
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"hwthreads": {
|
"hwthreads": {
|
||||||
"description": "List of OS processor ids",
|
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
@@ -2033,6 +1984,12 @@ const docTemplate = `{
|
|||||||
"type": "number"
|
"type": "number"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"median": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "number"
|
||||||
|
}
|
||||||
|
},
|
||||||
"min": {
|
"min": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
@@ -2056,15 +2013,33 @@ const docTemplate = `{
|
|||||||
"coresPerSocket": {
|
"coresPerSocket": {
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
|
"energyFootprint": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"flopRateScalar": {
|
"flopRateScalar": {
|
||||||
"$ref": "#/definitions/schema.MetricValue"
|
"$ref": "#/definitions/schema.MetricValue"
|
||||||
},
|
},
|
||||||
"flopRateSimd": {
|
"flopRateSimd": {
|
||||||
"$ref": "#/definitions/schema.MetricValue"
|
"$ref": "#/definitions/schema.MetricValue"
|
||||||
},
|
},
|
||||||
|
"footprint": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"memoryBandwidth": {
|
"memoryBandwidth": {
|
||||||
"$ref": "#/definitions/schema.MetricValue"
|
"$ref": "#/definitions/schema.MetricValue"
|
||||||
},
|
},
|
||||||
|
"metricConfig": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/definitions/schema.MetricConfig"
|
||||||
|
}
|
||||||
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
@@ -2094,6 +2069,15 @@ const docTemplate = `{
|
|||||||
"caution": {
|
"caution": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
|
"energy": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"footprint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"lowerIsBetter": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
@@ -2113,16 +2097,17 @@ const docTemplate = `{
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"id": {
|
"id": {
|
||||||
"description": "The unique DB identifier of a tag",
|
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
"name": {
|
"name": {
|
||||||
"description": "Tag Name",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "Testjob"
|
"example": "Testjob"
|
||||||
},
|
},
|
||||||
|
"scope": {
|
||||||
|
"type": "string",
|
||||||
|
"example": "global"
|
||||||
|
},
|
||||||
"type": {
|
"type": {
|
||||||
"description": "Tag Type",
|
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"example": "Debug"
|
"example": "Debug"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,12 +19,13 @@ import (
|
|||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
@@ -53,19 +54,24 @@ import (
|
|||||||
|
|
||||||
type RestApi struct {
|
type RestApi struct {
|
||||||
JobRepository *repository.JobRepository
|
JobRepository *repository.JobRepository
|
||||||
Resolver *graph.Resolver
|
|
||||||
Authentication *auth.Authentication
|
Authentication *auth.Authentication
|
||||||
MachineStateDir string
|
MachineStateDir string
|
||||||
RepositoryMutex sync.Mutex
|
RepositoryMutex sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api *RestApi) MountRoutes(r *mux.Router) {
|
func New() *RestApi {
|
||||||
r = r.PathPrefix("/api").Subrouter()
|
return &RestApi{
|
||||||
|
JobRepository: repository.GetJobRepository(),
|
||||||
|
MachineStateDir: config.Keys.MachineStateDir,
|
||||||
|
Authentication: auth.GetAuthInstance(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) MountApiRoutes(r *mux.Router) {
|
||||||
r.StrictSlash(true)
|
r.StrictSlash(true)
|
||||||
|
|
||||||
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
|
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
|
||||||
r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut)
|
r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut)
|
||||||
r.HandleFunc("/jobs/stop_job/{id}", api.stopJobById).Methods(http.MethodPost, http.MethodPut)
|
|
||||||
// r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
|
// r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
|
||||||
|
|
||||||
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
|
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
|
||||||
@@ -84,31 +90,41 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
|
|||||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet)
|
r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet)
|
||||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost)
|
r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
|
||||||
|
r.StrictSlash(true)
|
||||||
|
|
||||||
|
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
|
||||||
|
r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodPost)
|
||||||
|
r.HandleFunc("/jobs/{id}", api.getCompleteJobById).Methods(http.MethodGet)
|
||||||
|
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) MountConfigApiRoutes(r *mux.Router) {
|
||||||
|
r.StrictSlash(true)
|
||||||
|
|
||||||
if api.Authentication != nil {
|
if api.Authentication != nil {
|
||||||
r.HandleFunc("/jwt/", api.getJWT).Methods(http.MethodGet)
|
|
||||||
r.HandleFunc("/roles/", api.getRoles).Methods(http.MethodGet)
|
r.HandleFunc("/roles/", api.getRoles).Methods(http.MethodGet)
|
||||||
r.HandleFunc("/users/", api.createUser).Methods(http.MethodPost, http.MethodPut)
|
r.HandleFunc("/users/", api.createUser).Methods(http.MethodPost, http.MethodPut)
|
||||||
r.HandleFunc("/users/", api.getUsers).Methods(http.MethodGet)
|
r.HandleFunc("/users/", api.getUsers).Methods(http.MethodGet)
|
||||||
r.HandleFunc("/users/", api.deleteUser).Methods(http.MethodDelete)
|
r.HandleFunc("/users/", api.deleteUser).Methods(http.MethodDelete)
|
||||||
r.HandleFunc("/user/{id}", api.updateUser).Methods(http.MethodPost)
|
r.HandleFunc("/user/{id}", api.updateUser).Methods(http.MethodPost)
|
||||||
|
r.HandleFunc("/notice/", api.editNotice).Methods(http.MethodPost)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api *RestApi) MountFrontendApiRoutes(r *mux.Router) {
|
||||||
|
r.StrictSlash(true)
|
||||||
|
|
||||||
|
if api.Authentication != nil {
|
||||||
|
r.HandleFunc("/jwt/", api.getJWT).Methods(http.MethodGet)
|
||||||
r.HandleFunc("/configuration/", api.updateConfiguration).Methods(http.MethodPost)
|
r.HandleFunc("/configuration/", api.updateConfiguration).Methods(http.MethodPost)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// StartJobApiResponse model
|
// DefaultApiResponse model
|
||||||
type StartJobApiResponse struct {
|
type DefaultJobApiResponse struct {
|
||||||
// Database ID of new job
|
|
||||||
DBID int64 `json:"id"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeleteJobApiResponse model
|
|
||||||
type DeleteJobApiResponse struct {
|
|
||||||
Message string `json:"msg"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// UpdateUserApiResponse model
|
|
||||||
type UpdateUserApiResponse struct {
|
|
||||||
Message string `json:"msg"`
|
Message string `json:"msg"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -152,6 +168,7 @@ type ApiTag struct {
|
|||||||
// Tag Type
|
// Tag Type
|
||||||
Type string `json:"type" example:"Debug"`
|
Type string `json:"type" example:"Debug"`
|
||||||
Name string `json:"name" example:"Testjob"` // Tag Name
|
Name string `json:"name" example:"Testjob"` // Tag Name
|
||||||
|
Scope string `json:"scope" example:"global"` // Tag Scope for Frontend Display
|
||||||
}
|
}
|
||||||
|
|
||||||
// ApiMeta model
|
// ApiMeta model
|
||||||
@@ -311,17 +328,10 @@ func (api *RestApi) getClusters(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/ [get]
|
// @router /jobs/ [get]
|
||||||
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
|
||||||
!user.HasRole(schema.RoleApi) {
|
|
||||||
|
|
||||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
withMetadata := false
|
withMetadata := false
|
||||||
filter := &model.JobFilter{}
|
filter := &model.JobFilter{}
|
||||||
page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
|
page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
|
||||||
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc}
|
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
|
||||||
|
|
||||||
for key, vals := range r.URL.Query() {
|
for key, vals := range r.URL.Query() {
|
||||||
switch key {
|
switch key {
|
||||||
@@ -400,7 +410,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
|||||||
StartTime: job.StartTime.Unix(),
|
StartTime: job.StartTime.Unix(),
|
||||||
}
|
}
|
||||||
|
|
||||||
res.Tags, err = api.JobRepository.GetTags(&job.ID)
|
res.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleError(err, http.StatusInternalServerError, rw)
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
return
|
return
|
||||||
@@ -434,7 +444,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// getJobById godoc
|
// getCompleteJobById godoc
|
||||||
// @summary Get job meta and optional all metric data
|
// @summary Get job meta and optional all metric data
|
||||||
// @tags Job query
|
// @tags Job query
|
||||||
// @description Job to get is specified by database ID
|
// @description Job to get is specified by database ID
|
||||||
@@ -452,14 +462,6 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/{id} [get]
|
// @router /jobs/{id} [get]
|
||||||
func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
|
||||||
!user.HasRole(schema.RoleApi) {
|
|
||||||
|
|
||||||
handleError(fmt.Errorf("missing role: %v",
|
|
||||||
schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch job from db
|
// Fetch job from db
|
||||||
id, ok := mux.Vars(r)["id"]
|
id, ok := mux.Vars(r)["id"]
|
||||||
var job *schema.Job
|
var job *schema.Job
|
||||||
@@ -471,17 +473,17 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
job, err = api.JobRepository.FindById(id)
|
job, err = api.JobRepository.FindById(r.Context(), id) // Get Job from Repo by ID
|
||||||
} else {
|
} else {
|
||||||
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
handleError(fmt.Errorf("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Tags, err = api.JobRepository.GetTags(&job.ID)
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleError(err, http.StatusInternalServerError, rw)
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
return
|
return
|
||||||
@@ -503,10 +505,17 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
|
|||||||
|
|
||||||
var data schema.JobData
|
var data schema.JobData
|
||||||
|
|
||||||
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
resolution := 0
|
||||||
|
|
||||||
|
for _, mc := range metricConfigs {
|
||||||
|
resolution = max(resolution, mc.Timestep)
|
||||||
|
}
|
||||||
|
|
||||||
if r.URL.Query().Get("all-metrics") == "true" {
|
if r.URL.Query().Get("all-metrics") == "true" {
|
||||||
data, err = metricdata.LoadData(job, nil, scopes, r.Context())
|
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context(), resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while loading job data")
|
log.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -546,14 +555,6 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/{id} [post]
|
// @router /jobs/{id} [post]
|
||||||
func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
|
||||||
!user.HasRole(schema.RoleApi) {
|
|
||||||
|
|
||||||
handleError(fmt.Errorf("missing role: %v",
|
|
||||||
schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch job from db
|
// Fetch job from db
|
||||||
id, ok := mux.Vars(r)["id"]
|
id, ok := mux.Vars(r)["id"]
|
||||||
var job *schema.Job
|
var job *schema.Job
|
||||||
@@ -565,17 +566,17 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
job, err = api.JobRepository.FindById(id)
|
job, err = api.JobRepository.FindById(r.Context(), id)
|
||||||
} else {
|
} else {
|
||||||
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Tags, err = api.JobRepository.GetTags(&job.ID)
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleError(err, http.StatusInternalServerError, rw)
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
return
|
return
|
||||||
@@ -601,9 +602,16 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
|||||||
scopes = []schema.MetricScope{"node"}
|
scopes = []schema.MetricScope{"node"}
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdata.LoadData(job, metrics, scopes, r.Context())
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
resolution := 0
|
||||||
|
|
||||||
|
for _, mc := range metricConfigs {
|
||||||
|
resolution = max(resolution, mc.Timestep)
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context(), resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while loading job data")
|
log.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -651,19 +659,13 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/edit_meta/{id} [post]
|
// @router /jobs/edit_meta/{id} [post]
|
||||||
func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||||
!user.HasRole(schema.RoleApi) {
|
|
||||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
iid, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
job, err := api.JobRepository.FindById(iid)
|
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
return
|
return
|
||||||
@@ -689,6 +691,7 @@ func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @summary Adds one or more tags to a job
|
// @summary Adds one or more tags to a job
|
||||||
// @tags Job add and modify
|
// @tags Job add and modify
|
||||||
// @description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
// @description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
||||||
|
// @description Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username.
|
||||||
// @description If tagged job is already finished: Tag will be written directly to respective archive files.
|
// @description If tagged job is already finished: Tag will be written directly to respective archive files.
|
||||||
// @accept json
|
// @accept json
|
||||||
// @produce json
|
// @produce json
|
||||||
@@ -702,26 +705,19 @@ func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/tag_job/{id} [post]
|
// @router /jobs/tag_job/{id} [post]
|
||||||
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||||
!user.HasRole(schema.RoleApi) {
|
|
||||||
|
|
||||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
iid, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
job, err := api.JobRepository.FindById(iid)
|
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
job.Tags, err = api.JobRepository.GetTags(&job.ID)
|
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
@@ -734,7 +730,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, tag := range req {
|
for _, tag := range req {
|
||||||
tagId, err := api.JobRepository.AddTagOrCreate(job.ID, tag.Type, tag.Name)
|
tagId, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), job.ID, tag.Type, tag.Name, tag.Scope)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
@@ -744,6 +740,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
|||||||
ID: tagId,
|
ID: tagId,
|
||||||
Type: tag.Type,
|
Type: tag.Type,
|
||||||
Name: tag.Name,
|
Name: tag.Name,
|
||||||
|
Scope: tag.Scope,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -760,7 +757,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @accept json
|
// @accept json
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param request body schema.JobMeta true "Job to add"
|
// @param request body schema.JobMeta true "Job to add"
|
||||||
// @success 201 {object} api.StartJobApiResponse "Job added successfully"
|
// @success 201 {object} api.DefaultJobApiResponse "Job added successfully"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -769,22 +766,14 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/start_job/ [post]
|
// @router /jobs/start_job/ [post]
|
||||||
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
|
||||||
!user.HasRole(schema.RoleApi) {
|
|
||||||
|
|
||||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
req := schema.JobMeta{BaseJob: schema.JobDefaults}
|
req := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||||
if err := decode(r.Body, &req); err != nil {
|
if err := decode(r.Body, &req); err != nil {
|
||||||
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if req.State == "" {
|
|
||||||
req.State = schema.JobStateRunning
|
req.State = schema.JobStateRunning
|
||||||
}
|
|
||||||
if err := importer.SanityChecks(&req.BaseJob); err != nil {
|
if err := importer.SanityChecks(&req.BaseJob); err != nil {
|
||||||
handleError(err, http.StatusBadRequest, rw)
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
@@ -818,7 +807,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
|||||||
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||||
|
|
||||||
for _, tag := range req.Tags {
|
for _, tag := range req.Tags {
|
||||||
if _, err := api.JobRepository.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
|
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
|
||||||
return
|
return
|
||||||
@@ -828,68 +817,11 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
|||||||
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
|
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||||
rw.Header().Add("Content-Type", "application/json")
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
rw.WriteHeader(http.StatusCreated)
|
rw.WriteHeader(http.StatusCreated)
|
||||||
json.NewEncoder(rw).Encode(StartJobApiResponse{
|
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||||
DBID: id,
|
Message: "success",
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// stopJobById godoc
|
|
||||||
// @summary Marks job as completed and triggers archiving
|
|
||||||
// @tags Job add and modify
|
|
||||||
// @description Job to stop is specified by database ID. Only stopTime and final state are required in request body.
|
|
||||||
// @description Returns full job resource information according to 'JobMeta' scheme.
|
|
||||||
// @accept json
|
|
||||||
// @produce json
|
|
||||||
// @param id path int true "Database ID of Job"
|
|
||||||
// @param request body api.StopJobApiRequest true "stopTime and final state in request body"
|
|
||||||
// @success 200 {object} schema.JobMeta "Job resource"
|
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
|
||||||
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
|
||||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
|
||||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
|
||||||
// @security ApiKeyAuth
|
|
||||||
// @router /jobs/stop_job/{id} [post]
|
|
||||||
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
|
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
|
||||||
!user.HasRole(schema.RoleApi) {
|
|
||||||
|
|
||||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse request body: Only StopTime and State
|
|
||||||
req := StopJobApiRequest{}
|
|
||||||
if err := decode(r.Body, &req); err != nil {
|
|
||||||
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch job (that will be stopped) from db
|
|
||||||
id, ok := mux.Vars(r)["id"]
|
|
||||||
var job *schema.Job
|
|
||||||
var err error
|
|
||||||
if ok {
|
|
||||||
id, e := strconv.ParseInt(id, 10, 64)
|
|
||||||
if e != nil {
|
|
||||||
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
job, err = api.JobRepository.FindById(id)
|
|
||||||
} else {
|
|
||||||
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
api.checkAndHandleStopJob(rw, job, req)
|
|
||||||
}
|
|
||||||
|
|
||||||
// stopJobByRequest godoc
|
// stopJobByRequest godoc
|
||||||
// @summary Marks job as completed and triggers archiving
|
// @summary Marks job as completed and triggers archiving
|
||||||
// @tags Job add and modify
|
// @tags Job add and modify
|
||||||
@@ -902,18 +834,11 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: job has already been stopped"
|
||||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/stop_job/ [post]
|
// @router /jobs/stop_job/ [post]
|
||||||
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
|
||||||
!user.HasRole(schema.RoleApi) {
|
|
||||||
|
|
||||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse request body
|
// Parse request body
|
||||||
req := StopJobApiRequest{}
|
req := StopJobApiRequest{}
|
||||||
if err := decode(r.Body, &req); err != nil {
|
if err := decode(r.Body, &req); err != nil {
|
||||||
@@ -929,8 +854,8 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// log.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
|
||||||
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
return
|
return
|
||||||
@@ -945,7 +870,7 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @description Job to remove is specified by database ID. This will not remove the job from the job archive.
|
// @description Job to remove is specified by database ID. This will not remove the job from the job archive.
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param id path int true "Database ID of Job"
|
// @param id path int true "Database ID of Job"
|
||||||
// @success 200 {object} api.DeleteJobApiResponse "Success message"
|
// @success 200 {object} api.DefaultJobApiResponse "Success message"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -955,11 +880,6 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/delete_job/{id} [delete]
|
// @router /jobs/delete_job/{id} [delete]
|
||||||
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil && !user.HasRole(schema.RoleApi) {
|
|
||||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fetch job (that will be stopped) from db
|
// Fetch job (that will be stopped) from db
|
||||||
id, ok := mux.Vars(r)["id"]
|
id, ok := mux.Vars(r)["id"]
|
||||||
var err error
|
var err error
|
||||||
@@ -981,7 +901,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
rw.Header().Add("Content-Type", "application/json")
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
rw.WriteHeader(http.StatusOK)
|
rw.WriteHeader(http.StatusOK)
|
||||||
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
|
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||||
Message: fmt.Sprintf("Successfully deleted job %s", id),
|
Message: fmt.Sprintf("Successfully deleted job %s", id),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -993,7 +913,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @accept json
|
// @accept json
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param request body api.DeleteJobApiRequest true "All fields required"
|
// @param request body api.DeleteJobApiRequest true "All fields required"
|
||||||
// @success 200 {object} api.DeleteJobApiResponse "Success message"
|
// @success 200 {object} api.DefaultJobApiResponse "Success message"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -1003,12 +923,6 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/delete_job/ [delete]
|
// @router /jobs/delete_job/ [delete]
|
||||||
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
|
||||||
!user.HasRole(schema.RoleApi) {
|
|
||||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse request body
|
// Parse request body
|
||||||
req := DeleteJobApiRequest{}
|
req := DeleteJobApiRequest{}
|
||||||
if err := decode(r.Body, &req); err != nil {
|
if err := decode(r.Body, &req); err != nil {
|
||||||
@@ -1025,7 +939,6 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
|||||||
}
|
}
|
||||||
|
|
||||||
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
return
|
return
|
||||||
@@ -1039,7 +952,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
|||||||
|
|
||||||
rw.Header().Add("Content-Type", "application/json")
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
rw.WriteHeader(http.StatusOK)
|
rw.WriteHeader(http.StatusOK)
|
||||||
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
|
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||||
Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
|
Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -1050,7 +963,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
|||||||
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
|
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
|
||||||
// @produce json
|
// @produce json
|
||||||
// @param ts path int true "Unix epoch timestamp"
|
// @param ts path int true "Unix epoch timestamp"
|
||||||
// @success 200 {object} api.DeleteJobApiResponse "Success message"
|
// @success 200 {object} api.DefaultJobApiResponse "Success message"
|
||||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
@@ -1060,11 +973,6 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /jobs/delete_job_before/{ts} [delete]
|
// @router /jobs/delete_job_before/{ts} [delete]
|
||||||
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
||||||
if user := repository.GetUserFromContext(r.Context()); user != nil && !user.HasRole(schema.RoleApi) {
|
|
||||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var cnt int
|
var cnt int
|
||||||
// Fetch job (that will be stopped) from db
|
// Fetch job (that will be stopped) from db
|
||||||
id, ok := mux.Vars(r)["ts"]
|
id, ok := mux.Vars(r)["ts"]
|
||||||
@@ -1088,20 +996,25 @@ func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
rw.Header().Add("Content-Type", "application/json")
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
rw.WriteHeader(http.StatusOK)
|
rw.WriteHeader(http.StatusOK)
|
||||||
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
|
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||||
Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
|
Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
|
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
|
||||||
// Sanity checks
|
// Sanity checks
|
||||||
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != schema.JobStateRunning {
|
if job.State != schema.JobStateRunning {
|
||||||
handleError(errors.New("stopTime must be larger than startTime and only running jobs can be stopped"), http.StatusBadRequest, rw)
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : job has already been stopped (state is: %s)", job.JobID, job.ID, job.Cluster, job.State), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if job == nil || job.StartTime.Unix() > req.StopTime {
|
||||||
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if req.State != "" && !req.State.Valid() {
|
if req.State != "" && !req.State.Valid() {
|
||||||
handleError(fmt.Errorf("invalid job state: %#v", req.State), http.StatusBadRequest, rw)
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : invalid requested job state: %#v", job.JobID, job.ID, job.Cluster, req.State), http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
} else if req.State == "" {
|
} else if req.State == "" {
|
||||||
req.State = schema.JobStateCompleted
|
req.State = schema.JobStateCompleted
|
||||||
@@ -1111,11 +1024,11 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
|||||||
job.Duration = int32(req.StopTime - job.StartTime.Unix())
|
job.Duration = int32(req.StopTime - job.StartTime.Unix())
|
||||||
job.State = req.State
|
job.State = req.State
|
||||||
if err := api.JobRepository.Stop(job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
if err := api.JobRepository.Stop(job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||||
handleError(fmt.Errorf("marking job as stopped failed: %w", err), http.StatusInternalServerError, rw)
|
handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime)
|
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||||
|
|
||||||
// Send a response (with status OK). This means that erros that happen from here on forward
|
// Send a response (with status OK). This means that erros that happen from here on forward
|
||||||
// can *NOT* be communicated to the client. If reading from a MetricDataRepository or
|
// can *NOT* be communicated to the client. If reading from a MetricDataRepository or
|
||||||
@@ -1130,7 +1043,7 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Trigger async archiving
|
// Trigger async archiving
|
||||||
api.JobRepository.TriggerArchiving(job)
|
archiver.TriggerArchiving(job)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||||
@@ -1158,7 +1071,8 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
|||||||
} `json:"error"`
|
} `json:"error"`
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := api.Resolver.Query().JobMetrics(r.Context(), id, metrics, scopes)
|
resolver := graph.GetResolverInstance()
|
||||||
|
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
json.NewEncoder(rw).Encode(Respone{
|
json.NewEncoder(rw).Encode(Respone{
|
||||||
Error: &struct {
|
Error: &struct {
|
||||||
@@ -1386,6 +1300,69 @@ func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// editNotice godoc
|
||||||
|
// @summary Updates or empties the notice box content
|
||||||
|
// @tags User
|
||||||
|
// @description Modifies the content of notice.txt, shown as notice box on the homepage.
|
||||||
|
// @description If more than one formValue is set then only the highest priority field is used.
|
||||||
|
// @description Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||||
|
// @accept mpfd
|
||||||
|
// @produce plain
|
||||||
|
// @param new-content formData string false "Priority 1: New content to display"
|
||||||
|
// @success 200 {string} string "Success Response Message"
|
||||||
|
// @failure 400 {string} string "Bad Request"
|
||||||
|
// @failure 401 {string} string "Unauthorized"
|
||||||
|
// @failure 403 {string} string "Forbidden"
|
||||||
|
// @failure 422 {string} string "Unprocessable Entity: The user could not be updated"
|
||||||
|
// @failure 500 {string} string "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /notice/ [post]
|
||||||
|
func (api *RestApi) editNotice(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
err := securedCheck(r)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
http.Error(rw, "Only admins are allowed to update the notice.txt file", http.StatusForbidden)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get Value
|
||||||
|
newContent := r.FormValue("new-content")
|
||||||
|
|
||||||
|
// Check FIle
|
||||||
|
noticeExists := util.CheckFileExists("./var/notice.txt")
|
||||||
|
if !noticeExists {
|
||||||
|
ntxt, err := os.Create("./var/notice.txt")
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Creating ./var/notice.txt failed: %s", err.Error())
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ntxt.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
if newContent != "" {
|
||||||
|
if err := os.WriteFile("./var/notice.txt", []byte(newContent), 0o666); err != nil {
|
||||||
|
log.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
rw.Write([]byte("Update Notice Content Success"))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if err := os.WriteFile("./var/notice.txt", []byte(""), 0o666); err != nil {
|
||||||
|
log.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
|
||||||
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
rw.Write([]byte("Empty Notice Content Success"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
|
func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
|
||||||
err := securedCheck(r)
|
err := securedCheck(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -1446,7 +1423,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
|
|||||||
rw.Header().Set("Content-Type", "text/plain")
|
rw.Header().Set("Content-Type", "text/plain")
|
||||||
key, value := r.FormValue("key"), r.FormValue("value")
|
key, value := r.FormValue("key"), r.FormValue("value")
|
||||||
|
|
||||||
fmt.Printf("REST > KEY: %#v\nVALUE: %#v\n", key, value)
|
// fmt.Printf("REST > KEY: %#v\nVALUE: %#v\n", key, value)
|
||||||
|
|
||||||
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, repository.GetUserFromContext(r.Context())); err != nil {
|
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, repository.GetUserFromContext(r.Context())); err != nil {
|
||||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||||
|
|||||||
94
internal/archiver/archiveWorker.go
Normal file
94
internal/archiver/archiveWorker.go
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package archiver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
archivePending sync.WaitGroup
|
||||||
|
archiveChannel chan *schema.Job
|
||||||
|
jobRepo *repository.JobRepository
|
||||||
|
)
|
||||||
|
|
||||||
|
func Start(r *repository.JobRepository) {
|
||||||
|
archiveChannel = make(chan *schema.Job, 128)
|
||||||
|
jobRepo = r
|
||||||
|
|
||||||
|
go archivingWorker()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Archiving worker thread
|
||||||
|
func archivingWorker() {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case job, ok := <-archiveChannel:
|
||||||
|
if !ok {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
start := time.Now()
|
||||||
|
// not using meta data, called to load JobMeta into Cache?
|
||||||
|
// will fail if job meta not in repository
|
||||||
|
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||||
|
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||||
|
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||||
|
// TODO: Maybe use context with cancel/timeout here
|
||||||
|
jobMeta, err := ArchiveJob(job, context.Background())
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||||
|
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt := sq.Update("job").Where("job.id = ?", job.ID)
|
||||||
|
|
||||||
|
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
|
||||||
|
log.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
|
||||||
|
log.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Update the jobs database entry one last time:
|
||||||
|
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
|
||||||
|
if err := jobRepo.Execute(stmt); err != nil {
|
||||||
|
log.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||||
|
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||||
|
archivePending.Done()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trigger async archiving
|
||||||
|
func TriggerArchiving(job *schema.Job) {
|
||||||
|
if archiveChannel == nil {
|
||||||
|
log.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
|
||||||
|
}
|
||||||
|
|
||||||
|
archivePending.Add(1)
|
||||||
|
archiveChannel <- job
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for background thread to finish pending archiving operations
|
||||||
|
func WaitForArchiving() {
|
||||||
|
// close channel and wait for worker to process remaining jobs
|
||||||
|
archivePending.Wait()
|
||||||
|
}
|
||||||
83
internal/archiver/archiver.go
Normal file
83
internal/archiver/archiver.go
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package archiver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Writes a running job to the job-archive
|
||||||
|
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||||
|
allMetrics := make([]string, 0)
|
||||||
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
for _, mc := range metricConfigs {
|
||||||
|
allMetrics = append(allMetrics, mc.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||||
|
// FIXME: Add a config option for this
|
||||||
|
if job.NumNodes <= 8 {
|
||||||
|
// This will add the native scope if core scope is not available
|
||||||
|
scopes = append(scopes, schema.MetricScopeCore)
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.NumAcc > 0 {
|
||||||
|
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||||
|
}
|
||||||
|
|
||||||
|
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Error wile loading job data for archiving")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jobMeta := &schema.JobMeta{
|
||||||
|
BaseJob: job.BaseJob,
|
||||||
|
StartTime: job.StartTime.Unix(),
|
||||||
|
Statistics: make(map[string]schema.JobStatistics),
|
||||||
|
}
|
||||||
|
|
||||||
|
for metric, data := range jobData {
|
||||||
|
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||||
|
nodeData, ok := data["node"]
|
||||||
|
if !ok {
|
||||||
|
// This should never happen ?
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, series := range nodeData.Series {
|
||||||
|
avg += series.Statistics.Avg
|
||||||
|
min = math.Min(min, series.Statistics.Min)
|
||||||
|
max = math.Max(max, series.Statistics.Max)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round AVG Result to 2 Digits
|
||||||
|
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||||
|
Unit: schema.Unit{
|
||||||
|
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||||
|
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||||
|
},
|
||||||
|
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
|
||||||
|
Min: min,
|
||||||
|
Max: max,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the file based archive is disabled,
|
||||||
|
// only return the JobMeta structure as the
|
||||||
|
// statistics in there are needed.
|
||||||
|
if config.Keys.DisableArchive {
|
||||||
|
return jobMeta, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
||||||
|
}
|
||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
@@ -26,6 +27,11 @@ type Authenticator interface {
|
|||||||
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
|
authInstance *Authentication
|
||||||
|
)
|
||||||
|
|
||||||
type Authentication struct {
|
type Authentication struct {
|
||||||
sessionStore *sessions.CookieStore
|
sessionStore *sessions.CookieStore
|
||||||
LdapAuth *LdapAuthenticator
|
LdapAuth *LdapAuthenticator
|
||||||
@@ -62,25 +68,28 @@ func (auth *Authentication) AuthViaSession(
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func Init() (*Authentication, error) {
|
func Init() {
|
||||||
auth := &Authentication{}
|
initOnce.Do(func() {
|
||||||
|
authInstance = &Authentication{}
|
||||||
|
|
||||||
sessKey := os.Getenv("SESSION_KEY")
|
sessKey := os.Getenv("SESSION_KEY")
|
||||||
if sessKey == "" {
|
if sessKey == "" {
|
||||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||||
bytes := make([]byte, 32)
|
bytes := make([]byte, 32)
|
||||||
if _, err := rand.Read(bytes); err != nil {
|
if _, err := rand.Read(bytes); err != nil {
|
||||||
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
|
log.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||||
} else {
|
} else {
|
||||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while initializing authentication -> decoding session key failed")
|
log.Fatal("Error while initializing authentication -> decoding session key failed")
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
|
||||||
|
authInstance.SessionMaxAge = d
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.Keys.LdapConfig != nil {
|
if config.Keys.LdapConfig != nil {
|
||||||
@@ -88,56 +97,82 @@ func Init() (*Authentication, error) {
|
|||||||
if err := ldapAuth.Init(); err != nil {
|
if err := ldapAuth.Init(); err != nil {
|
||||||
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||||
} else {
|
} else {
|
||||||
auth.LdapAuth = ldapAuth
|
authInstance.LdapAuth = ldapAuth
|
||||||
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
|
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Info("Missing LDAP configuration: No LDAP support!")
|
log.Info("Missing LDAP configuration: No LDAP support!")
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.Keys.JwtConfig != nil {
|
if config.Keys.JwtConfig != nil {
|
||||||
auth.JwtAuth = &JWTAuthenticator{}
|
authInstance.JwtAuth = &JWTAuthenticator{}
|
||||||
if err := auth.JwtAuth.Init(); err != nil {
|
if err := authInstance.JwtAuth.Init(); err != nil {
|
||||||
log.Error("Error while initializing authentication -> jwtAuth init failed")
|
log.Fatal("Error while initializing authentication -> jwtAuth init failed")
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||||
if err := jwtSessionAuth.Init(); err != nil {
|
if err := jwtSessionAuth.Init(); err != nil {
|
||||||
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||||
} else {
|
} else {
|
||||||
auth.authenticators = append(auth.authenticators, jwtSessionAuth)
|
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
|
||||||
}
|
}
|
||||||
|
|
||||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||||
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||||
} else {
|
} else {
|
||||||
auth.authenticators = append(auth.authenticators, jwtCookieSessionAuth)
|
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Info("Missing JWT configuration: No JWT token support!")
|
log.Info("Missing JWT configuration: No JWT token support!")
|
||||||
}
|
}
|
||||||
|
|
||||||
auth.LocalAuth = &LocalAuthenticator{}
|
authInstance.LocalAuth = &LocalAuthenticator{}
|
||||||
if err := auth.LocalAuth.Init(); err != nil {
|
if err := authInstance.LocalAuth.Init(); err != nil {
|
||||||
log.Error("Error while initializing authentication -> localAuth init failed")
|
log.Fatal("Error while initializing authentication -> localAuth init failed")
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
|
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
|
||||||
|
})
|
||||||
return auth, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func persistUser(user *schema.User) {
|
func GetAuthInstance() *Authentication {
|
||||||
|
if authInstance == nil {
|
||||||
|
log.Fatal("Authentication module not initialized!")
|
||||||
|
}
|
||||||
|
|
||||||
|
return authInstance
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleTokenUser(tokenUser *schema.User) {
|
||||||
r := repository.GetUserRepository()
|
r := repository.GetUserRepository()
|
||||||
_, err := r.GetUser(user.Username)
|
dbUser, err := r.GetUser(tokenUser.Username)
|
||||||
|
|
||||||
if err != nil && err != sql.ErrNoRows {
|
if err != nil && err != sql.ErrNoRows {
|
||||||
log.Errorf("Error while loading user '%s': %v", user.Username, err)
|
log.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
|
||||||
} else if err == sql.ErrNoRows {
|
} else if err == sql.ErrNoRows && config.Keys.JwtConfig.SyncUserOnLogin { // Adds New User
|
||||||
if err := r.AddUser(user); err != nil {
|
if err := r.AddUser(tokenUser); err != nil {
|
||||||
log.Errorf("Error while adding user '%s' to DB: %v", user.Username, err)
|
log.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
|
||||||
|
}
|
||||||
|
} else if err == nil && config.Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
|
||||||
|
if err := r.UpdateUser(dbUser, tokenUser); err != nil {
|
||||||
|
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func handleOIDCUser(OIDCUser *schema.User) {
|
||||||
|
r := repository.GetUserRepository()
|
||||||
|
dbUser, err := r.GetUser(OIDCUser.Username)
|
||||||
|
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
log.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
|
||||||
|
} else if err == sql.ErrNoRows && config.Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
|
||||||
|
if err := r.AddUser(OIDCUser); err != nil {
|
||||||
|
log.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
|
||||||
|
}
|
||||||
|
} else if err == nil && config.Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
|
||||||
|
if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
|
||||||
|
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -153,6 +188,10 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
|
|||||||
if auth.SessionMaxAge != 0 {
|
if auth.SessionMaxAge != 0 {
|
||||||
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
||||||
}
|
}
|
||||||
|
if config.Keys.HttpsCertFile == "" && config.Keys.HttpsKeyFile == "" {
|
||||||
|
session.Options.Secure = false
|
||||||
|
}
|
||||||
|
session.Options.SameSite = http.SameSiteStrictMode
|
||||||
session.Values["username"] = user.Username
|
session.Values["username"] = user.Username
|
||||||
session.Values["projects"] = user.Projects
|
session.Values["projects"] = user.Projects
|
||||||
session.Values["roles"] = user.Roles
|
session.Values["roles"] = user.Roles
|
||||||
@@ -166,7 +205,6 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (auth *Authentication) Login(
|
func (auth *Authentication) Login(
|
||||||
onsuccess http.Handler,
|
|
||||||
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
|
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
|
||||||
) http.Handler {
|
) http.Handler {
|
||||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
@@ -203,7 +241,13 @@ func (auth *Authentication) Login(
|
|||||||
|
|
||||||
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
|
||||||
|
if r.FormValue("redirect") != "" {
|
||||||
|
http.RedirectHandler(r.FormValue("redirect"), http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
http.RedirectHandler("/", http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -219,31 +263,141 @@ func (auth *Authentication) Auth(
|
|||||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Infof("authentication failed: %s", err.Error())
|
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if user == nil {
|
if user == nil {
|
||||||
user, err = auth.AuthViaSession(rw, r)
|
user, err = auth.AuthViaSession(rw, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Infof("authentication failed: %s", err.Error())
|
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if user != nil {
|
if user != nil {
|
||||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug("authentication failed")
|
log.Info("auth -> authentication failed")
|
||||||
onfailure(rw, r, errors.New("unauthorized (please login first)"))
|
onfailure(rw, r, errors.New("unauthorized (please login first)"))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthApi(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
log.Infof("auth api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if user != nil {
|
||||||
|
switch {
|
||||||
|
case len(user.Roles) == 1:
|
||||||
|
if user.HasRole(schema.RoleApi) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case len(user.Roles) >= 2:
|
||||||
|
if user.HasAllRoles([]schema.Role{schema.RoleAdmin, schema.RoleApi}) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
log.Info("auth api -> authentication failed: missing role")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Info("auth api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthUserApi(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
log.Infof("auth user api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if user != nil {
|
||||||
|
switch {
|
||||||
|
case len(user.Roles) == 1:
|
||||||
|
if user.HasRole(schema.RoleApi) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case len(user.Roles) >= 2:
|
||||||
|
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
log.Info("auth user api -> authentication failed: missing role")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Info("auth user api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthConfigApi(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.AuthViaSession(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
log.Infof("auth config api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if user != nil && user.HasRole(schema.RoleAdmin) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Info("auth config api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthFrontendApi(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.AuthViaSession(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
log.Infof("auth frontend api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if user != nil {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
log.Info("auth frontend api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
session, err := auth.sessionStore.Get(r, "session")
|
session, err := auth.sessionStore.Get(r, "session")
|
||||||
|
|||||||
@@ -198,8 +198,8 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
|||||||
AuthSource: schema.AuthViaToken,
|
AuthSource: schema.AuthViaToken,
|
||||||
}
|
}
|
||||||
|
|
||||||
if jc.SyncUserOnLogin {
|
if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
|
||||||
persistUser(user)
|
handleTokenUser(user)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -138,8 +138,8 @@ func (ja *JWTSessionAuthenticator) Login(
|
|||||||
AuthSource: schema.AuthViaToken,
|
AuthSource: schema.AuthViaToken,
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.Keys.JwtConfig.SyncUserOnLogin {
|
if config.Keys.JwtConfig.SyncUserOnLogin || config.Keys.JwtConfig.UpdateUserOnLogin {
|
||||||
persistUser(user)
|
handleTokenUser(user)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
@@ -34,33 +33,6 @@ func (la *LdapAuthenticator) Init() error {
|
|||||||
|
|
||||||
lc := config.Keys.LdapConfig
|
lc := config.Keys.LdapConfig
|
||||||
|
|
||||||
if lc.SyncInterval != "" {
|
|
||||||
interval, err := time.ParseDuration(lc.SyncInterval)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("Could not parse duration for sync interval: %v",
|
|
||||||
lc.SyncInterval)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if interval == 0 {
|
|
||||||
log.Info("Sync interval is zero")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
ticker := time.NewTicker(interval)
|
|
||||||
for t := range ticker.C {
|
|
||||||
log.Printf("sync started at %s", t.Format(time.RFC3339))
|
|
||||||
if err := la.Sync(); err != nil {
|
|
||||||
log.Errorf("sync failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
log.Print("sync done")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
} else {
|
|
||||||
log.Info("LDAP configuration key sync_interval invalid")
|
|
||||||
}
|
|
||||||
|
|
||||||
if lc.UserAttr != "" {
|
if lc.UserAttr != "" {
|
||||||
la.UserAttr = lc.UserAttr
|
la.UserAttr = lc.UserAttr
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -168,8 +168,8 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
|||||||
AuthSource: schema.AuthViaOIDC,
|
AuthSource: schema.AuthViaOIDC,
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.Keys.OpenIDConfig.SyncUserOnLogin {
|
if config.Keys.OpenIDConfig.SyncUserOnLogin || config.Keys.OpenIDConfig.UpdateUserOnLogin {
|
||||||
persistUser(user)
|
handleOIDCUser(user)
|
||||||
}
|
}
|
||||||
|
|
||||||
oa.authentication.SaveSession(rw, r, user)
|
oa.authentication.SaveSession(rw, r, user)
|
||||||
|
|||||||
@@ -29,10 +29,9 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
|
|||||||
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||||
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
||||||
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||||
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
|
||||||
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||||
"job_view_showFootprint": true,
|
"job_view_showFootprint": true,
|
||||||
"job_list_usePaging": true,
|
"job_list_usePaging": false,
|
||||||
"plot_general_colorBackground": true,
|
"plot_general_colorBackground": true,
|
||||||
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
||||||
"plot_general_lineWidth": 3,
|
"plot_general_lineWidth": 3,
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -16,11 +16,23 @@ type Count struct {
|
|||||||
Count int `json:"count"`
|
Count int `json:"count"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type EnergyFootprintValue struct {
|
||||||
|
Hardware string `json:"hardware"`
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Value float64 `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
type FloatRange struct {
|
type FloatRange struct {
|
||||||
From float64 `json:"from"`
|
From float64 `json:"from"`
|
||||||
To float64 `json:"to"`
|
To float64 `json:"to"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type FootprintValue struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Stat string `json:"stat"`
|
||||||
|
Value float64 `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
type Footprints struct {
|
type Footprints struct {
|
||||||
TimeWeights *TimeWeights `json:"timeWeights"`
|
TimeWeights *TimeWeights `json:"timeWeights"`
|
||||||
Metrics []*MetricFootprints `json:"metrics"`
|
Metrics []*MetricFootprints `json:"metrics"`
|
||||||
@@ -46,16 +58,14 @@ type JobFilter struct {
|
|||||||
Cluster *StringInput `json:"cluster,omitempty"`
|
Cluster *StringInput `json:"cluster,omitempty"`
|
||||||
Partition *StringInput `json:"partition,omitempty"`
|
Partition *StringInput `json:"partition,omitempty"`
|
||||||
Duration *schema.IntRange `json:"duration,omitempty"`
|
Duration *schema.IntRange `json:"duration,omitempty"`
|
||||||
|
Energy *FloatRange `json:"energy,omitempty"`
|
||||||
MinRunningFor *int `json:"minRunningFor,omitempty"`
|
MinRunningFor *int `json:"minRunningFor,omitempty"`
|
||||||
NumNodes *schema.IntRange `json:"numNodes,omitempty"`
|
NumNodes *schema.IntRange `json:"numNodes,omitempty"`
|
||||||
NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
|
NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
|
||||||
NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
|
NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
|
||||||
StartTime *schema.TimeRange `json:"startTime,omitempty"`
|
StartTime *schema.TimeRange `json:"startTime,omitempty"`
|
||||||
State []schema.JobState `json:"state,omitempty"`
|
State []schema.JobState `json:"state,omitempty"`
|
||||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg,omitempty"`
|
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
||||||
MemBwAvg *FloatRange `json:"memBwAvg,omitempty"`
|
|
||||||
LoadAvg *FloatRange `json:"loadAvg,omitempty"`
|
|
||||||
MemUsedMax *FloatRange `json:"memUsedMax,omitempty"`
|
|
||||||
Exclusive *int `json:"exclusive,omitempty"`
|
Exclusive *int `json:"exclusive,omitempty"`
|
||||||
Node *StringInput `json:"node,omitempty"`
|
Node *StringInput `json:"node,omitempty"`
|
||||||
}
|
}
|
||||||
@@ -120,9 +130,15 @@ type MetricHistoPoint struct {
|
|||||||
type MetricHistoPoints struct {
|
type MetricHistoPoints struct {
|
||||||
Metric string `json:"metric"`
|
Metric string `json:"metric"`
|
||||||
Unit string `json:"unit"`
|
Unit string `json:"unit"`
|
||||||
|
Stat *string `json:"stat,omitempty"`
|
||||||
Data []*MetricHistoPoint `json:"data,omitempty"`
|
Data []*MetricHistoPoint `json:"data,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MetricStatItem struct {
|
||||||
|
MetricName string `json:"metricName"`
|
||||||
|
Range *FloatRange `json:"range"`
|
||||||
|
}
|
||||||
|
|
||||||
type Mutation struct {
|
type Mutation struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -132,8 +148,18 @@ type NodeMetrics struct {
|
|||||||
Metrics []*JobMetricWithName `json:"metrics"`
|
Metrics []*JobMetricWithName `json:"metrics"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodesResultList struct {
|
||||||
|
Items []*NodeMetrics `json:"items"`
|
||||||
|
Offset *int `json:"offset,omitempty"`
|
||||||
|
Limit *int `json:"limit,omitempty"`
|
||||||
|
Count *int `json:"count,omitempty"`
|
||||||
|
TotalNodes *int `json:"totalNodes,omitempty"`
|
||||||
|
HasNextPage *bool `json:"hasNextPage,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type OrderByInput struct {
|
type OrderByInput struct {
|
||||||
Field string `json:"field"`
|
Field string `json:"field"`
|
||||||
|
Type string `json:"type"`
|
||||||
Order SortDirectionEnum `json:"order"`
|
Order SortDirectionEnum `json:"order"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -155,6 +181,7 @@ type StringInput struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type TimeRangeOutput struct {
|
type TimeRangeOutput struct {
|
||||||
|
Range *string `json:"range,omitempty"`
|
||||||
From time.Time `json:"from"`
|
From time.Time `json:"from"`
|
||||||
To time.Time `json:"to"`
|
To time.Time `json:"to"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,15 +1,39 @@
|
|||||||
package graph
|
package graph
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
)
|
)
|
||||||
|
|
||||||
// This file will not be regenerated automatically.
|
// This file will not be regenerated automatically.
|
||||||
//
|
//
|
||||||
// It serves as dependency injection for your app, add any dependencies you require here.
|
// It serves as dependency injection for your app, add any dependencies you require here.
|
||||||
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
|
resolverInstance *Resolver
|
||||||
|
)
|
||||||
|
|
||||||
type Resolver struct {
|
type Resolver struct {
|
||||||
DB *sqlx.DB
|
DB *sqlx.DB
|
||||||
Repo *repository.JobRepository
|
Repo *repository.JobRepository
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Init() {
|
||||||
|
initOnce.Do(func() {
|
||||||
|
db := repository.GetConnection()
|
||||||
|
resolverInstance = &Resolver{
|
||||||
|
DB: db.DB, Repo: repository.GetJobRepository(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetResolverInstance() *Resolver {
|
||||||
|
if resolverInstance == nil {
|
||||||
|
log.Fatal("Authentication module not initialized!")
|
||||||
|
}
|
||||||
|
|
||||||
|
return resolverInstance
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,19 +2,22 @@ package graph
|
|||||||
|
|
||||||
// This file will be automatically regenerated based on the schema, any resolver implementations
|
// This file will be automatically regenerated based on the schema, any resolver implementations
|
||||||
// will be copied through when generating and any unknown code will be moved to the end.
|
// will be copied through when generating and any unknown code will be moved to the end.
|
||||||
// Code generated by github.com/99designs/gqlgen version v0.17.45
|
// Code generated by github.com/99designs/gqlgen version v0.17.57
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
@@ -28,15 +31,12 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
|||||||
|
|
||||||
// Tags is the resolver for the tags field.
|
// Tags is the resolver for the tags field.
|
||||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||||
return r.Repo.GetTags(&obj.ID)
|
return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
||||||
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
|
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
|
||||||
if obj.State == schema.JobStateRunning {
|
// FIXME: Make the hardcoded duration configurable
|
||||||
obj.Duration = int32(time.Now().Unix() - obj.StartTimeUnix)
|
|
||||||
}
|
|
||||||
|
|
||||||
if obj.Exclusive != 1 && obj.Duration > 600 {
|
if obj.Exclusive != 1 && obj.Duration > 600 {
|
||||||
return r.Repo.FindConcurrentJobs(ctx, obj)
|
return r.Repo.FindConcurrentJobs(ctx, obj)
|
||||||
}
|
}
|
||||||
@@ -44,8 +44,72 @@ func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*mod
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Footprint is the resolver for the footprint field.
|
||||||
|
func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.FootprintValue, error) {
|
||||||
|
rawFootprint, err := r.Repo.FetchFootprint(obj)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while fetching job footprint data")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
res := []*model.FootprintValue{}
|
||||||
|
for name, value := range rawFootprint {
|
||||||
|
|
||||||
|
parts := strings.Split(name, "_")
|
||||||
|
statPart := parts[len(parts)-1]
|
||||||
|
nameParts := parts[:len(parts)-1]
|
||||||
|
|
||||||
|
res = append(res, &model.FootprintValue{
|
||||||
|
Name: strings.Join(nameParts, "_"),
|
||||||
|
Stat: statPart,
|
||||||
|
Value: value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnergyFootprint is the resolver for the energyFootprint field.
|
||||||
|
func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*model.EnergyFootprintValue, error) {
|
||||||
|
rawEnergyFootprint, err := r.Repo.FetchEnergyFootprint(obj)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while fetching job energy footprint data")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
res := []*model.EnergyFootprintValue{}
|
||||||
|
for name, value := range rawEnergyFootprint {
|
||||||
|
// Suboptimal: Nearly hardcoded metric name expectations
|
||||||
|
matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`)
|
||||||
|
matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
|
||||||
|
matchMem := regexp.MustCompile(`mem|Mem|MEM`)
|
||||||
|
matchCore := regexp.MustCompile(`core|Core|CORE`)
|
||||||
|
|
||||||
|
hwType := ""
|
||||||
|
switch test := name; { // NOtice ';' for var declaration
|
||||||
|
case matchCpu.MatchString(test):
|
||||||
|
hwType = "CPU"
|
||||||
|
case matchAcc.MatchString(test):
|
||||||
|
hwType = "Accelerator"
|
||||||
|
case matchMem.MatchString(test):
|
||||||
|
hwType = "Memory"
|
||||||
|
case matchCore.MatchString(test):
|
||||||
|
hwType = "Core"
|
||||||
|
default:
|
||||||
|
hwType = "Other"
|
||||||
|
}
|
||||||
|
|
||||||
|
res = append(res, &model.EnergyFootprintValue{
|
||||||
|
Hardware: hwType,
|
||||||
|
Metric: name,
|
||||||
|
Value: value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
// MetaData is the resolver for the metaData field.
|
// MetaData is the resolver for the metaData field.
|
||||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
|
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (any, error) {
|
||||||
return r.Repo.FetchMetadata(obj)
|
return r.Repo.FetchMetadata(obj)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -54,15 +118,20 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
|
|||||||
return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
|
return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Name is the resolver for the name field.
|
||||||
|
func (r *metricValueResolver) Name(ctx context.Context, obj *schema.MetricValue) (*string, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: Name - name"))
|
||||||
|
}
|
||||||
|
|
||||||
// CreateTag is the resolver for the createTag field.
|
// CreateTag is the resolver for the createTag field.
|
||||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string, scope string) (*schema.Tag, error) {
|
||||||
id, err := r.Repo.CreateTag(typeArg, name)
|
id, err := r.Repo.CreateTag(typeArg, name, scope)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while creating tag")
|
log.Warn("Error while creating tag")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
|
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteTag is the resolver for the deleteTag field.
|
// DeleteTag is the resolver for the deleteTag field.
|
||||||
@@ -72,6 +141,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
|
|||||||
|
|
||||||
// AddTagsToJob is the resolver for the addTagsToJob field.
|
// AddTagsToJob is the resolver for the addTagsToJob field.
|
||||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||||
|
// Selectable Tags Pre-Filtered by Scope in Frontend: No backend check required
|
||||||
jid, err := strconv.ParseInt(job, 10, 64)
|
jid, err := strconv.ParseInt(job, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while adding tag to job")
|
log.Warn("Error while adding tag to job")
|
||||||
@@ -86,7 +156,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
|
if tags, err = r.Repo.AddTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
|
||||||
log.Warn("Error while adding tag")
|
log.Warn("Error while adding tag")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -97,6 +167,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
|||||||
|
|
||||||
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
|
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
|
||||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||||
|
// Removable Tags Pre-Filtered by Scope in Frontend: No backend check required
|
||||||
jid, err := strconv.ParseInt(job, 10, 64)
|
jid, err := strconv.ParseInt(job, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while parsing job id")
|
log.Warn("Error while parsing job id")
|
||||||
@@ -111,7 +182,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
|
if tags, err = r.Repo.RemoveTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
|
||||||
log.Warn("Error while removing tag")
|
log.Warn("Error while removing tag")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -137,7 +208,12 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error)
|
|||||||
|
|
||||||
// Tags is the resolver for the tags field.
|
// Tags is the resolver for the tags field.
|
||||||
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||||
return r.Repo.GetTags(nil)
|
return r.Repo.GetTags(repository.GetUserFromContext(ctx), nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GlobalMetrics is the resolver for the globalMetrics field.
|
||||||
|
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
|
||||||
|
return archive.GlobalMetricList, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// User is the resolver for the user field.
|
// User is the resolver for the user field.
|
||||||
@@ -172,7 +248,7 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
job, err := r.Repo.FindById(numericId)
|
job, err := r.Repo.FindById(ctx, numericId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while finding job by id")
|
log.Warn("Error while finding job by id")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -188,14 +264,24 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// JobMetrics is the resolver for the jobMetrics field.
|
// JobMetrics is the resolver for the jobMetrics field.
|
||||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
|
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
|
||||||
|
if resolution == nil { // Load from Config
|
||||||
|
if config.Keys.EnableResampling != nil {
|
||||||
|
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||||
|
resolution = &defaultRes
|
||||||
|
} else { // Set 0 (Loads configured metric timestep)
|
||||||
|
defaultRes := 0
|
||||||
|
resolution = &defaultRes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
job, err := r.Query().Job(ctx, id)
|
job, err := r.Query().Job(ctx, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while querying job for metrics")
|
log.Warn("Error while querying job for metrics")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
|
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while loading job data")
|
log.Warn("Error while loading job data")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -217,6 +303,7 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
|||||||
|
|
||||||
// JobsFootprints is the resolver for the jobsFootprints field.
|
// JobsFootprints is the resolver for the jobsFootprints field.
|
||||||
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||||
|
// NOTE: Legacy Naming! This resolver is for normalized histograms in analysis view only - *Not* related to DB "footprint" column!
|
||||||
return r.jobsFootprints(ctx, filter, metrics)
|
return r.jobsFootprints(ctx, filter, metrics)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -268,10 +355,14 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
|||||||
}
|
}
|
||||||
|
|
||||||
// JobsStatistics is the resolver for the jobsStatistics field.
|
// JobsStatistics is the resolver for the jobsStatistics field.
|
||||||
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) ([]*model.JobsStatistics, error) {
|
||||||
var err error
|
var err error
|
||||||
var stats []*model.JobsStatistics
|
var stats []*model.JobsStatistics
|
||||||
|
|
||||||
|
// Top Level Defaults
|
||||||
|
var defaultDurationBins string = "1h"
|
||||||
|
var defaultMetricBins int = 10
|
||||||
|
|
||||||
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
||||||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
||||||
if groupBy == nil {
|
if groupBy == nil {
|
||||||
@@ -305,8 +396,13 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
}
|
}
|
||||||
|
|
||||||
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
|
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
|
||||||
|
|
||||||
|
if numDurationBins == nil {
|
||||||
|
numDurationBins = &defaultDurationBins
|
||||||
|
}
|
||||||
|
|
||||||
if groupBy == nil {
|
if groupBy == nil {
|
||||||
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0])
|
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0], numDurationBins)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -316,8 +412,13 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
}
|
}
|
||||||
|
|
||||||
if requireField(ctx, "histMetrics") {
|
if requireField(ctx, "histMetrics") {
|
||||||
|
|
||||||
|
if numMetricBins == nil {
|
||||||
|
numMetricBins = &defaultMetricBins
|
||||||
|
}
|
||||||
|
|
||||||
if groupBy == nil {
|
if groupBy == nil {
|
||||||
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0])
|
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0], numMetricBins)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -337,8 +438,8 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
|
|||||||
// NodeMetrics is the resolver for the nodeMetrics field.
|
// NodeMetrics is the resolver for the nodeMetrics field.
|
||||||
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
|
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
|
||||||
user := repository.GetUserFromContext(ctx)
|
user := repository.GetUserFromContext(ctx)
|
||||||
if user != nil && !user.HasRole(schema.RoleAdmin) {
|
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||||
return nil, errors.New("you need to be an administrator for this query")
|
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||||
}
|
}
|
||||||
|
|
||||||
if metrics == nil {
|
if metrics == nil {
|
||||||
@@ -347,9 +448,9 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while loading node data")
|
log.Warn("error while loading node data")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -359,7 +460,10 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
|||||||
Host: hostname,
|
Host: hostname,
|
||||||
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||||
}
|
}
|
||||||
host.SubCluster, _ = archive.GetSubClusterByNode(cluster, hostname)
|
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("error in nodeMetrics resolver: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
for metric, scopedMetrics := range metrics {
|
for metric, scopedMetrics := range metrics {
|
||||||
for _, scopedMetric := range scopedMetrics {
|
for _, scopedMetric := range scopedMetrics {
|
||||||
@@ -377,6 +481,68 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
|||||||
return nodeMetrics, nil
|
return nodeMetrics, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NodeMetricsList is the resolver for the nodeMetricsList field.
|
||||||
|
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
|
||||||
|
if resolution == nil { // Load from Config
|
||||||
|
if config.Keys.EnableResampling != nil {
|
||||||
|
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||||
|
resolution = &defaultRes
|
||||||
|
} else { // Set 0 (Loads configured metric timestep)
|
||||||
|
defaultRes := 0
|
||||||
|
resolution = &defaultRes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
user := repository.GetUserFromContext(ctx)
|
||||||
|
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||||
|
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||||
|
}
|
||||||
|
|
||||||
|
if metrics == nil {
|
||||||
|
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
||||||
|
metrics = append(metrics, mc.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data, totalNodes, hasNextPage, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, *resolution, from, to, page, ctx)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("error while loading node data")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeMetricsList := make([]*model.NodeMetrics, 0, len(data))
|
||||||
|
for hostname, metrics := range data {
|
||||||
|
host := &model.NodeMetrics{
|
||||||
|
Host: hostname,
|
||||||
|
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||||
|
}
|
||||||
|
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("error in nodeMetrics resolver: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for metric, scopedMetrics := range metrics {
|
||||||
|
for scope, scopedMetric := range scopedMetrics {
|
||||||
|
host.Metrics = append(host.Metrics, &model.JobMetricWithName{
|
||||||
|
Name: metric,
|
||||||
|
Scope: scope,
|
||||||
|
Metric: scopedMetric,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeMetricsList = append(nodeMetricsList, host)
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeMetricsListResult := &model.NodesResultList{
|
||||||
|
Items: nodeMetricsList,
|
||||||
|
TotalNodes: &totalNodes,
|
||||||
|
HasNextPage: &hasNextPage,
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodeMetricsListResult, nil
|
||||||
|
}
|
||||||
|
|
||||||
// NumberOfNodes is the resolver for the numberOfNodes field.
|
// NumberOfNodes is the resolver for the numberOfNodes field.
|
||||||
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
|
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
|
||||||
nodeList, err := archive.ParseNodeList(obj.Nodes)
|
nodeList, err := archive.ParseNodeList(obj.Nodes)
|
||||||
@@ -392,6 +558,9 @@ func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver
|
|||||||
// Job returns generated.JobResolver implementation.
|
// Job returns generated.JobResolver implementation.
|
||||||
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
||||||
|
|
||||||
|
// MetricValue returns generated.MetricValueResolver implementation.
|
||||||
|
func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricValueResolver{r} }
|
||||||
|
|
||||||
// Mutation returns generated.MutationResolver implementation.
|
// Mutation returns generated.MutationResolver implementation.
|
||||||
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
||||||
|
|
||||||
@@ -403,6 +572,7 @@ func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subCluste
|
|||||||
|
|
||||||
type clusterResolver struct{ *Resolver }
|
type clusterResolver struct{ *Resolver }
|
||||||
type jobResolver struct{ *Resolver }
|
type jobResolver struct{ *Resolver }
|
||||||
|
type metricValueResolver struct{ *Resolver }
|
||||||
type mutationResolver struct{ *Resolver }
|
type mutationResolver struct{ *Resolver }
|
||||||
type queryResolver struct{ *Resolver }
|
type queryResolver struct{ *Resolver }
|
||||||
type subClusterResolver struct{ *Resolver }
|
type subClusterResolver struct{ *Resolver }
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import (
|
|||||||
|
|
||||||
"github.com/99designs/gqlgen/graphql"
|
"github.com/99designs/gqlgen/graphql"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
|
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
@@ -24,8 +24,8 @@ func (r *queryResolver) rooflineHeatmap(
|
|||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
rows int, cols int,
|
rows int, cols int,
|
||||||
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
minX float64, minY float64, maxX float64, maxY float64,
|
||||||
|
) ([][]float64, error) {
|
||||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while querying jobs for roofline")
|
log.Error("Error while querying jobs for roofline")
|
||||||
@@ -47,7 +47,14 @@ func (r *queryResolver) rooflineHeatmap(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
// resolution := 0
|
||||||
|
|
||||||
|
// for _, mc := range metricConfigs {
|
||||||
|
// resolution = max(resolution, mc.Timestep)
|
||||||
|
// }
|
||||||
|
|
||||||
|
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -120,7 +127,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||||
log.Error("Error while loading averages for footprint")
|
log.Error("Error while loading averages for footprint")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,9 +8,9 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
@@ -42,8 +42,8 @@ func HandleImportFlag(flag string) error {
|
|||||||
}
|
}
|
||||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
job := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||||
if err = dec.Decode(&jobMeta); err != nil {
|
if err = dec.Decode(&job); err != nil {
|
||||||
log.Warn("Error while decoding raw json metadata for import")
|
log.Warn("Error while decoding raw json metadata for import")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -67,32 +67,60 @@ func HandleImportFlag(flag string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkJobData(&jobData)
|
job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||||
|
|
||||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||||
|
if err != nil {
|
||||||
// if _, err = r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||||
// if err != nil {
|
return err
|
||||||
// log.Warn("Error while finding job in jobRepository")
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist")
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
job := schema.Job{
|
|
||||||
BaseJob: jobMeta.BaseJob,
|
|
||||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
|
||||||
StartTimeUnix: jobMeta.StartTime,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Other metrics...
|
job.Footprint = make(map[string]float64)
|
||||||
job.LoadAvg = loadJobStat(&jobMeta, "cpu_load")
|
|
||||||
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
|
for _, fp := range sc.Footprint {
|
||||||
job.MemUsedMax = loadJobStat(&jobMeta, "mem_used")
|
statType := "avg"
|
||||||
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
|
|
||||||
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||||
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
|
statType = sc.MetricConfig[i].Footprint
|
||||||
|
}
|
||||||
|
|
||||||
|
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||||
|
|
||||||
|
job.Footprint[name] = repository.LoadJobStat(&job, fp, statType)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while marshaling job footprint")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
job.EnergyFootprint = make(map[string]float64)
|
||||||
|
var totalEnergy float64
|
||||||
|
var energy float64
|
||||||
|
|
||||||
|
for _, fp := range sc.EnergyFootprint {
|
||||||
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||||
|
// Note: For DB data, calculate and save as kWh
|
||||||
|
// Energy: Power (in Watts) * Time (in Seconds)
|
||||||
|
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||||
|
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||||
|
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||||
|
energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.EnergyFootprint[fp] = energy
|
||||||
|
totalEnergy += energy
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Energy = (math.Round(totalEnergy*100) / 100)
|
||||||
|
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||||
|
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
job.RawResources, err = json.Marshal(job.Resources)
|
job.RawResources, err = json.Marshal(job.Resources)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -110,7 +138,7 @@ func HandleImportFlag(flag string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
|
if err = archive.GetHandle().ImportJob(&job, &jobData); err != nil {
|
||||||
log.Error("Error while importing job")
|
log.Error("Error while importing job")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -122,8 +150,8 @@ func HandleImportFlag(flag string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, tag := range job.Tags {
|
for _, tag := range job.Tags {
|
||||||
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
if err := r.ImportTag(id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||||
log.Error("Error while adding or creating tag")
|
log.Error("Error while adding or creating tag on import")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ func setup(t *testing.T) *repository.JobRepository {
|
|||||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
|
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ package importer
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -16,6 +17,11 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
addTagQuery = "INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)"
|
||||||
|
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
|
||||||
|
)
|
||||||
|
|
||||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||||
// repopulate them using the jobs found in `archive`.
|
// repopulate them using the jobs found in `archive`.
|
||||||
func InitDB() error {
|
func InitDB() error {
|
||||||
@@ -60,13 +66,58 @@ func InitDB() error {
|
|||||||
StartTimeUnix: jobMeta.StartTime,
|
StartTimeUnix: jobMeta.StartTime,
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Other metrics...
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
job.LoadAvg = loadJobStat(jobMeta, "cpu_load")
|
if err != nil {
|
||||||
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
|
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||||
job.MemUsedMax = loadJobStat(jobMeta, "mem_used")
|
return err
|
||||||
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
|
}
|
||||||
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
|
|
||||||
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
|
job.Footprint = make(map[string]float64)
|
||||||
|
|
||||||
|
for _, fp := range sc.Footprint {
|
||||||
|
statType := "avg"
|
||||||
|
|
||||||
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||||
|
statType = sc.MetricConfig[i].Footprint
|
||||||
|
}
|
||||||
|
|
||||||
|
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||||
|
|
||||||
|
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while marshaling job footprint")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
job.EnergyFootprint = make(map[string]float64)
|
||||||
|
var totalEnergy float64
|
||||||
|
var energy float64
|
||||||
|
|
||||||
|
for _, fp := range sc.EnergyFootprint {
|
||||||
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||||
|
// Note: For DB data, calculate and save as kWh
|
||||||
|
// Energy: Power (in Watts) * Time (in Seconds)
|
||||||
|
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||||
|
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||||
|
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||||
|
energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.EnergyFootprint[fp] = energy
|
||||||
|
totalEnergy += energy
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Energy = (math.Round(totalEnergy*100) / 100)
|
||||||
|
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||||
|
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
job.RawResources, err = json.Marshal(job.Resources)
|
job.RawResources, err = json.Marshal(job.Resources)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -88,7 +139,8 @@ func InitDB() error {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
id, err := r.TransactionAdd(t, job)
|
id, err := r.TransactionAddNamed(t,
|
||||||
|
repository.NamedJobInsert, job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("repository initDB(): %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
@@ -99,7 +151,9 @@ func InitDB() error {
|
|||||||
tagstr := tag.Name + ":" + tag.Type
|
tagstr := tag.Name + ":" + tag.Type
|
||||||
tagId, ok := tags[tagstr]
|
tagId, ok := tags[tagstr]
|
||||||
if !ok {
|
if !ok {
|
||||||
tagId, err = r.TransactionAddTag(t, tag)
|
tagId, err = r.TransactionAdd(t,
|
||||||
|
addTagQuery,
|
||||||
|
tag.Name, tag.Type)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error adding tag: %v", err)
|
log.Errorf("Error adding tag: %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
@@ -108,7 +162,9 @@ func InitDB() error {
|
|||||||
tags[tagstr] = tagId
|
tags[tagstr] = tagId
|
||||||
}
|
}
|
||||||
|
|
||||||
r.TransactionSetTag(t, id, tagId)
|
r.TransactionAdd(t,
|
||||||
|
setTagQuery,
|
||||||
|
id, tagId)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@@ -150,18 +206,6 @@ func SanityChecks(job *schema.BaseJob) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
|
||||||
if stats, ok := job.Statistics[metric]; ok {
|
|
||||||
if metric == "mem_used" {
|
|
||||||
return stats.Max
|
|
||||||
} else {
|
|
||||||
return stats.Avg
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0.0
|
|
||||||
}
|
|
||||||
|
|
||||||
func checkJobData(d *schema.JobData) error {
|
func checkJobData(d *schema.JobData) error {
|
||||||
for _, scopes := range *d {
|
for _, scopes := range *d {
|
||||||
// var newUnit schema.Unit
|
// var newUnit schema.Unit
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
},
|
},
|
||||||
"scope": "node",
|
"scope": "node",
|
||||||
"aggregation": "avg",
|
"aggregation": "avg",
|
||||||
|
"footprint": "avg",
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
"peak": 72,
|
"peak": 72,
|
||||||
"normal": 72,
|
"normal": 72,
|
||||||
@@ -35,6 +36,7 @@
|
|||||||
},
|
},
|
||||||
"scope": "node",
|
"scope": "node",
|
||||||
"aggregation": "sum",
|
"aggregation": "sum",
|
||||||
|
"footprint": "max",
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
"peak": 256,
|
"peak": 256,
|
||||||
"normal": 128,
|
"normal": 128,
|
||||||
@@ -49,6 +51,7 @@
|
|||||||
},
|
},
|
||||||
"scope": "hwthread",
|
"scope": "hwthread",
|
||||||
"aggregation": "sum",
|
"aggregation": "sum",
|
||||||
|
"footprint": "avg",
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
"peak": 5600,
|
"peak": 5600,
|
||||||
"normal": 1000,
|
"normal": 1000,
|
||||||
@@ -91,6 +94,7 @@
|
|||||||
},
|
},
|
||||||
"scope": "socket",
|
"scope": "socket",
|
||||||
"aggregation": "sum",
|
"aggregation": "sum",
|
||||||
|
"footprint": "avg",
|
||||||
"timestep": 60,
|
"timestep": 60,
|
||||||
"peak": 350,
|
"peak": 350,
|
||||||
"normal": 100,
|
"normal": 100,
|
||||||
|
|||||||
310
internal/metricDataDispatcher/dataLoader.go
Normal file
310
internal/metricDataDispatcher/dataLoader.go
Normal file
@@ -0,0 +1,310 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package metricDataDispatcher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/resampler"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||||
|
|
||||||
|
func cacheKey(
|
||||||
|
job *schema.Job,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
) string {
|
||||||
|
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
|
||||||
|
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
|
||||||
|
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
|
||||||
|
job.ID, job.State, metrics, scopes, resolution)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetches the metric data for a job.
|
||||||
|
func LoadData(job *schema.Job,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
ctx context.Context,
|
||||||
|
resolution int,
|
||||||
|
) (schema.JobData, error) {
|
||||||
|
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ interface{}, ttl time.Duration, size int) {
|
||||||
|
var jd schema.JobData
|
||||||
|
var err error
|
||||||
|
|
||||||
|
if job.State == schema.JobStateRunning ||
|
||||||
|
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
||||||
|
config.Keys.DisableArchive {
|
||||||
|
|
||||||
|
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if scopes == nil {
|
||||||
|
scopes = append(scopes, schema.MetricScopeNode)
|
||||||
|
}
|
||||||
|
|
||||||
|
if metrics == nil {
|
||||||
|
cluster := archive.GetCluster(job.Cluster)
|
||||||
|
for _, mc := range cluster.MetricConfig {
|
||||||
|
metrics = append(metrics, mc.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution)
|
||||||
|
if err != nil {
|
||||||
|
if len(jd) != 0 {
|
||||||
|
log.Warnf("partial error: %s", err.Error())
|
||||||
|
// return err, 0, 0 // Reactivating will block archiving on one partial error
|
||||||
|
} else {
|
||||||
|
log.Error("Error while loading job data from metric repository")
|
||||||
|
return err, 0, 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size = jd.Size()
|
||||||
|
} else {
|
||||||
|
var jd_temp schema.JobData
|
||||||
|
jd_temp, err = archive.GetHandle().LoadJobData(job)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Error while loading job data from archive")
|
||||||
|
return err, 0, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
//Deep copy the cached archive hashmap
|
||||||
|
jd = metricdata.DeepCopy(jd_temp)
|
||||||
|
|
||||||
|
//Resampling for archived data.
|
||||||
|
//Pass the resolution from frontend here.
|
||||||
|
for _, v := range jd {
|
||||||
|
for _, v_ := range v {
|
||||||
|
timestep := 0
|
||||||
|
for i := 0; i < len(v_.Series); i += 1 {
|
||||||
|
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
|
||||||
|
if err != nil {
|
||||||
|
return err, 0, 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
v_.Timestep = timestep
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid sending unrequested data to the client:
|
||||||
|
if metrics != nil || scopes != nil {
|
||||||
|
if metrics == nil {
|
||||||
|
metrics = make([]string, 0, len(jd))
|
||||||
|
for k := range jd {
|
||||||
|
metrics = append(metrics, k)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res := schema.JobData{}
|
||||||
|
for _, metric := range metrics {
|
||||||
|
if perscope, ok := jd[metric]; ok {
|
||||||
|
if len(perscope) > 1 {
|
||||||
|
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
||||||
|
for _, scope := range scopes {
|
||||||
|
if jm, ok := perscope[scope]; ok {
|
||||||
|
subset[scope] = jm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(subset) > 0 {
|
||||||
|
perscope = subset
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res[metric] = perscope
|
||||||
|
}
|
||||||
|
}
|
||||||
|
jd = res
|
||||||
|
}
|
||||||
|
size = jd.Size()
|
||||||
|
}
|
||||||
|
|
||||||
|
ttl = 5 * time.Hour
|
||||||
|
if job.State == schema.JobStateRunning {
|
||||||
|
ttl = 2 * time.Minute
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Review: Is this really necessary or correct.
|
||||||
|
// Note: Lines 147-170 formerly known as prepareJobData(jobData, scopes)
|
||||||
|
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
|
||||||
|
// to be available at the scope 'node'. If a job has a lot of nodes,
|
||||||
|
// statisticsSeries should be available so that a min/median/max Graph can be
|
||||||
|
// used instead of a lot of single lines.
|
||||||
|
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
|
||||||
|
// Existing (archived) StatsSeries can be 'min/mean/max'!
|
||||||
|
const maxSeriesSize int = 15
|
||||||
|
for _, scopes := range jd {
|
||||||
|
for _, jm := range scopes {
|
||||||
|
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
jm.AddStatisticsSeries()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeScopeRequested := false
|
||||||
|
for _, scope := range scopes {
|
||||||
|
if scope == schema.MetricScopeNode {
|
||||||
|
nodeScopeRequested = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if nodeScopeRequested {
|
||||||
|
jd.AddNodeScope("flops_any")
|
||||||
|
jd.AddNodeScope("mem_bw")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round Resulting Stat Values
|
||||||
|
jd.RoundMetricStats()
|
||||||
|
|
||||||
|
return jd, ttl, size
|
||||||
|
})
|
||||||
|
|
||||||
|
if err, ok := data.(error); ok {
|
||||||
|
log.Error("Error in returned dataset")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return data.(schema.JobData), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||||
|
func LoadAverages(
|
||||||
|
job *schema.Job,
|
||||||
|
metrics []string,
|
||||||
|
data [][]schema.Float,
|
||||||
|
ctx context.Context,
|
||||||
|
) error {
|
||||||
|
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
|
||||||
|
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
||||||
|
}
|
||||||
|
|
||||||
|
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, m := range metrics {
|
||||||
|
nodes, ok := stats[m]
|
||||||
|
if !ok {
|
||||||
|
data[i] = append(data[i], schema.NaN)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
sum := 0.0
|
||||||
|
for _, node := range nodes {
|
||||||
|
sum += node.Avg
|
||||||
|
}
|
||||||
|
data[i] = append(data[i], schema.Float(sum))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used for the classic node/system view. Returns a map of nodes to a map of metrics.
|
||||||
|
func LoadNodeData(
|
||||||
|
cluster string,
|
||||||
|
metrics, nodes []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
from, to time.Time,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||||
|
repo, err := metricdata.GetMetricDataRepo(cluster)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
if metrics == nil {
|
||||||
|
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||||
|
metrics = append(metrics, m.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||||
|
if err != nil {
|
||||||
|
if len(data) != 0 {
|
||||||
|
log.Warnf("partial error: %s", err.Error())
|
||||||
|
} else {
|
||||||
|
log.Error("Error while loading node data from metric repository")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if data == nil {
|
||||||
|
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func LoadNodeListData(
|
||||||
|
cluster, subCluster, nodeFilter string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
from, to time.Time,
|
||||||
|
page *model.PageRequest,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]schema.JobData, int, bool, error) {
|
||||||
|
repo, err := metricdata.GetMetricDataRepo(cluster)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, false, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
if metrics == nil {
|
||||||
|
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||||
|
metrics = append(metrics, m.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data, totalNodes, hasNextPage, err := repo.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, resolution, from, to, page, ctx)
|
||||||
|
if err != nil {
|
||||||
|
if len(data) != 0 {
|
||||||
|
log.Warnf("partial error: %s", err.Error())
|
||||||
|
} else {
|
||||||
|
log.Error("Error while loading node data from metric repository")
|
||||||
|
return nil, totalNodes, hasNextPage, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
|
||||||
|
const maxSeriesSize int = 8
|
||||||
|
for _, jd := range data {
|
||||||
|
for _, scopes := range jd {
|
||||||
|
for _, jm := range scopes {
|
||||||
|
if jm.StatisticsSeries != nil || len(jm.Series) < maxSeriesSize {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
jm.AddStatisticsSeries()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if data == nil {
|
||||||
|
return nil, totalNodes, hasNextPage, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
return data, totalNodes, hasNextPage, nil
|
||||||
|
}
|
||||||
@@ -11,10 +11,12 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@@ -55,6 +57,7 @@ type ApiQuery struct {
|
|||||||
SubType *string `json:"subtype,omitempty"`
|
SubType *string `json:"subtype,omitempty"`
|
||||||
Metric string `json:"metric"`
|
Metric string `json:"metric"`
|
||||||
Hostname string `json:"host"`
|
Hostname string `json:"host"`
|
||||||
|
Resolution int `json:"resolution"`
|
||||||
TypeIds []string `json:"type-ids,omitempty"`
|
TypeIds []string `json:"type-ids,omitempty"`
|
||||||
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||||
Aggregate bool `json:"aggreg"`
|
Aggregate bool `json:"aggreg"`
|
||||||
@@ -70,6 +73,7 @@ type ApiMetricData struct {
|
|||||||
Data []schema.Float `json:"data"`
|
Data []schema.Float `json:"data"`
|
||||||
From int64 `json:"from"`
|
From int64 `json:"from"`
|
||||||
To int64 `json:"to"`
|
To int64 `json:"to"`
|
||||||
|
Resolution int `json:"resolution"`
|
||||||
Avg schema.Float `json:"avg"`
|
Avg schema.Float `json:"avg"`
|
||||||
Min schema.Float `json:"min"`
|
Min schema.Float `json:"min"`
|
||||||
Max schema.Float `json:"max"`
|
Max schema.Float `json:"max"`
|
||||||
@@ -129,7 +133,7 @@ func (ccms *CCMetricStore) doRequest(
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf)
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while building request body")
|
log.Warn("Error while building request body")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -138,6 +142,13 @@ func (ccms *CCMetricStore) doRequest(
|
|||||||
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// versioning the cc-metric-store query API.
|
||||||
|
// v2 = data with resampling
|
||||||
|
// v1 = data without resampling
|
||||||
|
q := req.URL.Query()
|
||||||
|
q.Add("version", "v2")
|
||||||
|
req.URL.RawQuery = q.Encode()
|
||||||
|
|
||||||
res, err := ccms.client.Do(req)
|
res, err := ccms.client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while performing request")
|
log.Error("Error while performing request")
|
||||||
@@ -162,8 +173,9 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
|
resolution int,
|
||||||
) (schema.JobData, error) {
|
) (schema.JobData, error) {
|
||||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
|
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while building queries")
|
log.Warn("Error while building queries")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -195,11 +207,16 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
res := mc.Timestep
|
||||||
|
if len(row) > 0 {
|
||||||
|
res = row[0].Resolution
|
||||||
|
}
|
||||||
|
|
||||||
jobMetric, ok := jobData[metric][scope]
|
jobMetric, ok := jobData[metric][scope]
|
||||||
if !ok {
|
if !ok {
|
||||||
jobMetric = &schema.JobMetric{
|
jobMetric = &schema.JobMetric{
|
||||||
Unit: mc.Unit,
|
Unit: mc.Unit,
|
||||||
Timestep: mc.Timestep,
|
Timestep: res,
|
||||||
Series: make([]schema.Series, 0),
|
Series: make([]schema.Series, 0),
|
||||||
}
|
}
|
||||||
jobData[metric][scope] = jobMetric
|
jobData[metric][scope] = jobMetric
|
||||||
@@ -219,8 +236,7 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||||
// TODO: use schema.Float instead of float64?
|
// "schema.Float()" because regular float64 can not be JSONed when NaN.
|
||||||
// This is done because regular float64 can not be JSONed when NaN.
|
|
||||||
res.Avg = schema.Float(0)
|
res.Avg = schema.Float(0)
|
||||||
res.Min = schema.Float(0)
|
res.Min = schema.Float(0)
|
||||||
res.Max = schema.Float(0)
|
res.Max = schema.Float(0)
|
||||||
@@ -251,7 +267,6 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
/* Returns list for "partial errors" */
|
/* Returns list for "partial errors" */
|
||||||
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||||
}
|
}
|
||||||
|
|
||||||
return jobData, nil
|
return jobData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -267,6 +282,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
||||||
assignedScope := []schema.MetricScope{}
|
assignedScope := []schema.MetricScope{}
|
||||||
@@ -323,6 +339,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &acceleratorString,
|
Type: &acceleratorString,
|
||||||
TypeIds: host.Accelerators,
|
TypeIds: host.Accelerators,
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
|
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
|
||||||
continue
|
continue
|
||||||
@@ -340,6 +357,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &acceleratorString,
|
Type: &acceleratorString,
|
||||||
TypeIds: host.Accelerators,
|
TypeIds: host.Accelerators,
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
@@ -353,6 +371,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
TypeIds: intToStringSlice(hwthreads),
|
TypeIds: intToStringSlice(hwthreads),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
@@ -368,6 +387,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
TypeIds: intToStringSlice(topology.Core[core]),
|
TypeIds: intToStringSlice(topology.Core[core]),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
}
|
}
|
||||||
@@ -384,6 +404,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
TypeIds: intToStringSlice(topology.Socket[socket]),
|
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
}
|
}
|
||||||
@@ -398,6 +419,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &hwthreadString,
|
Type: &hwthreadString,
|
||||||
TypeIds: intToStringSlice(hwthreads),
|
TypeIds: intToStringSlice(hwthreads),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
@@ -412,11 +434,29 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &coreString,
|
Type: &coreString,
|
||||||
TypeIds: intToStringSlice(cores),
|
TypeIds: intToStringSlice(cores),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Core -> Socket
|
||||||
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
|
||||||
|
sockets, _ := topology.GetSocketsFromCores(hwthreads)
|
||||||
|
for _, socket := range sockets {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: host.Hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &coreString,
|
||||||
|
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Core -> Node
|
// Core -> Node
|
||||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||||
@@ -426,6 +466,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &coreString,
|
Type: &coreString,
|
||||||
TypeIds: intToStringSlice(cores),
|
TypeIds: intToStringSlice(cores),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
@@ -440,6 +481,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &memoryDomainString,
|
Type: &memoryDomainString,
|
||||||
TypeIds: intToStringSlice(sockets),
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
@@ -454,6 +496,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &memoryDomainString,
|
Type: &memoryDomainString,
|
||||||
TypeIds: intToStringSlice(sockets),
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
@@ -468,6 +511,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: false,
|
Aggregate: false,
|
||||||
Type: &socketString,
|
Type: &socketString,
|
||||||
TypeIds: intToStringSlice(sockets),
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
@@ -482,6 +526,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
Aggregate: true,
|
Aggregate: true,
|
||||||
Type: &socketString,
|
Type: &socketString,
|
||||||
TypeIds: intToStringSlice(sockets),
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
@@ -492,6 +537,7 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: remoteName,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
|
Resolution: resolution,
|
||||||
})
|
})
|
||||||
assignedScope = append(assignedScope, scope)
|
assignedScope = append(assignedScope, scope)
|
||||||
continue
|
continue
|
||||||
@@ -510,7 +556,15 @@ func (ccms *CCMetricStore) LoadStats(
|
|||||||
metrics []string,
|
metrics []string,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}) // #166 Add scope shere for analysis view accelerator normalization?
|
|
||||||
|
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
// resolution := 9000
|
||||||
|
|
||||||
|
// for _, mc := range metricConfigs {
|
||||||
|
// resolution = min(resolution, mc.Timestep)
|
||||||
|
// }
|
||||||
|
|
||||||
|
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while building query")
|
log.Warn("Error while building query")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -590,6 +644,7 @@ func (ccms *CCMetricStore) LoadNodeData(
|
|||||||
req.Queries = append(req.Queries, ApiQuery{
|
req.Queries = append(req.Queries, ApiQuery{
|
||||||
Hostname: node,
|
Hostname: node,
|
||||||
Metric: ccms.toRemoteName(metric),
|
Metric: ccms.toRemoteName(metric),
|
||||||
|
Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -597,7 +652,7 @@ func (ccms *CCMetricStore) LoadNodeData(
|
|||||||
|
|
||||||
resBody, err := ccms.doRequest(ctx, &req)
|
resBody, err := ccms.doRequest(ctx, &req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while performing request")
|
log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -655,6 +710,462 @@ func (ccms *CCMetricStore) LoadNodeData(
|
|||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ccms *CCMetricStore) LoadNodeListData(
|
||||||
|
cluster, subCluster, nodeFilter string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
from, to time.Time,
|
||||||
|
page *model.PageRequest,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]schema.JobData, int, bool, error) {
|
||||||
|
|
||||||
|
// 0) Init additional vars
|
||||||
|
var totalNodes int = 0
|
||||||
|
var hasNextPage bool = false
|
||||||
|
|
||||||
|
// 1) Get list of all nodes
|
||||||
|
var nodes []string
|
||||||
|
if subCluster != "" {
|
||||||
|
scNodes := archive.NodeLists[cluster][subCluster]
|
||||||
|
nodes = scNodes.PrintList()
|
||||||
|
} else {
|
||||||
|
subClusterNodeLists := archive.NodeLists[cluster]
|
||||||
|
for _, nodeList := range subClusterNodeLists {
|
||||||
|
nodes = append(nodes, nodeList.PrintList()...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2) Filter nodes
|
||||||
|
if nodeFilter != "" {
|
||||||
|
filteredNodes := []string{}
|
||||||
|
for _, node := range nodes {
|
||||||
|
if strings.Contains(node, nodeFilter) {
|
||||||
|
filteredNodes = append(filteredNodes, node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nodes = filteredNodes
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ...
|
||||||
|
totalNodes = len(nodes)
|
||||||
|
sort.Strings(nodes)
|
||||||
|
|
||||||
|
// 3) Apply paging
|
||||||
|
if len(nodes) > page.ItemsPerPage {
|
||||||
|
start := (page.Page - 1) * page.ItemsPerPage
|
||||||
|
end := start + page.ItemsPerPage
|
||||||
|
if end > len(nodes) {
|
||||||
|
end = len(nodes)
|
||||||
|
hasNextPage = false
|
||||||
|
} else {
|
||||||
|
hasNextPage = true
|
||||||
|
}
|
||||||
|
nodes = nodes[start:end]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: Order of node data is not guaranteed after this point, but contents match page and filter criteria
|
||||||
|
|
||||||
|
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while building queries")
|
||||||
|
return nil, totalNodes, hasNextPage, err
|
||||||
|
}
|
||||||
|
|
||||||
|
req := ApiQueryRequest{
|
||||||
|
Cluster: cluster,
|
||||||
|
Queries: queries,
|
||||||
|
From: from.Unix(),
|
||||||
|
To: to.Unix(),
|
||||||
|
WithStats: true,
|
||||||
|
WithData: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
resBody, err := ccms.doRequest(ctx, &req)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
|
||||||
|
return nil, totalNodes, hasNextPage, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var errors []string
|
||||||
|
data := make(map[string]schema.JobData)
|
||||||
|
for i, row := range resBody.Results {
|
||||||
|
var query ApiQuery
|
||||||
|
if resBody.Queries != nil {
|
||||||
|
query = resBody.Queries[i]
|
||||||
|
} else {
|
||||||
|
query = req.Queries[i]
|
||||||
|
}
|
||||||
|
// qdata := res[0]
|
||||||
|
metric := ccms.toLocalName(query.Metric)
|
||||||
|
scope := assignedScope[i]
|
||||||
|
mc := archive.GetMetricConfig(cluster, metric)
|
||||||
|
|
||||||
|
res := mc.Timestep
|
||||||
|
if len(row) > 0 {
|
||||||
|
res = row[0].Resolution
|
||||||
|
}
|
||||||
|
|
||||||
|
// Init Nested Map Data Structures If Not Found
|
||||||
|
hostData, ok := data[query.Hostname]
|
||||||
|
if !ok {
|
||||||
|
hostData = make(schema.JobData)
|
||||||
|
data[query.Hostname] = hostData
|
||||||
|
}
|
||||||
|
|
||||||
|
metricData, ok := hostData[metric]
|
||||||
|
if !ok {
|
||||||
|
metricData = make(map[schema.MetricScope]*schema.JobMetric)
|
||||||
|
data[query.Hostname][metric] = metricData
|
||||||
|
}
|
||||||
|
|
||||||
|
scopeData, ok := metricData[scope]
|
||||||
|
if !ok {
|
||||||
|
scopeData = &schema.JobMetric{
|
||||||
|
Unit: mc.Unit,
|
||||||
|
Timestep: res,
|
||||||
|
Series: make([]schema.Series, 0),
|
||||||
|
}
|
||||||
|
data[query.Hostname][metric][scope] = scopeData
|
||||||
|
}
|
||||||
|
|
||||||
|
for ndx, res := range row {
|
||||||
|
if res.Error != nil {
|
||||||
|
/* Build list for "partial errors", if any */
|
||||||
|
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
id := (*string)(nil)
|
||||||
|
if query.Type != nil {
|
||||||
|
id = new(string)
|
||||||
|
*id = query.TypeIds[ndx]
|
||||||
|
}
|
||||||
|
|
||||||
|
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||||
|
// "schema.Float()" because regular float64 can not be JSONed when NaN.
|
||||||
|
res.Avg = schema.Float(0)
|
||||||
|
res.Min = schema.Float(0)
|
||||||
|
res.Max = schema.Float(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
scopeData.Series = append(scopeData.Series, schema.Series{
|
||||||
|
Hostname: query.Hostname,
|
||||||
|
Id: id,
|
||||||
|
Statistics: schema.MetricStatistics{
|
||||||
|
Avg: float64(res.Avg),
|
||||||
|
Min: float64(res.Min),
|
||||||
|
Max: float64(res.Max),
|
||||||
|
},
|
||||||
|
Data: res.Data,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errors) != 0 {
|
||||||
|
/* Returns list of "partial errors" */
|
||||||
|
return data, totalNodes, hasNextPage, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||||
|
}
|
||||||
|
|
||||||
|
return data, totalNodes, hasNextPage, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ccms *CCMetricStore) buildNodeQueries(
|
||||||
|
cluster string,
|
||||||
|
subCluster string,
|
||||||
|
nodes []string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||||
|
|
||||||
|
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||||
|
assignedScope := []schema.MetricScope{}
|
||||||
|
|
||||||
|
// Get Topol before loop if subCluster given
|
||||||
|
var subClusterTopol *schema.SubCluster
|
||||||
|
var scterr error
|
||||||
|
if subCluster != "" {
|
||||||
|
subClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster)
|
||||||
|
if scterr != nil {
|
||||||
|
// TODO: Log
|
||||||
|
return nil, nil, scterr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, metric := range metrics {
|
||||||
|
remoteName := ccms.toRemoteName(metric)
|
||||||
|
mc := archive.GetMetricConfig(cluster, metric)
|
||||||
|
if mc == nil {
|
||||||
|
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||||
|
log.Infof("metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Avoid duplicates...
|
||||||
|
handledScopes := make([]schema.MetricScope, 0, 3)
|
||||||
|
|
||||||
|
scopesLoop:
|
||||||
|
for _, requestedScope := range scopes {
|
||||||
|
nativeScope := mc.Scope
|
||||||
|
|
||||||
|
scope := nativeScope.Max(requestedScope)
|
||||||
|
for _, s := range handledScopes {
|
||||||
|
if scope == s {
|
||||||
|
continue scopesLoop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
handledScopes = append(handledScopes, scope)
|
||||||
|
|
||||||
|
for _, hostname := range nodes {
|
||||||
|
|
||||||
|
// If no subCluster given, get it by node
|
||||||
|
if subCluster == "" {
|
||||||
|
subClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname)
|
||||||
|
if scnerr != nil {
|
||||||
|
return nil, nil, scnerr
|
||||||
|
}
|
||||||
|
subClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName)
|
||||||
|
if scterr != nil {
|
||||||
|
return nil, nil, scterr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always full node hwthread id list, no partial queries expected -> Use "topology.Node" directly where applicable
|
||||||
|
// Always full accelerator id list, no partial queries expected -> Use "acceleratorIds" directly where applicable
|
||||||
|
topology := subClusterTopol.Topology
|
||||||
|
acceleratorIds := topology.GetAcceleratorIDs()
|
||||||
|
|
||||||
|
// Moved check here if metric matches hardware specs
|
||||||
|
if nativeScope == schema.MetricScopeAccelerator && len(acceleratorIds) == 0 {
|
||||||
|
continue scopesLoop
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
||||||
|
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
||||||
|
if scope != schema.MetricScopeAccelerator {
|
||||||
|
// Skip all other catched cases
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &acceleratorString,
|
||||||
|
TypeIds: acceleratorIds,
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Accelerator -> Node
|
||||||
|
if nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode {
|
||||||
|
if len(acceleratorIds) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &acceleratorString,
|
||||||
|
TypeIds: acceleratorIds,
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// HWThread -> HWThead
|
||||||
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &hwthreadString,
|
||||||
|
TypeIds: intToStringSlice(topology.Node),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// HWThread -> Core
|
||||||
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
|
||||||
|
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||||
|
for _, core := range cores {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &hwthreadString,
|
||||||
|
TypeIds: intToStringSlice(topology.Core[core]),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// HWThread -> Socket
|
||||||
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
|
||||||
|
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||||
|
for _, socket := range sockets {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &hwthreadString,
|
||||||
|
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// HWThread -> Node
|
||||||
|
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &hwthreadString,
|
||||||
|
TypeIds: intToStringSlice(topology.Node),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Core -> Core
|
||||||
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
||||||
|
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &coreString,
|
||||||
|
TypeIds: intToStringSlice(cores),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Core -> Socket
|
||||||
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
|
||||||
|
sockets, _ := topology.GetSocketsFromCores(topology.Node)
|
||||||
|
for _, socket := range sockets {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &coreString,
|
||||||
|
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Core -> Node
|
||||||
|
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||||
|
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &coreString,
|
||||||
|
TypeIds: intToStringSlice(cores),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// MemoryDomain -> MemoryDomain
|
||||||
|
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
||||||
|
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &memoryDomainString,
|
||||||
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// MemoryDoman -> Node
|
||||||
|
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
||||||
|
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &memoryDomainString,
|
||||||
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Socket -> Socket
|
||||||
|
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||||
|
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: false,
|
||||||
|
Type: &socketString,
|
||||||
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Socket -> Node
|
||||||
|
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
||||||
|
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Aggregate: true,
|
||||||
|
Type: &socketString,
|
||||||
|
TypeIds: intToStringSlice(sockets),
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Node -> Node
|
||||||
|
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
||||||
|
queries = append(queries, ApiQuery{
|
||||||
|
Metric: remoteName,
|
||||||
|
Hostname: hostname,
|
||||||
|
Resolution: resolution,
|
||||||
|
})
|
||||||
|
assignedScope = append(assignedScope, scope)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return queries, assignedScope, nil
|
||||||
|
}
|
||||||
|
|
||||||
func intToStringSlice(is []int) []string {
|
func intToStringSlice(is []int) []string {
|
||||||
ss := make([]string, len(is))
|
ss := make([]string, len(is))
|
||||||
for i, x := range is {
|
for i, x := range is {
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@@ -60,7 +61,8 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
|||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
ctx context.Context) (schema.JobData, error) {
|
ctx context.Context,
|
||||||
|
resolution int) (schema.JobData, error) {
|
||||||
|
|
||||||
measurementsConds := make([]string, 0, len(metrics))
|
measurementsConds := make([]string, 0, len(metrics))
|
||||||
for _, m := range metrics {
|
for _, m := range metrics {
|
||||||
@@ -311,3 +313,21 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
|||||||
|
|
||||||
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (idb *InfluxDBv2DataRepository) LoadNodeListData(
|
||||||
|
cluster, subCluster, nodeFilter string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
from, to time.Time,
|
||||||
|
page *model.PageRequest,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]schema.JobData, int, bool, error) {
|
||||||
|
|
||||||
|
var totalNodes int = 0
|
||||||
|
var hasNextPage bool = false
|
||||||
|
// TODO : Implement to be used in NodeList-View
|
||||||
|
log.Infof("LoadNodeListData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodeFilter %v, scopes %v", cluster, metrics, nodeFilter, scopes)
|
||||||
|
|
||||||
|
return nil, totalNodes, hasNextPage, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,13 +8,11 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -24,21 +22,21 @@ type MetricDataRepository interface {
|
|||||||
Init(rawConfig json.RawMessage) error
|
Init(rawConfig json.RawMessage) error
|
||||||
|
|
||||||
// Return the JobData for the given job, only with the requested metrics.
|
// Return the JobData for the given job, only with the requested metrics.
|
||||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
|
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
|
||||||
|
|
||||||
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
|
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
|
||||||
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
||||||
|
|
||||||
// Return a map of hosts to a map of metrics at the requested scopes for that node.
|
// Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node.
|
||||||
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
|
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
|
||||||
|
|
||||||
|
// Return a map of hosts to a map of metrics to a map of scopes for multiple nodes.
|
||||||
|
LoadNodeListData(cluster, subCluster, nodeFilter string, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, page *model.PageRequest, ctx context.Context) (map[string]schema.JobData, int, bool, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
||||||
|
|
||||||
var useArchive bool
|
func Init() error {
|
||||||
|
|
||||||
func Init(disableArchive bool) error {
|
|
||||||
useArchive = !disableArchive
|
|
||||||
for _, cluster := range config.Keys.Clusters {
|
for _, cluster := range config.Keys.Clusters {
|
||||||
if cluster.MetricDataRepository != nil {
|
if cluster.MetricDataRepository != nil {
|
||||||
var kind struct {
|
var kind struct {
|
||||||
@@ -73,284 +71,13 @@ func Init(disableArchive bool) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
func GetMetricDataRepo(cluster string) (MetricDataRepository, error) {
|
||||||
|
|
||||||
// Fetches the metric data for a job.
|
|
||||||
func LoadData(job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
ctx context.Context,
|
|
||||||
) (schema.JobData, error) {
|
|
||||||
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
|
|
||||||
var jd schema.JobData
|
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
if job.State == schema.JobStateRunning ||
|
|
||||||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
|
||||||
!useArchive {
|
|
||||||
|
|
||||||
repo, ok := metricDataRepos[job.Cluster]
|
|
||||||
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
if scopes == nil {
|
|
||||||
scopes = append(scopes, schema.MetricScopeNode)
|
|
||||||
}
|
|
||||||
|
|
||||||
if metrics == nil {
|
|
||||||
cluster := archive.GetCluster(job.Cluster)
|
|
||||||
for _, mc := range cluster.MetricConfig {
|
|
||||||
metrics = append(metrics, mc.Name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
jd, err = repo.LoadData(job, metrics, scopes, ctx)
|
|
||||||
if err != nil {
|
|
||||||
if len(jd) != 0 {
|
|
||||||
log.Warnf("partial error: %s", err.Error())
|
|
||||||
// return err, 0, 0 // Reactivating will block archiving on one partial error
|
|
||||||
} else {
|
|
||||||
log.Error("Error while loading job data from metric repository")
|
|
||||||
return err, 0, 0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
size = jd.Size()
|
|
||||||
} else {
|
|
||||||
jd, err = archive.GetHandle().LoadJobData(job)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while loading job data from archive")
|
|
||||||
return err, 0, 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// Avoid sending unrequested data to the client:
|
|
||||||
if metrics != nil || scopes != nil {
|
|
||||||
if metrics == nil {
|
|
||||||
metrics = make([]string, 0, len(jd))
|
|
||||||
for k := range jd {
|
|
||||||
metrics = append(metrics, k)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
res := schema.JobData{}
|
|
||||||
for _, metric := range metrics {
|
|
||||||
if perscope, ok := jd[metric]; ok {
|
|
||||||
if len(perscope) > 1 {
|
|
||||||
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if jm, ok := perscope[scope]; ok {
|
|
||||||
subset[scope] = jm
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(subset) > 0 {
|
|
||||||
perscope = subset
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
res[metric] = perscope
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jd = res
|
|
||||||
}
|
|
||||||
size = jd.Size()
|
|
||||||
}
|
|
||||||
|
|
||||||
ttl = 5 * time.Hour
|
|
||||||
if job.State == schema.JobStateRunning {
|
|
||||||
ttl = 2 * time.Minute
|
|
||||||
}
|
|
||||||
|
|
||||||
prepareJobData(job, jd, scopes)
|
|
||||||
|
|
||||||
return jd, ttl, size
|
|
||||||
})
|
|
||||||
|
|
||||||
if err, ok := data.(error); ok {
|
|
||||||
log.Error("Error in returned dataset")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return data.(schema.JobData), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
|
||||||
func LoadAverages(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
data [][]schema.Float,
|
|
||||||
ctx context.Context,
|
|
||||||
) error {
|
|
||||||
if job.State != schema.JobStateRunning && useArchive {
|
|
||||||
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
|
||||||
}
|
|
||||||
|
|
||||||
repo, ok := metricDataRepos[job.Cluster]
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
|
|
||||||
}
|
|
||||||
|
|
||||||
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, m := range metrics {
|
|
||||||
nodes, ok := stats[m]
|
|
||||||
if !ok {
|
|
||||||
data[i] = append(data[i], schema.NaN)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
sum := 0.0
|
|
||||||
for _, node := range nodes {
|
|
||||||
sum += node.Avg
|
|
||||||
}
|
|
||||||
data[i] = append(data[i], schema.Float(sum))
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used for the node/system view. Returns a map of nodes to a map of metrics.
|
|
||||||
func LoadNodeData(
|
|
||||||
cluster string,
|
|
||||||
metrics, nodes []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
from, to time.Time,
|
|
||||||
ctx context.Context,
|
|
||||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
|
||||||
repo, ok := metricDataRepos[cluster]
|
repo, ok := metricDataRepos[cluster]
|
||||||
|
|
||||||
if !ok {
|
if !ok {
|
||||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
err = fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||||
}
|
}
|
||||||
|
|
||||||
if metrics == nil {
|
return repo, err
|
||||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
|
||||||
metrics = append(metrics, m.Name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
|
||||||
if err != nil {
|
|
||||||
if len(data) != 0 {
|
|
||||||
log.Warnf("partial error: %s", err.Error())
|
|
||||||
} else {
|
|
||||||
log.Error("Error while loading node data from metric repository")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if data == nil {
|
|
||||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
|
||||||
}
|
|
||||||
|
|
||||||
return data, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func cacheKey(
|
|
||||||
job *schema.Job,
|
|
||||||
metrics []string,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
) string {
|
|
||||||
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
|
|
||||||
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
|
|
||||||
return fmt.Sprintf("%d(%s):[%v],[%v]",
|
|
||||||
job.ID, job.State, metrics, scopes)
|
|
||||||
}
|
|
||||||
|
|
||||||
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
|
|
||||||
// to be available at the scope 'node'. If a job has a lot of nodes,
|
|
||||||
// statisticsSeries should be available so that a min/mean/max Graph can be
|
|
||||||
// used instead of a lot of single lines.
|
|
||||||
func prepareJobData(
|
|
||||||
job *schema.Job,
|
|
||||||
jobData schema.JobData,
|
|
||||||
scopes []schema.MetricScope,
|
|
||||||
) {
|
|
||||||
const maxSeriesSize int = 15
|
|
||||||
for _, scopes := range jobData {
|
|
||||||
for _, jm := range scopes {
|
|
||||||
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
jm.AddStatisticsSeries()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
nodeScopeRequested := false
|
|
||||||
for _, scope := range scopes {
|
|
||||||
if scope == schema.MetricScopeNode {
|
|
||||||
nodeScopeRequested = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if nodeScopeRequested {
|
|
||||||
jobData.AddNodeScope("flops_any")
|
|
||||||
jobData.AddNodeScope("mem_bw")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Writes a running job to the job-archive
|
|
||||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
|
||||||
allMetrics := make([]string, 0)
|
|
||||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
|
||||||
for _, mc := range metricConfigs {
|
|
||||||
allMetrics = append(allMetrics, mc.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Talk about this! What resolutions to store data at...
|
|
||||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
|
||||||
if job.NumNodes <= 8 {
|
|
||||||
scopes = append(scopes, schema.MetricScopeCore)
|
|
||||||
}
|
|
||||||
|
|
||||||
jobData, err := LoadData(job, allMetrics, scopes, ctx)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error wile loading job data for archiving")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
jobMeta := &schema.JobMeta{
|
|
||||||
BaseJob: job.BaseJob,
|
|
||||||
StartTime: job.StartTime.Unix(),
|
|
||||||
Statistics: make(map[string]schema.JobStatistics),
|
|
||||||
}
|
|
||||||
|
|
||||||
for metric, data := range jobData {
|
|
||||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
|
||||||
nodeData, ok := data["node"]
|
|
||||||
if !ok {
|
|
||||||
// TODO/FIXME: Calc average for non-node metrics as well!
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, series := range nodeData.Series {
|
|
||||||
avg += series.Statistics.Avg
|
|
||||||
min = math.Min(min, series.Statistics.Min)
|
|
||||||
max = math.Max(max, series.Statistics.Max)
|
|
||||||
}
|
|
||||||
|
|
||||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
|
||||||
Unit: schema.Unit{
|
|
||||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
|
||||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
|
||||||
},
|
|
||||||
Avg: avg / float64(job.NumNodes),
|
|
||||||
Min: min,
|
|
||||||
Max: max,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the file based archive is disabled,
|
|
||||||
// only return the JobMeta structure as the
|
|
||||||
// statistics in there are needed.
|
|
||||||
if !useArchive {
|
|
||||||
return jobMeta, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ import (
|
|||||||
"text/template"
|
"text/template"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@@ -166,10 +167,10 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
|||||||
var rt http.RoundTripper = nil
|
var rt http.RoundTripper = nil
|
||||||
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
|
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
|
||||||
prom_pw := promcfg.Secret(prom_pw)
|
prom_pw := promcfg.Secret(prom_pw)
|
||||||
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
|
rt = promcfg.NewBasicAuthRoundTripper(promcfg.NewInlineSecret(config.Username), promcfg.NewInlineSecret(string(prom_pw)), promapi.DefaultRoundTripper)
|
||||||
} else {
|
} else {
|
||||||
if config.Username != "" {
|
if config.Username != "" {
|
||||||
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
|
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// init client
|
// init client
|
||||||
@@ -204,8 +205,8 @@ func (pdb *PrometheusDataRepository) FormatQuery(
|
|||||||
metric string,
|
metric string,
|
||||||
scope schema.MetricScope,
|
scope schema.MetricScope,
|
||||||
nodes []string,
|
nodes []string,
|
||||||
cluster string) (string, error) {
|
cluster string,
|
||||||
|
) (string, error) {
|
||||||
args := PromQLArgs{}
|
args := PromQLArgs{}
|
||||||
if len(nodes) > 0 {
|
if len(nodes) > 0 {
|
||||||
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
|
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
|
||||||
@@ -233,12 +234,13 @@ func (pdb *PrometheusDataRepository) RowToSeries(
|
|||||||
from time.Time,
|
from time.Time,
|
||||||
step int64,
|
step int64,
|
||||||
steps int64,
|
steps int64,
|
||||||
row *promm.SampleStream) schema.Series {
|
row *promm.SampleStream,
|
||||||
|
) schema.Series {
|
||||||
ts := from.Unix()
|
ts := from.Unix()
|
||||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
||||||
// init array of expected length with NaN
|
// init array of expected length with NaN
|
||||||
values := make([]schema.Float, steps+1)
|
values := make([]schema.Float, steps+1)
|
||||||
for i, _ := range values {
|
for i := range values {
|
||||||
values[i] = schema.NaN
|
values[i] = schema.NaN
|
||||||
}
|
}
|
||||||
// copy recorded values from prom sample pair
|
// copy recorded values from prom sample pair
|
||||||
@@ -263,8 +265,9 @@ func (pdb *PrometheusDataRepository) LoadData(
|
|||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
ctx context.Context) (schema.JobData, error) {
|
ctx context.Context,
|
||||||
|
resolution int,
|
||||||
|
) (schema.JobData, error) {
|
||||||
// TODO respect requested scope
|
// TODO respect requested scope
|
||||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
||||||
scopes = append(scopes, schema.MetricScopeNode)
|
scopes = append(scopes, schema.MetricScopeNode)
|
||||||
@@ -306,7 +309,6 @@ func (pdb *PrometheusDataRepository) LoadData(
|
|||||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||||
}
|
}
|
||||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
|
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
|
||||||
return nil, errors.New("Prometheus query error")
|
return nil, errors.New("Prometheus query error")
|
||||||
@@ -351,12 +353,12 @@ func (pdb *PrometheusDataRepository) LoadData(
|
|||||||
func (pdb *PrometheusDataRepository) LoadStats(
|
func (pdb *PrometheusDataRepository) LoadStats(
|
||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
ctx context.Context,
|
||||||
|
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||||
// map of metrics of nodes of stats
|
// map of metrics of nodes of stats
|
||||||
stats := map[string]map[string]schema.MetricStatistics{}
|
stats := map[string]map[string]schema.MetricStatistics{}
|
||||||
|
|
||||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while loading job for stats")
|
log.Warn("Error while loading job for stats")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -376,7 +378,8 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
|||||||
metrics, nodes []string,
|
metrics, nodes []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
from, to time.Time,
|
from, to time.Time,
|
||||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
ctx context.Context,
|
||||||
|
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||||
t0 := time.Now()
|
t0 := time.Now()
|
||||||
// Map of hosts of metrics of value slices
|
// Map of hosts of metrics of value slices
|
||||||
data := make(map[string]map[string][]*schema.JobMetric)
|
data := make(map[string]map[string][]*schema.JobMetric)
|
||||||
@@ -411,7 +414,6 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
|||||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||||
}
|
}
|
||||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
||||||
return nil, errors.New("Prometheus query error")
|
return nil, errors.New("Prometheus query error")
|
||||||
@@ -445,3 +447,21 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
|||||||
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (pdb *PrometheusDataRepository) LoadNodeListData(
|
||||||
|
cluster, subCluster, nodeFilter string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
from, to time.Time,
|
||||||
|
page *model.PageRequest,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]schema.JobData, int, bool, error) {
|
||||||
|
|
||||||
|
var totalNodes int = 0
|
||||||
|
var hasNextPage bool = false
|
||||||
|
// TODO : Implement to be used in NodeList-View
|
||||||
|
log.Infof("LoadNodeListData unimplemented for PrometheusDataRepository, Args: cluster %s, metrics %v, nodeFilter %v, scopes %v", cluster, metrics, nodeFilter, scopes)
|
||||||
|
|
||||||
|
return nil, totalNodes, hasNextPage, errors.New("METRICDATA/INFLUXV2 > unimplemented for PrometheusDataRepository")
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,10 +9,11 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||||
panic("TODO")
|
panic("TODO")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -27,9 +28,10 @@ func (tmdr *TestMetricDataRepository) LoadData(
|
|||||||
job *schema.Job,
|
job *schema.Job,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
scopes []schema.MetricScope,
|
scopes []schema.MetricScope,
|
||||||
ctx context.Context) (schema.JobData, error) {
|
ctx context.Context,
|
||||||
|
resolution int) (schema.JobData, error) {
|
||||||
|
|
||||||
return TestLoadDataCallback(job, metrics, scopes, ctx)
|
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tmdr *TestMetricDataRepository) LoadStats(
|
func (tmdr *TestMetricDataRepository) LoadStats(
|
||||||
@@ -48,3 +50,62 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
|
|||||||
|
|
||||||
panic("TODO")
|
panic("TODO")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (tmdr *TestMetricDataRepository) LoadNodeListData(
|
||||||
|
cluster, subCluster, nodeFilter string,
|
||||||
|
metrics []string,
|
||||||
|
scopes []schema.MetricScope,
|
||||||
|
resolution int,
|
||||||
|
from, to time.Time,
|
||||||
|
page *model.PageRequest,
|
||||||
|
ctx context.Context,
|
||||||
|
) (map[string]schema.JobData, int, bool, error) {
|
||||||
|
|
||||||
|
panic("TODO")
|
||||||
|
}
|
||||||
|
|
||||||
|
func DeepCopy(jd_temp schema.JobData) schema.JobData {
|
||||||
|
var jd schema.JobData
|
||||||
|
|
||||||
|
jd = make(schema.JobData, len(jd_temp))
|
||||||
|
for k, v := range jd_temp {
|
||||||
|
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
|
||||||
|
for k_, v_ := range v {
|
||||||
|
jd[k][k_] = new(schema.JobMetric)
|
||||||
|
jd[k][k_].Series = make([]schema.Series, len(v_.Series))
|
||||||
|
for i := 0; i < len(v_.Series); i += 1 {
|
||||||
|
jd[k][k_].Series[i].Data = make([]schema.Float, len(v_.Series[i].Data))
|
||||||
|
copy(jd[k][k_].Series[i].Data, v_.Series[i].Data)
|
||||||
|
jd[k][k_].Series[i].Hostname = v_.Series[i].Hostname
|
||||||
|
jd[k][k_].Series[i].Id = v_.Series[i].Id
|
||||||
|
jd[k][k_].Series[i].Statistics.Avg = v_.Series[i].Statistics.Avg
|
||||||
|
jd[k][k_].Series[i].Statistics.Min = v_.Series[i].Statistics.Min
|
||||||
|
jd[k][k_].Series[i].Statistics.Max = v_.Series[i].Statistics.Max
|
||||||
|
}
|
||||||
|
jd[k][k_].Timestep = v_.Timestep
|
||||||
|
jd[k][k_].Unit.Base = v_.Unit.Base
|
||||||
|
jd[k][k_].Unit.Prefix = v_.Unit.Prefix
|
||||||
|
if v_.StatisticsSeries != nil {
|
||||||
|
// Init Slices
|
||||||
|
jd[k][k_].StatisticsSeries = new(schema.StatsSeries)
|
||||||
|
jd[k][k_].StatisticsSeries.Max = make([]schema.Float, len(v_.StatisticsSeries.Max))
|
||||||
|
jd[k][k_].StatisticsSeries.Min = make([]schema.Float, len(v_.StatisticsSeries.Min))
|
||||||
|
jd[k][k_].StatisticsSeries.Median = make([]schema.Float, len(v_.StatisticsSeries.Median))
|
||||||
|
jd[k][k_].StatisticsSeries.Mean = make([]schema.Float, len(v_.StatisticsSeries.Mean))
|
||||||
|
// Copy Data
|
||||||
|
copy(jd[k][k_].StatisticsSeries.Max, v_.StatisticsSeries.Max)
|
||||||
|
copy(jd[k][k_].StatisticsSeries.Min, v_.StatisticsSeries.Min)
|
||||||
|
copy(jd[k][k_].StatisticsSeries.Median, v_.StatisticsSeries.Median)
|
||||||
|
copy(jd[k][k_].StatisticsSeries.Mean, v_.StatisticsSeries.Mean)
|
||||||
|
// Handle Percentiles
|
||||||
|
for k__, v__ := range v_.StatisticsSeries.Percentiles {
|
||||||
|
jd[k][k_].StatisticsSeries.Percentiles[k__] = make([]schema.Float, len(v__))
|
||||||
|
copy(jd[k][k_].StatisticsSeries.Percentiles[k__], v__)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
jd[k][k_].StatisticsSeries = v_.StatisticsSeries
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return jd
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,17 +5,16 @@
|
|||||||
package repository
|
package repository
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||||
@@ -33,9 +32,7 @@ type JobRepository struct {
|
|||||||
DB *sqlx.DB
|
DB *sqlx.DB
|
||||||
stmtCache *sq.StmtCache
|
stmtCache *sq.StmtCache
|
||||||
cache *lrucache.Cache
|
cache *lrucache.Cache
|
||||||
archiveChannel chan *schema.Job
|
|
||||||
driver string
|
driver string
|
||||||
archivePending sync.WaitGroup
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetJobRepository() *JobRepository {
|
func GetJobRepository() *JobRepository {
|
||||||
@@ -48,45 +45,46 @@ func GetJobRepository() *JobRepository {
|
|||||||
|
|
||||||
stmtCache: sq.NewStmtCache(db.DB),
|
stmtCache: sq.NewStmtCache(db.DB),
|
||||||
cache: lrucache.New(1024 * 1024),
|
cache: lrucache.New(1024 * 1024),
|
||||||
archiveChannel: make(chan *schema.Job, 128),
|
|
||||||
}
|
}
|
||||||
// start archiving worker
|
|
||||||
go jobRepoInstance.archivingWorker()
|
|
||||||
})
|
})
|
||||||
return jobRepoInstance
|
return jobRepoInstance
|
||||||
}
|
}
|
||||||
|
|
||||||
var jobColumns []string = []string{
|
var jobColumns []string = []string{
|
||||||
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id",
|
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id",
|
||||||
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
||||||
"job.duration", "job.walltime", "job.resources", "job.mem_used_max", "job.flops_any_avg", "job.mem_bw_avg", "job.load_avg", // "job.meta_data",
|
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
|
||||||
}
|
}
|
||||||
|
|
||||||
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
||||||
job := &schema.Job{}
|
job := &schema.Job{}
|
||||||
|
|
||||||
if err := row.Scan(
|
if err := row.Scan(
|
||||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
||||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||||
&job.Duration, &job.Walltime, &job.RawResources, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.LoadAvg /*&job.RawMetaData*/); err != nil {
|
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
||||||
log.Warnf("Error while scanning rows (Job): %v", err)
|
log.Warnf("Error while scanning rows (Job): %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
||||||
log.Warn("Error while unmarhsaling raw resources json")
|
log.Warn("Error while unmarshaling raw resources json")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
job.RawResources = nil
|
||||||
|
|
||||||
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||||
// return nil, err
|
log.Warnf("Error while unmarshaling raw footprint json: %v", err)
|
||||||
// }
|
return nil, err
|
||||||
|
}
|
||||||
|
job.RawFootprint = nil
|
||||||
|
|
||||||
job.StartTime = time.Unix(job.StartTimeUnix, 0)
|
job.StartTime = time.Unix(job.StartTimeUnix, 0)
|
||||||
if job.Duration == 0 && job.State == schema.JobStateRunning {
|
// Always ensure accurate duration for running jobs
|
||||||
|
if job.State == schema.JobStateRunning {
|
||||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
||||||
}
|
}
|
||||||
|
|
||||||
job.RawResources = nil
|
|
||||||
return job, nil
|
return job, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -205,7 +203,10 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
|
if _, err = sq.Update("job").
|
||||||
|
Set("meta_data", job.RawMetaData).
|
||||||
|
Where("job.id = ?", job.ID).
|
||||||
|
RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -214,222 +215,54 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
|||||||
return archive.UpdateMetadata(job, job.MetaData)
|
return archive.UpdateMetadata(job, job.MetaData)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find executes a SQL query to find a specific batch job.
|
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
|
||||||
// The job is queried using the batch job id, the cluster name,
|
|
||||||
// and the start time of the job in UNIX epoch time seconds.
|
|
||||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
|
||||||
// To check if no job was found test err == sql.ErrNoRows
|
|
||||||
func (r *JobRepository) Find(
|
|
||||||
jobId *int64,
|
|
||||||
cluster *string,
|
|
||||||
startTime *int64,
|
|
||||||
) (*schema.Job, error) {
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
q := sq.Select(jobColumns...).From("job").
|
|
||||||
Where("job.job_id = ?", *jobId)
|
|
||||||
|
|
||||||
if cluster != nil {
|
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
|
||||||
q = q.Where("job.cluster = ?", *cluster)
|
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
|
||||||
}
|
log.Warn("Error while scanning for job footprint")
|
||||||
if startTime != nil {
|
|
||||||
q = q.Where("job.start_time = ?", *startTime)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Debugf("Timer Find %s", time.Since(start))
|
|
||||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find executes a SQL query to find a specific batch job.
|
|
||||||
// The job is queried using the batch job id, the cluster name,
|
|
||||||
// and the start time of the job in UNIX epoch time seconds.
|
|
||||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
|
||||||
// To check if no job was found test err == sql.ErrNoRows
|
|
||||||
func (r *JobRepository) FindAll(
|
|
||||||
jobId *int64,
|
|
||||||
cluster *string,
|
|
||||||
startTime *int64,
|
|
||||||
) ([]*schema.Job, error) {
|
|
||||||
start := time.Now()
|
|
||||||
q := sq.Select(jobColumns...).From("job").
|
|
||||||
Where("job.job_id = ?", *jobId)
|
|
||||||
|
|
||||||
if cluster != nil {
|
|
||||||
q = q.Where("job.cluster = ?", *cluster)
|
|
||||||
}
|
|
||||||
if startTime != nil {
|
|
||||||
q = q.Where("job.start_time = ?", *startTime)
|
|
||||||
}
|
|
||||||
|
|
||||||
rows, err := q.RunWith(r.stmtCache).Query()
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while running query")
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
jobs := make([]*schema.Job, 0, 10)
|
if len(job.RawFootprint) == 0 {
|
||||||
for rows.Next() {
|
|
||||||
job, err := scanJob(rows)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while scanning rows")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
jobs = append(jobs, job)
|
|
||||||
}
|
|
||||||
log.Debugf("Timer FindAll %s", time.Since(start))
|
|
||||||
return jobs, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// FindById executes a SQL query to find a specific batch job.
|
|
||||||
// The job is queried using the database id.
|
|
||||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
|
||||||
// To check if no job was found test err == sql.ErrNoRows
|
|
||||||
func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
|
|
||||||
q := sq.Select(jobColumns...).
|
|
||||||
From("job").Where("job.id = ?", jobId)
|
|
||||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *JobRepository) FindConcurrentJobs(
|
|
||||||
ctx context.Context,
|
|
||||||
job *schema.Job,
|
|
||||||
) (*model.JobLinkResultList, error) {
|
|
||||||
if job == nil {
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
|
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||||
if qerr != nil {
|
log.Warn("Error while unmarshaling raw footprint json")
|
||||||
return nil, qerr
|
|
||||||
}
|
|
||||||
|
|
||||||
query = query.Where("cluster = ?", job.Cluster)
|
|
||||||
var startTime int64
|
|
||||||
var stopTime int64
|
|
||||||
|
|
||||||
startTime = job.StartTimeUnix
|
|
||||||
hostname := job.Resources[0].Hostname
|
|
||||||
|
|
||||||
if job.State == schema.JobStateRunning {
|
|
||||||
stopTime = time.Now().Unix()
|
|
||||||
} else {
|
|
||||||
stopTime = startTime + int64(job.Duration)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add 200s overlap for jobs start time at the end
|
|
||||||
startTimeTail := startTime + 10
|
|
||||||
stopTimeTail := stopTime - 200
|
|
||||||
startTimeFront := startTime + 200
|
|
||||||
|
|
||||||
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
|
||||||
"running", startTimeTail, stopTimeTail, startTime)
|
|
||||||
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
|
||||||
|
|
||||||
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
|
||||||
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
|
||||||
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
|
||||||
|
|
||||||
rows, err := query.RunWith(r.stmtCache).Query()
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Error while running query: %v", err)
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
items := make([]*model.JobLink, 0, 10)
|
log.Debugf("Timer FetchFootprint %s", time.Since(start))
|
||||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
return job.Footprint, nil
|
||||||
|
}
|
||||||
|
|
||||||
for rows.Next() {
|
func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
|
||||||
var id, jobId, startTime sql.NullInt64
|
start := time.Now()
|
||||||
|
cachekey := fmt.Sprintf("energyFootprint:%d", job.ID)
|
||||||
|
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||||
|
job.EnergyFootprint = cached.(map[string]float64)
|
||||||
|
return job.EnergyFootprint, nil
|
||||||
|
}
|
||||||
|
|
||||||
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
|
||||||
log.Warn("Error while scanning rows")
|
RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
|
||||||
|
log.Warn("Error while scanning for job energy_footprint")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if id.Valid {
|
if len(job.RawEnergyFootprint) == 0 {
|
||||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
return nil, nil
|
||||||
items = append(items,
|
|
||||||
&model.JobLink{
|
|
||||||
ID: fmt.Sprint(id.Int64),
|
|
||||||
JobID: int(jobId.Int64),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
|
||||||
if err != nil {
|
log.Warn("Error while unmarshaling raw energy footprint json")
|
||||||
log.Errorf("Error while running query: %v", err)
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
for rows.Next() {
|
r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
|
||||||
var id, jobId, startTime sql.NullInt64
|
log.Debugf("Timer FetchEnergyFootprint %s", time.Since(start))
|
||||||
|
return job.EnergyFootprint, nil
|
||||||
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
|
||||||
log.Warn("Error while scanning rows")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if id.Valid {
|
|
||||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
|
||||||
items = append(items,
|
|
||||||
&model.JobLink{
|
|
||||||
ID: fmt.Sprint(id.Int64),
|
|
||||||
JobID: int(jobId.Int64),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cnt := len(items)
|
|
||||||
|
|
||||||
return &model.JobLinkResultList{
|
|
||||||
ListQuery: &queryString,
|
|
||||||
Items: items,
|
|
||||||
Count: &cnt,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start inserts a new job in the table, returning the unique job ID.
|
|
||||||
// Statistics are not transfered!
|
|
||||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
|
||||||
job.RawResources, err = json.Marshal(job.Resources)
|
|
||||||
if err != nil {
|
|
||||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
|
||||||
if err != nil {
|
|
||||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
res, err := r.DB.NamedExec(`INSERT INTO job (
|
|
||||||
job_id, user, project, cluster, subcluster, `+"`partition`"+`, array_job_id, num_nodes, num_hwthreads, num_acc,
|
|
||||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data
|
|
||||||
) VALUES (
|
|
||||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
|
||||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data
|
|
||||||
);`, job)
|
|
||||||
if err != nil {
|
|
||||||
return -1, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return res.LastInsertId()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stop updates the job with the database id jobId using the provided arguments.
|
|
||||||
func (r *JobRepository) Stop(
|
|
||||||
jobId int64,
|
|
||||||
duration int32,
|
|
||||||
state schema.JobState,
|
|
||||||
monitoringStatus int32,
|
|
||||||
) (err error) {
|
|
||||||
stmt := sq.Update("job").
|
|
||||||
Set("job_state", state).
|
|
||||||
Set("duration", duration).
|
|
||||||
Set("monitoring_status", monitoringStatus).
|
|
||||||
Where("job.id = ?", jobId)
|
|
||||||
|
|
||||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||||
@@ -461,119 +294,22 @@ func (r *JobRepository) DeleteJobById(id int64) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
|
||||||
stmt := sq.Update("job").
|
|
||||||
Set("monitoring_status", monitoringStatus).
|
|
||||||
Where("job.id = ?", job)
|
|
||||||
|
|
||||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stop updates the job with the database id jobId using the provided arguments.
|
|
||||||
func (r *JobRepository) MarkArchived(
|
|
||||||
jobId int64,
|
|
||||||
monitoringStatus int32,
|
|
||||||
metricStats map[string]schema.JobStatistics,
|
|
||||||
) error {
|
|
||||||
stmt := sq.Update("job").
|
|
||||||
Set("monitoring_status", monitoringStatus).
|
|
||||||
Where("job.id = ?", jobId)
|
|
||||||
|
|
||||||
for metric, stats := range metricStats {
|
|
||||||
switch metric {
|
|
||||||
case "flops_any":
|
|
||||||
stmt = stmt.Set("flops_any_avg", stats.Avg)
|
|
||||||
case "mem_used":
|
|
||||||
stmt = stmt.Set("mem_used_max", stats.Max)
|
|
||||||
case "mem_bw":
|
|
||||||
stmt = stmt.Set("mem_bw_avg", stats.Avg)
|
|
||||||
case "load":
|
|
||||||
stmt = stmt.Set("load_avg", stats.Avg)
|
|
||||||
case "cpu_load":
|
|
||||||
stmt = stmt.Set("load_avg", stats.Avg)
|
|
||||||
case "net_bw":
|
|
||||||
stmt = stmt.Set("net_bw_avg", stats.Avg)
|
|
||||||
case "file_bw":
|
|
||||||
stmt = stmt.Set("file_bw_avg", stats.Avg)
|
|
||||||
default:
|
|
||||||
log.Debugf("MarkArchived() Metric '%v' unknown", metric)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
|
||||||
log.Warn("Error while marking job as archived")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Archiving worker thread
|
|
||||||
func (r *JobRepository) archivingWorker() {
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case job, ok := <-r.archiveChannel:
|
|
||||||
if !ok {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
start := time.Now()
|
|
||||||
// not using meta data, called to load JobMeta into Cache?
|
|
||||||
// will fail if job meta not in repository
|
|
||||||
if _, err := r.FetchMetadata(job); err != nil {
|
|
||||||
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
|
||||||
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// metricdata.ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
|
||||||
// TODO: Maybe use context with cancel/timeout here
|
|
||||||
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
|
||||||
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update the jobs database entry one last time:
|
|
||||||
if err := r.MarkArchived(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
|
|
||||||
log.Errorf("archiving job (dbid: %d) failed at marking archived step: %s", job.ID, err.Error())
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
|
||||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
|
||||||
r.archivePending.Done()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Trigger async archiving
|
|
||||||
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
|
|
||||||
r.archivePending.Add(1)
|
|
||||||
r.archiveChannel <- job
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for background thread to finish pending archiving operations
|
|
||||||
func (r *JobRepository) WaitForArchiving() {
|
|
||||||
// close channel and wait for worker to process remaining jobs
|
|
||||||
r.archivePending.Wait()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
|
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
|
||||||
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
|
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
|
||||||
return searchterm, "", "", ""
|
return searchterm, "", "", ""
|
||||||
} else { // Has to have letters and logged-in user for other guesses
|
} else { // Has to have letters and logged-in user for other guesses
|
||||||
if user != nil {
|
if user != nil {
|
||||||
// Find username in jobs (match)
|
// Find username by username in job table (match)
|
||||||
uresult, _ := r.FindColumnValue(user, searchterm, "job", "user", "user", false)
|
uresult, _ := r.FindColumnValue(user, searchterm, "job", "hpc_user", "hpc_user", false)
|
||||||
if uresult != "" {
|
if uresult != "" {
|
||||||
return "", uresult, "", ""
|
return "", uresult, "", ""
|
||||||
}
|
}
|
||||||
// Find username by name (like)
|
// Find username by real name in hpc_user table (like)
|
||||||
nresult, _ := r.FindColumnValue(user, searchterm, "user", "username", "name", true)
|
nresult, _ := r.FindColumnValue(user, searchterm, "hpc_user", "username", "name", true)
|
||||||
if nresult != "" {
|
if nresult != "" {
|
||||||
return "", nresult, "", ""
|
return "", nresult, "", ""
|
||||||
}
|
}
|
||||||
// Find projectId in jobs (match)
|
// Find projectId by projectId in job table (match)
|
||||||
presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false)
|
presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false)
|
||||||
if presult != "" {
|
if presult != "" {
|
||||||
return "", "", presult, ""
|
return "", "", presult, ""
|
||||||
@@ -655,7 +391,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
|||||||
start := time.Now()
|
start := time.Now()
|
||||||
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
|
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
|
||||||
parts := []string{}
|
parts := []string{}
|
||||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||||
return nil, 0, 1000
|
return nil, 0, 1000
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -712,6 +448,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
|||||||
return subclusters, nil
|
return subclusters, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Set duration to requested walltime?
|
||||||
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
res, err := sq.Update("job").
|
res, err := sq.Update("job").
|
||||||
@@ -740,6 +477,46 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||||
|
query := sq.Select(jobColumns...).From("job").
|
||||||
|
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
||||||
|
Where("job.job_state = 'running'").
|
||||||
|
Where("job.duration > 600")
|
||||||
|
|
||||||
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Error while running query")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs := make([]*schema.Job, 0, 50)
|
||||||
|
for rows.Next() {
|
||||||
|
job, err := scanJob(rows)
|
||||||
|
if err != nil {
|
||||||
|
rows.Close()
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
jobs = append(jobs, job)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof("Return job count %d", len(jobs))
|
||||||
|
return jobs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) UpdateDuration() error {
|
||||||
|
stmnt := sq.Update("job").
|
||||||
|
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
|
||||||
|
Where("job_state = 'running'")
|
||||||
|
|
||||||
|
_, err := stmnt.RunWith(r.stmtCache).Exec()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
|
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
|
||||||
var query sq.SelectBuilder
|
var query sq.SelectBuilder
|
||||||
|
|
||||||
@@ -778,27 +555,112 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
|||||||
return jobs, nil
|
return jobs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
const NamedJobInsert string = `INSERT INTO job (
|
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||||
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
|
stmt := sq.Update("job").
|
||||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
|
Set("monitoring_status", monitoringStatus).
|
||||||
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
|
Where("job.id = ?", job)
|
||||||
) VALUES (
|
|
||||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
|
||||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
|
|
||||||
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
|
|
||||||
);`
|
|
||||||
|
|
||||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
return
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while NamedJobInsert")
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
id, err := res.LastInsertId()
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while getting last insert ID")
|
|
||||||
return 0, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return id, nil
|
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
|
||||||
|
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) MarkArchived(
|
||||||
|
stmt sq.UpdateBuilder,
|
||||||
|
monitoringStatus int32,
|
||||||
|
) sq.UpdateBuilder {
|
||||||
|
return stmt.Set("monitoring_status", monitoringStatus)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) UpdateEnergy(
|
||||||
|
stmt sq.UpdateBuilder,
|
||||||
|
jobMeta *schema.JobMeta,
|
||||||
|
) (sq.UpdateBuilder, error) {
|
||||||
|
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||||
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||||
|
return stmt, err
|
||||||
|
}
|
||||||
|
energyFootprint := make(map[string]float64)
|
||||||
|
var totalEnergy float64
|
||||||
|
var energy float64
|
||||||
|
|
||||||
|
for _, fp := range sc.EnergyFootprint {
|
||||||
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||||
|
// Note: For DB data, calculate and save as kWh
|
||||||
|
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
|
||||||
|
// FIXME: Needs sum as stats type
|
||||||
|
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||||
|
// Energy: Power (in Watts) * Time (in Seconds)
|
||||||
|
// Unit: (( W * s ) / 3600) / 1000 = kWh ; Rounded to 2 nearest digits: (Energy * 100) / 100
|
||||||
|
// Here: All-Node Metric Average * Number of Nodes * Job Runtime
|
||||||
|
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||||
|
metricNodeSum := LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes) * float64(jobMeta.Duration)
|
||||||
|
energy = math.Round(((metricNodeSum/3600)/1000)*100) / 100
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
energyFootprint[fp] = energy
|
||||||
|
totalEnergy += energy
|
||||||
|
}
|
||||||
|
|
||||||
|
var rawFootprint []byte
|
||||||
|
if rawFootprint, err = json.Marshal(energyFootprint); err != nil {
|
||||||
|
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||||
|
return stmt, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) UpdateFootprint(
|
||||||
|
stmt sq.UpdateBuilder,
|
||||||
|
jobMeta *schema.JobMeta,
|
||||||
|
) (sq.UpdateBuilder, error) {
|
||||||
|
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||||
|
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||||
|
return stmt, err
|
||||||
|
}
|
||||||
|
footprint := make(map[string]float64)
|
||||||
|
|
||||||
|
for _, fp := range sc.Footprint {
|
||||||
|
var statType string
|
||||||
|
for _, gm := range archive.GlobalMetricList {
|
||||||
|
if gm.Name == fp {
|
||||||
|
statType = gm.Footprint
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if statType != "avg" && statType != "min" && statType != "max" {
|
||||||
|
log.Warnf("unknown statType for footprint update: %s", statType)
|
||||||
|
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
|
||||||
|
}
|
||||||
|
|
||||||
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||||
|
statType = sc.MetricConfig[i].Footprint
|
||||||
|
}
|
||||||
|
|
||||||
|
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||||
|
footprint[name] = LoadJobStat(jobMeta, fp, statType)
|
||||||
|
}
|
||||||
|
|
||||||
|
var rawFootprint []byte
|
||||||
|
if rawFootprint, err = json.Marshal(footprint); err != nil {
|
||||||
|
log.Warnf("Error while marshaling footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||||
|
return stmt, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return stmt.Set("footprint", string(rawFootprint)), nil
|
||||||
}
|
}
|
||||||
|
|||||||
75
internal/repository/jobCreate.go
Normal file
75
internal/repository/jobCreate.go
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package repository
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
)
|
||||||
|
|
||||||
|
const NamedJobInsert string = `INSERT INTO job (
|
||||||
|
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||||
|
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||||
|
) VALUES (
|
||||||
|
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||||
|
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||||
|
);`
|
||||||
|
|
||||||
|
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
||||||
|
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while NamedJobInsert")
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
id, err := res.LastInsertId()
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while getting last insert ID")
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return id, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start inserts a new job in the table, returning the unique job ID.
|
||||||
|
// Statistics are not transfered!
|
||||||
|
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||||
|
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||||
|
if err != nil {
|
||||||
|
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.RawResources, err = json.Marshal(job.Resources)
|
||||||
|
if err != nil {
|
||||||
|
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||||
|
if err != nil {
|
||||||
|
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return r.InsertJob(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop updates the job with the database id jobId using the provided arguments.
|
||||||
|
func (r *JobRepository) Stop(
|
||||||
|
jobId int64,
|
||||||
|
duration int32,
|
||||||
|
state schema.JobState,
|
||||||
|
monitoringStatus int32,
|
||||||
|
) (err error) {
|
||||||
|
stmt := sq.Update("job").
|
||||||
|
Set("job_state", state).
|
||||||
|
Set("duration", duration).
|
||||||
|
Set("monitoring_status", monitoringStatus).
|
||||||
|
Where("job.id = ?", jobId)
|
||||||
|
|
||||||
|
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||||
|
return
|
||||||
|
}
|
||||||
261
internal/repository/jobFind.go
Normal file
261
internal/repository/jobFind.go
Normal file
@@ -0,0 +1,261 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package repository
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Find executes a SQL query to find a specific batch job.
|
||||||
|
// The job is queried using the batch job id, the cluster name,
|
||||||
|
// and the start time of the job in UNIX epoch time seconds.
|
||||||
|
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||||
|
// To check if no job was found test err == sql.ErrNoRows
|
||||||
|
func (r *JobRepository) Find(
|
||||||
|
jobId *int64,
|
||||||
|
cluster *string,
|
||||||
|
startTime *int64,
|
||||||
|
) (*schema.Job, error) {
|
||||||
|
start := time.Now()
|
||||||
|
q := sq.Select(jobColumns...).From("job").
|
||||||
|
Where("job.job_id = ?", *jobId)
|
||||||
|
|
||||||
|
if cluster != nil {
|
||||||
|
q = q.Where("job.cluster = ?", *cluster)
|
||||||
|
}
|
||||||
|
if startTime != nil {
|
||||||
|
q = q.Where("job.start_time = ?", *startTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
|
||||||
|
|
||||||
|
log.Debugf("Timer Find %s", time.Since(start))
|
||||||
|
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find executes a SQL query to find a specific batch job.
|
||||||
|
// The job is queried using the batch job id, the cluster name,
|
||||||
|
// and the start time of the job in UNIX epoch time seconds.
|
||||||
|
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||||
|
// To check if no job was found test err == sql.ErrNoRows
|
||||||
|
func (r *JobRepository) FindAll(
|
||||||
|
jobId *int64,
|
||||||
|
cluster *string,
|
||||||
|
startTime *int64,
|
||||||
|
) ([]*schema.Job, error) {
|
||||||
|
start := time.Now()
|
||||||
|
q := sq.Select(jobColumns...).From("job").
|
||||||
|
Where("job.job_id = ?", *jobId)
|
||||||
|
|
||||||
|
if cluster != nil {
|
||||||
|
q = q.Where("job.cluster = ?", *cluster)
|
||||||
|
}
|
||||||
|
if startTime != nil {
|
||||||
|
q = q.Where("job.start_time = ?", *startTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := q.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Error while running query")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs := make([]*schema.Job, 0, 10)
|
||||||
|
for rows.Next() {
|
||||||
|
job, err := scanJob(rows)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
jobs = append(jobs, job)
|
||||||
|
}
|
||||||
|
log.Debugf("Timer FindAll %s", time.Since(start))
|
||||||
|
return jobs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindById executes a SQL query to find a specific batch job.
|
||||||
|
// The job is queried using the database id.
|
||||||
|
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||||
|
// To check if no job was found test err == sql.ErrNoRows
|
||||||
|
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
|
||||||
|
q := sq.Select(jobColumns...).
|
||||||
|
From("job").Where("job.id = ?", jobId)
|
||||||
|
|
||||||
|
q, qerr := SecurityCheck(ctx, q)
|
||||||
|
if qerr != nil {
|
||||||
|
return nil, qerr
|
||||||
|
}
|
||||||
|
|
||||||
|
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindByIdWithUser executes a SQL query to find a specific batch job.
|
||||||
|
// The job is queried using the database id. The user is passed directly,
|
||||||
|
// instead as part of the context.
|
||||||
|
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||||
|
// To check if no job was found test err == sql.ErrNoRows
|
||||||
|
func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
|
||||||
|
q := sq.Select(jobColumns...).
|
||||||
|
From("job").Where("job.id = ?", jobId)
|
||||||
|
|
||||||
|
q, qerr := SecurityCheckWithUser(user, q)
|
||||||
|
if qerr != nil {
|
||||||
|
return nil, qerr
|
||||||
|
}
|
||||||
|
|
||||||
|
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindByIdDirect executes a SQL query to find a specific batch job.
|
||||||
|
// The job is queried using the database id.
|
||||||
|
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||||
|
// To check if no job was found test err == sql.ErrNoRows
|
||||||
|
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
|
||||||
|
q := sq.Select(jobColumns...).
|
||||||
|
From("job").Where("job.id = ?", jobId)
|
||||||
|
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindByJobId executes a SQL query to find a specific batch job.
|
||||||
|
// The job is queried using the slurm id and the clustername.
|
||||||
|
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||||
|
// To check if no job was found test err == sql.ErrNoRows
|
||||||
|
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
|
||||||
|
q := sq.Select(jobColumns...).
|
||||||
|
From("job").
|
||||||
|
Where("job.job_id = ?", jobId).
|
||||||
|
Where("job.cluster = ?", cluster).
|
||||||
|
Where("job.start_time = ?", startTime)
|
||||||
|
|
||||||
|
q, qerr := SecurityCheck(ctx, q)
|
||||||
|
if qerr != nil {
|
||||||
|
return nil, qerr
|
||||||
|
}
|
||||||
|
|
||||||
|
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsJobOwner executes a SQL query to find a specific batch job.
|
||||||
|
// The job is queried using the slurm id,a username and the cluster.
|
||||||
|
// It returns a bool.
|
||||||
|
// If job was found, user is owner: test err != sql.ErrNoRows
|
||||||
|
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
|
||||||
|
q := sq.Select("id").
|
||||||
|
From("job").
|
||||||
|
Where("job.job_id = ?", jobId).
|
||||||
|
Where("job.hpc_user = ?", user).
|
||||||
|
Where("job.cluster = ?", cluster).
|
||||||
|
Where("job.start_time = ?", startTime)
|
||||||
|
|
||||||
|
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||||
|
return err != sql.ErrNoRows
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) FindConcurrentJobs(
|
||||||
|
ctx context.Context,
|
||||||
|
job *schema.Job,
|
||||||
|
) (*model.JobLinkResultList, error) {
|
||||||
|
if job == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
|
||||||
|
if qerr != nil {
|
||||||
|
return nil, qerr
|
||||||
|
}
|
||||||
|
|
||||||
|
query = query.Where("cluster = ?", job.Cluster)
|
||||||
|
var startTime int64
|
||||||
|
var stopTime int64
|
||||||
|
|
||||||
|
startTime = job.StartTimeUnix
|
||||||
|
hostname := job.Resources[0].Hostname
|
||||||
|
|
||||||
|
if job.State == schema.JobStateRunning {
|
||||||
|
stopTime = time.Now().Unix()
|
||||||
|
} else {
|
||||||
|
stopTime = startTime + int64(job.Duration)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add 200s overlap for jobs start time at the end
|
||||||
|
startTimeTail := startTime + 10
|
||||||
|
stopTimeTail := stopTime - 200
|
||||||
|
startTimeFront := startTime + 200
|
||||||
|
|
||||||
|
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
||||||
|
"running", startTimeTail, stopTimeTail, startTime)
|
||||||
|
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||||
|
|
||||||
|
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
||||||
|
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
||||||
|
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||||
|
|
||||||
|
rows, err := query.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while running query: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
items := make([]*model.JobLink, 0, 10)
|
||||||
|
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
var id, jobId, startTime sql.NullInt64
|
||||||
|
|
||||||
|
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if id.Valid {
|
||||||
|
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||||
|
items = append(items,
|
||||||
|
&model.JobLink{
|
||||||
|
ID: fmt.Sprint(id.Int64),
|
||||||
|
JobID: int(jobId.Int64),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while running query: %v", err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for rows.Next() {
|
||||||
|
var id, jobId, startTime sql.NullInt64
|
||||||
|
|
||||||
|
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if id.Valid {
|
||||||
|
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||||
|
items = append(items,
|
||||||
|
&model.JobLink{
|
||||||
|
ID: fmt.Sprint(id.Int64),
|
||||||
|
JobID: int(jobId.Int64),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cnt := len(items)
|
||||||
|
|
||||||
|
return &model.JobLinkResultList{
|
||||||
|
ListQuery: &queryString,
|
||||||
|
Items: items,
|
||||||
|
Count: &cnt,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
@@ -22,8 +22,8 @@ func (r *JobRepository) QueryJobs(
|
|||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filters []*model.JobFilter,
|
filters []*model.JobFilter,
|
||||||
page *model.PageRequest,
|
page *model.PageRequest,
|
||||||
order *model.OrderByInput) ([]*schema.Job, error) {
|
order *model.OrderByInput,
|
||||||
|
) ([]*schema.Job, error) {
|
||||||
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
|
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
|
||||||
if qerr != nil {
|
if qerr != nil {
|
||||||
return nil, qerr
|
return nil, qerr
|
||||||
@@ -31,14 +31,28 @@ func (r *JobRepository) QueryJobs(
|
|||||||
|
|
||||||
if order != nil {
|
if order != nil {
|
||||||
field := toSnakeCase(order.Field)
|
field := toSnakeCase(order.Field)
|
||||||
|
if order.Type == "col" {
|
||||||
|
// "col": Fixed column name query
|
||||||
switch order.Order {
|
switch order.Order {
|
||||||
case model.SortDirectionEnumAsc:
|
case model.SortDirectionEnumAsc:
|
||||||
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
|
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
|
||||||
case model.SortDirectionEnumDesc:
|
case model.SortDirectionEnumDesc:
|
||||||
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
||||||
default:
|
default:
|
||||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
|
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// "foot": Order by footprint JSON field values
|
||||||
|
// Verify and Search Only in Valid Jsons
|
||||||
|
query = query.Where("JSON_VALID(meta_data)")
|
||||||
|
switch order.Order {
|
||||||
|
case model.SortDirectionEnumAsc:
|
||||||
|
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
|
||||||
|
case model.SortDirectionEnumDesc:
|
||||||
|
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
|
||||||
|
default:
|
||||||
|
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -73,9 +87,10 @@ func (r *JobRepository) QueryJobs(
|
|||||||
|
|
||||||
func (r *JobRepository) CountJobs(
|
func (r *JobRepository) CountJobs(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filters []*model.JobFilter) (int, error) {
|
filters []*model.JobFilter,
|
||||||
|
) (int, error) {
|
||||||
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
|
// DISTICT count for tags filters, does not affect other queries
|
||||||
|
query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
|
||||||
if qerr != nil {
|
if qerr != nil {
|
||||||
return 0, qerr
|
return 0, qerr
|
||||||
}
|
}
|
||||||
@@ -92,35 +107,43 @@ func (r *JobRepository) CountJobs(
|
|||||||
return count, nil
|
return count, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||||
user := GetUserFromContext(ctx)
|
|
||||||
if user == nil {
|
if user == nil {
|
||||||
var qnil sq.SelectBuilder
|
var qnil sq.SelectBuilder
|
||||||
return qnil, fmt.Errorf("user context is nil")
|
return qnil, fmt.Errorf("user context is nil")
|
||||||
} else if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleApi}) { // Admin & Co. : All jobs
|
}
|
||||||
|
|
||||||
|
switch {
|
||||||
|
case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
|
||||||
return query, nil
|
return query, nil
|
||||||
} else if user.HasRole(schema.RoleManager) { // Manager : Add filter for managed projects' jobs only + personal jobs
|
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
|
||||||
|
return query, nil
|
||||||
|
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
|
||||||
if len(user.Projects) != 0 {
|
if len(user.Projects) != 0 {
|
||||||
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.user": user.Username}}), nil
|
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
|
||||||
} else {
|
} else {
|
||||||
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
|
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
|
||||||
return query.Where("job.user = ?", user.Username), nil
|
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||||
}
|
}
|
||||||
} else if user.HasRole(schema.RoleUser) { // User : Only personal jobs
|
case user.HasRole(schema.RoleUser): // User : Only personal jobs
|
||||||
return query.Where("job.user = ?", user.Username), nil
|
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||||
} else {
|
default: // No known Role, return error
|
||||||
// Shortterm compatibility: Return User-Query if no roles:
|
var qnil sq.SelectBuilder
|
||||||
return query.Where("job.user = ?", user.Username), nil
|
return qnil, fmt.Errorf("user has no or unknown roles")
|
||||||
// // On the longterm: Return Error instead of fallback:
|
|
||||||
// var qnil sq.SelectBuilder
|
|
||||||
// return qnil, fmt.Errorf("user '%s' with unknown roles [%#v]", user.Username, user.Roles)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||||
|
user := GetUserFromContext(ctx)
|
||||||
|
|
||||||
|
return SecurityCheckWithUser(user, query)
|
||||||
|
}
|
||||||
|
|
||||||
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
||||||
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
||||||
if filter.Tags != nil {
|
if filter.Tags != nil {
|
||||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags})
|
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
|
||||||
|
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
|
||||||
}
|
}
|
||||||
if filter.JobID != nil {
|
if filter.JobID != nil {
|
||||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||||
@@ -129,7 +152,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
|||||||
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
|
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
|
||||||
}
|
}
|
||||||
if filter.User != nil {
|
if filter.User != nil {
|
||||||
query = buildStringCondition("job.user", filter.User, query)
|
query = buildStringCondition("job.hpc_user", filter.User, query)
|
||||||
}
|
}
|
||||||
if filter.Project != nil {
|
if filter.Project != nil {
|
||||||
query = buildStringCondition("job.project", filter.Project, query)
|
query = buildStringCondition("job.project", filter.Project, query)
|
||||||
@@ -141,19 +164,21 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
|||||||
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
||||||
}
|
}
|
||||||
if filter.Partition != nil {
|
if filter.Partition != nil {
|
||||||
query = buildStringCondition("job.partition", filter.Partition, query)
|
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
|
||||||
}
|
}
|
||||||
if filter.StartTime != nil {
|
if filter.StartTime != nil {
|
||||||
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
||||||
}
|
}
|
||||||
if filter.Duration != nil {
|
if filter.Duration != nil {
|
||||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
query = buildIntCondition("job.duration", filter.Duration, query)
|
||||||
query = query.Where("(CASE WHEN job.job_state = 'running' THEN (? - job.start_time) ELSE job.duration END) BETWEEN ? AND ?", now, filter.Duration.From, filter.Duration.To)
|
|
||||||
}
|
}
|
||||||
if filter.MinRunningFor != nil {
|
if filter.MinRunningFor != nil {
|
||||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||||
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
|
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
|
||||||
}
|
}
|
||||||
|
if filter.Exclusive != nil {
|
||||||
|
query = query.Where("job.exclusive = ?", *filter.Exclusive)
|
||||||
|
}
|
||||||
if filter.State != nil {
|
if filter.State != nil {
|
||||||
states := make([]string, len(filter.State))
|
states := make([]string, len(filter.State))
|
||||||
for i, val := range filter.State {
|
for i, val := range filter.State {
|
||||||
@@ -174,17 +199,13 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
|||||||
if filter.Node != nil {
|
if filter.Node != nil {
|
||||||
query = buildStringCondition("job.resources", filter.Node, query)
|
query = buildStringCondition("job.resources", filter.Node, query)
|
||||||
}
|
}
|
||||||
if filter.FlopsAnyAvg != nil {
|
if filter.Energy != nil {
|
||||||
query = buildFloatCondition("job.flops_any_avg", filter.FlopsAnyAvg, query)
|
query = buildFloatCondition("job.energy", filter.Energy, query)
|
||||||
}
|
}
|
||||||
if filter.MemBwAvg != nil {
|
if filter.MetricStats != nil {
|
||||||
query = buildFloatCondition("job.mem_bw_avg", filter.MemBwAvg, query)
|
for _, ms := range filter.MetricStats {
|
||||||
|
query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
|
||||||
}
|
}
|
||||||
if filter.LoadAvg != nil {
|
|
||||||
query = buildFloatCondition("job.load_avg", filter.LoadAvg, query)
|
|
||||||
}
|
|
||||||
if filter.MemUsedMax != nil {
|
|
||||||
query = buildFloatCondition("job.mem_used_max", filter.MemUsedMax, query)
|
|
||||||
}
|
}
|
||||||
return query
|
return query
|
||||||
}
|
}
|
||||||
@@ -193,6 +214,10 @@ func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuild
|
|||||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||||
|
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||||
|
}
|
||||||
|
|
||||||
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
|
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||||
if cond.From != nil && cond.To != nil {
|
if cond.From != nil && cond.To != nil {
|
||||||
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
|
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
|
||||||
@@ -200,13 +225,32 @@ func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBui
|
|||||||
return query.Where("? <= "+field, cond.From.Unix())
|
return query.Where("? <= "+field, cond.From.Unix())
|
||||||
} else if cond.To != nil {
|
} else if cond.To != nil {
|
||||||
return query.Where(field+" <= ?", cond.To.Unix())
|
return query.Where(field+" <= ?", cond.To.Unix())
|
||||||
|
} else if cond.Range != "" {
|
||||||
|
now := time.Now().Unix()
|
||||||
|
var then int64
|
||||||
|
switch cond.Range {
|
||||||
|
case "last6h":
|
||||||
|
then = now - (60 * 60 * 6)
|
||||||
|
case "last24h":
|
||||||
|
then = now - (60 * 60 * 24)
|
||||||
|
case "last7d":
|
||||||
|
then = now - (60 * 60 * 24 * 7)
|
||||||
|
case "last30d":
|
||||||
|
then = now - (60 * 60 * 24 * 30)
|
||||||
|
default:
|
||||||
|
log.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
|
||||||
|
return query
|
||||||
|
}
|
||||||
|
return query.Where(field+" BETWEEN ? AND ?", then, now)
|
||||||
} else {
|
} else {
|
||||||
return query
|
return query
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
// Verify and Search Only in Valid Jsons
|
||||||
|
query = query.Where("JSON_VALID(footprint)")
|
||||||
|
return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||||
@@ -227,9 +271,7 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
|
|||||||
}
|
}
|
||||||
if cond.In != nil {
|
if cond.In != nil {
|
||||||
queryElements := make([]string, len(cond.In))
|
queryElements := make([]string, len(cond.In))
|
||||||
for i, val := range cond.In {
|
copy(queryElements, cond.In)
|
||||||
queryElements[i] = val
|
|
||||||
}
|
|
||||||
return query.Where(sq.Or{sq.Eq{field: queryElements}})
|
return query.Where(sq.Or{sq.Eq{field: queryElements}})
|
||||||
}
|
}
|
||||||
return query
|
return query
|
||||||
@@ -257,8 +299,10 @@ func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.
|
|||||||
return query
|
return query
|
||||||
}
|
}
|
||||||
|
|
||||||
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
var (
|
||||||
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||||
|
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||||
|
)
|
||||||
|
|
||||||
func toSnakeCase(str string) string {
|
func toSnakeCase(str string) string {
|
||||||
for _, c := range str {
|
for _, c := range str {
|
||||||
@@ -5,9 +5,11 @@
|
|||||||
package repository
|
package repository
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
_ "github.com/mattn/go-sqlite3"
|
_ "github.com/mattn/go-sqlite3"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -30,7 +32,7 @@ func TestFind(t *testing.T) {
|
|||||||
func TestFindById(t *testing.T) {
|
func TestFindById(t *testing.T) {
|
||||||
r := setup(t)
|
r := setup(t)
|
||||||
|
|
||||||
job, err := r.FindById(5)
|
job, err := r.FindById(getContext(t), 5)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -45,7 +47,19 @@ func TestFindById(t *testing.T) {
|
|||||||
func TestGetTags(t *testing.T) {
|
func TestGetTags(t *testing.T) {
|
||||||
r := setup(t)
|
r := setup(t)
|
||||||
|
|
||||||
tags, counts, err := r.CountTags(nil)
|
const contextUserKey ContextKey = "user"
|
||||||
|
contextUserValue := &schema.User{
|
||||||
|
Username: "testuser",
|
||||||
|
Projects: make([]string, 0),
|
||||||
|
Roles: []string{"user"},
|
||||||
|
AuthType: 0,
|
||||||
|
AuthSource: 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.WithValue(getContext(t), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
// Test Tag has Scope "global"
|
||||||
|
tags, counts, err := r.CountTags(GetUserFromContext(ctx))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ import (
|
|||||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||||
)
|
)
|
||||||
|
|
||||||
const Version uint = 7
|
const Version uint = 8
|
||||||
|
|
||||||
//go:embed migrations/*
|
//go:embed migrations/*
|
||||||
var migrationFiles embed.FS
|
var migrationFiles embed.FS
|
||||||
@@ -114,6 +114,14 @@ func MigrateDB(backend string, db string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
v, dirty, err := m.Version()
|
||||||
|
|
||||||
|
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
||||||
|
|
||||||
|
if dirty {
|
||||||
|
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
|
||||||
|
}
|
||||||
|
|
||||||
if err := m.Up(); err != nil {
|
if err := m.Up(); err != nil {
|
||||||
if err == migrate.ErrNoChange {
|
if err == migrate.ErrNoChange {
|
||||||
log.Info("DB already up to date!")
|
log.Info("DB already up to date!")
|
||||||
|
|||||||
@@ -0,0 +1,83 @@
|
|||||||
|
ALTER TABLE job DROP energy;
|
||||||
|
ALTER TABLE job DROP energy_footprint;
|
||||||
|
ALTER TABLE job ADD COLUMN flops_any_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN mem_bw_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN mem_used_max;
|
||||||
|
ALTER TABLE job ADD COLUMN load_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN net_bw_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN net_data_vol_total;
|
||||||
|
ALTER TABLE job ADD COLUMN file_bw_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN file_data_vol_total;
|
||||||
|
|
||||||
|
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
|
||||||
|
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
|
||||||
|
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
|
||||||
|
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
|
||||||
|
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
|
||||||
|
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
|
||||||
|
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
|
||||||
|
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
|
||||||
|
|
||||||
|
ALTER TABLE job DROP footprint;
|
||||||
|
-- Do not use reserved keywords anymore
|
||||||
|
RENAME TABLE hpc_user TO `user`;
|
||||||
|
ALTER TABLE job RENAME COLUMN hpc_user TO `user`;
|
||||||
|
ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_project;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_subcluster;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_numnodes;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_user_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_user_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_user_numnodes;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_project;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_numnodes;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_project;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_numnodes;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_duration_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_numnodes_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_numacc_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_energy_starttime;
|
||||||
123
internal/repository/migrations/mysql/08_add-footprint.up.sql
Normal file
123
internal/repository/migrations/mysql/08_add-footprint.up.sql
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
DROP INDEX IF EXISTS job_stats ON job;
|
||||||
|
DROP INDEX IF EXISTS job_by_user ON job;
|
||||||
|
DROP INDEX IF EXISTS job_by_starttime ON job;
|
||||||
|
DROP INDEX IF EXISTS job_by_job_id ON job;
|
||||||
|
DROP INDEX IF EXISTS job_list ON job;
|
||||||
|
DROP INDEX IF EXISTS job_list_user ON job;
|
||||||
|
DROP INDEX IF EXISTS job_list_users ON job;
|
||||||
|
DROP INDEX IF EXISTS job_list_users_start ON job;
|
||||||
|
|
||||||
|
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
|
||||||
|
ALTER TABLE job ADD COLUMN energy_footprint JSON;
|
||||||
|
|
||||||
|
ALTER TABLE job ADD COLUMN footprint JSON;
|
||||||
|
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
|
||||||
|
|
||||||
|
-- Do not use reserved keywords anymore
|
||||||
|
RENAME TABLE `user` TO hpc_user;
|
||||||
|
ALTER TABLE job RENAME COLUMN `user` TO hpc_user;
|
||||||
|
ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition;
|
||||||
|
|
||||||
|
ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50);
|
||||||
|
ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50);
|
||||||
|
ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50);
|
||||||
|
ALTER TABLE job MODIFY COLUMN project VARCHAR(50);
|
||||||
|
ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50);
|
||||||
|
ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25);
|
||||||
|
|
||||||
|
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
|
||||||
|
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
|
||||||
|
|
||||||
|
ALTER TABLE job DROP flops_any_avg;
|
||||||
|
ALTER TABLE job DROP mem_bw_avg;
|
||||||
|
ALTER TABLE job DROP mem_used_max;
|
||||||
|
ALTER TABLE job DROP load_avg;
|
||||||
|
ALTER TABLE job DROP net_bw_avg;
|
||||||
|
ALTER TABLE job DROP net_data_vol_total;
|
||||||
|
ALTER TABLE job DROP file_bw_avg;
|
||||||
|
ALTER TABLE job DROP file_data_vol_total;
|
||||||
|
|
||||||
|
-- Indices for: Single filters, combined filters, sorting, sorting with filters
|
||||||
|
-- Cluster Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||||
|
-- Cluster Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||||
|
|
||||||
|
-- Cluster+Partition Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
|
||||||
|
-- Cluster+Partition Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||||
|
|
||||||
|
-- Cluster+Partition+Jobstate Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
|
||||||
|
-- Cluster+Partition+Jobstate Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
|
||||||
|
|
||||||
|
-- Cluster+JobState Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||||
|
-- Cluster+JobState Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||||
|
|
||||||
|
-- User Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
|
||||||
|
-- User Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||||
|
|
||||||
|
-- Project Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||||
|
-- Project Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||||
|
|
||||||
|
-- JobState Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||||
|
-- JobState Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||||
|
|
||||||
|
-- ArrayJob Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||||
|
|
||||||
|
-- Sorting without active filters
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||||
|
|
||||||
|
-- Single filters with default starttime sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||||
|
|
||||||
|
-- Optimize DB index usage
|
||||||
103
internal/repository/migrations/sqlite3/08_add-footprint.down.sql
Normal file
103
internal/repository/migrations/sqlite3/08_add-footprint.down.sql
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
ALTER TABLE job DROP energy;
|
||||||
|
ALTER TABLE job DROP energy_footprint;
|
||||||
|
ALTER TABLE job ADD COLUMN flops_any_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN mem_bw_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN mem_used_max;
|
||||||
|
ALTER TABLE job ADD COLUMN load_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN net_bw_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN net_data_vol_total;
|
||||||
|
ALTER TABLE job ADD COLUMN file_bw_avg;
|
||||||
|
ALTER TABLE job ADD COLUMN file_data_vol_total;
|
||||||
|
|
||||||
|
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
|
||||||
|
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
|
||||||
|
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
|
||||||
|
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
|
||||||
|
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
|
||||||
|
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
|
||||||
|
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
|
||||||
|
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
|
||||||
|
|
||||||
|
ALTER TABLE job DROP footprint;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_project;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_subcluster;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_numnodes;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_numhwthreads;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_numacc;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_energy;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_numhwthreads;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_numacc;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_energy;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numhwthreads;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numacc;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_energy;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_numhwthreads;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_numacc;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_jobstate_energy;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_user_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_user_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_user_numnodes;
|
||||||
|
DROP INDEX IF EXISTS jobs_user_numhwthreads;
|
||||||
|
DROP INDEX IF EXISTS jobs_user_numacc;
|
||||||
|
DROP INDEX IF EXISTS jobs_user_energy;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_project;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_numnodes;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_numhwthreads;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_numacc;
|
||||||
|
DROP INDEX IF EXISTS jobs_project_energy;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_user;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_project;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_numhwthreads;
|
||||||
|
DROP INDEX IF EXISTS jobs_jobstate_numacc;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_duration;
|
||||||
|
DROP INDEX IF EXISTS jobs_numnodes;
|
||||||
|
DROP INDEX IF EXISTS jobs_numhwthreads;
|
||||||
|
DROP INDEX IF EXISTS jobs_numacc;
|
||||||
|
DROP INDEX IF EXISTS jobs_energy;
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS jobs_duration_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_numnodes_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_numhwthreads_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_numacc_starttime;
|
||||||
|
DROP INDEX IF EXISTS jobs_energy_starttime;
|
||||||
142
internal/repository/migrations/sqlite3/08_add-footprint.up.sql
Normal file
142
internal/repository/migrations/sqlite3/08_add-footprint.up.sql
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
DROP INDEX IF EXISTS job_stats;
|
||||||
|
DROP INDEX IF EXISTS job_by_user;
|
||||||
|
DROP INDEX IF EXISTS job_by_starttime;
|
||||||
|
DROP INDEX IF EXISTS job_by_job_id;
|
||||||
|
DROP INDEX IF EXISTS job_list;
|
||||||
|
DROP INDEX IF EXISTS job_list_user;
|
||||||
|
DROP INDEX IF EXISTS job_list_users;
|
||||||
|
DROP INDEX IF EXISTS job_list_users_start;
|
||||||
|
|
||||||
|
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
|
||||||
|
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
|
||||||
|
|
||||||
|
ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL;
|
||||||
|
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
|
||||||
|
|
||||||
|
-- Do not use reserved keywords anymore
|
||||||
|
ALTER TABLE "user" RENAME TO hpc_user;
|
||||||
|
ALTER TABLE job RENAME COLUMN "user" TO hpc_user;
|
||||||
|
ALTER TABLE job RENAME COLUMN "partition" TO cluster_partition;
|
||||||
|
|
||||||
|
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
|
||||||
|
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
|
||||||
|
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
|
||||||
|
|
||||||
|
ALTER TABLE job DROP flops_any_avg;
|
||||||
|
ALTER TABLE job DROP mem_bw_avg;
|
||||||
|
ALTER TABLE job DROP mem_used_max;
|
||||||
|
ALTER TABLE job DROP load_avg;
|
||||||
|
ALTER TABLE job DROP net_bw_avg;
|
||||||
|
ALTER TABLE job DROP net_data_vol_total;
|
||||||
|
ALTER TABLE job DROP file_bw_avg;
|
||||||
|
ALTER TABLE job DROP file_data_vol_total;
|
||||||
|
|
||||||
|
-- Indices for: Single filters, combined filters, sorting, sorting with filters
|
||||||
|
-- Cluster Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||||
|
-- Cluster Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
|
||||||
|
|
||||||
|
-- Cluster+Partition Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
|
||||||
|
-- Cluster+Partition Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
|
||||||
|
|
||||||
|
-- Cluster+Partition+Jobstate Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
|
||||||
|
-- Cluster+Partition+Jobstate Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
|
||||||
|
|
||||||
|
-- Cluster+JobState Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||||
|
-- Cluster+JobState Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
|
||||||
|
|
||||||
|
-- User Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
|
||||||
|
-- User Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
|
||||||
|
|
||||||
|
-- Project Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||||
|
-- Project Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
|
||||||
|
|
||||||
|
-- JobState Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||||
|
-- JobState Filter Sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
|
||||||
|
|
||||||
|
-- ArrayJob Filter
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||||
|
|
||||||
|
-- Sorting without active filters
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
|
||||||
|
|
||||||
|
-- Single filters with default starttime sorting
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||||
|
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||||
|
|
||||||
|
-- Optimize DB index usage
|
||||||
|
PRAGMA optimize;
|
||||||
@@ -55,7 +55,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
|
|||||||
|
|
||||||
b.RunParallel(func(pb *testing.PB) {
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
for pb.Next() {
|
for pb.Next() {
|
||||||
_, err := db.FindById(jobId)
|
_, err := db.FindById(getContext(b), jobId)
|
||||||
noErr(b, err)
|
noErr(b, err)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -111,7 +111,7 @@ func BenchmarkDB_QueryJobs(b *testing.B) {
|
|||||||
user := "mppi133h"
|
user := "mppi133h"
|
||||||
filter.User = &model.StringInput{Eq: &user}
|
filter.User = &model.StringInput{Eq: &user}
|
||||||
page := &model.PageRequest{ItemsPerPage: 50, Page: 1}
|
page := &model.PageRequest{ItemsPerPage: 50, Page: 1}
|
||||||
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc}
|
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
|
||||||
|
|
||||||
b.Run("QueryJobs", func(b *testing.B) {
|
b.Run("QueryJobs", func(b *testing.B) {
|
||||||
db := setup(b)
|
db := setup(b)
|
||||||
|
|||||||
@@ -8,12 +8,11 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@@ -22,7 +21,7 @@ import (
|
|||||||
|
|
||||||
// GraphQL validation should make sure that no unkown values can be specified.
|
// GraphQL validation should make sure that no unkown values can be specified.
|
||||||
var groupBy2column = map[model.Aggregate]string{
|
var groupBy2column = map[model.Aggregate]string{
|
||||||
model.AggregateUser: "job.user",
|
model.AggregateUser: "job.hpc_user",
|
||||||
model.AggregateProject: "job.project",
|
model.AggregateProject: "job.project",
|
||||||
model.AggregateCluster: "job.cluster",
|
model.AggregateCluster: "job.cluster",
|
||||||
}
|
}
|
||||||
@@ -41,8 +40,8 @@ var sortBy2column = map[model.SortByAggregate]string{
|
|||||||
func (r *JobRepository) buildCountQuery(
|
func (r *JobRepository) buildCountQuery(
|
||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
kind string,
|
kind string,
|
||||||
col string) sq.SelectBuilder {
|
col string,
|
||||||
|
) sq.SelectBuilder {
|
||||||
var query sq.SelectBuilder
|
var query sq.SelectBuilder
|
||||||
|
|
||||||
if col != "" {
|
if col != "" {
|
||||||
@@ -69,16 +68,16 @@ func (r *JobRepository) buildCountQuery(
|
|||||||
|
|
||||||
func (r *JobRepository) buildStatsQuery(
|
func (r *JobRepository) buildStatsQuery(
|
||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
col string) sq.SelectBuilder {
|
col string,
|
||||||
|
) sq.SelectBuilder {
|
||||||
var query sq.SelectBuilder
|
var query sq.SelectBuilder
|
||||||
castType := r.getCastType()
|
castType := r.getCastType()
|
||||||
|
|
||||||
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
||||||
|
|
||||||
if col != "" {
|
if col != "" {
|
||||||
// Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||||
query = sq.Select(col, "COUNT(job.id) as totalJobs",
|
query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
|
||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
|
||||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
|
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
|
||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
|
||||||
@@ -86,10 +85,9 @@ func (r *JobRepository) buildStatsQuery(
|
|||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
|
||||||
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
|
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
|
||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
|
||||||
).From("job").GroupBy(col)
|
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||||
query = sq.Select("COUNT(job.id)",
|
query = sq.Select("COUNT(job.id)",
|
||||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
|
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
|
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
|
||||||
@@ -108,15 +106,15 @@ func (r *JobRepository) buildStatsQuery(
|
|||||||
return query
|
return query
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *JobRepository) getUserName(ctx context.Context, id string) string {
|
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
|
||||||
user := GetUserFromContext(ctx)
|
// user := GetUserFromContext(ctx)
|
||||||
name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
|
// name, _ := r.FindColumnValue(user, id, "hpc_user", "name", "username", false)
|
||||||
if name != "" {
|
// if name != "" {
|
||||||
return name
|
// return name
|
||||||
} else {
|
// } else {
|
||||||
return "-"
|
// return "-"
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
func (r *JobRepository) getCastType() string {
|
func (r *JobRepository) getCastType() string {
|
||||||
var castType string
|
var castType string
|
||||||
@@ -138,8 +136,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
|||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
page *model.PageRequest,
|
page *model.PageRequest,
|
||||||
sortBy *model.SortByAggregate,
|
sortBy *model.SortByAggregate,
|
||||||
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
groupBy *model.Aggregate,
|
||||||
|
) ([]*model.JobsStatistics, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
col := groupBy2column[*groupBy]
|
col := groupBy2column[*groupBy]
|
||||||
query := r.buildStatsQuery(filter, col)
|
query := r.buildStatsQuery(filter, col)
|
||||||
@@ -168,14 +166,20 @@ func (r *JobRepository) JobsStatsGrouped(
|
|||||||
|
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var id sql.NullString
|
var id sql.NullString
|
||||||
|
var name sql.NullString
|
||||||
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
||||||
if err := rows.Scan(&id, &jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
||||||
log.Warn("Error while scanning rows")
|
log.Warn("Error while scanning rows")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if id.Valid {
|
if id.Valid {
|
||||||
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
|
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
|
||||||
|
var personName string
|
||||||
|
|
||||||
|
if name.Valid {
|
||||||
|
personName = name.String
|
||||||
|
}
|
||||||
|
|
||||||
if jobs.Valid {
|
if jobs.Valid {
|
||||||
totalJobs = int(jobs.Int64)
|
totalJobs = int(jobs.Int64)
|
||||||
@@ -205,12 +209,12 @@ func (r *JobRepository) JobsStatsGrouped(
|
|||||||
totalAccHours = int(accHours.Int64)
|
totalAccHours = int(accHours.Int64)
|
||||||
}
|
}
|
||||||
|
|
||||||
if col == "job.user" {
|
if col == "job.hpc_user" {
|
||||||
name := r.getUserName(ctx, id.String)
|
// name := r.getUserName(ctx, id.String)
|
||||||
stats = append(stats,
|
stats = append(stats,
|
||||||
&model.JobsStatistics{
|
&model.JobsStatistics{
|
||||||
ID: id.String,
|
ID: id.String,
|
||||||
Name: name,
|
Name: personName,
|
||||||
TotalJobs: totalJobs,
|
TotalJobs: totalJobs,
|
||||||
TotalWalltime: totalWalltime,
|
TotalWalltime: totalWalltime,
|
||||||
TotalNodes: totalNodes,
|
TotalNodes: totalNodes,
|
||||||
@@ -218,7 +222,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
|||||||
TotalCores: totalCores,
|
TotalCores: totalCores,
|
||||||
TotalCoreHours: totalCoreHours,
|
TotalCoreHours: totalCoreHours,
|
||||||
TotalAccs: totalAccs,
|
TotalAccs: totalAccs,
|
||||||
TotalAccHours: totalAccHours})
|
TotalAccHours: totalAccHours,
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
stats = append(stats,
|
stats = append(stats,
|
||||||
&model.JobsStatistics{
|
&model.JobsStatistics{
|
||||||
@@ -230,7 +235,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
|||||||
TotalCores: totalCores,
|
TotalCores: totalCores,
|
||||||
TotalCoreHours: totalCoreHours,
|
TotalCoreHours: totalCoreHours,
|
||||||
TotalAccs: totalAccs,
|
TotalAccs: totalAccs,
|
||||||
TotalAccHours: totalAccHours})
|
TotalAccHours: totalAccHours,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -241,8 +247,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
|||||||
|
|
||||||
func (r *JobRepository) JobsStats(
|
func (r *JobRepository) JobsStats(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filter []*model.JobFilter) ([]*model.JobsStatistics, error) {
|
filter []*model.JobFilter,
|
||||||
|
) ([]*model.JobsStatistics, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
query := r.buildStatsQuery(filter, "")
|
query := r.buildStatsQuery(filter, "")
|
||||||
query, err := SecurityCheck(ctx, query)
|
query, err := SecurityCheck(ctx, query)
|
||||||
@@ -277,18 +283,36 @@ func (r *JobRepository) JobsStats(
|
|||||||
TotalWalltime: int(walltime.Int64),
|
TotalWalltime: int(walltime.Int64),
|
||||||
TotalNodeHours: totalNodeHours,
|
TotalNodeHours: totalNodeHours,
|
||||||
TotalCoreHours: totalCoreHours,
|
TotalCoreHours: totalCoreHours,
|
||||||
TotalAccHours: totalAccHours})
|
TotalAccHours: totalAccHours,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debugf("Timer JobStats %s", time.Since(start))
|
log.Debugf("Timer JobStats %s", time.Since(start))
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func LoadJobStat(job *schema.JobMeta, metric string, statType string) float64 {
|
||||||
|
if stats, ok := job.Statistics[metric]; ok {
|
||||||
|
switch statType {
|
||||||
|
case "avg":
|
||||||
|
return stats.Avg
|
||||||
|
case "max":
|
||||||
|
return stats.Max
|
||||||
|
case "min":
|
||||||
|
return stats.Min
|
||||||
|
default:
|
||||||
|
log.Errorf("Unknown stat type %s", statType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
|
||||||
func (r *JobRepository) JobCountGrouped(
|
func (r *JobRepository) JobCountGrouped(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
groupBy *model.Aggregate,
|
||||||
|
) ([]*model.JobsStatistics, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
col := groupBy2column[*groupBy]
|
col := groupBy2column[*groupBy]
|
||||||
query := r.buildCountQuery(filter, "", col)
|
query := r.buildCountQuery(filter, "", col)
|
||||||
@@ -315,7 +339,8 @@ func (r *JobRepository) JobCountGrouped(
|
|||||||
stats = append(stats,
|
stats = append(stats,
|
||||||
&model.JobsStatistics{
|
&model.JobsStatistics{
|
||||||
ID: id.String,
|
ID: id.String,
|
||||||
TotalJobs: int(cnt.Int64)})
|
TotalJobs: int(cnt.Int64),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -328,8 +353,8 @@ func (r *JobRepository) AddJobCountGrouped(
|
|||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
groupBy *model.Aggregate,
|
groupBy *model.Aggregate,
|
||||||
stats []*model.JobsStatistics,
|
stats []*model.JobsStatistics,
|
||||||
kind string) ([]*model.JobsStatistics, error) {
|
kind string,
|
||||||
|
) ([]*model.JobsStatistics, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
col := groupBy2column[*groupBy]
|
col := groupBy2column[*groupBy]
|
||||||
query := r.buildCountQuery(filter, kind, col)
|
query := r.buildCountQuery(filter, kind, col)
|
||||||
@@ -376,8 +401,8 @@ func (r *JobRepository) AddJobCount(
|
|||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
stats []*model.JobsStatistics,
|
stats []*model.JobsStatistics,
|
||||||
kind string) ([]*model.JobsStatistics, error) {
|
kind string,
|
||||||
|
) ([]*model.JobsStatistics, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
query := r.buildCountQuery(filter, kind, "")
|
query := r.buildCountQuery(filter, kind, "")
|
||||||
query, err := SecurityCheck(ctx, query)
|
query, err := SecurityCheck(ctx, query)
|
||||||
@@ -420,15 +445,41 @@ func (r *JobRepository) AddJobCount(
|
|||||||
func (r *JobRepository) AddHistograms(
|
func (r *JobRepository) AddHistograms(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
|
stat *model.JobsStatistics,
|
||||||
|
durationBins *string,
|
||||||
|
) (*model.JobsStatistics, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
|
var targetBinCount int
|
||||||
|
var targetBinSize int
|
||||||
|
switch {
|
||||||
|
case *durationBins == "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
|
||||||
|
targetBinCount = 60
|
||||||
|
targetBinSize = 60
|
||||||
|
case *durationBins == "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
|
||||||
|
targetBinCount = 72
|
||||||
|
targetBinSize = 600
|
||||||
|
case *durationBins == "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
|
||||||
|
targetBinCount = 48
|
||||||
|
targetBinSize = 3600
|
||||||
|
case *durationBins == "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
|
||||||
|
targetBinCount = 12
|
||||||
|
targetBinSize = 21600
|
||||||
|
case *durationBins == "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
|
||||||
|
targetBinCount = 14
|
||||||
|
targetBinSize = 43200
|
||||||
|
default: // 24h
|
||||||
|
targetBinCount = 24
|
||||||
|
targetBinSize = 3600
|
||||||
|
}
|
||||||
|
|
||||||
castType := r.getCastType()
|
castType := r.getCastType()
|
||||||
var err error
|
var err error
|
||||||
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
// Return X-Values always as seconds, will be formatted into minutes and hours in frontend
|
||||||
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter)
|
value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), targetBinSize, castType)
|
||||||
|
stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while loading job statistics histogram: running jobs")
|
log.Warn("Error while loading job statistics histogram: job duration")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -459,14 +510,16 @@ func (r *JobRepository) AddMetricHistograms(
|
|||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
|
stat *model.JobsStatistics,
|
||||||
|
targetBinCount *int,
|
||||||
|
) (*model.JobsStatistics, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
// Running Jobs Only: First query jobdata from sqlite, then query data and make bins
|
// Running Jobs Only: First query jobdata from sqlite, then query data and make bins
|
||||||
for _, f := range filter {
|
for _, f := range filter {
|
||||||
if f.State != nil {
|
if f.State != nil {
|
||||||
if len(f.State) == 1 && f.State[0] == "running" {
|
if len(f.State) == 1 && f.State[0] == "running" {
|
||||||
stat.HistMetrics = r.runningJobsMetricStatisticsHistogram(ctx, metrics, filter)
|
stat.HistMetrics = r.runningJobsMetricStatisticsHistogram(ctx, metrics, filter, targetBinCount)
|
||||||
log.Debugf("Timer AddMetricHistograms %s", time.Since(start))
|
log.Debugf("Timer AddMetricHistograms %s", time.Since(start))
|
||||||
return stat, nil
|
return stat, nil
|
||||||
}
|
}
|
||||||
@@ -475,7 +528,7 @@ func (r *JobRepository) AddMetricHistograms(
|
|||||||
|
|
||||||
// All other cases: Query and make bins in sqlite directly
|
// All other cases: Query and make bins in sqlite directly
|
||||||
for _, m := range metrics {
|
for _, m := range metrics {
|
||||||
metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter)
|
metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter, targetBinCount)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("Error while loading job metric statistics histogram: %s", m)
|
log.Warnf("Error while loading job metric statistics histogram: %s", m)
|
||||||
continue
|
continue
|
||||||
@@ -491,8 +544,8 @@ func (r *JobRepository) AddMetricHistograms(
|
|||||||
func (r *JobRepository) jobsStatisticsHistogram(
|
func (r *JobRepository) jobsStatisticsHistogram(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
value string,
|
value string,
|
||||||
filters []*model.JobFilter) ([]*model.HistoPoint, error) {
|
filters []*model.JobFilter,
|
||||||
|
) ([]*model.HistoPoint, error) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
query, qerr := SecurityCheck(ctx,
|
query, qerr := SecurityCheck(ctx,
|
||||||
sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
||||||
@@ -512,6 +565,7 @@ func (r *JobRepository) jobsStatisticsHistogram(
|
|||||||
}
|
}
|
||||||
|
|
||||||
points := make([]*model.HistoPoint, 0)
|
points := make([]*model.HistoPoint, 0)
|
||||||
|
// is it possible to introduce zero values here? requires info about bincount
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
point := model.HistoPoint{}
|
point := model.HistoPoint{}
|
||||||
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
||||||
@@ -525,39 +579,79 @@ func (r *JobRepository) jobsStatisticsHistogram(
|
|||||||
return points, nil
|
return points, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) jobsDurationStatisticsHistogram(
|
||||||
|
ctx context.Context,
|
||||||
|
value string,
|
||||||
|
filters []*model.JobFilter,
|
||||||
|
binSizeSeconds int,
|
||||||
|
targetBinCount *int,
|
||||||
|
) ([]*model.HistoPoint, error) {
|
||||||
|
start := time.Now()
|
||||||
|
query, qerr := SecurityCheck(ctx,
|
||||||
|
sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
||||||
|
|
||||||
|
if qerr != nil {
|
||||||
|
return nil, qerr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup Array
|
||||||
|
points := make([]*model.HistoPoint, 0)
|
||||||
|
for i := 1; i <= *targetBinCount; i++ {
|
||||||
|
point := model.HistoPoint{Value: i * binSizeSeconds, Count: 0}
|
||||||
|
points = append(points, &point)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range filters {
|
||||||
|
query = BuildWhereClause(f, query)
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Error while running query")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill Array at matching $Value
|
||||||
|
for rows.Next() {
|
||||||
|
point := model.HistoPoint{}
|
||||||
|
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, e := range points {
|
||||||
|
if e.Value == (point.Value * binSizeSeconds) {
|
||||||
|
// Note:
|
||||||
|
// Matching on unmodified integer value (and multiplying point.Value by binSizeSeconds after match)
|
||||||
|
// causes frontend to loop into highest targetBinCount, due to zoom condition instantly being fullfilled (cause unknown)
|
||||||
|
e.Count = point.Count
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
|
||||||
|
return points, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (r *JobRepository) jobsMetricStatisticsHistogram(
|
func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
metric string,
|
metric string,
|
||||||
filters []*model.JobFilter) (*model.MetricHistoPoints, error) {
|
filters []*model.JobFilter,
|
||||||
|
bins *int,
|
||||||
var dbMetric string
|
) (*model.MetricHistoPoints, error) {
|
||||||
switch metric {
|
|
||||||
case "cpu_load":
|
|
||||||
dbMetric = "load_avg"
|
|
||||||
case "flops_any":
|
|
||||||
dbMetric = "flops_any_avg"
|
|
||||||
case "mem_bw":
|
|
||||||
dbMetric = "mem_bw_avg"
|
|
||||||
case "mem_used":
|
|
||||||
dbMetric = "mem_used_max"
|
|
||||||
case "net_bw":
|
|
||||||
dbMetric = "net_bw_avg"
|
|
||||||
case "file_bw":
|
|
||||||
dbMetric = "file_bw_avg"
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("%s not implemented", metric)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get specific Peak or largest Peak
|
// Get specific Peak or largest Peak
|
||||||
var metricConfig *schema.MetricConfig
|
var metricConfig *schema.MetricConfig
|
||||||
var peak float64 = 0.0
|
var peak float64
|
||||||
var unit string = ""
|
var unit string
|
||||||
|
var footprintStat string
|
||||||
|
|
||||||
for _, f := range filters {
|
for _, f := range filters {
|
||||||
if f.Cluster != nil {
|
if f.Cluster != nil {
|
||||||
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
||||||
peak = metricConfig.Peak
|
peak = metricConfig.Peak
|
||||||
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
|
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
|
||||||
|
footprintStat = metricConfig.Footprint
|
||||||
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
|
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -572,23 +666,29 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
|||||||
if unit == "" {
|
if unit == "" {
|
||||||
unit = m.Unit.Prefix + m.Unit.Base
|
unit = m.Unit.Prefix + m.Unit.Base
|
||||||
}
|
}
|
||||||
|
if footprintStat == "" {
|
||||||
|
footprintStat = m.Footprint
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// log.Debugf("Metric %s: DB %s, Peak %f, Unit %s", metric, dbMetric, peak, unit)
|
// log.Debugf("Metric %s, Peak %f, Unit %s, Aggregation %s", metric, peak, unit, aggreg)
|
||||||
// Make bins, see https://jereze.com/code/sql-histogram/
|
// Make bins, see https://jereze.com/code/sql-histogram/
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
jm := fmt.Sprintf(`json_extract(footprint, "$.%s")`, (metric + "_" + footprintStat))
|
||||||
|
|
||||||
crossJoinQuery := sq.Select(
|
crossJoinQuery := sq.Select(
|
||||||
fmt.Sprintf(`max(%s) as max`, dbMetric),
|
fmt.Sprintf(`max(%s) as max`, jm),
|
||||||
fmt.Sprintf(`min(%s) as min`, dbMetric),
|
fmt.Sprintf(`min(%s) as min`, jm),
|
||||||
).From("job").Where(
|
).From("job").Where(
|
||||||
fmt.Sprintf(`%s is not null`, dbMetric),
|
"JSON_VALID(footprint)",
|
||||||
).Where(
|
).Where(
|
||||||
fmt.Sprintf(`%s <= %f`, dbMetric, peak),
|
fmt.Sprintf(`%s is not null`, jm),
|
||||||
|
).Where(
|
||||||
|
fmt.Sprintf(`%s <= %f`, jm, peak),
|
||||||
)
|
)
|
||||||
|
|
||||||
crossJoinQuery, cjqerr := SecurityCheck(ctx, crossJoinQuery)
|
crossJoinQuery, cjqerr := SecurityCheck(ctx, crossJoinQuery)
|
||||||
@@ -606,17 +706,18 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
|||||||
return nil, sqlerr
|
return nil, sqlerr
|
||||||
}
|
}
|
||||||
|
|
||||||
bins := 10
|
binQuery := fmt.Sprintf(`CAST( (case when %s = value.max
|
||||||
binQuery := fmt.Sprintf(`CAST( (case when job.%s = value.max then value.max*0.999999999 else job.%s end - value.min) / (value.max - value.min) * %d as INTEGER )`, dbMetric, dbMetric, bins)
|
then value.max*0.999999999 else %s end - value.min) / (value.max -
|
||||||
|
value.min) * %v as INTEGER )`, jm, jm, *bins)
|
||||||
|
|
||||||
mainQuery := sq.Select(
|
mainQuery := sq.Select(
|
||||||
fmt.Sprintf(`%s + 1 as bin`, binQuery),
|
fmt.Sprintf(`%s + 1 as bin`, binQuery),
|
||||||
fmt.Sprintf(`count(job.%s) as count`, dbMetric),
|
fmt.Sprintf(`count(%s) as count`, jm),
|
||||||
fmt.Sprintf(`CAST(((value.max / %d) * (%s )) as INTEGER ) as min`, bins, binQuery),
|
fmt.Sprintf(`CAST(((value.max / %d) * (%v )) as INTEGER ) as min`, *bins, binQuery),
|
||||||
fmt.Sprintf(`CAST(((value.max / %d) * (%s + 1 )) as INTEGER ) as max`, bins, binQuery),
|
fmt.Sprintf(`CAST(((value.max / %d) * (%v + 1 )) as INTEGER ) as max`, *bins, binQuery),
|
||||||
).From("job").CrossJoin(
|
).From("job").CrossJoin(
|
||||||
fmt.Sprintf(`(%s) as value`, crossJoinQuerySql), crossJoinQueryArgs...,
|
fmt.Sprintf(`(%s) as value`, crossJoinQuerySql), crossJoinQueryArgs...,
|
||||||
).Where(fmt.Sprintf(`job.%s is not null and job.%s <= %f`, dbMetric, dbMetric, peak))
|
).Where(fmt.Sprintf(`%s is not null and %s <= %f`, jm, jm, peak))
|
||||||
|
|
||||||
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
|
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
|
||||||
|
|
||||||
@@ -637,18 +738,39 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Setup Array
|
||||||
points := make([]*model.MetricHistoPoint, 0)
|
points := make([]*model.MetricHistoPoint, 0)
|
||||||
for rows.Next() {
|
for i := 1; i <= *bins; i++ {
|
||||||
point := model.MetricHistoPoint{}
|
binMax := ((int(peak) / *bins) * i)
|
||||||
if err := rows.Scan(&point.Bin, &point.Count, &point.Min, &point.Max); err != nil {
|
binMin := ((int(peak) / *bins) * (i - 1))
|
||||||
log.Warnf("Error while scanning rows for %s", metric)
|
point := model.MetricHistoPoint{Bin: &i, Count: 0, Min: &binMin, Max: &binMax}
|
||||||
return nil, err // Totally bricks cc-backend if returned and if all metrics requested?
|
|
||||||
}
|
|
||||||
|
|
||||||
points = append(points, &point)
|
points = append(points, &point)
|
||||||
}
|
}
|
||||||
|
|
||||||
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Data: points}
|
for rows.Next() {
|
||||||
|
point := model.MetricHistoPoint{}
|
||||||
|
if err := rows.Scan(&point.Bin, &point.Count, &point.Min, &point.Max); err != nil {
|
||||||
|
log.Warnf("Error while scanning rows for %s", jm)
|
||||||
|
return nil, err // Totally bricks cc-backend if returned and if all metrics requested?
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, e := range points {
|
||||||
|
if e.Bin != nil && point.Bin != nil {
|
||||||
|
if *e.Bin == *point.Bin {
|
||||||
|
e.Count = point.Count
|
||||||
|
if point.Min != nil {
|
||||||
|
e.Min = point.Min
|
||||||
|
}
|
||||||
|
if point.Max != nil {
|
||||||
|
e.Max = point.Max
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Stat: &footprintStat, Data: points}
|
||||||
|
|
||||||
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
|
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
|
||||||
return &result, nil
|
return &result, nil
|
||||||
@@ -657,7 +779,9 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
|||||||
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
metrics []string,
|
metrics []string,
|
||||||
filters []*model.JobFilter) []*model.MetricHistoPoints {
|
filters []*model.JobFilter,
|
||||||
|
bins *int,
|
||||||
|
) []*model.MetricHistoPoints {
|
||||||
|
|
||||||
// Get Jobs
|
// Get Jobs
|
||||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
||||||
@@ -681,7 +805,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||||
log.Errorf("Error while loading averages for histogram: %s", err)
|
log.Errorf("Error while loading averages for histogram: %s", err)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -692,15 +816,14 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
|||||||
for idx, metric := range metrics {
|
for idx, metric := range metrics {
|
||||||
// Get specific Peak or largest Peak
|
// Get specific Peak or largest Peak
|
||||||
var metricConfig *schema.MetricConfig
|
var metricConfig *schema.MetricConfig
|
||||||
var peak float64 = 0.0
|
var peak float64
|
||||||
var unit string = ""
|
var unit string
|
||||||
|
|
||||||
for _, f := range filters {
|
for _, f := range filters {
|
||||||
if f.Cluster != nil {
|
if f.Cluster != nil {
|
||||||
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
||||||
peak = metricConfig.Peak
|
peak = metricConfig.Peak
|
||||||
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
|
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
|
||||||
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -720,28 +843,24 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Make and fill bins
|
// Make and fill bins
|
||||||
bins := 10.0
|
peakBin := int(peak) / *bins
|
||||||
peakBin := peak / bins
|
|
||||||
|
|
||||||
points := make([]*model.MetricHistoPoint, 0)
|
points := make([]*model.MetricHistoPoint, 0)
|
||||||
for b := 0; b < 10; b++ {
|
for b := 0; b < *bins; b++ {
|
||||||
count := 0
|
count := 0
|
||||||
bindex := b + 1
|
bindex := b + 1
|
||||||
bmin := math.Round(peakBin * float64(b))
|
bmin := peakBin * b
|
||||||
bmax := math.Round(peakBin * (float64(b) + 1.0))
|
bmax := peakBin * (b + 1)
|
||||||
|
|
||||||
// Iterate AVG values for indexed metric and count for bins
|
// Iterate AVG values for indexed metric and count for bins
|
||||||
for _, val := range avgs[idx] {
|
for _, val := range avgs[idx] {
|
||||||
if float64(val) >= bmin && float64(val) < bmax {
|
if int(val) >= bmin && int(val) < bmax {
|
||||||
count += 1
|
count += 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bminint := int(bmin)
|
|
||||||
bmaxint := int(bmax)
|
|
||||||
|
|
||||||
// Append Bin to Metric Result Array
|
// Append Bin to Metric Result Array
|
||||||
point := model.MetricHistoPoint{Bin: &bindex, Count: count, Min: &bminint, Max: &bmaxint}
|
point := model.MetricHistoPoint{Bin: &bindex, Count: count, Min: &bmin, Max: &bmax}
|
||||||
points = append(points, &point)
|
points = append(points, &point)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
package repository
|
package repository
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
@@ -14,7 +15,13 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Add the tag with id `tagId` to the job with the database id `jobId`.
|
// Add the tag with id `tagId` to the job with the database id `jobId`.
|
||||||
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
|
||||||
|
j, err := r.FindByIdWithUser(user, job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while finding job by id")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
|
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
|
||||||
|
|
||||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
@@ -23,49 +30,60 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
j, err := r.FindById(job)
|
tags, err := r.GetTags(user, &job)
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while finding job by id")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
tags, err := r.GetTags(&job)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while getting tags for job")
|
log.Warn("Error while getting tags for job")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return tags, archive.UpdateTags(j, tags)
|
archiveTags, err := r.getArchiveTags(&job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while getting tags for job")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return tags, archive.UpdateTags(j, archiveTags)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Removes a tag from a job
|
// Removes a tag from a job
|
||||||
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
|
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
|
||||||
|
j, err := r.FindByIdWithUser(user, job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while finding job by id")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
q := sq.Delete("jobtag").Where("jobtag.job_id = ?", job).Where("jobtag.tag_id = ?", tag)
|
q := sq.Delete("jobtag").Where("jobtag.job_id = ?", job).Where("jobtag.tag_id = ?", tag)
|
||||||
|
|
||||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
s, _, _ := q.ToSql()
|
s, _, _ := q.ToSql()
|
||||||
log.Errorf("Error adding tag with %s: %v", s, err)
|
log.Errorf("Error removing tag with %s: %v", s, err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
j, err := r.FindById(job)
|
tags, err := r.GetTags(user, &job)
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while finding job by id")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
tags, err := r.GetTags(&job)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while getting tags for job")
|
log.Warn("Error while getting tags for job")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return tags, archive.UpdateTags(j, tags)
|
archiveTags, err := r.getArchiveTags(&job)
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Error while getting tags for job")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return tags, archive.UpdateTags(j, archiveTags)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateTag creates a new tag with the specified type and name and returns its database id.
|
// CreateTag creates a new tag with the specified type and name and returns its database id.
|
||||||
func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64, err error) {
|
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||||
q := sq.Insert("tag").Columns("tag_type", "tag_name").Values(tagType, tagName)
|
// Default to "Global" scope if none defined
|
||||||
|
if tagScope == "" {
|
||||||
|
tagScope = "global"
|
||||||
|
}
|
||||||
|
|
||||||
|
q := sq.Insert("tag").Columns("tag_type", "tag_name", "tag_scope").Values(tagType, tagName, tagScope)
|
||||||
|
|
||||||
res, err := q.RunWith(r.stmtCache).Exec()
|
res, err := q.RunWith(r.stmtCache).Exec()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -78,8 +96,9 @@ func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64,
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
|
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
|
||||||
|
// Fetch all Tags in DB for Display in Frontend Tag-View
|
||||||
tags = make([]schema.Tag, 0, 100)
|
tags = make([]schema.Tag, 0, 100)
|
||||||
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name FROM tag")
|
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
@@ -89,22 +108,42 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
|||||||
if err = xrows.StructScan(&t); err != nil {
|
if err = xrows.StructScan(&t); err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
|
||||||
|
readable, err := r.checkScopeAuth(user, "read", t.Scope)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil, err
|
||||||
|
}
|
||||||
|
if readable {
|
||||||
tags = append(tags, t)
|
tags = append(tags, t)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
q := sq.Select("t.tag_name, count(jt.tag_id)").
|
// Query and Count Jobs with attached Tags
|
||||||
|
q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
|
||||||
From("tag t").
|
From("tag t").
|
||||||
LeftJoin("jobtag jt ON t.id = jt.tag_id").
|
LeftJoin("jobtag jt ON t.id = jt.tag_id").
|
||||||
GroupBy("t.tag_name")
|
GroupBy("t.tag_name")
|
||||||
|
|
||||||
|
// Handle Scope Filtering
|
||||||
|
scopeList := "\"global\""
|
||||||
|
if user != nil {
|
||||||
|
scopeList += ",\"" + user.Username + "\""
|
||||||
|
}
|
||||||
|
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||||
|
scopeList += ",\"admin\""
|
||||||
|
}
|
||||||
|
q = q.Where("t.tag_scope IN (" + scopeList + ")")
|
||||||
|
|
||||||
|
// Handle Job Ownership
|
||||||
if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
|
if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
|
||||||
log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
|
// log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
|
||||||
// Unchanged: Needs to be own case still, due to UserRole/NoRole compatibility handling in else case
|
// Unchanged: Needs to be own case still, due to UserRole/NoRole compatibility handling in else case
|
||||||
} else if user != nil && user.HasRole(schema.RoleManager) { // MANAGER: Count own jobs plus project's jobs
|
} else if user != nil && user.HasRole(schema.RoleManager) { // MANAGER: Count own jobs plus project's jobs
|
||||||
// Build ("project1", "project2", ...) list of variable length directly in SQL string
|
// Build ("project1", "project2", ...) list of variable length directly in SQL string
|
||||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
|
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
|
||||||
} else if user != nil { // USER OR NO ROLE (Compatibility): Only count own jobs
|
} else if user != nil { // USER OR NO ROLE (Compatibility): Only count own jobs
|
||||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ?)", user.Username)
|
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ?)", user.Username)
|
||||||
}
|
}
|
||||||
|
|
||||||
rows, err := q.RunWith(r.stmtCache).Query()
|
rows, err := q.RunWith(r.stmtCache).Query()
|
||||||
@@ -115,29 +154,44 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
|||||||
counts = make(map[string]int)
|
counts = make(map[string]int)
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
var tagName string
|
var tagName string
|
||||||
|
var tagId int
|
||||||
var count int
|
var count int
|
||||||
if err = rows.Scan(&tagName, &count); err != nil {
|
if err = rows.Scan(&tagName, &tagId, &count); err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
counts[tagName] = count
|
// Use tagId as second Map-Key component to differentiate tags with identical names
|
||||||
|
counts[fmt.Sprint(tagName, tagId)] = count
|
||||||
}
|
}
|
||||||
err = rows.Err()
|
err = rows.Err()
|
||||||
|
|
||||||
return
|
return tags, counts, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
|
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
|
||||||
// If such a tag does not yet exist, it is created.
|
// If such a tag does not yet exist, it is created.
|
||||||
func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName string) (tagId int64, err error) {
|
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||||
tagId, exists := r.TagId(tagType, tagName)
|
// Default to "Global" scope if none defined
|
||||||
|
if tagScope == "" {
|
||||||
|
tagScope = "global"
|
||||||
|
}
|
||||||
|
|
||||||
|
writable, err := r.checkScopeAuth(user, "write", tagScope)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
if !writable {
|
||||||
|
return 0, fmt.Errorf("cannot write tag scope with current authorization")
|
||||||
|
}
|
||||||
|
|
||||||
|
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||||
if !exists {
|
if !exists {
|
||||||
tagId, err = r.CreateTag(tagType, tagName)
|
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := r.AddTag(jobId, tagId); err != nil {
|
if _, err := r.AddTag(user, jobId, tagId); err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -145,19 +199,19 @@ func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName stri
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TagId returns the database id of the tag with the specified type and name.
|
// TagId returns the database id of the tag with the specified type and name.
|
||||||
func (r *JobRepository) TagId(tagType string, tagName string) (tagId int64, exists bool) {
|
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
|
||||||
exists = true
|
exists = true
|
||||||
if err := sq.Select("id").From("tag").
|
if err := sq.Select("id").From("tag").
|
||||||
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).
|
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
|
||||||
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
|
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
|
||||||
exists = false
|
exists = false
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetTags returns a list of all tags if job is nil or of the tags that the job with that database ID has.
|
// GetTags returns a list of all scoped tags if job is nil or of the tags that the job with that database ID has.
|
||||||
func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, error) {
|
||||||
q := sq.Select("id", "tag_type", "tag_name").From("tag")
|
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||||
if job != nil {
|
if job != nil {
|
||||||
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||||
}
|
}
|
||||||
@@ -172,7 +226,41 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
|||||||
tags := make([]*schema.Tag, 0)
|
tags := make([]*schema.Tag, 0)
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
tag := &schema.Tag{}
|
tag := &schema.Tag{}
|
||||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
|
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||||
|
log.Warn("Error while scanning rows")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
|
||||||
|
readable, err := r.checkScopeAuth(user, "read", tag.Scope)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if readable {
|
||||||
|
tags = append(tags, tag)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tags, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
|
||||||
|
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
||||||
|
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||||
|
if job != nil {
|
||||||
|
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||||
|
}
|
||||||
|
|
||||||
|
rows, err := q.RunWith(r.stmtCache).Query()
|
||||||
|
if err != nil {
|
||||||
|
s, _, _ := q.ToSql()
|
||||||
|
log.Errorf("Error get tags with %s: %v", s, err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tags := make([]*schema.Tag, 0)
|
||||||
|
for rows.Next() {
|
||||||
|
tag := &schema.Tag{}
|
||||||
|
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||||
log.Warn("Error while scanning rows")
|
log.Warn("Error while scanning rows")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -181,3 +269,59 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
|||||||
|
|
||||||
return tags, nil
|
return tags, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
|
||||||
|
// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err
|
||||||
|
|
||||||
|
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||||
|
if !exists {
|
||||||
|
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
|
||||||
|
|
||||||
|
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||||
|
s, _, _ := q.ToSql()
|
||||||
|
log.Errorf("Error adding tag on import with %s: %v", s, err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *JobRepository) checkScopeAuth(user *schema.User, operation string, scope string) (pass bool, err error) {
|
||||||
|
if user != nil {
|
||||||
|
switch {
|
||||||
|
case operation == "write" && scope == "admin":
|
||||||
|
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
case operation == "write" && scope == "global":
|
||||||
|
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
case operation == "write" && scope == user.Username:
|
||||||
|
return true, nil
|
||||||
|
case operation == "read" && scope == "admin":
|
||||||
|
return user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}), nil
|
||||||
|
case operation == "read" && scope == "global":
|
||||||
|
return true, nil
|
||||||
|
case operation == "read" && scope == user.Username:
|
||||||
|
return true, nil
|
||||||
|
default:
|
||||||
|
if operation == "read" || operation == "write" {
|
||||||
|
// No acceptable scope: deny tag
|
||||||
|
return false, nil
|
||||||
|
} else {
|
||||||
|
return false, fmt.Errorf("error while checking tag operation auth: unknown operation (%s)", operation)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return false, fmt.Errorf("error while checking tag operation auth: no user in context")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
BIN
internal/repository/testdata/job.db-shm
vendored
BIN
internal/repository/testdata/job.db-shm
vendored
Binary file not shown.
0
internal/repository/testdata/job.db-wal
vendored
0
internal/repository/testdata/job.db-wal
vendored
@@ -6,7 +6,6 @@ package repository
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -18,20 +17,12 @@ type Transaction struct {
|
|||||||
func (r *JobRepository) TransactionInit() (*Transaction, error) {
|
func (r *JobRepository) TransactionInit() (*Transaction, error) {
|
||||||
var err error
|
var err error
|
||||||
t := new(Transaction)
|
t := new(Transaction)
|
||||||
// Inserts are bundled into transactions because in sqlite,
|
|
||||||
// that speeds up inserts A LOT.
|
|
||||||
t.tx, err = r.DB.Beginx()
|
t.tx, err = r.DB.Beginx()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while bundling transactions")
|
log.Warn("Error while bundling transactions")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
t.stmt, err = t.tx.PrepareNamed(NamedJobInsert)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while preparing namedJobInsert")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return t, nil
|
return t, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -50,7 +41,6 @@ func (r *JobRepository) TransactionCommit(t *Transaction) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
t.stmt = t.tx.NamedStmt(t.stmt)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,14 +49,17 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error {
|
|||||||
log.Warn("Error while committing SQL transactions")
|
log.Warn("Error while committing SQL transactions")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, error) {
|
func (r *JobRepository) TransactionAddNamed(
|
||||||
res, err := t.stmt.Exec(job)
|
t *Transaction,
|
||||||
|
query string,
|
||||||
|
args ...interface{},
|
||||||
|
) (int64, error) {
|
||||||
|
res, err := t.tx.NamedExec(query, args)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
log.Errorf("Named Exec failed: %v", err)
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -79,26 +72,19 @@ func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, e
|
|||||||
return id, nil
|
return id, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *JobRepository) TransactionAddTag(t *Transaction, tag *schema.Tag) (int64, error) {
|
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
|
||||||
res, err := t.tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
|
|
||||||
|
res, err := t.tx.Exec(query, args...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
|
log.Errorf("TransactionAdd(), Exec() Error: %v", err)
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
tagId, err := res.LastInsertId()
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while getting last insert ID")
|
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return tagId, nil
|
id, err := res.LastInsertId()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("TransactionAdd(), LastInsertId() Error: %v", err)
|
||||||
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *JobRepository) TransactionSetTag(t *Transaction, jobId int64, tagId int64) error {
|
return id, nil
|
||||||
if _, err := t.tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, jobId, tagId); err != nil {
|
|
||||||
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", jobId, tagId)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,8 +46,8 @@ func GetUserRepository() *UserRepository {
|
|||||||
func (r *UserRepository) GetUser(username string) (*schema.User, error) {
|
func (r *UserRepository) GetUser(username string) (*schema.User, error) {
|
||||||
user := &schema.User{Username: username}
|
user := &schema.User{Username: username}
|
||||||
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
|
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
|
||||||
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
|
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("hpc_user").
|
||||||
Where("user.username = ?", username).RunWith(r.DB).
|
Where("hpc_user.username = ?", username).RunWith(r.DB).
|
||||||
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
|
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
|
||||||
log.Warnf("Error while querying user '%v' from database", username)
|
log.Warnf("Error while querying user '%v' from database", username)
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -72,9 +72,8 @@ func (r *UserRepository) GetUser(username string) (*schema.User, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *UserRepository) GetLdapUsernames() ([]string, error) {
|
func (r *UserRepository) GetLdapUsernames() ([]string, error) {
|
||||||
|
|
||||||
var users []string
|
var users []string
|
||||||
rows, err := r.DB.Query(`SELECT username FROM user WHERE user.ldap = 1`)
|
rows, err := r.DB.Query(`SELECT username FROM hpc_user WHERE hpc_user.ldap = 1`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while querying usernames")
|
log.Warn("Error while querying usernames")
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -122,7 +121,7 @@ func (r *UserRepository) AddUser(user *schema.User) error {
|
|||||||
vals = append(vals, int(user.AuthSource))
|
vals = append(vals, int(user.AuthSource))
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(r.DB).Exec(); err != nil {
|
if _, err := sq.Insert("hpc_user").Columns(cols...).Values(vals...).RunWith(r.DB).Exec(); err != nil {
|
||||||
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
|
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -131,9 +130,29 @@ func (r *UserRepository) AddUser(user *schema.User) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *UserRepository) DelUser(username string) error {
|
func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) error {
|
||||||
|
// user contains updated info, apply to dbuser
|
||||||
|
// TODO: Discuss updatable fields
|
||||||
|
if dbUser.Name != user.Name {
|
||||||
|
if _, err := sq.Update("hpc_user").Set("name", user.Name).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||||
|
log.Errorf("error while updating name of user '%s'", user.Username)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_, err := r.DB.Exec(`DELETE FROM user WHERE user.username = ?`, username)
|
// Toggled until greenlit
|
||||||
|
// if dbUser.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
|
||||||
|
// projects, _ := json.Marshal(user.Projects)
|
||||||
|
// if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||||
|
// return err
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *UserRepository) DelUser(username string) error {
|
||||||
|
_, err := r.DB.Exec(`DELETE FROM hpc_user WHERE hpc_user.username = ?`, username)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error while deleting user '%s' from DB", username)
|
log.Errorf("Error while deleting user '%s' from DB", username)
|
||||||
return err
|
return err
|
||||||
@@ -143,8 +162,7 @@ func (r *UserRepository) DelUser(username string) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
||||||
|
q := sq.Select("username", "name", "email", "roles", "projects").From("hpc_user")
|
||||||
q := sq.Select("username", "name", "email", "roles", "projects").From("user")
|
|
||||||
if specialsOnly {
|
if specialsOnly {
|
||||||
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
|
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
|
||||||
}
|
}
|
||||||
@@ -186,8 +204,8 @@ func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
|||||||
func (r *UserRepository) AddRole(
|
func (r *UserRepository) AddRole(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
username string,
|
username string,
|
||||||
queryrole string) error {
|
queryrole string,
|
||||||
|
) error {
|
||||||
newRole := strings.ToLower(queryrole)
|
newRole := strings.ToLower(queryrole)
|
||||||
user, err := r.GetUser(username)
|
user, err := r.GetUser(username)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -198,15 +216,15 @@ func (r *UserRepository) AddRole(
|
|||||||
exists, valid := user.HasValidRole(newRole)
|
exists, valid := user.HasValidRole(newRole)
|
||||||
|
|
||||||
if !valid {
|
if !valid {
|
||||||
return fmt.Errorf("Supplied role is no valid option : %v", newRole)
|
return fmt.Errorf("supplied role is no valid option : %v", newRole)
|
||||||
}
|
}
|
||||||
if exists {
|
if exists {
|
||||||
return fmt.Errorf("User %v already has role %v", username, newRole)
|
return fmt.Errorf("user %v already has role %v", username, newRole)
|
||||||
}
|
}
|
||||||
|
|
||||||
roles, _ := json.Marshal(append(user.Roles, newRole))
|
roles, _ := json.Marshal(append(user.Roles, newRole))
|
||||||
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
if _, err := sq.Update("hpc_user").Set("roles", roles).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||||
log.Errorf("Error while adding new role for user '%s'", user.Username)
|
log.Errorf("error while adding new role for user '%s'", user.Username)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@@ -223,14 +241,14 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
|||||||
exists, valid := user.HasValidRole(oldRole)
|
exists, valid := user.HasValidRole(oldRole)
|
||||||
|
|
||||||
if !valid {
|
if !valid {
|
||||||
return fmt.Errorf("Supplied role is no valid option : %v", oldRole)
|
return fmt.Errorf("supplied role is no valid option : %v", oldRole)
|
||||||
}
|
}
|
||||||
if !exists {
|
if !exists {
|
||||||
return fmt.Errorf("Role already deleted for user '%v': %v", username, oldRole)
|
return fmt.Errorf("role already deleted for user '%v': %v", username, oldRole)
|
||||||
}
|
}
|
||||||
|
|
||||||
if oldRole == schema.GetRoleString(schema.RoleManager) && len(user.Projects) != 0 {
|
if oldRole == schema.GetRoleString(schema.RoleManager) && len(user.Projects) != 0 {
|
||||||
return fmt.Errorf("Cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
|
return fmt.Errorf("cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
|
||||||
}
|
}
|
||||||
|
|
||||||
var newroles []string
|
var newroles []string
|
||||||
@@ -240,8 +258,8 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var mroles, _ = json.Marshal(newroles)
|
mroles, _ := json.Marshal(newroles)
|
||||||
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
if _, err := sq.Update("hpc_user").Set("roles", mroles).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||||
log.Errorf("Error while removing role for user '%s'", user.Username)
|
log.Errorf("Error while removing role for user '%s'", user.Username)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -251,15 +269,15 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
|||||||
func (r *UserRepository) AddProject(
|
func (r *UserRepository) AddProject(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
username string,
|
username string,
|
||||||
project string) error {
|
project string,
|
||||||
|
) error {
|
||||||
user, err := r.GetUser(username)
|
user, err := r.GetUser(username)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if !user.HasRole(schema.RoleManager) {
|
if !user.HasRole(schema.RoleManager) {
|
||||||
return fmt.Errorf("user '%s' is not a manager!", username)
|
return fmt.Errorf("user '%s' is not a manager", username)
|
||||||
}
|
}
|
||||||
|
|
||||||
if user.HasProject(project) {
|
if user.HasProject(project) {
|
||||||
@@ -267,7 +285,7 @@ func (r *UserRepository) AddProject(
|
|||||||
}
|
}
|
||||||
|
|
||||||
projects, _ := json.Marshal(append(user.Projects, project))
|
projects, _ := json.Marshal(append(user.Projects, project))
|
||||||
if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -281,11 +299,11 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !user.HasRole(schema.RoleManager) {
|
if !user.HasRole(schema.RoleManager) {
|
||||||
return fmt.Errorf("user '%#v' is not a manager!", username)
|
return fmt.Errorf("user '%#v' is not a manager", username)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !user.HasProject(project) {
|
if !user.HasProject(project) {
|
||||||
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match!", username, project)
|
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match", username, project)
|
||||||
}
|
}
|
||||||
|
|
||||||
var exists bool
|
var exists bool
|
||||||
@@ -298,14 +316,14 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if exists == true {
|
if exists {
|
||||||
var result interface{}
|
var result interface{}
|
||||||
if len(newprojects) == 0 {
|
if len(newprojects) == 0 {
|
||||||
result = "[]"
|
result = "[]"
|
||||||
} else {
|
} else {
|
||||||
result, _ = json.Marshal(newprojects)
|
result, _ = json.Marshal(newprojects)
|
||||||
}
|
}
|
||||||
if _, err := sq.Update("user").Set("projects", result).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
if _, err := sq.Update("hpc_user").Set("projects", result).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@@ -321,9 +339,10 @@ const ContextUserKey ContextKey = "user"
|
|||||||
func GetUserFromContext(ctx context.Context) *schema.User {
|
func GetUserFromContext(ctx context.Context) *schema.User {
|
||||||
x := ctx.Value(ContextUserKey)
|
x := ctx.Value(ContextUserKey)
|
||||||
if x == nil {
|
if x == nil {
|
||||||
|
log.Warnf("no user retrieved from context")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
// log.Infof("user retrieved from context: %v", x.(*schema.User))
|
||||||
return x.(*schema.User)
|
return x.(*schema.User)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -336,7 +355,7 @@ func (r *UserRepository) FetchUserInCtx(ctx context.Context, username string) (*
|
|||||||
|
|
||||||
user := &model.User{Username: username}
|
user := &model.User{Username: username}
|
||||||
var name, email sql.NullString
|
var name, email sql.NullString
|
||||||
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
|
if err := sq.Select("name", "email").From("hpc_user").Where("hpc_user.username = ?", username).
|
||||||
RunWith(r.DB).QueryRow().Scan(&name, &email); err != nil {
|
RunWith(r.DB).QueryRow().Scan(&name, &email); err != nil {
|
||||||
if err == sql.ErrNoRows {
|
if err == sql.ErrNoRows {
|
||||||
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
|
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
@@ -34,32 +35,38 @@ type Route struct {
|
|||||||
|
|
||||||
var routes []Route = []Route{
|
var routes []Route = []Route{
|
||||||
{"/", "home.tmpl", "ClusterCockpit", false, setupHomeRoute},
|
{"/", "home.tmpl", "ClusterCockpit", false, setupHomeRoute},
|
||||||
{"/config", "config.tmpl", "Settings", false, func(i InfoType, r *http.Request) InfoType { return i }},
|
{"/config", "config.tmpl", "Settings", false, setupConfigRoute},
|
||||||
{"/monitoring/jobs/", "monitoring/jobs.tmpl", "Jobs - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { return i }},
|
{"/monitoring/jobs/", "monitoring/jobs.tmpl", "Jobs - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { return i }},
|
||||||
{"/monitoring/job/{id:[0-9]+}", "monitoring/job.tmpl", "Job <ID> - ClusterCockpit", false, setupJobRoute},
|
{"/monitoring/job/{id:[0-9]+}", "monitoring/job.tmpl", "Job <ID> - ClusterCockpit", false, setupJobRoute},
|
||||||
{"/monitoring/users/", "monitoring/list.tmpl", "Users - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { i["listType"] = "USER"; return i }},
|
{"/monitoring/users/", "monitoring/list.tmpl", "Users - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { i["listType"] = "USER"; return i }},
|
||||||
{"/monitoring/projects/", "monitoring/list.tmpl", "Projects - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { i["listType"] = "PROJECT"; return i }},
|
{"/monitoring/projects/", "monitoring/list.tmpl", "Projects - ClusterCockpit", true, func(i InfoType, r *http.Request) InfoType { i["listType"] = "PROJECT"; return i }},
|
||||||
{"/monitoring/tags/", "monitoring/taglist.tmpl", "Tags - ClusterCockpit", false, setupTaglistRoute},
|
{"/monitoring/tags/", "monitoring/taglist.tmpl", "Tags - ClusterCockpit", false, setupTaglistRoute},
|
||||||
{"/monitoring/user/{id}", "monitoring/user.tmpl", "User <ID> - ClusterCockpit", true, setupUserRoute},
|
{"/monitoring/user/{id}", "monitoring/user.tmpl", "User <ID> - ClusterCockpit", true, setupUserRoute},
|
||||||
{"/monitoring/systems/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> - ClusterCockpit", false, setupClusterRoute},
|
{"/monitoring/systems/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> Node Overview - ClusterCockpit", false, setupClusterOverviewRoute},
|
||||||
|
{"/monitoring/systems/list/{cluster}", "monitoring/systems.tmpl", "Cluster <ID> Node List - ClusterCockpit", false, setupClusterListRoute},
|
||||||
|
{"/monitoring/systems/list/{cluster}/{subcluster}", "monitoring/systems.tmpl", "Cluster <ID> <SID> Node List - ClusterCockpit", false, setupClusterListRoute},
|
||||||
{"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute},
|
{"/monitoring/node/{cluster}/{hostname}", "monitoring/node.tmpl", "Node <ID> - ClusterCockpit", false, setupNodeRoute},
|
||||||
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analysis - ClusterCockpit", true, setupAnalysisRoute},
|
{"/monitoring/analysis/{cluster}", "monitoring/analysis.tmpl", "Analysis - ClusterCockpit", true, setupAnalysisRoute},
|
||||||
{"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterRoute},
|
{"/monitoring/status/{cluster}", "monitoring/status.tmpl", "Status of <ID> - ClusterCockpit", false, setupClusterStatusRoute},
|
||||||
}
|
}
|
||||||
|
|
||||||
func setupHomeRoute(i InfoType, r *http.Request) InfoType {
|
func setupHomeRoute(i InfoType, r *http.Request) InfoType {
|
||||||
jobRepo := repository.GetJobRepository()
|
jobRepo := repository.GetJobRepository()
|
||||||
groupBy := model.AggregateCluster
|
groupBy := model.AggregateCluster
|
||||||
|
|
||||||
|
// startJobCount := time.Now()
|
||||||
stats, err := jobRepo.JobCountGrouped(r.Context(), nil, &groupBy)
|
stats, err := jobRepo.JobCountGrouped(r.Context(), nil, &groupBy)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("failed to count jobs: %s", err.Error())
|
log.Warnf("failed to count jobs: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
// log.Infof("Timer HOME ROUTE startJobCount: %s", time.Since(startJobCount))
|
||||||
|
|
||||||
|
// startRunningJobCount := time.Now()
|
||||||
stats, err = jobRepo.AddJobCountGrouped(r.Context(), nil, &groupBy, stats, "running")
|
stats, err = jobRepo.AddJobCountGrouped(r.Context(), nil, &groupBy, stats, "running")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("failed to count running jobs: %s", err.Error())
|
log.Warnf("failed to count running jobs: %s", err.Error())
|
||||||
}
|
}
|
||||||
|
// log.Infof("Timer HOME ROUTE startRunningJobCount: %s", time.Since(startRunningJobCount))
|
||||||
|
|
||||||
i["clusters"] = stats
|
i["clusters"] = stats
|
||||||
|
|
||||||
@@ -75,8 +82,22 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
|
|||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func setupConfigRoute(i InfoType, r *http.Request) InfoType {
|
||||||
|
if util.CheckFileExists("./var/notice.txt") {
|
||||||
|
msg, err := os.ReadFile("./var/notice.txt")
|
||||||
|
if err == nil {
|
||||||
|
i["ncontent"] = string(msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
func setupJobRoute(i InfoType, r *http.Request) InfoType {
|
func setupJobRoute(i InfoType, r *http.Request) InfoType {
|
||||||
i["id"] = mux.Vars(r)["id"]
|
i["id"] = mux.Vars(r)["id"]
|
||||||
|
if config.Keys.EmissionConstant != 0 {
|
||||||
|
i["emission"] = config.Keys.EmissionConstant
|
||||||
|
}
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -92,7 +113,7 @@ func setupUserRoute(i InfoType, r *http.Request) InfoType {
|
|||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
func setupClusterRoute(i InfoType, r *http.Request) InfoType {
|
func setupClusterStatusRoute(i InfoType, r *http.Request) InfoType {
|
||||||
vars := mux.Vars(r)
|
vars := mux.Vars(r)
|
||||||
i["id"] = vars["cluster"]
|
i["id"] = vars["cluster"]
|
||||||
i["cluster"] = vars["cluster"]
|
i["cluster"] = vars["cluster"]
|
||||||
@@ -104,6 +125,36 @@ func setupClusterRoute(i InfoType, r *http.Request) InfoType {
|
|||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func setupClusterOverviewRoute(i InfoType, r *http.Request) InfoType {
|
||||||
|
vars := mux.Vars(r)
|
||||||
|
i["id"] = vars["cluster"]
|
||||||
|
i["cluster"] = vars["cluster"]
|
||||||
|
i["displayType"] = "OVERVIEW"
|
||||||
|
|
||||||
|
from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to")
|
||||||
|
if from != "" || to != "" {
|
||||||
|
i["from"] = from
|
||||||
|
i["to"] = to
|
||||||
|
}
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
|
func setupClusterListRoute(i InfoType, r *http.Request) InfoType {
|
||||||
|
vars := mux.Vars(r)
|
||||||
|
i["id"] = vars["cluster"]
|
||||||
|
i["cluster"] = vars["cluster"]
|
||||||
|
i["sid"] = vars["subcluster"]
|
||||||
|
i["subCluster"] = vars["subcluster"]
|
||||||
|
i["displayType"] = "LIST"
|
||||||
|
|
||||||
|
from, to := r.URL.Query().Get("from"), r.URL.Query().Get("to")
|
||||||
|
if from != "" || to != "" {
|
||||||
|
i["from"] = from
|
||||||
|
i["to"] = to
|
||||||
|
}
|
||||||
|
return i
|
||||||
|
}
|
||||||
|
|
||||||
func setupNodeRoute(i InfoType, r *http.Request) InfoType {
|
func setupNodeRoute(i InfoType, r *http.Request) InfoType {
|
||||||
vars := mux.Vars(r)
|
vars := mux.Vars(r)
|
||||||
i["cluster"] = vars["cluster"]
|
i["cluster"] = vars["cluster"]
|
||||||
@@ -124,28 +175,46 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType {
|
|||||||
|
|
||||||
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
|
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
|
||||||
jobRepo := repository.GetJobRepository()
|
jobRepo := repository.GetJobRepository()
|
||||||
user := repository.GetUserFromContext(r.Context())
|
tags, counts, err := jobRepo.CountTags(repository.GetUserFromContext(r.Context()))
|
||||||
|
|
||||||
tags, counts, err := jobRepo.CountTags(user)
|
|
||||||
tagMap := make(map[string][]map[string]interface{})
|
tagMap := make(map[string][]map[string]interface{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("GetTags failed: %s", err.Error())
|
log.Warnf("GetTags failed: %s", err.Error())
|
||||||
i["tagmap"] = tagMap
|
i["tagmap"] = tagMap
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
// Reduces displayed tags for unauth'd users
|
||||||
|
userAuthlevel := repository.GetUserFromContext(r.Context()).GetAuthLevel()
|
||||||
|
// Uses tag.ID as second Map-Key component to differentiate tags with identical names
|
||||||
|
if userAuthlevel >= 4 { // Support+ : Show tags for all scopes, regardless of count
|
||||||
for _, tag := range tags {
|
for _, tag := range tags {
|
||||||
tagItem := map[string]interface{}{
|
tagItem := map[string]interface{}{
|
||||||
"id": tag.ID,
|
"id": tag.ID,
|
||||||
"name": tag.Name,
|
"name": tag.Name,
|
||||||
"count": counts[tag.Name],
|
"scope": tag.Scope,
|
||||||
|
"count": counts[fmt.Sprint(tag.Name, tag.ID)],
|
||||||
}
|
}
|
||||||
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
|
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
|
||||||
}
|
}
|
||||||
|
} else if userAuthlevel < 4 && userAuthlevel >= 2 { // User+ : Show global and admin scope only if at least 1 tag used, private scope regardless of count
|
||||||
|
for _, tag := range tags {
|
||||||
|
tagCount := counts[fmt.Sprint(tag.Name, tag.ID)]
|
||||||
|
if ((tag.Scope == "global" || tag.Scope == "admin") && tagCount >= 1) || (tag.Scope != "global" && tag.Scope != "admin") {
|
||||||
|
tagItem := map[string]interface{}{
|
||||||
|
"id": tag.ID,
|
||||||
|
"name": tag.Name,
|
||||||
|
"scope": tag.Scope,
|
||||||
|
"count": tagCount,
|
||||||
|
}
|
||||||
|
tagMap[tag.Type] = append(tagMap[tag.Type], tagItem)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // auth < 2 return nothing for this route
|
||||||
|
|
||||||
i["tagmap"] = tagMap
|
i["tagmap"] = tagMap
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Lots of redundant code. Needs refactoring
|
||||||
func buildFilterPresets(query url.Values) map[string]interface{} {
|
func buildFilterPresets(query url.Values) map[string]interface{} {
|
||||||
filterPresets := map[string]interface{}{}
|
filterPresets := map[string]interface{}{}
|
||||||
|
|
||||||
@@ -208,6 +277,16 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if query.Get("numHWThreads") != "" {
|
||||||
|
parts := strings.Split(query.Get("numHWThreads"), "-")
|
||||||
|
if len(parts) == 2 {
|
||||||
|
a, e1 := strconv.Atoi(parts[0])
|
||||||
|
b, e2 := strconv.Atoi(parts[1])
|
||||||
|
if e1 == nil && e2 == nil {
|
||||||
|
filterPresets["numHWThreads"] = map[string]int{"from": a, "to": b}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
if query.Get("numAccelerators") != "" {
|
if query.Get("numAccelerators") != "" {
|
||||||
parts := strings.Split(query.Get("numAccelerators"), "-")
|
parts := strings.Split(query.Get("numAccelerators"), "-")
|
||||||
if len(parts) == 2 {
|
if len(parts) == 2 {
|
||||||
@@ -234,7 +313,7 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
|||||||
}
|
}
|
||||||
if query.Get("startTime") != "" {
|
if query.Get("startTime") != "" {
|
||||||
parts := strings.Split(query.Get("startTime"), "-")
|
parts := strings.Split(query.Get("startTime"), "-")
|
||||||
if len(parts) == 2 {
|
if len(parts) == 2 { // Time in seconds, from - to
|
||||||
a, e1 := strconv.ParseInt(parts[0], 10, 64)
|
a, e1 := strconv.ParseInt(parts[0], 10, 64)
|
||||||
b, e2 := strconv.ParseInt(parts[1], 10, 64)
|
b, e2 := strconv.ParseInt(parts[1], 10, 64)
|
||||||
if e1 == nil && e2 == nil {
|
if e1 == nil && e2 == nil {
|
||||||
@@ -243,9 +322,41 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
|
|||||||
"to": time.Unix(b, 0).Format(time.RFC3339),
|
"to": time.Unix(b, 0).Format(time.RFC3339),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else { // named range
|
||||||
|
filterPresets["startTime"] = map[string]string{
|
||||||
|
"range": query.Get("startTime"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
if query.Get("energy") != "" {
|
||||||
|
parts := strings.Split(query.Get("energy"), "-")
|
||||||
|
if len(parts) == 2 {
|
||||||
|
a, e1 := strconv.Atoi(parts[0])
|
||||||
|
b, e2 := strconv.Atoi(parts[1])
|
||||||
|
if e1 == nil && e2 == nil {
|
||||||
|
filterPresets["energy"] = map[string]int{"from": a, "to": b}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(query["stat"]) != 0 {
|
||||||
|
statList := make([]map[string]interface{}, 0)
|
||||||
|
for _, statEntry := range query["stat"] {
|
||||||
|
parts := strings.Split(statEntry, "-")
|
||||||
|
if len(parts) == 3 { // Metric Footprint Stat Field, from - to
|
||||||
|
a, e1 := strconv.ParseInt(parts[1], 10, 64)
|
||||||
|
b, e2 := strconv.ParseInt(parts[2], 10, 64)
|
||||||
|
if e1 == nil && e2 == nil {
|
||||||
|
statEntry := map[string]interface{}{
|
||||||
|
"field": parts[0],
|
||||||
|
"from": a,
|
||||||
|
"to": b,
|
||||||
|
}
|
||||||
|
statList = append(statList, statEntry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filterPresets["stats"] = statList
|
||||||
|
}
|
||||||
return filterPresets
|
return filterPresets
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -264,10 +375,14 @@ func SetupRoutes(router *mux.Router, buildInfo web.Build) {
|
|||||||
infos := route.Setup(map[string]interface{}{}, r)
|
infos := route.Setup(map[string]interface{}{}, r)
|
||||||
if id, ok := infos["id"]; ok {
|
if id, ok := infos["id"]; ok {
|
||||||
title = strings.Replace(route.Title, "<ID>", id.(string), 1)
|
title = strings.Replace(route.Title, "<ID>", id.(string), 1)
|
||||||
|
if sid, ok := infos["sid"]; ok { // 2nd ID element
|
||||||
|
title = strings.Replace(title, "<SID>", sid.(string), 1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get User -> What if NIL?
|
// Get User -> What if NIL?
|
||||||
user := repository.GetUserFromContext(r.Context())
|
user := repository.GetUserFromContext(r.Context())
|
||||||
|
|
||||||
// Get Roles
|
// Get Roles
|
||||||
availableRoles, _ := schema.GetValidRolesMap(user)
|
availableRoles, _ := schema.GetValidRolesMap(user)
|
||||||
|
|
||||||
@@ -277,6 +392,7 @@ func SetupRoutes(router *mux.Router, buildInfo web.Build) {
|
|||||||
Roles: availableRoles,
|
Roles: availableRoles,
|
||||||
Build: buildInfo,
|
Build: buildInfo,
|
||||||
Config: conf,
|
Config: conf,
|
||||||
|
Resampling: config.Keys.EnableResampling,
|
||||||
Infos: infos,
|
Infos: infos,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
41
internal/taskManager/compressionService.go
Normal file
41
internal/taskManager/compressionService.go
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package taskManager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/go-co-op/gocron/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterCompressionService(compressOlderThan int) {
|
||||||
|
log.Info("Register compression service")
|
||||||
|
|
||||||
|
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(05, 0, 0))),
|
||||||
|
gocron.NewTask(
|
||||||
|
func() {
|
||||||
|
var jobs []*schema.Job
|
||||||
|
var err error
|
||||||
|
|
||||||
|
ar := archive.GetHandle()
|
||||||
|
startTime := time.Now().Unix() - int64(compressOlderThan*24*3600)
|
||||||
|
lastTime := ar.CompressLast(startTime)
|
||||||
|
if startTime == lastTime {
|
||||||
|
log.Info("Compression Service - Complete archive run")
|
||||||
|
jobs, err = jobRepo.FindJobsBetween(0, startTime)
|
||||||
|
|
||||||
|
} else {
|
||||||
|
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while looking for compression jobs: %v", err)
|
||||||
|
}
|
||||||
|
ar.Compress(jobs)
|
||||||
|
}))
|
||||||
|
}
|
||||||
36
internal/taskManager/ldapSyncService.go
Normal file
36
internal/taskManager/ldapSyncService.go
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package taskManager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/go-co-op/gocron/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterLdapSyncService(ds string) {
|
||||||
|
interval, err := parseDuration(ds)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Could not parse duration for sync interval: %v",
|
||||||
|
ds)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
auth := auth.GetAuthInstance()
|
||||||
|
|
||||||
|
log.Info("Register LDAP sync service")
|
||||||
|
s.NewJob(gocron.DurationJob(interval),
|
||||||
|
gocron.NewTask(
|
||||||
|
func() {
|
||||||
|
t := time.Now()
|
||||||
|
log.Printf("ldap sync started at %s", t.Format(time.RFC3339))
|
||||||
|
if err := auth.LdapAuth.Sync(); err != nil {
|
||||||
|
log.Errorf("ldap sync failed: %s", err.Error())
|
||||||
|
}
|
||||||
|
log.Print("ldap sync done")
|
||||||
|
}))
|
||||||
|
}
|
||||||
67
internal/taskManager/retentionService.go
Normal file
67
internal/taskManager/retentionService.go
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package taskManager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/go-co-op/gocron/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterRetentionDeleteService(age int, includeDB bool) {
|
||||||
|
log.Info("Register retention delete service")
|
||||||
|
|
||||||
|
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(04, 0, 0))),
|
||||||
|
gocron.NewTask(
|
||||||
|
func() {
|
||||||
|
startTime := time.Now().Unix() - int64(age*24*3600)
|
||||||
|
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||||
|
}
|
||||||
|
archive.GetHandle().CleanUp(jobs)
|
||||||
|
|
||||||
|
if includeDB {
|
||||||
|
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
|
||||||
|
} else {
|
||||||
|
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||||
|
}
|
||||||
|
if err = jobRepo.Optimize(); err != nil {
|
||||||
|
log.Errorf("Error occured in db optimization: %s", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
||||||
|
func RegisterRetentionMoveService(age int, includeDB bool, location string) {
|
||||||
|
log.Info("Register retention move service")
|
||||||
|
|
||||||
|
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(04, 0, 0))),
|
||||||
|
gocron.NewTask(
|
||||||
|
func() {
|
||||||
|
startTime := time.Now().Unix() - int64(age*24*3600)
|
||||||
|
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||||
|
}
|
||||||
|
archive.GetHandle().Move(jobs, location)
|
||||||
|
|
||||||
|
if includeDB {
|
||||||
|
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("Error while deleting retention jobs from db: %v", err)
|
||||||
|
} else {
|
||||||
|
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||||
|
}
|
||||||
|
if err = jobRepo.Optimize(); err != nil {
|
||||||
|
log.Errorf("Error occured in db optimization: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}))
|
||||||
|
}
|
||||||
27
internal/taskManager/stopJobsExceedTime.go
Normal file
27
internal/taskManager/stopJobsExceedTime.go
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package taskManager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"runtime"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/go-co-op/gocron/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterStopJobsExceedTime() {
|
||||||
|
log.Info("Register undead jobs service")
|
||||||
|
|
||||||
|
s.NewJob(gocron.DailyJob(1, gocron.NewAtTimes(gocron.NewAtTime(03, 0, 0))),
|
||||||
|
gocron.NewTask(
|
||||||
|
func() {
|
||||||
|
err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
|
||||||
|
}
|
||||||
|
runtime.GC()
|
||||||
|
}))
|
||||||
|
}
|
||||||
90
internal/taskManager/taskManager.go
Normal file
90
internal/taskManager/taskManager.go
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package taskManager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
"github.com/go-co-op/gocron/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
s gocron.Scheduler
|
||||||
|
jobRepo *repository.JobRepository
|
||||||
|
)
|
||||||
|
|
||||||
|
func parseDuration(s string) (time.Duration, error) {
|
||||||
|
interval, err := time.ParseDuration(s)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf("Could not parse duration for sync interval: %v",
|
||||||
|
s)
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if interval == 0 {
|
||||||
|
log.Info("TaskManager: Sync interval is zero")
|
||||||
|
}
|
||||||
|
|
||||||
|
return interval, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func Start() {
|
||||||
|
var err error
|
||||||
|
jobRepo = repository.GetJobRepository()
|
||||||
|
s, err = gocron.NewScheduler()
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error while creating gocron scheduler: %s", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Keys.StopJobsExceedingWalltime > 0 {
|
||||||
|
RegisterStopJobsExceedTime()
|
||||||
|
}
|
||||||
|
|
||||||
|
var cfg struct {
|
||||||
|
Retention schema.Retention `json:"retention"`
|
||||||
|
Compression int `json:"compression"`
|
||||||
|
}
|
||||||
|
cfg.Retention.IncludeDB = true
|
||||||
|
|
||||||
|
if err := json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
|
||||||
|
log.Warn("Error while unmarshaling raw config json")
|
||||||
|
}
|
||||||
|
|
||||||
|
switch cfg.Retention.Policy {
|
||||||
|
case "delete":
|
||||||
|
RegisterRetentionDeleteService(
|
||||||
|
cfg.Retention.Age,
|
||||||
|
cfg.Retention.IncludeDB)
|
||||||
|
case "move":
|
||||||
|
RegisterRetentionMoveService(
|
||||||
|
cfg.Retention.Age,
|
||||||
|
cfg.Retention.IncludeDB,
|
||||||
|
cfg.Retention.Location)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.Compression > 0 {
|
||||||
|
RegisterCompressionService(cfg.Compression)
|
||||||
|
}
|
||||||
|
|
||||||
|
lc := config.Keys.LdapConfig
|
||||||
|
|
||||||
|
if lc != nil && lc.SyncInterval != "" {
|
||||||
|
RegisterLdapSyncService(lc.SyncInterval)
|
||||||
|
}
|
||||||
|
|
||||||
|
RegisterFootprintWorker()
|
||||||
|
RegisterUpdateDurationWorker()
|
||||||
|
|
||||||
|
s.Start()
|
||||||
|
}
|
||||||
|
|
||||||
|
func Shutdown() {
|
||||||
|
s.Shutdown()
|
||||||
|
}
|
||||||
33
internal/taskManager/updateDurationService.go
Normal file
33
internal/taskManager/updateDurationService.go
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package taskManager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/go-co-op/gocron/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterUpdateDurationWorker() {
|
||||||
|
var frequency string
|
||||||
|
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.DurationWorker != "" {
|
||||||
|
frequency = config.Keys.CronFrequency.DurationWorker
|
||||||
|
} else {
|
||||||
|
frequency = "5m"
|
||||||
|
}
|
||||||
|
d, _ := time.ParseDuration(frequency)
|
||||||
|
log.Infof("Register Duration Update service with %s interval", frequency)
|
||||||
|
|
||||||
|
s.NewJob(gocron.DurationJob(d),
|
||||||
|
gocron.NewTask(
|
||||||
|
func() {
|
||||||
|
start := time.Now()
|
||||||
|
log.Printf("Update duration started at %s", start.Format(time.RFC3339))
|
||||||
|
jobRepo.UpdateDuration()
|
||||||
|
log.Printf("Update duration is done and took %s", time.Since(start))
|
||||||
|
}))
|
||||||
|
}
|
||||||
146
internal/taskManager/updateFootprintService.go
Normal file
146
internal/taskManager/updateFootprintService.go
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package taskManager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"math"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
"github.com/go-co-op/gocron/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RegisterFootprintWorker() {
|
||||||
|
var frequency string
|
||||||
|
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.FootprintWorker != "" {
|
||||||
|
frequency = config.Keys.CronFrequency.FootprintWorker
|
||||||
|
} else {
|
||||||
|
frequency = "10m"
|
||||||
|
}
|
||||||
|
d, _ := time.ParseDuration(frequency)
|
||||||
|
log.Infof("Register Footprint Update service with %s interval", frequency)
|
||||||
|
|
||||||
|
s.NewJob(gocron.DurationJob(d),
|
||||||
|
gocron.NewTask(
|
||||||
|
func() {
|
||||||
|
s := time.Now()
|
||||||
|
c := 0
|
||||||
|
ce := 0
|
||||||
|
cl := 0
|
||||||
|
log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
|
||||||
|
|
||||||
|
for _, cluster := range archive.Clusters {
|
||||||
|
s_cluster := time.Now()
|
||||||
|
jobs, err := jobRepo.FindRunningJobs(cluster.Name)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// NOTE: Additional Subcluster Loop Could Allow For Limited List Of Footprint-Metrics Only.
|
||||||
|
// - Chunk-Size Would Then Be 'SubCluster' (Running Jobs, Transactions) as Lists Can Change Within SCs
|
||||||
|
// - Would Require Review of 'updateFootprint' Usage (Logic Could Possibly Be Included Here Completely)
|
||||||
|
allMetrics := make([]string, 0)
|
||||||
|
metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
|
||||||
|
for _, mc := range metricConfigs {
|
||||||
|
allMetrics = append(allMetrics, mc.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
repo, err := metricdata.GetMetricDataRepo(cluster.Name)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("no metric data repository configured for '%s'", cluster.Name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
pendingStatements := []sq.UpdateBuilder{}
|
||||||
|
|
||||||
|
for _, job := range jobs {
|
||||||
|
log.Debugf("Prepare job %d", job.JobID)
|
||||||
|
cl++
|
||||||
|
|
||||||
|
s_job := time.Now()
|
||||||
|
|
||||||
|
jobStats, err := repo.LoadStats(job, allMetrics, context.Background())
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("error wile loading job data stats for footprint update: %v", err)
|
||||||
|
ce++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
jobMeta := &schema.JobMeta{
|
||||||
|
BaseJob: job.BaseJob,
|
||||||
|
StartTime: job.StartTime.Unix(),
|
||||||
|
Statistics: make(map[string]schema.JobStatistics),
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, metric := range allMetrics {
|
||||||
|
avg, min, max := 0.0, 0.0, 0.0
|
||||||
|
data, ok := jobStats[metric] // JobStats[Metric1:[Hostname1:[Stats], Hostname2:[Stats], ...], Metric2[...] ...]
|
||||||
|
if ok {
|
||||||
|
for _, res := range job.Resources {
|
||||||
|
hostStats, ok := data[res.Hostname]
|
||||||
|
if ok {
|
||||||
|
avg += hostStats.Avg
|
||||||
|
min = math.Min(min, hostStats.Min)
|
||||||
|
max = math.Max(max, hostStats.Max)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add values rounded to 2 digits: repo.LoadStats may return unrounded
|
||||||
|
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||||
|
Unit: schema.Unit{
|
||||||
|
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||||
|
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||||
|
},
|
||||||
|
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
|
||||||
|
Min: (math.Round(min*100) / 100),
|
||||||
|
Max: (math.Round(max*100) / 100),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build Statement per Job, Add to Pending Array
|
||||||
|
stmt := sq.Update("job")
|
||||||
|
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
|
||||||
|
ce++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
stmt = stmt.Where("job.id = ?", job.ID)
|
||||||
|
|
||||||
|
pendingStatements = append(pendingStatements, stmt)
|
||||||
|
log.Debugf("Job %d took %s", job.JobID, time.Since(s_job))
|
||||||
|
}
|
||||||
|
|
||||||
|
t, err := jobRepo.TransactionInit()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed TransactionInit %v", err)
|
||||||
|
log.Errorf("skipped %d transactions for cluster %s", len(pendingStatements), cluster.Name)
|
||||||
|
ce += len(pendingStatements)
|
||||||
|
} else {
|
||||||
|
for _, ps := range pendingStatements {
|
||||||
|
query, args, err := ps.ToSql()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed in ToSQL conversion: %v", err)
|
||||||
|
ce++
|
||||||
|
} else {
|
||||||
|
// args...: Footprint-JSON, Energyfootprint-JSON, TotalEnergy, JobID
|
||||||
|
jobRepo.TransactionAdd(t, query, args...)
|
||||||
|
c++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
jobRepo.TransactionEnd(t)
|
||||||
|
}
|
||||||
|
log.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
|
||||||
|
}
|
||||||
|
log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
|
||||||
|
}))
|
||||||
|
}
|
||||||
@@ -4,7 +4,13 @@
|
|||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
package util
|
package util
|
||||||
|
|
||||||
import "golang.org/x/exp/constraints"
|
import (
|
||||||
|
"golang.org/x/exp/constraints"
|
||||||
|
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"sort"
|
||||||
|
)
|
||||||
|
|
||||||
func Min[T constraints.Ordered](a, b T) T {
|
func Min[T constraints.Ordered](a, b T) T {
|
||||||
if a < b {
|
if a < b {
|
||||||
@@ -19,3 +25,36 @@ func Max[T constraints.Ordered](a, b T) T {
|
|||||||
}
|
}
|
||||||
return b
|
return b
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sortedCopy(input []float64) []float64 {
|
||||||
|
sorted := make([]float64, len(input))
|
||||||
|
copy(sorted, input)
|
||||||
|
sort.Float64s(sorted)
|
||||||
|
return sorted
|
||||||
|
}
|
||||||
|
|
||||||
|
func Mean(input []float64) (float64, error) {
|
||||||
|
if len(input) == 0 {
|
||||||
|
return math.NaN(), fmt.Errorf("input array is empty: %#v", input)
|
||||||
|
}
|
||||||
|
sum := 0.0
|
||||||
|
for _, n := range input {
|
||||||
|
sum += n
|
||||||
|
}
|
||||||
|
return sum / float64(len(input)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func Median(input []float64) (median float64, err error) {
|
||||||
|
c := sortedCopy(input)
|
||||||
|
// Even numbers: add the two middle numbers, divide by two (use mean function)
|
||||||
|
// Odd numbers: Use the middle number
|
||||||
|
l := len(c)
|
||||||
|
if l == 0 {
|
||||||
|
return math.NaN(), fmt.Errorf("input array is empty: %#v", input)
|
||||||
|
} else if l%2 == 0 {
|
||||||
|
median, _ = Mean(c[l/2-1 : l/2+1])
|
||||||
|
} else {
|
||||||
|
median = c[l/2]
|
||||||
|
}
|
||||||
|
return median, nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -7,13 +7,14 @@ package archive
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
const Version uint64 = 1
|
const Version uint64 = 2
|
||||||
|
|
||||||
type ArchiveBackend interface {
|
type ArchiveBackend interface {
|
||||||
Init(rawConfig json.RawMessage) (uint64, error)
|
Init(rawConfig json.RawMessage) (uint64, error)
|
||||||
@@ -53,21 +54,25 @@ type JobContainer struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||||
ar ArchiveBackend
|
ar ArchiveBackend
|
||||||
useArchive bool
|
useArchive bool
|
||||||
)
|
)
|
||||||
|
|
||||||
func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
||||||
|
var err error
|
||||||
|
|
||||||
|
initOnce.Do(func() {
|
||||||
useArchive = !disableArchive
|
useArchive = !disableArchive
|
||||||
|
|
||||||
var cfg struct {
|
var cfg struct {
|
||||||
Kind string `json:"kind"`
|
Kind string `json:"kind"`
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
|
if err = json.Unmarshal(rawConfig, &cfg); err != nil {
|
||||||
log.Warn("Error while unmarshaling raw config json")
|
log.Warn("Error while unmarshaling raw config json")
|
||||||
return err
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
switch cfg.Kind {
|
switch cfg.Kind {
|
||||||
@@ -76,17 +81,21 @@ func Init(rawConfig json.RawMessage, disableArchive bool) error {
|
|||||||
// case "s3":
|
// case "s3":
|
||||||
// ar = &S3Archive{}
|
// ar = &S3Archive{}
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
|
err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
|
||||||
}
|
}
|
||||||
|
|
||||||
version, err := ar.Init(rawConfig)
|
var version uint64
|
||||||
|
version, err = ar.Init(rawConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while initializing archiveBackend")
|
log.Error("Error while initializing archiveBackend")
|
||||||
return err
|
return
|
||||||
}
|
}
|
||||||
log.Infof("Load archive version %d", version)
|
log.Infof("Load archive version %d", version)
|
||||||
|
|
||||||
return initClusterConfig()
|
err = initClusterConfig()
|
||||||
|
})
|
||||||
|
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetHandle() ArchiveBackend {
|
func GetHandle() ArchiveBackend {
|
||||||
@@ -164,6 +173,7 @@ func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
|
|||||||
jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{
|
jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{
|
||||||
Name: tag.Name,
|
Name: tag.Name,
|
||||||
Type: tag.Type,
|
Type: tag.Type,
|
||||||
|
Scope: tag.Scope,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -12,13 +12,16 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
var Clusters []*schema.Cluster
|
var (
|
||||||
var nodeLists map[string]map[string]NodeList
|
Clusters []*schema.Cluster
|
||||||
|
GlobalMetricList []*schema.GlobalMetricListItem
|
||||||
|
NodeLists map[string]map[string]NodeList
|
||||||
|
)
|
||||||
|
|
||||||
func initClusterConfig() error {
|
func initClusterConfig() error {
|
||||||
|
|
||||||
Clusters = []*schema.Cluster{}
|
Clusters = []*schema.Cluster{}
|
||||||
nodeLists = map[string]map[string]NodeList{}
|
NodeLists = map[string]map[string]NodeList{}
|
||||||
|
metricLookup := make(map[string]schema.GlobalMetricListItem)
|
||||||
|
|
||||||
for _, c := range ar.GetClusters() {
|
for _, c := range ar.GetClusters() {
|
||||||
|
|
||||||
@@ -49,11 +52,64 @@ func initClusterConfig() error {
|
|||||||
if !mc.Scope.Valid() {
|
if !mc.Scope.Valid() {
|
||||||
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
|
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ml, ok := metricLookup[mc.Name]
|
||||||
|
if !ok {
|
||||||
|
metricLookup[mc.Name] = schema.GlobalMetricListItem{
|
||||||
|
Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint,
|
||||||
|
}
|
||||||
|
ml = metricLookup[mc.Name]
|
||||||
|
}
|
||||||
|
availability := schema.ClusterSupport{Cluster: cluster.Name}
|
||||||
|
scLookup := make(map[string]*schema.SubClusterConfig)
|
||||||
|
|
||||||
|
for _, scc := range mc.SubClusters {
|
||||||
|
scLookup[scc.Name] = scc
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, sc := range cluster.SubClusters {
|
||||||
|
newMetric := mc
|
||||||
|
newMetric.SubClusters = nil
|
||||||
|
|
||||||
|
if cfg, ok := scLookup[sc.Name]; ok {
|
||||||
|
if !cfg.Remove {
|
||||||
|
availability.SubClusters = append(availability.SubClusters, sc.Name)
|
||||||
|
newMetric.Peak = cfg.Peak
|
||||||
|
newMetric.Normal = cfg.Normal
|
||||||
|
newMetric.Caution = cfg.Caution
|
||||||
|
newMetric.Alert = cfg.Alert
|
||||||
|
newMetric.Footprint = cfg.Footprint
|
||||||
|
newMetric.Energy = cfg.Energy
|
||||||
|
newMetric.LowerIsBetter = cfg.LowerIsBetter
|
||||||
|
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
|
||||||
|
|
||||||
|
if newMetric.Footprint != "" {
|
||||||
|
sc.Footprint = append(sc.Footprint, newMetric.Name)
|
||||||
|
ml.Footprint = newMetric.Footprint
|
||||||
|
}
|
||||||
|
if newMetric.Energy != "" {
|
||||||
|
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
availability.SubClusters = append(availability.SubClusters, sc.Name)
|
||||||
|
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
|
||||||
|
|
||||||
|
if newMetric.Footprint != "" {
|
||||||
|
sc.Footprint = append(sc.Footprint, newMetric.Name)
|
||||||
|
}
|
||||||
|
if newMetric.Energy != "" {
|
||||||
|
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ml.Availability = append(metricLookup[mc.Name].Availability, availability)
|
||||||
|
metricLookup[mc.Name] = ml
|
||||||
}
|
}
|
||||||
|
|
||||||
Clusters = append(Clusters, cluster)
|
Clusters = append(Clusters, cluster)
|
||||||
|
|
||||||
nodeLists[cluster.Name] = make(map[string]NodeList)
|
NodeLists[cluster.Name] = make(map[string]NodeList)
|
||||||
for _, sc := range cluster.SubClusters {
|
for _, sc := range cluster.SubClusters {
|
||||||
if sc.Nodes == "*" {
|
if sc.Nodes == "*" {
|
||||||
continue
|
continue
|
||||||
@@ -63,15 +119,18 @@ func initClusterConfig() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
|
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > in %s/cluster.json: %w", cluster.Name, err)
|
||||||
}
|
}
|
||||||
nodeLists[cluster.Name][sc.Name] = nl
|
NodeLists[cluster.Name][sc.Name] = nl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, ml := range metricLookup {
|
||||||
|
GlobalMetricList = append(GlobalMetricList, &ml)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetCluster(cluster string) *schema.Cluster {
|
func GetCluster(cluster string) *schema.Cluster {
|
||||||
|
|
||||||
for _, c := range Clusters {
|
for _, c := range Clusters {
|
||||||
if c.Name == cluster {
|
if c.Name == cluster {
|
||||||
return c
|
return c
|
||||||
@@ -90,11 +149,10 @@ func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("Subcluster '%v' not found for cluster '%v', or cluster '%v' not configured!", subcluster, cluster, cluster)
|
return nil, fmt.Errorf("subcluster '%v' not found for cluster '%v', or cluster '%v' not configured", subcluster, cluster, cluster)
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
||||||
|
|
||||||
for _, c := range Clusters {
|
for _, c := range Clusters {
|
||||||
if c.Name == cluster {
|
if c.Name == cluster {
|
||||||
for _, m := range c.MetricConfig {
|
for _, m := range c.MetricConfig {
|
||||||
@@ -110,7 +168,6 @@ func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
|
|||||||
// AssignSubCluster sets the `job.subcluster` property of the job based
|
// AssignSubCluster sets the `job.subcluster` property of the job based
|
||||||
// on its cluster and resources.
|
// on its cluster and resources.
|
||||||
func AssignSubCluster(job *schema.BaseJob) error {
|
func AssignSubCluster(job *schema.BaseJob) error {
|
||||||
|
|
||||||
cluster := GetCluster(job.Cluster)
|
cluster := GetCluster(job.Cluster)
|
||||||
if cluster == nil {
|
if cluster == nil {
|
||||||
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
|
return fmt.Errorf("ARCHIVE/CLUSTERCONFIG > unkown cluster: %v", job.Cluster)
|
||||||
@@ -130,7 +187,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
host0 := job.Resources[0].Hostname
|
host0 := job.Resources[0].Hostname
|
||||||
for sc, nl := range nodeLists[job.Cluster] {
|
for sc, nl := range NodeLists[job.Cluster] {
|
||||||
if nl != nil && nl.Contains(host0) {
|
if nl != nil && nl.Contains(host0) {
|
||||||
job.SubCluster = sc
|
job.SubCluster = sc
|
||||||
return nil
|
return nil
|
||||||
@@ -146,8 +203,7 @@ func AssignSubCluster(job *schema.BaseJob) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
||||||
|
for sc, nl := range NodeLists[cluster] {
|
||||||
for sc, nl := range nodeLists[cluster] {
|
|
||||||
if nl != nil && nl.Contains(hostname) {
|
if nl != nil && nl.Contains(hostname) {
|
||||||
return sc, nil
|
return sc, nil
|
||||||
}
|
}
|
||||||
@@ -164,3 +220,13 @@ func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
|||||||
|
|
||||||
return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", cluster, hostname)
|
return "", fmt.Errorf("ARCHIVE/CLUSTERCONFIG > no subcluster found for cluster %v and host %v", cluster, hostname)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func MetricIndex(mc []schema.MetricConfig, name string) (int, error) {
|
||||||
|
for i, m := range mc {
|
||||||
|
if m.Name == name {
|
||||||
|
return i, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, fmt.Errorf("unknown metric name %s", name)
|
||||||
|
}
|
||||||
|
|||||||
39
pkg/archive/clusterConfig_test.go
Normal file
39
pkg/archive/clusterConfig_test.go
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
package archive_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestClusterConfig(t *testing.T) {
|
||||||
|
if err := archive.Init(json.RawMessage("{\"kind\": \"file\",\"path\": \"testdata/archive\"}"), false); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sc, err := archive.GetSubCluster("fritz", "spr1tb")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// spew.Dump(sc.MetricConfig)
|
||||||
|
if len(sc.Footprint) != 3 {
|
||||||
|
t.Fail()
|
||||||
|
}
|
||||||
|
if len(sc.MetricConfig) != 15 {
|
||||||
|
t.Fail()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, metric := range sc.MetricConfig {
|
||||||
|
if metric.LowerIsBetter && metric.Name != "mem_used" {
|
||||||
|
t.Fail()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// spew.Dump(archive.GlobalMetricList)
|
||||||
|
// t.Fail()
|
||||||
|
}
|
||||||
@@ -30,6 +30,7 @@ func TestInitNoJson(t *testing.T) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestInitNotExists(t *testing.T) {
|
func TestInitNotExists(t *testing.T) {
|
||||||
var fsa FsArchive
|
var fsa FsArchive
|
||||||
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/job-archive\"}"))
|
_, err := fsa.Init(json.RawMessage("{\"path\":\"testdata/job-archive\"}"))
|
||||||
@@ -47,10 +48,10 @@ func TestInit(t *testing.T) {
|
|||||||
if fsa.path != "testdata/archive" {
|
if fsa.path != "testdata/archive" {
|
||||||
t.Fail()
|
t.Fail()
|
||||||
}
|
}
|
||||||
if version != 1 {
|
if version != 2 {
|
||||||
t.Fail()
|
t.Fail()
|
||||||
}
|
}
|
||||||
if len(fsa.clusters) != 1 || fsa.clusters[0] != "emmy" {
|
if len(fsa.clusters) != 3 || fsa.clusters[1] != "emmy" {
|
||||||
t.Fail()
|
t.Fail()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -133,7 +134,6 @@ func TestLoadJobData(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkLoadJobData(b *testing.B) {
|
func BenchmarkLoadJobData(b *testing.B) {
|
||||||
|
|
||||||
tmpdir := b.TempDir()
|
tmpdir := b.TempDir()
|
||||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||||
util.CopyDir("./testdata/archive/", jobarchive)
|
util.CopyDir("./testdata/archive/", jobarchive)
|
||||||
@@ -157,7 +157,6 @@ func BenchmarkLoadJobData(b *testing.B) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkLoadJobDataCompressed(b *testing.B) {
|
func BenchmarkLoadJobDataCompressed(b *testing.B) {
|
||||||
|
|
||||||
tmpdir := b.TempDir()
|
tmpdir := b.TempDir()
|
||||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||||
util.CopyDir("./testdata/archive/", jobarchive)
|
util.CopyDir("./testdata/archive/", jobarchive)
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
|
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
|
||||||
|
|||||||
2772
pkg/archive/testdata/archive/alex/cluster.json
vendored
Normal file
2772
pkg/archive/testdata/archive/alex/cluster.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2246
pkg/archive/testdata/archive/fritz/cluster.json
vendored
Normal file
2246
pkg/archive/testdata/archive/fritz/cluster.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2
pkg/archive/testdata/archive/version.txt
vendored
2
pkg/archive/testdata/archive/version.txt
vendored
@@ -1 +1 @@
|
|||||||
1
|
2
|
||||||
|
|||||||
123
pkg/resampler/resampler.go
Normal file
123
pkg/resampler/resampler.go
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
package resampler
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func SimpleResampler(data []schema.Float, old_frequency int64, new_frequency int64) ([]schema.Float, int64, error) {
|
||||||
|
if old_frequency == 0 || new_frequency == 0 || new_frequency <= old_frequency {
|
||||||
|
return data, old_frequency, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if new_frequency%old_frequency != 0 {
|
||||||
|
return nil, 0, errors.New("new sampling frequency should be multiple of the old frequency")
|
||||||
|
}
|
||||||
|
|
||||||
|
var step int = int(new_frequency / old_frequency)
|
||||||
|
var new_data_length = len(data) / step
|
||||||
|
|
||||||
|
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
|
||||||
|
return data, old_frequency, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
new_data := make([]schema.Float, new_data_length)
|
||||||
|
|
||||||
|
for i := 0; i < new_data_length; i++ {
|
||||||
|
new_data[i] = data[i*step]
|
||||||
|
}
|
||||||
|
|
||||||
|
return new_data, new_frequency, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inspired by one of the algorithms from https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf
|
||||||
|
// Adapted from https://github.com/haoel/downsampling/blob/master/core/lttb.go
|
||||||
|
func LargestTriangleThreeBucket(data []schema.Float, old_frequency int, new_frequency int) ([]schema.Float, int, error) {
|
||||||
|
|
||||||
|
if old_frequency == 0 || new_frequency == 0 || new_frequency <= old_frequency {
|
||||||
|
return data, old_frequency, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if new_frequency%old_frequency != 0 {
|
||||||
|
return nil, 0, errors.New(fmt.Sprintf("new sampling frequency : %d should be multiple of the old frequency : %d", new_frequency, old_frequency))
|
||||||
|
}
|
||||||
|
|
||||||
|
var step int = int(new_frequency / old_frequency)
|
||||||
|
var new_data_length = len(data) / step
|
||||||
|
|
||||||
|
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
|
||||||
|
return data, old_frequency, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
new_data := make([]schema.Float, 0, new_data_length)
|
||||||
|
|
||||||
|
// Bucket size. Leave room for start and end data points
|
||||||
|
bucketSize := float64(len(data)-2) / float64(new_data_length-2)
|
||||||
|
|
||||||
|
new_data = append(new_data, data[0]) // Always add the first point
|
||||||
|
|
||||||
|
// We have 3 pointers represent for
|
||||||
|
// > bucketLow - the current bucket's beginning location
|
||||||
|
// > bucketMiddle - the current bucket's ending location,
|
||||||
|
// also the beginning location of next bucket
|
||||||
|
// > bucketHight - the next bucket's ending location.
|
||||||
|
bucketLow := 1
|
||||||
|
bucketMiddle := int(math.Floor(bucketSize)) + 1
|
||||||
|
|
||||||
|
var prevMaxAreaPoint int
|
||||||
|
|
||||||
|
for i := 0; i < new_data_length-2; i++ {
|
||||||
|
|
||||||
|
bucketHigh := int(math.Floor(float64(i+2)*bucketSize)) + 1
|
||||||
|
if bucketHigh >= len(data)-1 {
|
||||||
|
bucketHigh = len(data) - 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate point average for next bucket (containing c)
|
||||||
|
avgPointX, avgPointY := calculateAverageDataPoint(data[bucketMiddle:bucketHigh+1], int64(bucketMiddle))
|
||||||
|
|
||||||
|
// Get the range for current bucket
|
||||||
|
currBucketStart := bucketLow
|
||||||
|
currBucketEnd := bucketMiddle
|
||||||
|
|
||||||
|
// Point a
|
||||||
|
pointX := prevMaxAreaPoint
|
||||||
|
pointY := data[prevMaxAreaPoint]
|
||||||
|
|
||||||
|
maxArea := -1.0
|
||||||
|
|
||||||
|
var maxAreaPoint int
|
||||||
|
flag_ := 0
|
||||||
|
for ; currBucketStart < currBucketEnd; currBucketStart++ {
|
||||||
|
|
||||||
|
area := calculateTriangleArea(schema.Float(pointX), pointY, avgPointX, avgPointY, schema.Float(currBucketStart), data[currBucketStart])
|
||||||
|
if area > maxArea {
|
||||||
|
maxArea = area
|
||||||
|
maxAreaPoint = currBucketStart
|
||||||
|
}
|
||||||
|
if math.IsNaN(float64(avgPointY)) {
|
||||||
|
flag_ = 1
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if flag_ == 1 {
|
||||||
|
new_data = append(new_data, schema.NaN) // Pick this point from the bucket
|
||||||
|
|
||||||
|
} else {
|
||||||
|
new_data = append(new_data, data[maxAreaPoint]) // Pick this point from the bucket
|
||||||
|
}
|
||||||
|
prevMaxAreaPoint = maxAreaPoint // This MaxArea point is the next's prevMAxAreaPoint
|
||||||
|
|
||||||
|
//move to the next window
|
||||||
|
bucketLow = bucketMiddle
|
||||||
|
bucketMiddle = bucketHigh
|
||||||
|
}
|
||||||
|
|
||||||
|
new_data = append(new_data, data[len(data)-1]) // Always add last
|
||||||
|
|
||||||
|
return new_data, new_frequency, nil
|
||||||
|
}
|
||||||
35
pkg/resampler/util.go
Normal file
35
pkg/resampler/util.go
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
package resampler
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
func calculateTriangleArea(paX, paY, pbX, pbY, pcX, pcY schema.Float) float64 {
|
||||||
|
area := ((paX-pcX)*(pbY-paY) - (paX-pbX)*(pcY-paY)) * 0.5
|
||||||
|
return math.Abs(float64(area))
|
||||||
|
}
|
||||||
|
|
||||||
|
func calculateAverageDataPoint(points []schema.Float, xStart int64) (avgX schema.Float, avgY schema.Float) {
|
||||||
|
flag := 0
|
||||||
|
for _, point := range points {
|
||||||
|
avgX += schema.Float(xStart)
|
||||||
|
avgY += point
|
||||||
|
xStart++
|
||||||
|
if math.IsNaN(float64(point)) {
|
||||||
|
flag = 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
l := schema.Float(len(points))
|
||||||
|
|
||||||
|
avgX /= l
|
||||||
|
avgY /= l
|
||||||
|
|
||||||
|
if flag == 1 {
|
||||||
|
return avgX, schema.NaN
|
||||||
|
} else {
|
||||||
|
return avgX, avgY
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -33,35 +33,44 @@ type SubCluster struct {
|
|||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Nodes string `json:"nodes"`
|
Nodes string `json:"nodes"`
|
||||||
ProcessorType string `json:"processorType"`
|
ProcessorType string `json:"processorType"`
|
||||||
SocketsPerNode int `json:"socketsPerNode"`
|
Topology Topology `json:"topology"`
|
||||||
CoresPerSocket int `json:"coresPerSocket"`
|
|
||||||
ThreadsPerCore int `json:"threadsPerCore"`
|
|
||||||
FlopRateScalar MetricValue `json:"flopRateScalar"`
|
FlopRateScalar MetricValue `json:"flopRateScalar"`
|
||||||
FlopRateSimd MetricValue `json:"flopRateSimd"`
|
FlopRateSimd MetricValue `json:"flopRateSimd"`
|
||||||
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
|
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
|
||||||
Topology Topology `json:"topology"`
|
MetricConfig []MetricConfig `json:"metricConfig,omitempty"`
|
||||||
|
Footprint []string `json:"footprint,omitempty"`
|
||||||
|
EnergyFootprint []string `json:"energyFootprint,omitempty"`
|
||||||
|
SocketsPerNode int `json:"socketsPerNode"`
|
||||||
|
CoresPerSocket int `json:"coresPerSocket"`
|
||||||
|
ThreadsPerCore int `json:"threadsPerCore"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubClusterConfig struct {
|
type SubClusterConfig struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
Footprint string `json:"footprint,omitempty"`
|
||||||
|
Energy string `json:"energy"`
|
||||||
Peak float64 `json:"peak"`
|
Peak float64 `json:"peak"`
|
||||||
Normal float64 `json:"normal"`
|
Normal float64 `json:"normal"`
|
||||||
Caution float64 `json:"caution"`
|
Caution float64 `json:"caution"`
|
||||||
Alert float64 `json:"alert"`
|
Alert float64 `json:"alert"`
|
||||||
Remove bool `json:"remove"`
|
Remove bool `json:"remove"`
|
||||||
|
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricConfig struct {
|
type MetricConfig struct {
|
||||||
Name string `json:"name"`
|
|
||||||
Unit Unit `json:"unit"`
|
Unit Unit `json:"unit"`
|
||||||
|
Energy string `json:"energy"`
|
||||||
|
Name string `json:"name"`
|
||||||
Scope MetricScope `json:"scope"`
|
Scope MetricScope `json:"scope"`
|
||||||
Aggregation string `json:"aggregation"`
|
Aggregation string `json:"aggregation"`
|
||||||
Timestep int `json:"timestep"`
|
Footprint string `json:"footprint,omitempty"`
|
||||||
|
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
||||||
Peak float64 `json:"peak"`
|
Peak float64 `json:"peak"`
|
||||||
Normal float64 `json:"normal"`
|
|
||||||
Caution float64 `json:"caution"`
|
Caution float64 `json:"caution"`
|
||||||
Alert float64 `json:"alert"`
|
Alert float64 `json:"alert"`
|
||||||
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
|
Timestep int `json:"timestep"`
|
||||||
|
Normal float64 `json:"normal"`
|
||||||
|
LowerIsBetter bool `json:"lowerIsBetter"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Cluster struct {
|
type Cluster struct {
|
||||||
@@ -70,14 +79,27 @@ type Cluster struct {
|
|||||||
SubClusters []*SubCluster `json:"subClusters"`
|
SubClusters []*SubCluster `json:"subClusters"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ClusterSupport struct {
|
||||||
|
Cluster string `json:"cluster"`
|
||||||
|
SubClusters []string `json:"subclusters"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type GlobalMetricListItem struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Unit Unit `json:"unit"`
|
||||||
|
Scope MetricScope `json:"scope"`
|
||||||
|
Footprint string `json:"footprint,omitempty"`
|
||||||
|
Availability []ClusterSupport `json:"availability"`
|
||||||
|
}
|
||||||
|
|
||||||
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
|
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
|
||||||
// hwthread is in that socket, add it to the list. If no hwthreads other than
|
// hwthread is in that socket, add it to the list. If no hwthreads other than
|
||||||
// those in the argument list are assigned to one of the sockets in the first
|
// those in the argument list are assigned to one of the sockets in the first
|
||||||
// return value, return true as the second value. TODO: Optimize this, there
|
// return value, return true as the second value. TODO: Optimize this, there
|
||||||
// must be a more efficient way/algorithm.
|
// must be a more efficient way/algorithm.
|
||||||
func (topo *Topology) GetSocketsFromHWThreads(
|
func (topo *Topology) GetSocketsFromHWThreads(
|
||||||
hwthreads []int) (sockets []int, exclusive bool) {
|
hwthreads []int,
|
||||||
|
) (sockets []int, exclusive bool) {
|
||||||
socketsMap := map[int]int{}
|
socketsMap := map[int]int{}
|
||||||
for _, hwthread := range hwthreads {
|
for _, hwthread := range hwthreads {
|
||||||
for socket, hwthreadsInSocket := range topo.Socket {
|
for socket, hwthreadsInSocket := range topo.Socket {
|
||||||
@@ -100,14 +122,46 @@ func (topo *Topology) GetSocketsFromHWThreads(
|
|||||||
return sockets, exclusive
|
return sockets, exclusive
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return a list of socket IDs given a list of core IDs. Even if just one
|
||||||
|
// core is in that socket, add it to the list. If no cores other than
|
||||||
|
// those in the argument list are assigned to one of the sockets in the first
|
||||||
|
// return value, return true as the second value. TODO: Optimize this, there
|
||||||
|
// must be a more efficient way/algorithm.
|
||||||
|
func (topo *Topology) GetSocketsFromCores (
|
||||||
|
cores []int,
|
||||||
|
) (sockets []int, exclusive bool) {
|
||||||
|
socketsMap := map[int]int{}
|
||||||
|
for _, core := range cores {
|
||||||
|
for _, hwthreadInCore := range topo.Core[core] {
|
||||||
|
for socket, hwthreadsInSocket := range topo.Socket {
|
||||||
|
for _, hwthreadInSocket := range hwthreadsInSocket {
|
||||||
|
if hwthreadInCore == hwthreadInSocket {
|
||||||
|
socketsMap[socket] += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
exclusive = true
|
||||||
|
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
|
||||||
|
sockets = make([]int, 0, len(socketsMap))
|
||||||
|
for socket, count := range socketsMap {
|
||||||
|
sockets = append(sockets, socket)
|
||||||
|
exclusive = exclusive && count == hwthreadsPerSocket
|
||||||
|
}
|
||||||
|
|
||||||
|
return sockets, exclusive
|
||||||
|
}
|
||||||
|
|
||||||
// Return a list of core IDs given a list of hwthread IDs. Even if just one
|
// Return a list of core IDs given a list of hwthread IDs. Even if just one
|
||||||
// hwthread is in that core, add it to the list. If no hwthreads other than
|
// hwthread is in that core, add it to the list. If no hwthreads other than
|
||||||
// those in the argument list are assigned to one of the cores in the first
|
// those in the argument list are assigned to one of the cores in the first
|
||||||
// return value, return true as the second value. TODO: Optimize this, there
|
// return value, return true as the second value. TODO: Optimize this, there
|
||||||
// must be a more efficient way/algorithm.
|
// must be a more efficient way/algorithm.
|
||||||
func (topo *Topology) GetCoresFromHWThreads(
|
func (topo *Topology) GetCoresFromHWThreads(
|
||||||
hwthreads []int) (cores []int, exclusive bool) {
|
hwthreads []int,
|
||||||
|
) (cores []int, exclusive bool) {
|
||||||
coresMap := map[int]int{}
|
coresMap := map[int]int{}
|
||||||
for _, hwthread := range hwthreads {
|
for _, hwthread := range hwthreads {
|
||||||
for core, hwthreadsInCore := range topo.Core {
|
for core, hwthreadsInCore := range topo.Core {
|
||||||
@@ -136,8 +190,8 @@ func (topo *Topology) GetCoresFromHWThreads(
|
|||||||
// memory domains in the first return value, return true as the second value.
|
// memory domains in the first return value, return true as the second value.
|
||||||
// TODO: Optimize this, there must be a more efficient way/algorithm.
|
// TODO: Optimize this, there must be a more efficient way/algorithm.
|
||||||
func (topo *Topology) GetMemoryDomainsFromHWThreads(
|
func (topo *Topology) GetMemoryDomainsFromHWThreads(
|
||||||
hwthreads []int) (memDoms []int, exclusive bool) {
|
hwthreads []int,
|
||||||
|
) (memDoms []int, exclusive bool) {
|
||||||
memDomsMap := map[int]int{}
|
memDomsMap := map[int]int{}
|
||||||
for _, hwthread := range hwthreads {
|
for _, hwthread := range hwthreads {
|
||||||
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
|
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
|
||||||
@@ -172,7 +226,17 @@ func (topo *Topology) GetAcceleratorID(id int) (string, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (topo *Topology) GetAcceleratorIDs() ([]int, error) {
|
// Return list of hardware (string) accelerator IDs
|
||||||
|
func (topo *Topology) GetAcceleratorIDs() []string {
|
||||||
|
accels := make([]string, 0)
|
||||||
|
for _, accel := range topo.Accelerators {
|
||||||
|
accels = append(accels, accel.ID)
|
||||||
|
}
|
||||||
|
return accels
|
||||||
|
}
|
||||||
|
|
||||||
|
// Outdated? Or: Return indices of accelerators in parent array?
|
||||||
|
func (topo *Topology) GetAcceleratorIDsAsInt() ([]int, error) {
|
||||||
accels := make([]int, 0)
|
accels := make([]int, 0)
|
||||||
for _, accel := range topo.Accelerators {
|
for _, accel := range topo.Accelerators {
|
||||||
id, err := strconv.Atoi(accel.ID)
|
id, err := strconv.Atoi(accel.ID)
|
||||||
|
|||||||
@@ -26,6 +26,7 @@ type LdapConfig struct {
|
|||||||
type OpenIDConfig struct {
|
type OpenIDConfig struct {
|
||||||
Provider string `json:"provider"`
|
Provider string `json:"provider"`
|
||||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||||
|
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type JWTAuthConfig struct {
|
type JWTAuthConfig struct {
|
||||||
@@ -45,6 +46,9 @@ type JWTAuthConfig struct {
|
|||||||
|
|
||||||
// Should an non-existent user be added to the DB based on the information in the token
|
// Should an non-existent user be added to the DB based on the information in the token
|
||||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||||
|
|
||||||
|
// Should an existent user be updated in the DB based on the information in the token
|
||||||
|
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type IntRange struct {
|
type IntRange struct {
|
||||||
@@ -55,6 +59,7 @@ type IntRange struct {
|
|||||||
type TimeRange struct {
|
type TimeRange struct {
|
||||||
From *time.Time `json:"from"`
|
From *time.Time `json:"from"`
|
||||||
To *time.Time `json:"to"`
|
To *time.Time `json:"to"`
|
||||||
|
Range string `json:"range,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type FilterRanges struct {
|
type FilterRanges struct {
|
||||||
@@ -76,6 +81,20 @@ type Retention struct {
|
|||||||
IncludeDB bool `json:"includeDB"`
|
IncludeDB bool `json:"includeDB"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ResampleConfig struct {
|
||||||
|
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
|
||||||
|
Resolutions []int `json:"resolutions"`
|
||||||
|
// Trigger next zoom level at less than this many visible datapoints
|
||||||
|
Trigger int `json:"trigger"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CronFrequency struct {
|
||||||
|
// Duration Update Worker [Defaults to '5m']
|
||||||
|
DurationWorker string `json:"duration-worker"`
|
||||||
|
// Metric-Footprint Update Worker [Defaults to '10m']
|
||||||
|
FootprintWorker string `json:"footprint-worker"`
|
||||||
|
}
|
||||||
|
|
||||||
// Format of the configuration (file). See below for the defaults.
|
// Format of the configuration (file). See below for the defaults.
|
||||||
type ProgramConfig struct {
|
type ProgramConfig struct {
|
||||||
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||||
@@ -133,6 +152,9 @@ type ProgramConfig struct {
|
|||||||
// be provided! Most options here can be overwritten by the user.
|
// be provided! Most options here can be overwritten by the user.
|
||||||
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
UiDefaults map[string]interface{} `json:"ui-defaults"`
|
||||||
|
|
||||||
|
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
||||||
|
EnableResampling *ResampleConfig `json:"enable-resampling"`
|
||||||
|
|
||||||
// Where to store MachineState files
|
// Where to store MachineState files
|
||||||
MachineStateDir string `json:"machine-state-dir"`
|
MachineStateDir string `json:"machine-state-dir"`
|
||||||
|
|
||||||
@@ -142,6 +164,13 @@ type ProgramConfig struct {
|
|||||||
// Defines time X in seconds in which jobs are considered to be "short" and will be filtered in specific views.
|
// Defines time X in seconds in which jobs are considered to be "short" and will be filtered in specific views.
|
||||||
ShortRunningJobsDuration int `json:"short-running-jobs-duration"`
|
ShortRunningJobsDuration int `json:"short-running-jobs-duration"`
|
||||||
|
|
||||||
|
// Energy Mix CO2 Emission Constant [g/kWh]
|
||||||
|
// If entered, displays estimated CO2 emission for job based on jobs totalEnergy
|
||||||
|
EmissionConstant int `json:"emission-constant"`
|
||||||
|
|
||||||
|
// Frequency of cron job workers
|
||||||
|
CronFrequency *CronFrequency `json:"cron-frequency"`
|
||||||
|
|
||||||
// Array of Clusters
|
// Array of Clusters
|
||||||
Clusters []*ClusterConfig `json:"clusters"`
|
Clusters []*ClusterConfig `json:"clusters"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,30 +16,33 @@ import (
|
|||||||
// Common subset of Job and JobMeta. Use one of those, not this type directly.
|
// Common subset of Job and JobMeta. Use one of those, not this type directly.
|
||||||
|
|
||||||
type BaseJob struct {
|
type BaseJob struct {
|
||||||
// The unique identifier of a job
|
Cluster string `json:"cluster" db:"cluster" example:"fritz"`
|
||||||
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
SubCluster string `json:"subCluster" db:"subcluster" example:"main"`
|
||||||
User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user
|
Partition string `json:"partition,omitempty" db:"cluster_partition" example:"main"`
|
||||||
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
|
Project string `json:"project" db:"project" example:"abcd200"`
|
||||||
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
|
User string `json:"user" db:"hpc_user" example:"abcd100h"`
|
||||||
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
|
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"`
|
||||||
Partition string `json:"partition,omitempty" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
|
Tags []*Tag `json:"tags,omitempty"`
|
||||||
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"` // The unique identifier of an array job
|
RawEnergyFootprint []byte `json:"-" db:"energy_footprint"`
|
||||||
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
|
RawFootprint []byte `json:"-" db:"footprint"`
|
||||||
// NumCores int32 `json:"numCores" db:"num_cores" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
RawMetaData []byte `json:"-" db:"meta_data"`
|
||||||
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
RawResources []byte `json:"-" db:"resources"`
|
||||||
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
|
Resources []*Resource `json:"resources"`
|
||||||
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
|
EnergyFootprint map[string]float64 `json:"energyFootprint"`
|
||||||
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
|
Footprint map[string]float64 `json:"footprint"`
|
||||||
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"` // SMT threads used by job
|
MetaData map[string]string `json:"metaData"`
|
||||||
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job
|
|
||||||
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
|
|
||||||
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
|
|
||||||
Tags []*Tag `json:"tags,omitempty"` // List of tags
|
|
||||||
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
|
|
||||||
Resources []*Resource `json:"resources"` // Resources used by job
|
|
||||||
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
|
|
||||||
MetaData map[string]string `json:"metaData"` // Additional information about the job
|
|
||||||
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
|
||||||
|
Energy float64 `json:"energy" db:"energy"`
|
||||||
|
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
|
||||||
|
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
|
||||||
|
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
||||||
|
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"`
|
||||||
|
SMT int32 `json:"smt,omitempty" db:"smt" example:"4"`
|
||||||
|
MonitoringStatus int32 `json:"monitoringStatus,omitempty" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"`
|
||||||
|
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"`
|
||||||
|
NumAcc int32 `json:"numAcc,omitempty" db:"num_acc" example:"2" minimum:"1"`
|
||||||
|
NumHWThreads int32 `json:"numHwthreads,omitempty" db:"num_hwthreads" example:"20" minimum:"1"`
|
||||||
|
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Job struct type
|
// Job struct type
|
||||||
@@ -49,19 +52,10 @@ type BaseJob struct {
|
|||||||
// Job model
|
// Job model
|
||||||
// @Description Information of a HPC job.
|
// @Description Information of a HPC job.
|
||||||
type Job struct {
|
type Job struct {
|
||||||
// The unique identifier of a job in the database
|
StartTime time.Time `json:"startTime"`
|
||||||
ID int64 `json:"id" db:"id"`
|
|
||||||
BaseJob
|
BaseJob
|
||||||
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
|
ID int64 `json:"id" db:"id"`
|
||||||
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
|
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"`
|
||||||
MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64
|
|
||||||
FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64
|
|
||||||
MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64
|
|
||||||
LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64
|
|
||||||
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
|
|
||||||
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
|
|
||||||
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64
|
|
||||||
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"` // FileDataVolTotal as Float64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// JobMeta struct type
|
// JobMeta struct type
|
||||||
@@ -88,11 +82,10 @@ type JobLinkResultList struct {
|
|||||||
// JobMeta model
|
// JobMeta model
|
||||||
// @Description Meta data information of a HPC job.
|
// @Description Meta data information of a HPC job.
|
||||||
type JobMeta struct {
|
type JobMeta struct {
|
||||||
// The unique identifier of a job in the database
|
|
||||||
ID *int64 `json:"id,omitempty"`
|
ID *int64 `json:"id,omitempty"`
|
||||||
|
Statistics map[string]JobStatistics `json:"statistics"`
|
||||||
BaseJob
|
BaseJob
|
||||||
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` // Start epoch time stamp in seconds (Min > 0)
|
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"`
|
||||||
Statistics map[string]JobStatistics `json:"statistics"` // Metric statistics of job
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -124,18 +117,19 @@ type JobStatistics struct {
|
|||||||
// Tag model
|
// Tag model
|
||||||
// @Description Defines a tag using name and type.
|
// @Description Defines a tag using name and type.
|
||||||
type Tag struct {
|
type Tag struct {
|
||||||
ID int64 `json:"id" db:"id"` // The unique DB identifier of a tag
|
Type string `json:"type" db:"tag_type" example:"Debug"`
|
||||||
Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type
|
Name string `json:"name" db:"tag_name" example:"Testjob"`
|
||||||
Name string `json:"name" db:"tag_name" example:"Testjob"` // Tag Name
|
Scope string `json:"scope" db:"tag_scope" example:"global"`
|
||||||
|
ID int64 `json:"id" db:"id"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Resource model
|
// Resource model
|
||||||
// @Description A resource used by a job
|
// @Description A resource used by a job
|
||||||
type Resource struct {
|
type Resource struct {
|
||||||
Hostname string `json:"hostname"` // Name of the host (= node)
|
Hostname string `json:"hostname"`
|
||||||
HWThreads []int `json:"hwthreads,omitempty"` // List of OS processor ids
|
Configuration string `json:"configuration,omitempty"`
|
||||||
Accelerators []string `json:"accelerators,omitempty"` // List of of accelerator device ids
|
HWThreads []int `json:"hwthreads,omitempty"`
|
||||||
Configuration string `json:"configuration,omitempty"` // The configuration options of the node
|
Accelerators []string `json:"accelerators,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobState string
|
type JobState string
|
||||||
|
|||||||
@@ -10,22 +10,24 @@ import (
|
|||||||
"math"
|
"math"
|
||||||
"sort"
|
"sort"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
type JobData map[string]map[MetricScope]*JobMetric
|
type JobData map[string]map[MetricScope]*JobMetric
|
||||||
|
|
||||||
type JobMetric struct {
|
type JobMetric struct {
|
||||||
Unit Unit `json:"unit"`
|
|
||||||
Timestep int `json:"timestep"`
|
|
||||||
Series []Series `json:"series"`
|
|
||||||
StatisticsSeries *StatsSeries `json:"statisticsSeries,omitempty"`
|
StatisticsSeries *StatsSeries `json:"statisticsSeries,omitempty"`
|
||||||
|
Unit Unit `json:"unit"`
|
||||||
|
Series []Series `json:"series"`
|
||||||
|
Timestep int `json:"timestep"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Series struct {
|
type Series struct {
|
||||||
Hostname string `json:"hostname"`
|
|
||||||
Id *string `json:"id,omitempty"`
|
Id *string `json:"id,omitempty"`
|
||||||
Statistics MetricStatistics `json:"statistics"`
|
Hostname string `json:"hostname"`
|
||||||
Data []Float `json:"data"`
|
Data []Float `json:"data"`
|
||||||
|
Statistics MetricStatistics `json:"statistics"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricStatistics struct {
|
type MetricStatistics struct {
|
||||||
@@ -35,10 +37,11 @@ type MetricStatistics struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type StatsSeries struct {
|
type StatsSeries struct {
|
||||||
|
Percentiles map[int][]Float `json:"percentiles,omitempty"`
|
||||||
Mean []Float `json:"mean"`
|
Mean []Float `json:"mean"`
|
||||||
|
Median []Float `json:"median"`
|
||||||
Min []Float `json:"min"`
|
Min []Float `json:"min"`
|
||||||
Max []Float `json:"max"`
|
Max []Float `json:"max"`
|
||||||
Percentiles map[int][]Float `json:"percentiles,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricScope string
|
type MetricScope string
|
||||||
@@ -121,6 +124,7 @@ func (jd *JobData) Size() int {
|
|||||||
if metric.StatisticsSeries != nil {
|
if metric.StatisticsSeries != nil {
|
||||||
n += len(metric.StatisticsSeries.Max)
|
n += len(metric.StatisticsSeries.Max)
|
||||||
n += len(metric.StatisticsSeries.Mean)
|
n += len(metric.StatisticsSeries.Mean)
|
||||||
|
n += len(metric.StatisticsSeries.Median)
|
||||||
n += len(metric.StatisticsSeries.Min)
|
n += len(metric.StatisticsSeries.Min)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -149,53 +153,74 @@ func (jm *JobMetric) AddStatisticsSeries() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
min, mean, max := make([]Float, n), make([]Float, n), make([]Float, n)
|
// mean := make([]Float, n)
|
||||||
|
min, median, max := make([]Float, n), make([]Float, n), make([]Float, n)
|
||||||
i := 0
|
i := 0
|
||||||
for ; i < m; i++ {
|
for ; i < m; i++ {
|
||||||
smin, ssum, smax := math.MaxFloat32, 0.0, -math.MaxFloat32
|
seriesCount := len(jm.Series)
|
||||||
|
// ssum := 0.0
|
||||||
|
smin, smed, smax := math.MaxFloat32, make([]float64, seriesCount), -math.MaxFloat32
|
||||||
notnan := 0
|
notnan := 0
|
||||||
for j := 0; j < len(jm.Series); j++ {
|
for j := 0; j < seriesCount; j++ {
|
||||||
x := float64(jm.Series[j].Data[i])
|
x := float64(jm.Series[j].Data[i])
|
||||||
if math.IsNaN(x) {
|
if math.IsNaN(x) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
notnan += 1
|
notnan += 1
|
||||||
ssum += x
|
// ssum += x
|
||||||
|
smed[j] = x
|
||||||
smin = math.Min(smin, x)
|
smin = math.Min(smin, x)
|
||||||
smax = math.Max(smax, x)
|
smax = math.Max(smax, x)
|
||||||
}
|
}
|
||||||
|
|
||||||
if notnan < 3 {
|
if notnan < 3 {
|
||||||
min[i] = NaN
|
min[i] = NaN
|
||||||
mean[i] = NaN
|
// mean[i] = NaN
|
||||||
|
median[i] = NaN
|
||||||
max[i] = NaN
|
max[i] = NaN
|
||||||
} else {
|
} else {
|
||||||
min[i] = Float(smin)
|
min[i] = Float(smin)
|
||||||
mean[i] = Float(ssum / float64(notnan))
|
// mean[i] = Float(ssum / float64(notnan))
|
||||||
max[i] = Float(smax)
|
max[i] = Float(smax)
|
||||||
|
|
||||||
|
medianRaw, err := util.Median(smed)
|
||||||
|
if err != nil {
|
||||||
|
median[i] = NaN
|
||||||
|
} else {
|
||||||
|
median[i] = Float(medianRaw)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for ; i < n; i++ {
|
for ; i < n; i++ {
|
||||||
min[i] = NaN
|
min[i] = NaN
|
||||||
mean[i] = NaN
|
// mean[i] = NaN
|
||||||
|
median[i] = NaN
|
||||||
max[i] = NaN
|
max[i] = NaN
|
||||||
}
|
}
|
||||||
|
|
||||||
if smooth {
|
if smooth {
|
||||||
for i := 2; i < len(mean)-2; i++ {
|
for i := 2; i < len(median)-2; i++ {
|
||||||
if min[i].IsNaN() {
|
if min[i].IsNaN() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
min[i] = (min[i-2] + min[i-1] + min[i] + min[i+1] + min[i+2]) / 5
|
min[i] = (min[i-2] + min[i-1] + min[i] + min[i+1] + min[i+2]) / 5
|
||||||
max[i] = (max[i-2] + max[i-1] + max[i] + max[i+1] + max[i+2]) / 5
|
max[i] = (max[i-2] + max[i-1] + max[i] + max[i+1] + max[i+2]) / 5
|
||||||
mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5
|
// mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5
|
||||||
|
// Reduce Median further
|
||||||
|
smoothRaw := []float64{float64(median[i-2]), float64(median[i-1]), float64(median[i]), float64(median[i+1]), float64(median[i+2])}
|
||||||
|
smoothMedian, err := util.Median(smoothRaw)
|
||||||
|
if err != nil {
|
||||||
|
median[i] = NaN
|
||||||
|
} else {
|
||||||
|
median[i] = Float(smoothMedian)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
jm.StatisticsSeries = &StatsSeries{Mean: mean, Min: min, Max: max}
|
jm.StatisticsSeries = &StatsSeries{Median: median, Min: min, Max: max} // Mean: mean
|
||||||
}
|
}
|
||||||
|
|
||||||
func (jd *JobData) AddNodeScope(metric string) bool {
|
func (jd *JobData) AddNodeScope(metric string) bool {
|
||||||
@@ -204,7 +229,7 @@ func (jd *JobData) AddNodeScope(metric string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
var maxScope MetricScope = MetricScopeInvalid
|
maxScope := MetricScopeInvalid
|
||||||
for scope := range scopes {
|
for scope := range scopes {
|
||||||
maxScope = maxScope.Max(scope)
|
maxScope = maxScope.Max(scope)
|
||||||
}
|
}
|
||||||
@@ -266,6 +291,21 @@ func (jd *JobData) AddNodeScope(metric string) bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (jd *JobData) RoundMetricStats() {
|
||||||
|
// TODO: Make Digit-Precision Configurable? (Currently: Fixed to 2 Digits)
|
||||||
|
for _, scopes := range *jd {
|
||||||
|
for _, jm := range scopes {
|
||||||
|
for index := range jm.Series {
|
||||||
|
jm.Series[index].Statistics = MetricStatistics{
|
||||||
|
Avg: (math.Round(jm.Series[index].Statistics.Avg*100) / 100),
|
||||||
|
Min: (math.Round(jm.Series[index].Statistics.Min*100) / 100),
|
||||||
|
Max: (math.Round(jm.Series[index].Statistics.Max*100) / 100),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (jm *JobMetric) AddPercentiles(ps []int) bool {
|
func (jm *JobMetric) AddPercentiles(ps []int) bool {
|
||||||
if jm.StatisticsSeries == nil {
|
if jm.StatisticsSeries == nil {
|
||||||
jm.AddStatisticsSeries()
|
jm.AddStatisticsSeries()
|
||||||
|
|||||||
@@ -39,6 +39,27 @@
|
|||||||
"avg"
|
"avg"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"footprint": {
|
||||||
|
"description": "Is it a footprint metric and what type",
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"avg",
|
||||||
|
"max",
|
||||||
|
"min"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"energy": {
|
||||||
|
"description": "Is it used to calculate job energy",
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"power",
|
||||||
|
"energy"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lowerIsBetter": {
|
||||||
|
"description": "Is lower better.",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"peak": {
|
"peak": {
|
||||||
"description": "Metric peak threshold (Upper metric limit)",
|
"description": "Metric peak threshold (Upper metric limit)",
|
||||||
"type": "number"
|
"type": "number"
|
||||||
@@ -65,6 +86,27 @@
|
|||||||
"description": "Hardware partition name",
|
"description": "Hardware partition name",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"footprint": {
|
||||||
|
"description": "Is it a footprint metric and what type. Overwrite global setting",
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"avg",
|
||||||
|
"max",
|
||||||
|
"min"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"energy": {
|
||||||
|
"description": "Is it used to calculate job energy. Overwrite global",
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"power",
|
||||||
|
"energy"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"lowerIsBetter": {
|
||||||
|
"description": "Is lower better. Overwrite global",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"peak": {
|
"peak": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
@@ -78,6 +120,7 @@
|
|||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
"remove": {
|
"remove": {
|
||||||
|
"description": "Remove this metric for this subcluster",
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -8,6 +8,13 @@
|
|||||||
"description": "Address where the http (or https) server will listen on (for example: 'localhost:80').",
|
"description": "Address where the http (or https) server will listen on (for example: 'localhost:80').",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"apiAllowedIPs": {
|
||||||
|
"description": "Addresses from which secured API endpoints can be reached",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
"user": {
|
"user": {
|
||||||
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
@@ -40,7 +47,7 @@
|
|||||||
"description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).",
|
"description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"job-archive": {
|
"archive": {
|
||||||
"description": "Configuration keys for job-archive",
|
"description": "Configuration keys for job-archive",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
@@ -127,6 +134,45 @@
|
|||||||
"description": "Do not show running jobs shorter than X seconds.",
|
"description": "Do not show running jobs shorter than X seconds.",
|
||||||
"type": "integer"
|
"type": "integer"
|
||||||
},
|
},
|
||||||
|
"emission-constant": {
|
||||||
|
"description": ".",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"cron-frequency": {
|
||||||
|
"description": "Frequency of cron job workers.",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"duration-worker": {
|
||||||
|
"description": "Duration Update Worker [Defaults to '5m']",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"footprint-worker": {
|
||||||
|
"description": "Metric-Footprint Update Worker [Defaults to '10m']",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"enable-resampling": {
|
||||||
|
"description": "Enable dynamic zoom in frontend metric plots.",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"trigger": {
|
||||||
|
"description": "Trigger next zoom level at less than this many visible datapoints.",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"resolutions": {
|
||||||
|
"description": "Array of resampling target resolutions, in seconds.",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"trigger",
|
||||||
|
"resolutions"
|
||||||
|
]
|
||||||
|
},
|
||||||
"jwts": {
|
"jwts": {
|
||||||
"description": "For JWT token authentication.",
|
"description": "For JWT token authentication.",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
@@ -156,6 +202,23 @@
|
|||||||
"max-age"
|
"max-age"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"oidc": {
|
||||||
|
"provider": {
|
||||||
|
"description": "",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"syncUserOnLogin": {
|
||||||
|
"description": "",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"updateUserOnLogin": {
|
||||||
|
"description": "",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"provider"
|
||||||
|
]
|
||||||
|
},
|
||||||
"ldap": {
|
"ldap": {
|
||||||
"description": "For LDAP Authentication and user synchronisation.",
|
"description": "For LDAP Authentication and user synchronisation.",
|
||||||
"type": "object",
|
"type": "object",
|
||||||
@@ -345,6 +408,14 @@
|
|||||||
"description": "Initial metric shown in system view",
|
"description": "Initial metric shown in system view",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
"job_view_showFootprint": {
|
||||||
|
"description": "Option to toggle footprint ui in single job view",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"job_list_usePaging": {
|
||||||
|
"description": "Option to switch from continous scroll to paging",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"analysis_view_histogramMetrics": {
|
"analysis_view_histogramMetrics": {
|
||||||
"description": "Metrics to show as job count histograms in analysis view",
|
"description": "Metrics to show as job count histograms in analysis view",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
@@ -374,16 +445,8 @@
|
|||||||
"minItems": 1
|
"minItems": 1
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"job_view_polarPlotMetrics": {
|
|
||||||
"description": "Metrics shown in polar plot of single job view",
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"minItems": 1
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"job_view_selectedMetrics": {
|
"job_view_selectedMetrics": {
|
||||||
"description": "",
|
"description": "Initial metrics shown as plots in single job view",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
@@ -416,10 +479,11 @@
|
|||||||
"plot_view_showRoofline",
|
"plot_view_showRoofline",
|
||||||
"plot_view_showStatTable",
|
"plot_view_showStatTable",
|
||||||
"system_view_selectedMetric",
|
"system_view_selectedMetric",
|
||||||
|
"job_view_showFootprint",
|
||||||
|
"job_list_usePaging",
|
||||||
"analysis_view_histogramMetrics",
|
"analysis_view_histogramMetrics",
|
||||||
"analysis_view_scatterPlotMetrics",
|
"analysis_view_scatterPlotMetrics",
|
||||||
"job_view_nodestats_selectedMetrics",
|
"job_view_nodestats_selectedMetrics",
|
||||||
"job_view_polarPlotMetrics",
|
|
||||||
"job_view_selectedMetrics",
|
"job_view_selectedMetrics",
|
||||||
"plot_general_colorscheme",
|
"plot_general_colorscheme",
|
||||||
"plot_list_selectedMetrics"
|
"plot_list_selectedMetrics"
|
||||||
|
|||||||
@@ -42,11 +42,11 @@ type User struct {
|
|||||||
Username string `json:"username"`
|
Username string `json:"username"`
|
||||||
Password string `json:"-"`
|
Password string `json:"-"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
|
Email string `json:"email"`
|
||||||
Roles []string `json:"roles"`
|
Roles []string `json:"roles"`
|
||||||
|
Projects []string `json:"projects"`
|
||||||
AuthType AuthType `json:"authType"`
|
AuthType AuthType `json:"authType"`
|
||||||
AuthSource AuthSource `json:"authSource"`
|
AuthSource AuthSource `json:"authSource"`
|
||||||
Email string `json:"email"`
|
|
||||||
Projects []string `json:"projects"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u *User) HasProject(project string) bool {
|
func (u *User) HasProject(project string) bool {
|
||||||
|
|||||||
@@ -1,65 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
// type Accelerator struct {
|
|
||||||
// ID string `json:"id"`
|
|
||||||
// Type string `json:"type"`
|
|
||||||
// Model string `json:"model"`
|
|
||||||
// }
|
|
||||||
|
|
||||||
// type Topology struct {
|
|
||||||
// Node []int `json:"node"`
|
|
||||||
// Socket [][]int `json:"socket"`
|
|
||||||
// MemoryDomain [][]int `json:"memoryDomain"`
|
|
||||||
// Die [][]int `json:"die"`
|
|
||||||
// Core [][]int `json:"core"`
|
|
||||||
// Accelerators []*Accelerator `json:"accelerators"`
|
|
||||||
// }
|
|
||||||
|
|
||||||
type SubCluster struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
Nodes string `json:"nodes"`
|
|
||||||
NumberOfNodes int `json:"numberOfNodes"`
|
|
||||||
ProcessorType string `json:"processorType"`
|
|
||||||
SocketsPerNode int `json:"socketsPerNode"`
|
|
||||||
CoresPerSocket int `json:"coresPerSocket"`
|
|
||||||
ThreadsPerCore int `json:"threadsPerCore"`
|
|
||||||
FlopRateScalar int `json:"flopRateScalar"`
|
|
||||||
FlopRateSimd int `json:"flopRateSimd"`
|
|
||||||
MemoryBandwidth int `json:"memoryBandwidth"`
|
|
||||||
Topology *schema.Topology `json:"topology"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// type SubClusterConfig struct {
|
|
||||||
// Name string `json:"name"`
|
|
||||||
// Peak float64 `json:"peak"`
|
|
||||||
// Normal float64 `json:"normal"`
|
|
||||||
// Caution float64 `json:"caution"`
|
|
||||||
// Alert float64 `json:"alert"`
|
|
||||||
// }
|
|
||||||
|
|
||||||
type MetricConfig struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
Unit string `json:"unit"`
|
|
||||||
Scope schema.MetricScope `json:"scope"`
|
|
||||||
Aggregation string `json:"aggregation"`
|
|
||||||
Timestep int `json:"timestep"`
|
|
||||||
Peak float64 `json:"peak"`
|
|
||||||
Normal float64 `json:"normal"`
|
|
||||||
Caution float64 `json:"caution"`
|
|
||||||
Alert float64 `json:"alert"`
|
|
||||||
SubClusters []*schema.SubClusterConfig `json:"subClusters"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Cluster struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
MetricConfig []*MetricConfig `json:"metricConfig"`
|
|
||||||
SubClusters []*SubCluster `json:"subClusters"`
|
|
||||||
}
|
|
||||||
@@ -1,166 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
var Clusters []*Cluster
|
|
||||||
var nodeLists map[string]map[string]archive.NodeList
|
|
||||||
|
|
||||||
func initClusterConfig() error {
|
|
||||||
|
|
||||||
Clusters = []*Cluster{}
|
|
||||||
nodeLists = map[string]map[string]archive.NodeList{}
|
|
||||||
|
|
||||||
for _, c := range ar.GetClusters() {
|
|
||||||
|
|
||||||
cluster, err := ar.LoadClusterCfg(c)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(cluster.Name) == 0 ||
|
|
||||||
len(cluster.MetricConfig) == 0 ||
|
|
||||||
len(cluster.SubClusters) == 0 {
|
|
||||||
return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty")
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, mc := range cluster.MetricConfig {
|
|
||||||
if len(mc.Name) == 0 {
|
|
||||||
return errors.New("cluster.metricConfig.name should not be empty")
|
|
||||||
}
|
|
||||||
if mc.Timestep < 1 {
|
|
||||||
return errors.New("cluster.metricConfig.timestep should not be smaller than one")
|
|
||||||
}
|
|
||||||
|
|
||||||
// For backwards compability...
|
|
||||||
if mc.Scope == "" {
|
|
||||||
mc.Scope = schema.MetricScopeNode
|
|
||||||
}
|
|
||||||
if !mc.Scope.Valid() {
|
|
||||||
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Clusters = append(Clusters, cluster)
|
|
||||||
|
|
||||||
nodeLists[cluster.Name] = make(map[string]archive.NodeList)
|
|
||||||
for _, sc := range cluster.SubClusters {
|
|
||||||
if sc.Nodes == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
nl, err := archive.ParseNodeList(sc.Nodes)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("in %s/cluster.json: %w", cluster.Name, err)
|
|
||||||
}
|
|
||||||
nodeLists[cluster.Name][sc.Name] = nl
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetCluster(cluster string) *Cluster {
|
|
||||||
|
|
||||||
for _, c := range Clusters {
|
|
||||||
if c.Name == cluster {
|
|
||||||
return c
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetSubCluster(cluster, subcluster string) *SubCluster {
|
|
||||||
|
|
||||||
for _, c := range Clusters {
|
|
||||||
if c.Name == cluster {
|
|
||||||
for _, p := range c.SubClusters {
|
|
||||||
if p.Name == subcluster {
|
|
||||||
return p
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetMetricConfig(cluster, metric string) *MetricConfig {
|
|
||||||
|
|
||||||
for _, c := range Clusters {
|
|
||||||
if c.Name == cluster {
|
|
||||||
for _, m := range c.MetricConfig {
|
|
||||||
if m.Name == metric {
|
|
||||||
return m
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// AssignSubCluster sets the `job.subcluster` property of the job based
|
|
||||||
// on its cluster and resources.
|
|
||||||
func AssignSubCluster(job *BaseJob) error {
|
|
||||||
|
|
||||||
cluster := GetCluster(job.Cluster)
|
|
||||||
if cluster == nil {
|
|
||||||
return fmt.Errorf("unkown cluster: %#v", job.Cluster)
|
|
||||||
}
|
|
||||||
|
|
||||||
if job.SubCluster != "" {
|
|
||||||
for _, sc := range cluster.SubClusters {
|
|
||||||
if sc.Name == job.SubCluster {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return fmt.Errorf("already assigned subcluster %#v unkown (cluster: %#v)", job.SubCluster, job.Cluster)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(job.Resources) == 0 {
|
|
||||||
return fmt.Errorf("job without any resources/hosts")
|
|
||||||
}
|
|
||||||
|
|
||||||
host0 := job.Resources[0].Hostname
|
|
||||||
for sc, nl := range nodeLists[job.Cluster] {
|
|
||||||
if nl != nil && nl.Contains(host0) {
|
|
||||||
job.SubCluster = sc
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if cluster.SubClusters[0].Nodes == "" {
|
|
||||||
job.SubCluster = cluster.SubClusters[0].Name
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0)
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetSubClusterByNode(cluster, hostname string) (string, error) {
|
|
||||||
|
|
||||||
for sc, nl := range nodeLists[cluster] {
|
|
||||||
if nl != nil && nl.Contains(hostname) {
|
|
||||||
return sc, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
c := GetCluster(cluster)
|
|
||||||
if c == nil {
|
|
||||||
return "", fmt.Errorf("unkown cluster: %#v", cluster)
|
|
||||||
}
|
|
||||||
|
|
||||||
if c.SubClusters[0].Nodes == "" {
|
|
||||||
return c.SubClusters[0].Name, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname)
|
|
||||||
}
|
|
||||||
@@ -1,109 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"io"
|
|
||||||
"math"
|
|
||||||
"strconv"
|
|
||||||
)
|
|
||||||
|
|
||||||
// A custom float type is used so that (Un)MarshalJSON and
|
|
||||||
// (Un)MarshalGQL can be overloaded and NaN/null can be used.
|
|
||||||
// The default behaviour of putting every nullable value behind
|
|
||||||
// a pointer has a bigger overhead.
|
|
||||||
type Float float64
|
|
||||||
|
|
||||||
var NaN Float = Float(math.NaN())
|
|
||||||
var nullAsBytes []byte = []byte("null")
|
|
||||||
|
|
||||||
func (f Float) IsNaN() bool {
|
|
||||||
return math.IsNaN(float64(f))
|
|
||||||
}
|
|
||||||
|
|
||||||
// NaN will be serialized to `null`.
|
|
||||||
func (f Float) MarshalJSON() ([]byte, error) {
|
|
||||||
if f.IsNaN() {
|
|
||||||
return nullAsBytes, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return strconv.AppendFloat(make([]byte, 0, 10), float64(f), 'f', 2, 64), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// `null` will be unserialized to NaN.
|
|
||||||
func (f *Float) UnmarshalJSON(input []byte) error {
|
|
||||||
s := string(input)
|
|
||||||
if s == "null" {
|
|
||||||
*f = NaN
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
val, err := strconv.ParseFloat(s, 64)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
*f = Float(val)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// UnmarshalGQL implements the graphql.Unmarshaler interface.
|
|
||||||
func (f *Float) UnmarshalGQL(v interface{}) error {
|
|
||||||
f64, ok := v.(float64)
|
|
||||||
if !ok {
|
|
||||||
return errors.New("invalid Float scalar")
|
|
||||||
}
|
|
||||||
|
|
||||||
*f = Float(f64)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// MarshalGQL implements the graphql.Marshaler interface.
|
|
||||||
// NaN will be serialized to `null`.
|
|
||||||
func (f Float) MarshalGQL(w io.Writer) {
|
|
||||||
if f.IsNaN() {
|
|
||||||
w.Write(nullAsBytes)
|
|
||||||
} else {
|
|
||||||
w.Write(strconv.AppendFloat(make([]byte, 0, 10), float64(f), 'f', 2, 64))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only used via REST-API, not via GraphQL.
|
|
||||||
// This uses a lot less allocations per series,
|
|
||||||
// but it turns out that the performance increase
|
|
||||||
// from using this is not that big.
|
|
||||||
func (s *Series) MarshalJSON() ([]byte, error) {
|
|
||||||
buf := make([]byte, 0, 512+len(s.Data)*8)
|
|
||||||
buf = append(buf, `{"hostname":"`...)
|
|
||||||
buf = append(buf, s.Hostname...)
|
|
||||||
buf = append(buf, '"')
|
|
||||||
if s.Id != nil {
|
|
||||||
buf = append(buf, `,"id":`...)
|
|
||||||
buf = strconv.AppendInt(buf, int64(*s.Id), 10)
|
|
||||||
}
|
|
||||||
if s.Statistics != nil {
|
|
||||||
buf = append(buf, `,"statistics":{"min":`...)
|
|
||||||
buf = strconv.AppendFloat(buf, s.Statistics.Min, 'f', 2, 64)
|
|
||||||
buf = append(buf, `,"avg":`...)
|
|
||||||
buf = strconv.AppendFloat(buf, s.Statistics.Avg, 'f', 2, 64)
|
|
||||||
buf = append(buf, `,"max":`...)
|
|
||||||
buf = strconv.AppendFloat(buf, s.Statistics.Max, 'f', 2, 64)
|
|
||||||
buf = append(buf, '}')
|
|
||||||
}
|
|
||||||
buf = append(buf, `,"data":[`...)
|
|
||||||
for i := 0; i < len(s.Data); i++ {
|
|
||||||
if i != 0 {
|
|
||||||
buf = append(buf, ',')
|
|
||||||
}
|
|
||||||
|
|
||||||
if s.Data[i].IsNaN() {
|
|
||||||
buf = append(buf, `null`...)
|
|
||||||
} else {
|
|
||||||
buf = strconv.AppendFloat(buf, float64(s.Data[i]), 'f', 2, 32)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
buf = append(buf, ']', '}')
|
|
||||||
return buf, nil
|
|
||||||
}
|
|
||||||
@@ -1,142 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"strconv"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
|
||||||
)
|
|
||||||
|
|
||||||
type FsArchiveConfig struct {
|
|
||||||
Path string `json:"path"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type FsArchive struct {
|
|
||||||
path string
|
|
||||||
clusters []string
|
|
||||||
}
|
|
||||||
|
|
||||||
func getPath(
|
|
||||||
job *JobMeta,
|
|
||||||
rootPath string,
|
|
||||||
file string) string {
|
|
||||||
|
|
||||||
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
|
|
||||||
return filepath.Join(
|
|
||||||
rootPath,
|
|
||||||
job.Cluster,
|
|
||||||
lvl1, lvl2,
|
|
||||||
strconv.FormatInt(job.StartTime, 10), file)
|
|
||||||
}
|
|
||||||
|
|
||||||
func loadJobMeta(filename string) (*JobMeta, error) {
|
|
||||||
|
|
||||||
f, err := os.Open(filename)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("fsBackend loadJobMeta()- %v", err)
|
|
||||||
return &JobMeta{}, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
return DecodeJobMeta(bufio.NewReader(f))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fsa *FsArchive) Init(rawConfig json.RawMessage) error {
|
|
||||||
|
|
||||||
var config FsArchiveConfig
|
|
||||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
|
||||||
log.Errorf("fsBackend Init()- %v", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if config.Path == "" {
|
|
||||||
err := fmt.Errorf("fsBackend Init()- empty path")
|
|
||||||
log.Errorf("fsBackend Init()- %v", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
fsa.path = config.Path
|
|
||||||
|
|
||||||
entries, err := os.ReadDir(fsa.path)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("fsBackend Init()- %v", err)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, de := range entries {
|
|
||||||
fsa.clusters = append(fsa.clusters, de.Name())
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fsa *FsArchive) Iter() <-chan *JobMeta {
|
|
||||||
|
|
||||||
ch := make(chan *JobMeta)
|
|
||||||
go func() {
|
|
||||||
clustersDir, err := os.ReadDir(fsa.path)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Reading clusters failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, clusterDir := range clustersDir {
|
|
||||||
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name()))
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Reading jobs failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, lvl1Dir := range lvl1Dirs {
|
|
||||||
if !lvl1Dir.IsDir() {
|
|
||||||
// Could be the cluster.json file
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
lvl2Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name()))
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Reading jobs failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, lvl2Dir := range lvl2Dirs {
|
|
||||||
dirpath := filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
|
|
||||||
startTimeDirs, err := os.ReadDir(dirpath)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("Reading jobs failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, startTimeDir := range startTimeDirs {
|
|
||||||
if startTimeDir.IsDir() {
|
|
||||||
job, err := loadJobMeta(filepath.Join(dirpath, startTimeDir.Name(), "meta.json"))
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
|
|
||||||
} else {
|
|
||||||
ch <- job
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close(ch)
|
|
||||||
}()
|
|
||||||
return ch
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fsa *FsArchive) LoadClusterCfg(name string) (*Cluster, error) {
|
|
||||||
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("fsBackend LoadClusterCfg()- %v", err)
|
|
||||||
return &Cluster{}, err
|
|
||||||
}
|
|
||||||
return DecodeCluster(bytes.NewReader(b))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (fsa *FsArchive) GetClusters() []string {
|
|
||||||
return fsa.clusters
|
|
||||||
}
|
|
||||||
@@ -1,162 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Non-Swaggered Comment: BaseJob
|
|
||||||
// Non-Swaggered Comment: Common subset of Job and JobMeta. Use one of those, not this type directly.
|
|
||||||
|
|
||||||
type BaseJob struct {
|
|
||||||
// The unique identifier of a job
|
|
||||||
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
|
|
||||||
User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user
|
|
||||||
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
|
|
||||||
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
|
|
||||||
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
|
|
||||||
Partition string `json:"partition" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
|
|
||||||
ArrayJobId int64 `json:"arrayJobId" db:"array_job_id" example:"123000"` // The unique identifier of an array job
|
|
||||||
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
|
|
||||||
NumHWThreads int32 `json:"numHwthreads" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
|
|
||||||
NumAcc int32 `json:"numAcc" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
|
|
||||||
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
|
|
||||||
MonitoringStatus int32 `json:"monitoringStatus" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
|
|
||||||
SMT int32 `json:"smt" db:"smt" example:"4"` // SMT threads used by job
|
|
||||||
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job
|
|
||||||
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
|
|
||||||
Walltime int64 `json:"walltime" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
|
|
||||||
Tags []*schema.Tag `json:"tags"` // List of tags
|
|
||||||
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
|
|
||||||
Resources []*Resource `json:"resources"` // Resources used by job
|
|
||||||
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
|
|
||||||
MetaData map[string]string `json:"metaData"` // Additional information about the job
|
|
||||||
}
|
|
||||||
|
|
||||||
// Non-Swaggered Comment: Job
|
|
||||||
// Non-Swaggered Comment: This type is used as the GraphQL interface and using sqlx as a table row.
|
|
||||||
|
|
||||||
// Job model
|
|
||||||
// @Description Information of a HPC job.
|
|
||||||
type Job struct {
|
|
||||||
// The unique identifier of a job in the database
|
|
||||||
ID int64 `json:"id" db:"id"`
|
|
||||||
BaseJob
|
|
||||||
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
|
|
||||||
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
|
|
||||||
MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64
|
|
||||||
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64
|
|
||||||
MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64
|
|
||||||
LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64
|
|
||||||
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
|
|
||||||
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
|
|
||||||
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64
|
|
||||||
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"` // FileDataVolTotal as Float64
|
|
||||||
}
|
|
||||||
|
|
||||||
// Non-Swaggered Comment: JobMeta
|
|
||||||
// Non-Swaggered Comment: When reading from the database or sending data via GraphQL, the start time can be in the much more
|
|
||||||
// Non-Swaggered Comment: convenient time.Time type. In the `meta.json` files, the start time is encoded as a unix epoch timestamp.
|
|
||||||
// Non-Swaggered Comment: This is why there is this struct, which contains all fields from the regular job struct, but "overwrites"
|
|
||||||
// Non-Swaggered Comment: the StartTime field with one of type int64.
|
|
||||||
// Non-Swaggered Comment: ID *int64 `json:"id,omitempty"` >> never used in the job-archive, only available via REST-API
|
|
||||||
|
|
||||||
// JobMeta model
|
|
||||||
// @Description Meta data information of a HPC job.
|
|
||||||
type JobMeta struct {
|
|
||||||
// The unique identifier of a job in the database
|
|
||||||
ID *int64 `json:"id,omitempty"`
|
|
||||||
BaseJob
|
|
||||||
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` // Start epoch time stamp in seconds (Min > 0)
|
|
||||||
Statistics map[string]JobStatistics `json:"statistics,omitempty"` // Metric statistics of job
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
|
||||||
MonitoringStatusDisabled int32 = 0
|
|
||||||
MonitoringStatusRunningOrArchiving int32 = 1
|
|
||||||
MonitoringStatusArchivingFailed int32 = 2
|
|
||||||
MonitoringStatusArchivingSuccessful int32 = 3
|
|
||||||
)
|
|
||||||
|
|
||||||
var JobDefaults BaseJob = BaseJob{
|
|
||||||
Exclusive: 1,
|
|
||||||
MonitoringStatus: MonitoringStatusRunningOrArchiving,
|
|
||||||
}
|
|
||||||
|
|
||||||
// JobStatistics model
|
|
||||||
// @Description Specification for job metric statistics.
|
|
||||||
type JobStatistics struct {
|
|
||||||
// Metric unit (see schema/unit.schema.json)
|
|
||||||
Unit string `json:"unit" example:"GHz"`
|
|
||||||
Avg float64 `json:"avg" example:"2500" minimum:"0"` // Job metric average
|
|
||||||
Min float64 `json:"min" example:"2000" minimum:"0"` // Job metric minimum
|
|
||||||
Max float64 `json:"max" example:"3000" minimum:"0"` // Job metric maximum
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tag model
|
|
||||||
// @Description Defines a tag using name and type.
|
|
||||||
type Tag struct {
|
|
||||||
// The unique DB identifier of a tag
|
|
||||||
ID int64 `json:"id" db:"id"`
|
|
||||||
Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type
|
|
||||||
Name string `json:"name" db:"tag_name" example:"Testjob"` // Tag Name
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resource model
|
|
||||||
// @Description A resource used by a job
|
|
||||||
type Resource struct {
|
|
||||||
Hostname string `json:"hostname"` // Name of the host (= node)
|
|
||||||
HWThreads []int `json:"hwthreads,omitempty"` // List of OS processor ids
|
|
||||||
Accelerators []string `json:"accelerators,omitempty"` // List of of accelerator device ids
|
|
||||||
Configuration string `json:"configuration,omitempty"` // The configuration options of the node
|
|
||||||
}
|
|
||||||
|
|
||||||
type JobState string
|
|
||||||
|
|
||||||
const (
|
|
||||||
JobStateRunning JobState = "running"
|
|
||||||
JobStateCompleted JobState = "completed"
|
|
||||||
JobStateFailed JobState = "failed"
|
|
||||||
JobStateCancelled JobState = "cancelled"
|
|
||||||
JobStateStopped JobState = "stopped"
|
|
||||||
JobStateTimeout JobState = "timeout"
|
|
||||||
JobStatePreempted JobState = "preempted"
|
|
||||||
JobStateOutOfMemory JobState = "out_of_memory"
|
|
||||||
)
|
|
||||||
|
|
||||||
func (e *JobState) UnmarshalGQL(v interface{}) error {
|
|
||||||
str, ok := v.(string)
|
|
||||||
if !ok {
|
|
||||||
return fmt.Errorf("enums must be strings")
|
|
||||||
}
|
|
||||||
|
|
||||||
*e = JobState(str)
|
|
||||||
if !e.Valid() {
|
|
||||||
return errors.New("invalid job state")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e JobState) MarshalGQL(w io.Writer) {
|
|
||||||
fmt.Fprintf(w, "\"%s\"", e)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (e JobState) Valid() bool {
|
|
||||||
return e == JobStateRunning ||
|
|
||||||
e == JobStateCompleted ||
|
|
||||||
e == JobStateFailed ||
|
|
||||||
e == JobStateCancelled ||
|
|
||||||
e == JobStateStopped ||
|
|
||||||
e == JobStateTimeout ||
|
|
||||||
e == JobStatePreempted ||
|
|
||||||
e == JobStateOutOfMemory
|
|
||||||
}
|
|
||||||
@@ -1,66 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"io"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
func DecodeJobData(r io.Reader) (*JobData, error) {
|
|
||||||
var d JobData
|
|
||||||
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &d, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func DecodeJobMeta(r io.Reader) (*JobMeta, error) {
|
|
||||||
var d JobMeta
|
|
||||||
if err := json.NewDecoder(r).Decode(&d); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &d, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func DecodeCluster(r io.Reader) (*Cluster, error) {
|
|
||||||
var c Cluster
|
|
||||||
if err := json.NewDecoder(r).Decode(&c); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &c, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func EncodeJobData(w io.Writer, d *schema.JobData) error {
|
|
||||||
// Sanitize parameters
|
|
||||||
if err := json.NewEncoder(w).Encode(d); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error {
|
|
||||||
// Sanitize parameters
|
|
||||||
if err := json.NewEncoder(w).Encode(d); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func EncodeCluster(w io.Writer, c *schema.Cluster) error {
|
|
||||||
// Sanitize parameters
|
|
||||||
if err := json.NewEncoder(w).Encode(c); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -1,371 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"flag"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"sync"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
ccunits "github.com/ClusterCockpit/cc-units"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Version = 1
|
|
||||||
|
|
||||||
var ar FsArchive
|
|
||||||
var srcPath string
|
|
||||||
var dstPath string
|
|
||||||
|
|
||||||
func loadJobData(filename string) (*JobData, error) {
|
|
||||||
|
|
||||||
f, err := os.Open(filename)
|
|
||||||
if err != nil {
|
|
||||||
return &JobData{}, fmt.Errorf("fsBackend loadJobData()- %v", err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
return DecodeJobData(bufio.NewReader(f))
|
|
||||||
}
|
|
||||||
|
|
||||||
func ConvertUnitString(us string) schema.Unit {
|
|
||||||
var nu schema.Unit
|
|
||||||
|
|
||||||
if us == "CPI" ||
|
|
||||||
us == "IPC" ||
|
|
||||||
us == "load" ||
|
|
||||||
us == "" {
|
|
||||||
nu.Base = us
|
|
||||||
return nu
|
|
||||||
}
|
|
||||||
u := ccunits.NewUnit(us)
|
|
||||||
p := u.GetPrefix()
|
|
||||||
if p.Prefix() != "" {
|
|
||||||
prefix := p.Prefix()
|
|
||||||
nu.Prefix = prefix
|
|
||||||
}
|
|
||||||
m := u.GetMeasure()
|
|
||||||
d := u.GetUnitDenominator()
|
|
||||||
if d.Short() != "inval" {
|
|
||||||
nu.Base = fmt.Sprintf("%s/%s", m.Short(), d.Short())
|
|
||||||
} else {
|
|
||||||
nu.Base = m.Short()
|
|
||||||
}
|
|
||||||
|
|
||||||
return nu
|
|
||||||
}
|
|
||||||
|
|
||||||
func deepCopyJobMeta(j *JobMeta) schema.JobMeta {
|
|
||||||
var jn schema.JobMeta
|
|
||||||
|
|
||||||
//required properties
|
|
||||||
jn.JobID = j.JobID
|
|
||||||
jn.User = j.User
|
|
||||||
jn.Project = j.Project
|
|
||||||
jn.Cluster = j.Cluster
|
|
||||||
jn.SubCluster = j.SubCluster
|
|
||||||
jn.NumNodes = j.NumNodes
|
|
||||||
jn.Exclusive = j.Exclusive
|
|
||||||
jn.StartTime = j.StartTime
|
|
||||||
jn.State = schema.JobState(j.State)
|
|
||||||
jn.Duration = j.Duration
|
|
||||||
|
|
||||||
for _, ro := range j.Resources {
|
|
||||||
var rn schema.Resource
|
|
||||||
rn.Hostname = ro.Hostname
|
|
||||||
rn.Configuration = ro.Configuration
|
|
||||||
hwt := make([]int, len(ro.HWThreads))
|
|
||||||
if ro.HWThreads != nil {
|
|
||||||
copy(hwt, ro.HWThreads)
|
|
||||||
}
|
|
||||||
rn.HWThreads = hwt
|
|
||||||
acc := make([]string, len(ro.Accelerators))
|
|
||||||
if ro.Accelerators != nil {
|
|
||||||
copy(acc, ro.Accelerators)
|
|
||||||
}
|
|
||||||
rn.Accelerators = acc
|
|
||||||
jn.Resources = append(jn.Resources, &rn)
|
|
||||||
}
|
|
||||||
jn.MetaData = make(map[string]string)
|
|
||||||
|
|
||||||
for k, v := range j.MetaData {
|
|
||||||
jn.MetaData[k] = v
|
|
||||||
}
|
|
||||||
|
|
||||||
jn.Statistics = make(map[string]schema.JobStatistics)
|
|
||||||
for k, v := range j.Statistics {
|
|
||||||
var sn schema.JobStatistics
|
|
||||||
sn.Avg = v.Avg
|
|
||||||
sn.Max = v.Max
|
|
||||||
sn.Min = v.Min
|
|
||||||
tmpUnit := ConvertUnitString(v.Unit)
|
|
||||||
if tmpUnit.Base == "inval" {
|
|
||||||
sn.Unit = schema.Unit{Base: ""}
|
|
||||||
} else {
|
|
||||||
sn.Unit = tmpUnit
|
|
||||||
}
|
|
||||||
jn.Statistics[k] = sn
|
|
||||||
}
|
|
||||||
|
|
||||||
//optional properties
|
|
||||||
jn.Partition = j.Partition
|
|
||||||
jn.ArrayJobId = j.ArrayJobId
|
|
||||||
jn.NumHWThreads = j.NumHWThreads
|
|
||||||
jn.NumAcc = j.NumAcc
|
|
||||||
jn.MonitoringStatus = j.MonitoringStatus
|
|
||||||
jn.SMT = j.SMT
|
|
||||||
jn.Walltime = j.Walltime
|
|
||||||
|
|
||||||
for _, t := range j.Tags {
|
|
||||||
jn.Tags = append(jn.Tags, t)
|
|
||||||
}
|
|
||||||
|
|
||||||
return jn
|
|
||||||
}
|
|
||||||
|
|
||||||
func deepCopyJobData(d *JobData, cluster string, subCluster string) *schema.JobData {
|
|
||||||
var dn = make(schema.JobData)
|
|
||||||
|
|
||||||
for k, v := range *d {
|
|
||||||
// fmt.Printf("Metric %s\n", k)
|
|
||||||
dn[k] = make(map[schema.MetricScope]*schema.JobMetric)
|
|
||||||
|
|
||||||
for mk, mv := range v {
|
|
||||||
// fmt.Printf("Scope %s\n", mk)
|
|
||||||
var mn schema.JobMetric
|
|
||||||
tmpUnit := ConvertUnitString(mv.Unit)
|
|
||||||
if tmpUnit.Base == "inval" {
|
|
||||||
mn.Unit = schema.Unit{Base: ""}
|
|
||||||
} else {
|
|
||||||
mn.Unit = tmpUnit
|
|
||||||
}
|
|
||||||
|
|
||||||
mn.Timestep = mv.Timestep
|
|
||||||
|
|
||||||
for _, v := range mv.Series {
|
|
||||||
var sn schema.Series
|
|
||||||
sn.Hostname = v.Hostname
|
|
||||||
if v.Id != nil {
|
|
||||||
var id = new(string)
|
|
||||||
|
|
||||||
if mk == schema.MetricScopeAccelerator {
|
|
||||||
s := GetSubCluster(cluster, subCluster)
|
|
||||||
var err error
|
|
||||||
|
|
||||||
*id, err = s.Topology.GetAcceleratorID(*v.Id)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
*id = fmt.Sprint(*v.Id)
|
|
||||||
}
|
|
||||||
sn.Id = id
|
|
||||||
}
|
|
||||||
if v.Statistics != nil {
|
|
||||||
sn.Statistics = schema.MetricStatistics{
|
|
||||||
Avg: v.Statistics.Avg,
|
|
||||||
Min: v.Statistics.Min,
|
|
||||||
Max: v.Statistics.Max}
|
|
||||||
}
|
|
||||||
|
|
||||||
sn.Data = make([]schema.Float, len(v.Data))
|
|
||||||
copy(sn.Data, v.Data)
|
|
||||||
mn.Series = append(mn.Series, sn)
|
|
||||||
}
|
|
||||||
|
|
||||||
dn[k][mk] = &mn
|
|
||||||
}
|
|
||||||
// fmt.Printf("FINISH %s\n", k)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &dn
|
|
||||||
}
|
|
||||||
|
|
||||||
func deepCopyClusterConfig(co *Cluster) schema.Cluster {
|
|
||||||
var cn schema.Cluster
|
|
||||||
|
|
||||||
cn.Name = co.Name
|
|
||||||
for _, sco := range co.SubClusters {
|
|
||||||
var scn schema.SubCluster
|
|
||||||
scn.Name = sco.Name
|
|
||||||
scn.Nodes = sco.Nodes
|
|
||||||
scn.ProcessorType = sco.ProcessorType
|
|
||||||
scn.SocketsPerNode = sco.SocketsPerNode
|
|
||||||
scn.CoresPerSocket = sco.CoresPerSocket
|
|
||||||
scn.ThreadsPerCore = sco.ThreadsPerCore
|
|
||||||
scn.FlopRateScalar = schema.MetricValue{
|
|
||||||
Unit: schema.Unit{Base: "F/s", Prefix: "G"},
|
|
||||||
Value: float64(sco.FlopRateScalar)}
|
|
||||||
scn.FlopRateSimd = schema.MetricValue{
|
|
||||||
Unit: schema.Unit{Base: "F/s", Prefix: "G"},
|
|
||||||
Value: float64(sco.FlopRateSimd)}
|
|
||||||
scn.MemoryBandwidth = schema.MetricValue{
|
|
||||||
Unit: schema.Unit{Base: "B/s", Prefix: "G"},
|
|
||||||
Value: float64(sco.MemoryBandwidth)}
|
|
||||||
scn.Topology = *sco.Topology
|
|
||||||
cn.SubClusters = append(cn.SubClusters, &scn)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, mco := range co.MetricConfig {
|
|
||||||
var mcn schema.MetricConfig
|
|
||||||
mcn.Name = mco.Name
|
|
||||||
mcn.Scope = mco.Scope
|
|
||||||
if mco.Aggregation == "" {
|
|
||||||
fmt.Println("cluster.json - Property aggregation missing! Please review file!")
|
|
||||||
mcn.Aggregation = "sum"
|
|
||||||
} else {
|
|
||||||
mcn.Aggregation = mco.Aggregation
|
|
||||||
}
|
|
||||||
mcn.Timestep = mco.Timestep
|
|
||||||
tmpUnit := ConvertUnitString(mco.Unit)
|
|
||||||
if tmpUnit.Base == "inval" {
|
|
||||||
mcn.Unit = schema.Unit{Base: ""}
|
|
||||||
} else {
|
|
||||||
mcn.Unit = tmpUnit
|
|
||||||
}
|
|
||||||
mcn.Peak = mco.Peak
|
|
||||||
mcn.Normal = mco.Normal
|
|
||||||
mcn.Caution = mco.Caution
|
|
||||||
mcn.Alert = mco.Alert
|
|
||||||
mcn.SubClusters = mco.SubClusters
|
|
||||||
|
|
||||||
cn.MetricConfig = append(cn.MetricConfig, &mcn)
|
|
||||||
}
|
|
||||||
|
|
||||||
return cn
|
|
||||||
}
|
|
||||||
|
|
||||||
func convertJob(job *JobMeta) {
|
|
||||||
// check if source data is available, otherwise skip job
|
|
||||||
src_data_path := getPath(job, srcPath, "data.json")
|
|
||||||
info, err := os.Stat(src_data_path)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
if info.Size() == 0 {
|
|
||||||
fmt.Printf("Skip path %s, filesize is 0 Bytes.", src_data_path)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
path := getPath(job, dstPath, "meta.json")
|
|
||||||
err = os.MkdirAll(filepath.Dir(path), 0750)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
f, err := os.Create(path)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
jmn := deepCopyJobMeta(job)
|
|
||||||
if err = EncodeJobMeta(f, &jmn); err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
if err = f.Close(); err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
f, err = os.Create(getPath(job, dstPath, "data.json"))
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var jd *JobData
|
|
||||||
jd, err = loadJobData(src_data_path)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
jdn := deepCopyJobData(jd, job.Cluster, job.SubCluster)
|
|
||||||
if err := EncodeJobData(f, jdn); err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
if err := f.Close(); err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
var flagLogLevel, flagConfigFile string
|
|
||||||
var flagLogDateTime, debug bool
|
|
||||||
|
|
||||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
|
||||||
flag.BoolVar(&debug, "debug", false, "Set this flag to force sequential execution for debugging")
|
|
||||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
|
||||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
|
||||||
flag.StringVar(&srcPath, "src", "./var/job-archive", "Specify the source job archive path")
|
|
||||||
flag.StringVar(&dstPath, "dst", "./var/job-archive-new", "Specify the destination job archive path")
|
|
||||||
flag.Parse()
|
|
||||||
|
|
||||||
if _, err := os.Stat(filepath.Join(srcPath, "version.txt")); !errors.Is(err, os.ErrNotExist) {
|
|
||||||
log.Fatal("Archive version exists!")
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Init(flagLogLevel, flagLogDateTime)
|
|
||||||
config.Init(flagConfigFile)
|
|
||||||
srcConfig := fmt.Sprintf("{\"path\": \"%s\"}", srcPath)
|
|
||||||
err := ar.Init(json.RawMessage(srcConfig))
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = initClusterConfig()
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
// setup new job archive
|
|
||||||
err = os.Mkdir(dstPath, 0750)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, c := range Clusters {
|
|
||||||
path := fmt.Sprintf("%s/%s", dstPath, c.Name)
|
|
||||||
fmt.Println(path)
|
|
||||||
err = os.Mkdir(path, 0750)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
cn := deepCopyClusterConfig(c)
|
|
||||||
|
|
||||||
f, err := os.Create(fmt.Sprintf("%s/%s/cluster.json", dstPath, c.Name))
|
|
||||||
if err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
if err := EncodeCluster(f, &cn); err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
if err := f.Close(); err != nil {
|
|
||||||
log.Fatal(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
|
|
||||||
for job := range ar.Iter() {
|
|
||||||
if debug {
|
|
||||||
fmt.Printf("Job %d\n", job.JobID)
|
|
||||||
convertJob(job)
|
|
||||||
} else {
|
|
||||||
job := job
|
|
||||||
wg.Add(1)
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
convertJob(job)
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
wg.Wait()
|
|
||||||
os.WriteFile(filepath.Join(dstPath, "version.txt"), []byte(fmt.Sprintf("%d", Version)), 0644)
|
|
||||||
}
|
|
||||||
@@ -1,65 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
type JobData map[string]map[schema.MetricScope]*JobMetric
|
|
||||||
|
|
||||||
type JobMetric struct {
|
|
||||||
Unit string `json:"unit"`
|
|
||||||
Scope schema.MetricScope `json:"scope"`
|
|
||||||
Timestep int `json:"timestep"`
|
|
||||||
Series []Series `json:"series"`
|
|
||||||
StatisticsSeries *StatsSeries `json:"statisticsSeries"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Series struct {
|
|
||||||
Hostname string `json:"hostname"`
|
|
||||||
Id *int `json:"id,omitempty"`
|
|
||||||
Statistics *MetricStatistics `json:"statistics"`
|
|
||||||
Data []schema.Float `json:"data"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type MetricStatistics struct {
|
|
||||||
Avg float64 `json:"avg"`
|
|
||||||
Min float64 `json:"min"`
|
|
||||||
Max float64 `json:"max"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type StatsSeries struct {
|
|
||||||
Mean []Float `json:"mean"`
|
|
||||||
Min []Float `json:"min"`
|
|
||||||
Max []Float `json:"max"`
|
|
||||||
Percentiles map[int][]Float `json:"percentiles,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// type MetricScope string
|
|
||||||
|
|
||||||
// const (
|
|
||||||
// MetricScopeInvalid MetricScope = "invalid_scope"
|
|
||||||
|
|
||||||
// MetricScopeNode MetricScope = "node"
|
|
||||||
// MetricScopeSocket MetricScope = "socket"
|
|
||||||
// MetricScopeMemoryDomain MetricScope = "memoryDomain"
|
|
||||||
// MetricScopeCore MetricScope = "core"
|
|
||||||
// MetricScopeHWThread MetricScope = "hwthread"
|
|
||||||
|
|
||||||
// MetricScopeAccelerator MetricScope = "accelerator"
|
|
||||||
// )
|
|
||||||
|
|
||||||
// var metricScopeGranularity map[MetricScope]int = map[MetricScope]int{
|
|
||||||
// MetricScopeNode: 10,
|
|
||||||
// MetricScopeSocket: 5,
|
|
||||||
// MetricScopeMemoryDomain: 3,
|
|
||||||
// MetricScopeCore: 2,
|
|
||||||
// MetricScopeHWThread: 1,
|
|
||||||
|
|
||||||
// MetricScopeAccelerator: 5, // Special/Randomly choosen
|
|
||||||
|
|
||||||
// MetricScopeInvalid: -1,
|
|
||||||
// }
|
|
||||||
650
web/frontend/package-lock.json
generated
650
web/frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user