mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-11-15 22:53:49 +01:00
Compare commits
540 Commits
v1.2.2
...
nats-clien
| Author | SHA1 | Date | |
|---|---|---|---|
| 18376aa64b | |||
| a5fdccc764 | |||
|
|
cbc49669d0 | ||
|
|
78bb638fd6 | ||
|
|
7a61bae471 | ||
|
|
e1b992526e | ||
|
|
b6b37ee68b | ||
|
|
43cb1f1bff | ||
|
|
f158eaa29c | ||
|
|
86d85f12be | ||
|
|
dd470d49ec | ||
|
|
95d8062b00 | ||
|
|
6247150e9c | ||
|
5266644725
|
|||
|
81d9e96552
|
|||
|
|
4ec9f06114 | ||
|
0033e9f6c0
|
|||
|
571652c314
|
|||
|
|
7ec233e18a | ||
|
|
13c9a12336 | ||
|
|
83d472ecd6 | ||
|
|
c21da6512a | ||
|
|
4b4374e0df | ||
|
|
407276a04d | ||
|
|
64f60905b4 | ||
|
|
9e6072fed2 | ||
|
|
a3e5c424fd | ||
|
|
6683a350aa | ||
|
|
05bfa9b546 | ||
|
|
735988decb | ||
|
|
d0580592be | ||
|
|
817076bdbf | ||
|
|
736236e9ca | ||
|
|
3f4114c51b | ||
|
|
5c2c493c56 | ||
|
|
2c383ebea1 | ||
|
|
91e73450cf | ||
|
|
e55798944e | ||
|
|
5ea11a5ad2 | ||
|
|
2a3383e9e6 | ||
|
|
e871703724 | ||
|
|
1ee367d7be | ||
|
|
bce536b9b4 | ||
|
|
7c9182e0b0 | ||
|
|
aa915d639d | ||
|
|
9489ebc7d6 | ||
|
2a5c525193
|
|||
|
9e2d981c60
|
|||
|
|
53dfe9e4f5 | ||
|
48e95fbdb0
|
|||
|
fd94d85edf
|
|||
|
f2d1a85afb
|
|||
|
0bdbcb8bab
|
|||
|
|
7b91a819be | ||
| bc89025924 | |||
|
|
16bcaef4c3 | ||
|
|
fcbfa451f2 | ||
|
|
559ce53ca4 | ||
|
|
ee2c5b58d7 | ||
|
|
d98d998106 | ||
|
212c45e070
|
|||
|
143fa9b6ed
|
|||
|
4849928288
|
|||
|
|
9248ee8868 | ||
|
|
1616d96732 | ||
| 0bbedd1600 | |||
|
|
c7e49644d8 | ||
|
010c903c74
|
|||
|
e4d12e3537
|
|||
|
051cc8384e
|
|||
|
49a94170d2
|
|||
|
|
42e8e37bd4 | ||
|
|
5d2c350ce2 | ||
|
|
85dc0362c1 | ||
|
|
01c06728eb | ||
|
|
257250714d | ||
|
|
3b769c3059 | ||
|
|
a7395ed45b | ||
|
|
ab07c7928f | ||
|
|
b0c0d15505 | ||
|
|
fcf50790da | ||
|
|
1e43654607 | ||
|
|
4fecbe820d | ||
|
|
763c9dfa6b | ||
|
9de5879786
|
|||
|
|
9396e7492c | ||
|
3ac3415178
|
|||
|
1aae1c59d0
|
|||
|
907e80a01c
|
|||
|
|
8a10b69716 | ||
|
|
1a3cf7edd6 | ||
|
|
76d0fc979b | ||
|
|
a42d8ece35 | ||
|
|
93377f53fc | ||
|
|
c853d74ba0 | ||
|
|
0b9f74f4f4 | ||
|
|
5da6baf828 | ||
|
5766945006
|
|||
|
a53d473b58
|
|||
|
|
d1207ad80e | ||
|
|
e2efe71b33 | ||
|
|
2aef6ed9c0 | ||
|
|
fcb6db0603 | ||
| 01b1136316 | |||
|
|
2512fe9e75 | ||
|
|
f89b5cd2ec | ||
|
|
ab284ed208 | ||
|
|
00a578657c | ||
|
|
38ce40ae7d | ||
| e1be6c7138 | |||
| 28539e60b0 | |||
|
adb11b3ed0
|
|||
|
|
f1e6dedd44 | ||
|
|
8ea1454c06 | ||
| 81b8d578f2 | |||
|
|
16b11db39c | ||
| 0d923cc920 | |||
| c523e93564 | |||
| d588798ea1 | |||
| a11f165f2a | |||
|
|
d4f487d554 | ||
|
|
93d5a0e532 | ||
|
|
00ddc462d2 | ||
|
|
5f4a74f8ba | ||
|
|
a8eff6fbd1 | ||
|
|
baa7367ebe | ||
|
|
69f8a34aac | ||
|
|
21b3a67988 | ||
|
|
d89574ce73 | ||
| ddeac6b9d9 | |||
| 17906ec0eb | |||
|
|
311c088d3d | ||
| a2584d6083 | |||
| 35bd7739c6 | |||
| 7f43c88a39 | |||
|
|
fc1c54a141 | ||
|
|
2af111c584 | ||
| c093cca8b1 | |||
|
|
2bb1b78ba4 | ||
| 3ab26172c4 | |||
| cdd45ce88b | |||
|
210a7d3136
|
|||
|
92ec64d80f
|
|||
|
ff37f71fdb
|
|||
|
6056341525
|
|||
|
|
075612f5bd | ||
| 1a87ed8210 | |||
|
|
c05ffeb16d | ||
| ee3710c5ed | |||
| 4327c4b1f7 | |||
| 492e56a098 | |||
| f0257a2784 | |||
| ec1ead89ab | |||
|
|
ae53e87aba | ||
|
|
939dd2320a | ||
|
|
2c8b73e2e2 | ||
|
|
eabc6212ea | ||
|
|
c120d6517f | ||
|
|
597ee1dad7 | ||
|
|
c4a901504d | ||
|
|
f5cc5d07fd | ||
|
|
8a0e6c921c | ||
|
|
bf1bff9ace | ||
|
|
06f24e988f | ||
|
|
ae327f545e | ||
|
|
35012b18c5 | ||
|
|
9688bad622 | ||
|
|
447b8d3372 | ||
|
|
01102cb9b0 | ||
|
|
934d1a6114 | ||
|
|
6f74c8cb77 | ||
|
|
63b9e619a4 | ||
|
|
82e28f26d7 | ||
|
|
ca9fd96baa | ||
|
|
39b22267d6 | ||
|
|
60d7984d66 | ||
|
|
33d219d2ac | ||
|
|
85a77e05af | ||
|
|
3dfeabcec6 | ||
|
|
673fdc443c | ||
|
|
2f6e5a7648 | ||
|
|
2cbe8e9517 | ||
|
|
2f0460d6ec | ||
|
|
37f4ed7770 | ||
|
|
e3104c61cb | ||
|
|
bc434ee8cb | ||
|
|
f4102b948e | ||
|
|
ed991de11a | ||
|
|
322e161064 | ||
|
|
1adc741cc2 | ||
|
|
4eff87bbf7 | ||
|
|
fc6970d08a | ||
|
|
f616c7e1c6 | ||
|
|
89ec749172 | ||
|
|
182f0f2c64 | ||
|
|
e3681495ce | ||
|
|
37415fa261 | ||
|
|
7243dbe763 | ||
|
|
0ff5c4bedd | ||
|
|
f047f89ad5 | ||
|
|
0eb0aa1d3b | ||
|
|
6019891591 | ||
|
|
615281601c | ||
|
|
82baf5d384 | ||
|
|
6fe93ecb7e | ||
|
|
b3222f3523 | ||
|
|
3b94863521 | ||
|
|
582dc8bf46 | ||
|
|
a9868fd275 | ||
|
|
218e56576a | ||
|
|
c50e79375a | ||
|
|
dcb8308f35 | ||
|
|
183b310696 | ||
|
|
c7d0c86d52 | ||
|
|
48225662b1 | ||
|
|
f53fc088ec | ||
|
|
05517fcbcd | ||
|
|
18af51b0a4 | ||
|
|
ede3da7a87 | ||
|
|
8e3327ef6a | ||
|
|
827f6daabc | ||
|
|
2567442321 | ||
|
|
9cf5478519 | ||
|
|
e5275311c2 | ||
|
|
21e4870e4c | ||
|
|
beba7c8d2e | ||
|
|
fe35313305 | ||
|
|
d7a8bbf40b | ||
|
|
f1893c596e | ||
|
|
6367c1ab4d | ||
|
|
9579887fc4 | ||
|
|
e29be2f140 | ||
|
|
2736b5d1ef | ||
|
|
ff52fb16b6 | ||
|
|
ccbf3867e1 | ||
|
|
f0de422c6e | ||
|
|
64cc19b252 | ||
|
|
26226009f0 | ||
|
|
d10e09da02 | ||
|
|
00a2e58fee | ||
|
|
b1cb45dfe6 | ||
|
|
a2951d1f05 | ||
|
|
c0b1e97602 | ||
|
|
71621a9dc4 | ||
|
|
b3ed2afebe | ||
|
|
704620baff | ||
|
|
8feb805167 | ||
|
|
065b32755a | ||
|
|
1b5f4bff2c | ||
|
|
8e1c5a485f | ||
| 5fa6c9db35 | |||
| 5482b9be2c | |||
|
|
7400273b0a | ||
|
|
0b7cdde4a0 | ||
|
|
d5382aec4f | ||
|
|
df484dc816 | ||
|
|
7ea4086807 | ||
|
|
b04bf6a951 | ||
| 7c33dcf630 | |||
| 5e65e21f0b | |||
| 53ca38ce53 | |||
|
|
398e3c1b91 | ||
| 508978d586 | |||
| e267481f71 | |||
|
|
193bee5ac8 | ||
| f58efa2871 | |||
| 6568b6d723 | |||
|
|
4b1b34d8a7 | ||
| 39c09f8565 | |||
|
|
275a77807e | ||
|
|
6443541a79 | ||
|
|
5eb6f7d307 | ||
|
|
bce2a66177 | ||
|
|
7602641909 | ||
|
|
54f3a261c5 | ||
|
|
906bac965f | ||
|
|
4ec1de6900 | ||
|
|
8ded131666 | ||
| 47b14f932e | |||
|
|
838ebb3f69 | ||
| c459724114 | |||
| b0c9d1164d | |||
| 7c51d88501 | |||
| 5b03cf826b | |||
| f305863616 | |||
| db5809d522 | |||
|
|
83df6f015c | ||
| e7231b0e13 | |||
|
|
cff60eb51c | ||
|
f914a312f5
|
|||
| 56ebb301ca | |||
|
|
a59df12595 | ||
|
|
5cc7fc6ccb | ||
|
|
55027cb630 | ||
|
|
036eba68e1 | ||
|
|
d34e0d9348 | ||
|
|
31765ce0ef | ||
|
|
9fe7cdca92 | ||
|
|
adc3502b6b | ||
|
|
95fe369648 | ||
|
|
01845a0cb7 | ||
|
|
708eaf4178 | ||
|
|
d629a58712 | ||
|
|
90886b63d6 | ||
|
|
084f89fa32 | ||
|
|
ceb3a095d8 | ||
|
|
1758275f11 | ||
|
|
e74e506ffe | ||
|
|
599a36466a | ||
|
|
613e128cab | ||
|
|
e4f8022b7a | ||
|
|
5603c41900 | ||
| a8a27c9b51 | |||
|
|
b70de5a4be | ||
|
|
b1fd07cd30 | ||
|
|
6ab2e02fe6 | ||
|
|
5535c5780c | ||
|
|
49e0a2c055 | ||
|
5e074dad10
|
|||
|
d6a88896d0
|
|||
|
5c99f5f8bb
|
|||
|
e1faba0ff2
|
|||
|
ba2f406bc0
|
|||
|
9b6db4684a
|
|||
|
|
561fd41d5d | ||
|
|
ce9995dac7 | ||
|
|
0afaea9513 | ||
|
|
9b5c6e3164 | ||
|
|
e6ebec8c1e | ||
|
|
2551921ed6 | ||
|
|
e02575aad7 | ||
|
|
ff3502c87a | ||
|
|
017f9b2140 | ||
|
|
c80d3a6958 | ||
|
|
3ca1127685 | ||
|
|
18369da5bc | ||
|
|
e65100cdc8 | ||
|
|
6a1cb51c2f | ||
|
c4d93e492b
|
|||
|
c2f72f72ac
|
|||
|
721b6b2afa
|
|||
|
b6f011c669
|
|||
|
801607fc16
|
|||
|
01a4d33514
|
|||
|
e348ec74fd
|
|||
|
0458675608
|
|||
|
c61ffce0e9
|
|||
|
68a97dc980
|
|||
|
a07d167390
|
|||
|
|
a8721dcc69 | ||
|
|
68cf952ac6 | ||
|
|
e14d6a81fe | ||
|
|
a4912893a8 | ||
|
0adfb631ef
|
|||
|
b64ce1f67f
|
|||
|
e8e3b1595d
|
|||
|
f1427d5272
|
|||
|
|
bf6b87d65c | ||
|
|
0240997257 | ||
|
|
f1e341f0b9 | ||
|
a54acb8c42
|
|||
|
c6ede67589
|
|||
|
|
11176da5d8 | ||
|
|
0a604336c4 | ||
|
|
be9df7649f | ||
|
|
63fb923995 | ||
|
|
3afe40083d | ||
|
|
9d4767539c | ||
|
ac9bba8b5b
|
|||
|
80c46bea7f
|
|||
|
|
614f694777 | ||
|
|
1072d7b449 | ||
|
1b70596735
|
|||
|
|
61eebc9fbd | ||
|
b05909969f
|
|||
|
bd89ce7cc9
|
|||
|
130613b717
|
|||
|
b3c1f39a0e
|
|||
|
97c807cd33
|
|||
|
aede5f71ec
|
|||
|
786770f56a
|
|||
|
|
74d4f00784 | ||
|
d61c4235dc
|
|||
|
e8794b8c79
|
|||
|
552da005dc
|
|||
|
|
51452d2e68 | ||
|
5c5484b4d2
|
|||
|
|
9974a851e8 | ||
| 6c0bfc6c35 | |||
|
|
41bbd203cc | ||
|
|
4344c26bef | ||
|
|
e1c1c06fb2 | ||
|
|
70e63764ff | ||
|
|
d10f3e3af6 | ||
|
|
a4397d5447 | ||
|
|
320c87a1db | ||
|
|
8d1228c9e8 | ||
|
|
420bec7c46 | ||
|
|
ba1658beac | ||
|
|
575753038b | ||
|
|
061c9f0979 | ||
|
|
b48d1b8ad6 | ||
| dff7aeefb8 | |||
| 54f7980162 | |||
|
|
684cb5a376 | ||
|
|
597bccc080 | ||
|
|
72557fd0bf | ||
|
|
0b2f2214f9 | ||
|
|
ef51e69ffb | ||
|
|
c9eb40f455 | ||
|
|
b66750339d | ||
|
|
136460567c | ||
|
|
f80123c85d | ||
|
|
a22340196f | ||
|
|
cbaeffde2c | ||
| b67f5436f8 | |||
| b637ddeb28 | |||
|
|
a20b7eacd6 | ||
| 6df639a0c3 | |||
|
|
d4a9887532 | ||
|
|
79b08a181d | ||
|
|
758cef1bd3 | ||
| fb8bbea99d | |||
| 9b261a4778 | |||
|
|
aafa29db8b | ||
|
|
896c39f9bc | ||
|
|
3a97ff7f57 | ||
|
|
7518c41fab | ||
|
|
8cb00a5340 | ||
|
|
baa51db26c | ||
|
|
fc260b2291 | ||
|
|
43ebb01b63 | ||
|
|
067dc0df5d | ||
| 6828c97415 | |||
| 50401e0030 | |||
| c3d2508693 | |||
| 642fd5cc91 | |||
| e8fb5a0030 | |||
|
|
0dee5073c6 | ||
|
|
b9b452f043 | ||
|
|
ddd3fad1c6 | ||
|
|
1f5723a97e | ||
|
|
5a177c952d | ||
|
|
86e456d152 | ||
| 03895f9e45 | |||
| 5c79f44055 | |||
| 83c38e74db | |||
| 1e5f2944cf | |||
|
|
e45ecbdef7 | ||
| c65694b36c | |||
| 0005469101 | |||
|
|
60b56bd41a | ||
|
|
81fe492655 | ||
|
|
849b7e038d | ||
|
|
82f5257cf1 | ||
|
|
e347659db4 | ||
|
|
7940317857 | ||
|
|
58415ab5c3 | ||
|
|
1176974a78 | ||
|
|
ce792426e6 | ||
| e92e727279 | |||
| f761900a3e | |||
| 32a57661fd | |||
|
|
5004e44934 | ||
| 99d55f05f8 | |||
| 9fd839fad8 | |||
|
|
1c7cc9e16f | ||
| 06d01962a6 | |||
| 2c2c1accb5 | |||
| 105b7eabf0 | |||
|
|
de1d83e1a9 | ||
|
|
ce97780741 | ||
| e81e56ea1d | |||
| aa6336ea1e | |||
| dd887cbb1f | |||
|
|
860053be67 | ||
| 5a4671b7b1 | |||
|
|
ec581e3509 | ||
| e7ae9dd06d | |||
| 0c7f55ff8d | |||
| bcd7f47409 | |||
| 476caebe7f | |||
|
|
dadc81c026 | ||
|
|
cc719d0ae5 | ||
|
|
53af79cf0d | ||
|
|
f534ad66e1 | ||
|
|
2d8cf02296 | ||
|
|
71386f8466 | ||
|
|
c897c8e56b | ||
|
|
2036069051 | ||
|
|
be6c63e526 | ||
|
|
a2af9c152a | ||
|
|
63f3dc926c | ||
|
|
21dde870c6 | ||
|
|
04f37a85ce | ||
| 10a332083b | |||
|
|
6818d1de62 | ||
|
|
1b10b75e25 | ||
|
|
b829a5aafe | ||
|
|
07073e290a | ||
|
|
ee6d286cd7 | ||
|
|
119637cb9b | ||
|
|
ee4097a2dd | ||
|
|
1185737eaa | ||
|
|
7d14086e54 | ||
|
|
78494cd30e | ||
|
|
ead5c54bcb | ||
|
|
b5b355c16c | ||
|
|
3067d7b250 | ||
|
|
9bc36152d9 | ||
|
|
c1b944b838 | ||
|
|
175a88f1c4 | ||
|
|
aac3e7d2f4 | ||
|
|
c0488b8cbe | ||
|
|
d66703c4d0 | ||
|
|
173975aadd | ||
|
|
d97fa37d2c | ||
|
|
782262b52e | ||
|
|
b8213ef6be | ||
|
|
e34623b1ce | ||
|
|
4e375ff32b | ||
|
|
f7529be3ea | ||
|
|
1aa9720405 | ||
|
|
709880ff5a | ||
|
|
6b78b4e12b | ||
|
|
f342a65aba | ||
|
|
dc860f8fd9 | ||
|
|
f8f900151a | ||
|
|
8d409eed0f | ||
|
|
dc86523cce | ||
|
|
506d112cce | ||
|
|
eb7f92282d | ||
|
|
3468e987b6 | ||
|
|
5acd9ece7f | ||
|
|
8bc43baf2c | ||
|
|
a2c99fb56d | ||
|
|
9689f95ea1 | ||
|
|
84d6b48353 | ||
|
|
bf64fc5213 | ||
|
|
d9f9c8aaf5 |
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.19.x
|
||||
go-version: 1.22.x
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
- name: Build, Vet & Test
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -17,3 +17,5 @@ var/job.db-wal
|
||||
|
||||
dist/
|
||||
*.db
|
||||
internal/repository/testdata/job.db-shm
|
||||
internal/repository/testdata/job.db-wal
|
||||
|
||||
@@ -34,19 +34,6 @@ builds:
|
||||
main: ./tools/archive-manager
|
||||
tags:
|
||||
- static_build
|
||||
- env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
goamd64:
|
||||
- v3
|
||||
id: "archive-migration"
|
||||
binary: archive-migration
|
||||
main: ./tools/archive-migration
|
||||
tags:
|
||||
- static_build
|
||||
- env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
@@ -70,7 +57,7 @@ archives:
|
||||
{{- else }}{{ .Arch }}{{ end }}
|
||||
{{- if .Arm }}v{{ .Arm }}{{ end }}
|
||||
checksum:
|
||||
name_template: 'checksums.txt'
|
||||
name_template: "checksums.txt"
|
||||
snapshot:
|
||||
name_template: "{{ incpatch .Version }}-next"
|
||||
changelog:
|
||||
@@ -100,7 +87,7 @@ changelog:
|
||||
release:
|
||||
draft: false
|
||||
footer: |
|
||||
Supports job archive version 1 and database version 6.
|
||||
Supports job archive version 2 and database version 8.
|
||||
Please check out the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for further details on breaking changes.
|
||||
|
||||
# vim: set ts=2 sw=2 tw=0 fo=cnqoj
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 NHR@FAU, University Erlangen-Nuremberg
|
||||
Copyright (c) NHR@FAU, University Erlangen-Nuremberg
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
120
Makefile
120
Makefile
@@ -2,7 +2,7 @@ TARGET = ./cc-backend
|
||||
VAR = ./var
|
||||
CFG = config.json .env
|
||||
FRONTEND = ./web/frontend
|
||||
VERSION = 1.2.2
|
||||
VERSION = 1.4.2
|
||||
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
||||
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
|
||||
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
|
||||
@@ -22,13 +22,23 @@ SVELTE_COMPONENTS = status \
|
||||
header
|
||||
|
||||
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
|
||||
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/filters/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
|
||||
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/analysis/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/header/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/job/*.svelte)
|
||||
|
||||
.PHONY: clean distclean test tags frontend $(TARGET)
|
||||
.PHONY: clean distclean test tags frontend swagger graphql $(TARGET)
|
||||
|
||||
.NOTPARALLEL:
|
||||
|
||||
@@ -40,6 +50,15 @@ frontend:
|
||||
$(info ===> BUILD frontend)
|
||||
cd web/frontend && npm install && npm run build
|
||||
|
||||
swagger:
|
||||
$(info ===> GENERATE swagger)
|
||||
@go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api
|
||||
@mv ./api/docs.go ./internal/api/docs.go
|
||||
|
||||
graphql:
|
||||
$(info ===> GENERATE graphql)
|
||||
@go run github.com/99designs/gqlgen
|
||||
|
||||
clean:
|
||||
$(info ===> CLEAN)
|
||||
@go clean
|
||||
@@ -76,90 +95,3 @@ config.json:
|
||||
$(SVELTE_TARGETS): $(SVELTE_SRC)
|
||||
$(info ===> BUILD frontend)
|
||||
cd web/frontend && npm install && npm run build
|
||||
|
||||
install: $(TARGET)
|
||||
@WORKSPACE=$(PREFIX)
|
||||
@if [ -z "$${WORKSPACE}" ]; then exit 1; fi
|
||||
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
|
||||
@install -Dpm 755 $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
|
||||
@install -Dpm 600 configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
|
||||
|
||||
.ONESHELL:
|
||||
.PHONY: RPM
|
||||
RPM: build/package/cc-backend.spec
|
||||
@WORKSPACE="$${PWD}"
|
||||
@SPECFILE="$${WORKSPACE}/build/package/cc-backend.spec"
|
||||
# Setup RPM build tree
|
||||
@eval $$(rpm --eval "ARCH='%{_arch}' RPMDIR='%{_rpmdir}' SOURCEDIR='%{_sourcedir}' SPECDIR='%{_specdir}' SRPMDIR='%{_srcrpmdir}' BUILDDIR='%{_builddir}'")
|
||||
@mkdir --parents --verbose "$${RPMDIR}" "$${SOURCEDIR}" "$${SPECDIR}" "$${SRPMDIR}" "$${BUILDDIR}"
|
||||
# Create source tarball
|
||||
@COMMITISH="HEAD"
|
||||
@VERS=$$(git describe --tags $${COMMITISH})
|
||||
@VERS=$${VERS#v}
|
||||
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
|
||||
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
|
||||
@eval $$(rpmspec --query --queryformat "NAME='%{name}' VERSION='%{version}' RELEASE='%{release}' NVR='%{NVR}' NVRA='%{NVRA}'" --define="VERS $${VERS}" "$${SPECFILE}")
|
||||
@PREFIX="$${NAME}-$${VERSION}"
|
||||
@FORMAT="tar.gz"
|
||||
@SRCFILE="$${SOURCEDIR}/$${PREFIX}.$${FORMAT}"
|
||||
@git archive --verbose --format "$${FORMAT}" --prefix="$${PREFIX}/" --output="$${SRCFILE}" $${COMMITISH}
|
||||
# Build RPM and SRPM
|
||||
@rpmbuild -ba --define="VERS $${VERS}" --rmsource --clean "$${SPECFILE}"
|
||||
# Report RPMs and SRPMs when in GitHub Workflow
|
||||
@if [ "$${GITHUB_ACTIONS}" = true ]; then
|
||||
@ RPMFILE="$${RPMDIR}/$${ARCH}/$${NVRA}.rpm"
|
||||
@ SRPMFILE="$${SRPMDIR}/$${NVR}.src.rpm"
|
||||
@ echo "RPM: $${RPMFILE}"
|
||||
@ echo "SRPM: $${SRPMFILE}"
|
||||
@ echo "::set-output name=SRPM::$${SRPMFILE}"
|
||||
@ echo "::set-output name=RPM::$${RPMFILE}"
|
||||
@fi
|
||||
|
||||
.ONESHELL:
|
||||
.PHONY: DEB
|
||||
DEB: build/package/cc-backend.deb.control
|
||||
@BASEDIR=$${PWD}
|
||||
@WORKSPACE=$${PWD}/.dpkgbuild
|
||||
@DEBIANDIR=$${WORKSPACE}/debian
|
||||
@DEBIANBINDIR=$${WORKSPACE}/DEBIAN
|
||||
@mkdir --parents --verbose $$WORKSPACE $$DEBIANBINDIR
|
||||
#@mkdir --parents --verbose $$DEBIANDIR
|
||||
@CONTROLFILE="$${BASEDIR}/build/package/cc-backend.deb.control"
|
||||
@COMMITISH="HEAD"
|
||||
@VERS=$$(git describe --tags --abbrev=0 $${COMMITISH})
|
||||
@VERS=$${VERS#v}
|
||||
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
|
||||
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
|
||||
@ARCH=$$(uname -m)
|
||||
@ARCH=$$(echo $$ARCH | sed -e s+'_'+'-'+g)
|
||||
@if [ "$${ARCH}" = "x86-64" ]; then ARCH=amd64; fi
|
||||
@PREFIX="$${NAME}-$${VERSION}_$${ARCH}"
|
||||
@SIZE_BYTES=$$(du -bcs --exclude=.dpkgbuild "$${WORKSPACE}"/ | awk '{print $$1}' | head -1 | sed -e 's/^0\+//')
|
||||
@SIZE="$$(awk -v size="$$SIZE_BYTES" 'BEGIN {print (size/1024)+1}' | awk '{print int($$0)}')"
|
||||
#@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANDIR}/control
|
||||
@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANBINDIR}/control
|
||||
@mkdir --parents --verbose "$${WORKSPACE}"/$(VAR)
|
||||
@touch "$${WORKSPACE}"/$(VAR)/job.db
|
||||
@cd web/frontend && yarn install && yarn build && cd -
|
||||
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
|
||||
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
|
||||
@cp $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
|
||||
@chmod 0755 $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
|
||||
@mkdir --parents --verbose $${WORKSPACE}/etc/$(TARGET)
|
||||
@cp configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
|
||||
@chmod 0600 $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
|
||||
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/systemd/system
|
||||
@cp build/package/$(TARGET).service $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
|
||||
@chmod 0644 $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
|
||||
@mkdir --parents --verbose $${WORKSPACE}/etc/default
|
||||
@cp build/package/$(TARGET).config $${WORKSPACE}/etc/default/$(TARGET)
|
||||
@chmod 0600 $${WORKSPACE}/etc/default/$(TARGET)
|
||||
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/sysusers.d
|
||||
@cp build/package/$(TARGET).sysusers $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
|
||||
@chmod 0644 $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
|
||||
@DEB_FILE="cc-metric-store_$${VERS}_$${ARCH}.deb"
|
||||
@dpkg-deb -b $${WORKSPACE} "$$DEB_FILE"
|
||||
@rm -r "$${WORKSPACE}"
|
||||
@if [ "$${GITHUB_ACTIONS}" = "true" ]; then
|
||||
@ echo "::set-output name=DEB::$${DEB_FILE}"
|
||||
@fi
|
||||
|
||||
200
README.md
200
README.md
@@ -1,12 +1,16 @@
|
||||
# NOTE
|
||||
Please have a look at the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for breaking changes!
|
||||
|
||||
Please have a look at the [Release
|
||||
Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md)
|
||||
for breaking changes!
|
||||
|
||||
# ClusterCockpit REST and GraphQL API backend
|
||||
|
||||
[](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml)
|
||||
|
||||
This is a Golang backend implementation for a REST and GraphQL API according to
|
||||
the [ClusterCockpit specifications](https://github.com/ClusterCockpit/cc-specifications). It also
|
||||
the [ClusterCockpit
|
||||
specifications](https://github.com/ClusterCockpit/cc-specifications). It also
|
||||
includes a web interface for ClusterCockpit. This implementation replaces the
|
||||
previous PHP Symfony based ClusterCockpit web interface. The reasons for
|
||||
switching from PHP Symfony to a Golang based solution are explained
|
||||
@@ -14,31 +18,31 @@ switching from PHP Symfony to a Golang based solution are explained
|
||||
|
||||
## Overview
|
||||
|
||||
This is a Golang web backend for the ClusterCockpit job-specific performance
|
||||
monitoring framework. It provides a REST API for integrating ClusterCockpit with
|
||||
an HPC cluster batch system and external analysis scripts. Data exchange between
|
||||
the web front-end and the back-end is based on a GraphQL API. The web frontend
|
||||
is also served by the backend using [Svelte](https://svelte.dev/) components.
|
||||
Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using
|
||||
[Bootstrap Icons](https://icons.getbootstrap.com/).
|
||||
|
||||
This is a Golang web backend for the ClusterCockpit job-specific performance monitoring framework.
|
||||
It provides a REST API for integrating ClusterCockpit with an HPC cluster batch system and external analysis scripts.
|
||||
Data exchange between the web front-end and the back-end is based on a GraphQL API.
|
||||
The web frontend is also served by the backend using [Svelte](https://svelte.dev/) components.
|
||||
Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using [Bootstrap Icons](https://icons.getbootstrap.com/).
|
||||
|
||||
The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by default.
|
||||
Optionally it can use a MySQL/MariaDB database server.
|
||||
While there are metric data backends for the InfluxDB and Prometheus time series databases, the only tested and supported setup is to use cc-metric-store as the metric data backend.
|
||||
Documentation on how to integrate ClusterCockpit with other time series databases will be added in the future.
|
||||
The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by
|
||||
default. Optionally it can use a MySQL/MariaDB database server. While there are
|
||||
metric data backends for the InfluxDB and Prometheus time series databases, the
|
||||
only tested and supported setup is to use cc-metric-store as the metric data
|
||||
backend. Documentation on how to integrate ClusterCockpit with other time series
|
||||
databases will be added in the future.
|
||||
|
||||
Completed batch jobs are stored in a file-based job archive according to
|
||||
[this specification] (https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||
[this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||
The backend supports authentication via local accounts, an external LDAP
|
||||
directory, and JWT tokens. Authorization for APIs is implemented with
|
||||
[JWT](https://jwt.io/) tokens created with public/private key encryption.
|
||||
|
||||
You find more detailed information here:
|
||||
* `./configs/README.md`: Infos about configuration and setup of cc-backend.
|
||||
* `./init/README.md`: Infos on how to setup cc-backend as systemd service on Linux.
|
||||
* `./tools/README.md`: Infos on the JWT authorizatin token workflows in ClusterCockpit.
|
||||
* `./docs`: You can find further documentation here. There is also a Hands-on tutorial that is recommended to get familiar with the ClusterCockpit setup.
|
||||
You find a detailed documentation on the [ClusterCockpit
|
||||
Webpage](https://clustercockpit.org).
|
||||
|
||||
**NOTE**
|
||||
## Build requirements
|
||||
|
||||
ClusterCockpit requires a current version of the golang toolchain and node.js.
|
||||
You can check `go.mod` to see what is the current minimal golang version needed.
|
||||
@@ -49,7 +53,7 @@ on the Go standard library, it is crucial for security and performance to use a
|
||||
current version of golang. In addition, an old golang toolchain may limit the supported
|
||||
versions of third-party packages.
|
||||
|
||||
## How to try ClusterCockpit with a demo setup.
|
||||
## How to try ClusterCockpit with a demo setup
|
||||
|
||||
We provide a shell script that downloads demo data and automatically starts the
|
||||
cc-backend. You will need `wget`, `go`, `node`, `npm` in your path to
|
||||
@@ -61,31 +65,37 @@ cd ./cc-backend
|
||||
./startDemo.sh
|
||||
```
|
||||
|
||||
You can also try the demo using the lates release binary.
|
||||
You can also try the demo using the latest release binary.
|
||||
Create a folder and put the release binary `cc-backend` into this folder.
|
||||
Execute the following steps:
|
||||
```
|
||||
$ ./cc-backend -init
|
||||
$ vim config.json (Add a second cluster entry and name the clusters alex and fritz)
|
||||
$ wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
|
||||
$ tar xf job-archive-demo.tar
|
||||
$ ./cc-backend -init-db -add-user demo:admin:demo -loglevel info
|
||||
$ ./cc-backend -server -dev -loglevel info
|
||||
|
||||
``` shell
|
||||
./cc-backend -init
|
||||
vim config.json (Add a second cluster entry and name the clusters alex and fritz)
|
||||
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
|
||||
tar xf job-archive-demo.tar
|
||||
./cc-backend -init-db -add-user demo:admin:demo -loglevel info
|
||||
./cc-backend -server -dev -loglevel info
|
||||
```
|
||||
|
||||
You can access the web interface at http://localhost:8080.
|
||||
You can access the web interface at [http://localhost:8080](http://localhost:8080).
|
||||
Credentials for login are `demo:demo`.
|
||||
Please note that some views do not work without a metric backend (e.g., the
|
||||
Analysis, Systems and Status views).
|
||||
|
||||
## Howto build and run
|
||||
## How to build and run
|
||||
|
||||
There is a Makefile to automate the build of cc-backend. The Makefile supports the following targets:
|
||||
* `$ make`: Initialize `var` directory and build svelte frontend and backend binary. Note that there is no proper prerequesite handling. Any change of frontend source files will result in a complete rebuild.
|
||||
* `$ make clean`: Clean go build cache and remove binary.
|
||||
* `$ make test`: Run the tests that are also run in the GitHub workflow setup.
|
||||
There is a Makefile to automate the build of cc-backend. The Makefile supports
|
||||
the following targets:
|
||||
|
||||
* `make`: Initialize `var` directory and build svelte frontend and backend
|
||||
binary. Note that there is no proper prerequisite handling. Any change of
|
||||
frontend source files will result in a complete rebuild.
|
||||
* `make clean`: Clean go build cache and remove binary.
|
||||
* `make test`: Run the tests that are also run in the GitHub workflow setup.
|
||||
|
||||
A common workflow for setting up cc-backend from scratch is:
|
||||
|
||||
```sh
|
||||
git clone https://github.com/ClusterCockpit/cc-backend.git
|
||||
|
||||
@@ -116,89 +126,43 @@ ln -s <your-existing-job-archive> ./var/job-archive
|
||||
./cc-backend -help
|
||||
```
|
||||
|
||||
### Run as systemd daemon
|
||||
|
||||
To run this program as a daemon, cc-backend comes with a [example systemd setup](./init/README.md).
|
||||
|
||||
## Configuration and setup
|
||||
|
||||
cc-backend can be used as a local web interface for an existing job archive or
|
||||
as a server for the ClusterCockpit monitoring framework.
|
||||
|
||||
Create your job archive according to [this specification] (https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||
At least one cluster directory with a valid `cluster.json` file is required. If
|
||||
you configure the job archive from scratch, you must also create the job
|
||||
archive version file that contains the job archive version as an integer.
|
||||
You can retrieve the currently supported version by running the following
|
||||
command:
|
||||
```
|
||||
$ ./cc-backend -version
|
||||
```
|
||||
It is ok to have no jobs in the job archive.
|
||||
|
||||
### Configuration
|
||||
|
||||
A configuration file in JSON format must be specified with `-config` to override the default settings.
|
||||
By default, a `config.json` file located in the current directory of the `cc-backend` process will be loaded even without the `-config` flag.
|
||||
Documentation of all supported configuration and command line options can be found [here](./configs/README.md).
|
||||
|
||||
## Database initialization and migration
|
||||
|
||||
Each `cc-backend` version supports a specific database version.
|
||||
At startup, the version of the sqlite database is checked and `cc-backend` terminates if the version does not match.
|
||||
`cc-backend` supports the migration of the database schema to the required version with the command line option `-migrate-db`.
|
||||
If the database file does not exist yet, it will be created and initialized with the command line option `-migrate-db`.
|
||||
If you want to use a newer database version with an older version of cc-backend, you can downgrade a database with the external tool [migrate](https://github.com/golang-migrate/migrate).
|
||||
In this case, you must specify the path to the migration files in a current source tree: `./internal/repository/migrations/`.
|
||||
|
||||
## Development and testing
|
||||
When making changes to the REST or GraphQL API, the appropriate code generators must be used.
|
||||
You must always rebuild `cc-backend` after updating the API files.
|
||||
|
||||
### Update GraphQL schema
|
||||
|
||||
This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL API.
|
||||
The schema can be found in `./api/schema.graphqls`.
|
||||
After changing it, you need to run `go run github.com/99designs/gqlgen`, which will update `./internal/graph/model`.
|
||||
If new resolvers are needed, they will be added to `./internal/graph/schema.resolvers.go`, where you will then need to implement them.
|
||||
If you start `cc-backend` with the `-dev` flag, the GraphQL Playground UI is available at http://localhost:8080/playground.
|
||||
|
||||
### Update Swagger UI
|
||||
|
||||
This project integrates [swagger ui] (https://swagger.io/tools/swagger-ui/) to document and test its REST API.
|
||||
The swagger documentation files can be found in `./api/`.
|
||||
You can generate the swagger-ui configuration by running `go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api `.
|
||||
You need to move the created `./api/docs.go` to `./internal/api/docs.go`.
|
||||
If you start cc-backend with the `-dev` flag, the Swagger interface is available
|
||||
at http://localhost:8080/swagger/.
|
||||
You must enter a JWT key for a user with the API role.
|
||||
|
||||
**NOTE**
|
||||
|
||||
The user who owns the JWT key must not be logged into the same browser (have a
|
||||
running session), or the Swagger requests will not work. It is recommended to
|
||||
create a separate user that has only the API role.
|
||||
|
||||
## Development and testing
|
||||
In case the REST or GraphQL API is changed the according code generators have to be used.
|
||||
|
||||
## Project file structure
|
||||
|
||||
- [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api) contains the API schema files for the REST and GraphQL APIs. The REST API is documented in the OpenAPI 3.0 format in [./api/openapi.yaml](./api/openapi.yaml).
|
||||
- [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend) contains `main.go` for the main application.
|
||||
- [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs) contains documentation about configuration and command line options and required environment variables. A sample configuration file is provided.
|
||||
- [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs) contains more in-depth documentation.
|
||||
- [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init) contains an example of setting up systemd for production use.
|
||||
- [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal) contains library source code that is not intended for use by others.
|
||||
- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg) contains Go packages that can be used by other projects.
|
||||
- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools) Additional command line helper tools.
|
||||
- [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager) Commands for getting infos about and existing job archive.
|
||||
- [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration) Tool to migrate from previous to current job archive version.
|
||||
- [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey) Tool to convert external pubkey for use in `cc-backend`.
|
||||
- [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair) contains a small application to generate a compatible JWT keypair. You find documentation on how to use it [here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
|
||||
- [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web) Server-side templates and frontend-related files:
|
||||
- [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend) Svelte components and static assets for the frontend UI
|
||||
- [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates) Server-side Go templates
|
||||
- [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml) Configures the behaviour and generation of [gqlgen](https://github.com/99designs/gqlgen).
|
||||
- [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh) is a shell script that sets up demo data, and builds and starts `cc-backend`.
|
||||
|
||||
* [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api)
|
||||
contains the API schema files for the REST and GraphQL APIs. The REST API is
|
||||
documented in the OpenAPI 3.0 format in
|
||||
[./api/openapi.yaml](./api/openapi.yaml).
|
||||
* [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend)
|
||||
contains `main.go` for the main application.
|
||||
* [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs)
|
||||
contains documentation about configuration and command line options and required
|
||||
environment variables. A sample configuration file is provided.
|
||||
* [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs)
|
||||
contains more in-depth documentation.
|
||||
* [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init)
|
||||
contains an example of setting up systemd for production use.
|
||||
* [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal)
|
||||
contains library source code that is not intended for use by others.
|
||||
* [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
|
||||
contains Go packages that can be used by other projects.
|
||||
* [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
|
||||
Additional command line helper tools.
|
||||
* [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
||||
Commands for getting infos about and existing job archive.
|
||||
* [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
||||
Tool to convert external pubkey for use in `cc-backend`.
|
||||
* [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
||||
contains a small application to generate a compatible JWT keypair. You find
|
||||
documentation on how to use it
|
||||
[here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
|
||||
* [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web)
|
||||
Server-side templates and frontend-related files:
|
||||
* [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend)
|
||||
Svelte components and static assets for the frontend UI
|
||||
* [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates)
|
||||
Server-side Go templates
|
||||
* [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml)
|
||||
Configures the behaviour and generation of
|
||||
[gqlgen](https://github.com/99designs/gqlgen).
|
||||
* [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh)
|
||||
is a shell script that sets up demo data, and builds and starts `cc-backend`.
|
||||
|
||||
@@ -1,40 +1,46 @@
|
||||
# `cc-backend` version 1.2.2
|
||||
# `cc-backend` version 1.4.2
|
||||
|
||||
Supports job archive version 1 and database version 6.
|
||||
Supports job archive version 2 and database version 8.
|
||||
|
||||
This is a minor release of `cc-backend`, the API backend and frontend
|
||||
This is a small bug fix release of `cc-backend`, the API backend and frontend
|
||||
implementation of ClusterCockpit.
|
||||
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
|
||||
|
||||
** Breaking changes **
|
||||
## Breaking changes
|
||||
|
||||
* The LDAP configuration option `user_filter` was changed and now should not include
|
||||
the uid wildcard. Example:
|
||||
- Old: `"user_filter": "(&(objectclass=posixAccount)(uid=*))"`
|
||||
- New: `"user_filter": "(&(objectclass=posixAccount))"`
|
||||
- You need to perform a database migration. Depending on your database size the
|
||||
migration might require several hours!
|
||||
- You need to adapt the `cluster.json` configuration files in the job-archive,
|
||||
add new required attributes to the metric list and after that edit
|
||||
`./job-archive/version.txt` to version 2. Only metrics that have the footprint
|
||||
attribute set can be filtered and show up in the footprint UI and polar plot.
|
||||
- Continuous scrolling is default now in all job lists. You can change this back
|
||||
to paging globally, also every user can configure to use paging or continuous
|
||||
scrolling individually.
|
||||
- Tags have a scope now. Existing tags will get global scope in the database
|
||||
migration.
|
||||
|
||||
* The aggregate job statistic core hours is now computed using the job table
|
||||
column `num_hwthreads`. In a future release this column will be renamed to
|
||||
`num_cores`. For correct display of core hours `num_hwthreads` must be correctly
|
||||
filled on job start. If your existing jobs do not provide the correct value in
|
||||
this column then you can set this with one SQL INSERT statement. This only applies
|
||||
if you have exclusive jobs, only. Please be aware that we treat this column as
|
||||
it is the number of cores. In case you have SMT enabled and `num_hwthreads`
|
||||
is not the number of cores the core hours will be too high by a factor!
|
||||
## New features
|
||||
|
||||
* The jwts key is now mandatory in config.json. It has to set max-age for
|
||||
validity. Some key names have changed, please refer to
|
||||
[config documentation](./configs/README.md) for details.
|
||||
- Tags have a scope now. Tags created by a basic user are only visible by that
|
||||
user. Tags created by an admin/support role can be configured to be visible by
|
||||
all users (global scope) or only be admin/support role.
|
||||
- Re-sampling support for running (requires a recent `cc-metric-store`) and
|
||||
archived jobs. This greatly speeds up loading of large or very long jobs. You
|
||||
need to add the new configuration key `enable-resampling` to the `config.json`
|
||||
file.
|
||||
- For finished jobs a total job energy is shown in the job view.
|
||||
- Continuous scrolling in job lists is default now.
|
||||
- All database queries (especially for sqlite) were optimized resulting in
|
||||
dramatically faster load times.
|
||||
- A performance and energy footprint can be freely configured on a per
|
||||
subcluster base. One can filter for footprint statistics for running and
|
||||
finished jobs.
|
||||
|
||||
* The following API endpoints are only accessible from IPs registered using the apiAllowedIPs configuration option:
|
||||
- `/users/` [GET, POST, DELETE]
|
||||
- `/user/{id}` [POST]
|
||||
## Known issues
|
||||
|
||||
** NOTE **
|
||||
If you are using the sqlite3 backend the `PRAGMA` option `foreign_keys` must be
|
||||
explicitly set to ON. If using the sqlite3 console it is per default set to
|
||||
OFF! On every console session you must set:
|
||||
```
|
||||
sqlite> PRAGMA foreign_keys = ON;
|
||||
|
||||
```
|
||||
Otherwise if you delete jobs the jobtag relation table will not be updated accordingly!
|
||||
- Currently energy footprint metrics of type energy are ignored for calculating
|
||||
total energy.
|
||||
- Resampling for running jobs only works with cc-metric-store
|
||||
- With energy footprint metrics of type power the unit is ignored and it is
|
||||
assumed the metric has the unit Watt.
|
||||
|
||||
@@ -18,6 +18,7 @@ type Job {
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
energy: Float!
|
||||
SMT: Int!
|
||||
exclusive: Int!
|
||||
partition: String!
|
||||
@@ -27,7 +28,8 @@ type Job {
|
||||
tags: [Tag!]!
|
||||
resources: [Resource!]!
|
||||
concurrentJobs: JobLinkResultList
|
||||
|
||||
footprint: [FootprintValue]
|
||||
energyFootprint: [EnergyFootprintValue]
|
||||
metaData: Any
|
||||
userData: User
|
||||
}
|
||||
@@ -40,7 +42,6 @@ type JobLink {
|
||||
type Cluster {
|
||||
name: String!
|
||||
partitions: [String!]! # Slurm partitions
|
||||
metricConfig: [MetricConfig!]!
|
||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||
}
|
||||
|
||||
@@ -56,9 +57,24 @@ type SubCluster {
|
||||
flopRateSimd: MetricValue!
|
||||
memoryBandwidth: MetricValue!
|
||||
topology: Topology!
|
||||
metricConfig: [MetricConfig!]!
|
||||
footprint: [String!]!
|
||||
}
|
||||
|
||||
type FootprintValue {
|
||||
name: String!
|
||||
stat: String!
|
||||
value: Float!
|
||||
}
|
||||
|
||||
type EnergyFootprintValue {
|
||||
hardware: String!
|
||||
metric: String!
|
||||
value: Float!
|
||||
}
|
||||
|
||||
type MetricValue {
|
||||
name: String
|
||||
unit: Unit!
|
||||
value: Float!
|
||||
}
|
||||
@@ -97,6 +113,7 @@ type MetricConfig {
|
||||
normal: Float
|
||||
caution: Float!
|
||||
alert: Float!
|
||||
lowerIsBetter: Boolean
|
||||
subClusters: [SubClusterConfig!]!
|
||||
}
|
||||
|
||||
@@ -104,6 +121,7 @@ type Tag {
|
||||
id: ID!
|
||||
type: String!
|
||||
name: String!
|
||||
scope: String!
|
||||
}
|
||||
|
||||
type Resource {
|
||||
@@ -145,9 +163,10 @@ type MetricStatistics {
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
mean: [NullableFloat!]!
|
||||
median: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricFootprints {
|
||||
@@ -175,6 +194,28 @@ type NodeMetrics {
|
||||
metrics: [JobMetricWithName!]!
|
||||
}
|
||||
|
||||
type NodesResultList {
|
||||
items: [NodeMetrics!]!
|
||||
offset: Int
|
||||
limit: Int
|
||||
count: Int
|
||||
totalNodes: Int
|
||||
hasNextPage: Boolean
|
||||
}
|
||||
|
||||
type ClusterSupport {
|
||||
cluster: String!
|
||||
subClusters: [String!]!
|
||||
}
|
||||
|
||||
type GlobalMetricListItem {
|
||||
name: String!
|
||||
unit: Unit!
|
||||
scope: MetricScope!
|
||||
footprint: String
|
||||
availability: [ClusterSupport!]!
|
||||
}
|
||||
|
||||
type Count {
|
||||
name: String!
|
||||
count: Int!
|
||||
@@ -186,27 +227,34 @@ type User {
|
||||
email: String!
|
||||
}
|
||||
|
||||
input MetricStatItem {
|
||||
metricName: String!
|
||||
range: FloatRange!
|
||||
}
|
||||
|
||||
type Query {
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [Tag!]! # List of all tags
|
||||
globalMetrics: [GlobalMetricListItem!]!
|
||||
|
||||
user(username: String!): User
|
||||
allocatedNodes(cluster: String!): [Count!]!
|
||||
|
||||
job(id: ID!): Job
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||
|
||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||
jobsStatistics(filter: [JobFilter!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate): [JobsStatistics!]!
|
||||
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate, numDurationBins: String, numMetricBins: Int): [JobsStatistics!]!
|
||||
|
||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
||||
|
||||
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
||||
nodeMetricsList(cluster: String!, subCluster: String!, nodeFilter: String!, scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!, page: PageRequest, resolution: Int): NodesResultList!
|
||||
}
|
||||
|
||||
type Mutation {
|
||||
createTag(type: String!, name: String!): Tag!
|
||||
createTag(type: String!, name: String!, scope: String!): Tag!
|
||||
deleteTag(id: ID!): ID!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
@@ -215,7 +263,7 @@ type Mutation {
|
||||
}
|
||||
|
||||
type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { from: Time!, to: Time! }
|
||||
type TimeRangeOutput { range: String, from: Time!, to: Time! }
|
||||
|
||||
input JobFilter {
|
||||
tags: [ID!]
|
||||
@@ -227,6 +275,7 @@ input JobFilter {
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
energy: FloatRange
|
||||
|
||||
minRunningFor: Int
|
||||
|
||||
@@ -236,17 +285,14 @@ input JobFilter {
|
||||
|
||||
startTime: TimeRange
|
||||
state: [JobState!]
|
||||
flopsAnyAvg: FloatRange
|
||||
memBwAvg: FloatRange
|
||||
loadAvg: FloatRange
|
||||
memUsedMax: FloatRange
|
||||
|
||||
metricStats: [MetricStatItem!]
|
||||
exclusive: Int
|
||||
node: StringInput
|
||||
}
|
||||
|
||||
input OrderByInput {
|
||||
field: String!
|
||||
type: String!,
|
||||
order: SortDirectionEnum! = ASC
|
||||
}
|
||||
|
||||
@@ -264,15 +310,20 @@ input StringInput {
|
||||
in: [String!]
|
||||
}
|
||||
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input FloatRange { from: Float!, to: Float! }
|
||||
input TimeRange { from: Time, to: Time }
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input TimeRange { range: String, from: Time, to: Time }
|
||||
|
||||
input FloatRange {
|
||||
from: Float!
|
||||
to: Float!
|
||||
}
|
||||
|
||||
type JobResultList {
|
||||
items: [Job!]!
|
||||
offset: Int
|
||||
limit: Int
|
||||
count: Int
|
||||
hasNextPage: Boolean
|
||||
}
|
||||
|
||||
type JobLinkResultList {
|
||||
@@ -286,6 +337,20 @@ type HistoPoint {
|
||||
value: Int!
|
||||
}
|
||||
|
||||
type MetricHistoPoints {
|
||||
metric: String!
|
||||
unit: String!
|
||||
stat: String
|
||||
data: [MetricHistoPoint!]
|
||||
}
|
||||
|
||||
type MetricHistoPoint {
|
||||
bin: Int
|
||||
count: Int!
|
||||
min: Int
|
||||
max: Int
|
||||
}
|
||||
|
||||
type JobsStatistics {
|
||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
||||
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
||||
@@ -303,6 +368,7 @@ type JobsStatistics {
|
||||
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
||||
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
||||
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
||||
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
|
||||
}
|
||||
|
||||
input PageRequest {
|
||||
|
||||
647
api/swagger.json
647
api/swagger.json
File diff suppressed because it is too large
Load Diff
463
api/swagger.yaml
463
api/swagger.yaml
@@ -23,6 +23,10 @@ definitions:
|
||||
description: Tag Name
|
||||
example: Testjob
|
||||
type: string
|
||||
scope:
|
||||
description: Tag Scope for Frontend Display
|
||||
example: global
|
||||
type: string
|
||||
type:
|
||||
description: Tag Type
|
||||
example: Debug
|
||||
@@ -50,6 +54,15 @@ definitions:
|
||||
msg:
|
||||
type: string
|
||||
type: object
|
||||
api.EditMetaRequest:
|
||||
properties:
|
||||
key:
|
||||
example: jobScript
|
||||
type: string
|
||||
value:
|
||||
example: bash script
|
||||
type: string
|
||||
type: object
|
||||
api.ErrorResponse:
|
||||
properties:
|
||||
error:
|
||||
@@ -59,6 +72,14 @@ definitions:
|
||||
description: Statustext of Errorcode
|
||||
type: string
|
||||
type: object
|
||||
api.GetClustersApiResponse:
|
||||
properties:
|
||||
clusters:
|
||||
description: Array of clusters
|
||||
items:
|
||||
$ref: '#/definitions/schema.Cluster'
|
||||
type: array
|
||||
type: object
|
||||
api.GetJobApiResponse:
|
||||
properties:
|
||||
data:
|
||||
@@ -93,74 +114,91 @@ definitions:
|
||||
type: object
|
||||
api.StartJobApiResponse:
|
||||
properties:
|
||||
id:
|
||||
description: Database ID of new job
|
||||
type: integer
|
||||
msg:
|
||||
type: string
|
||||
type: object
|
||||
api.StopJobApiRequest:
|
||||
properties:
|
||||
cluster:
|
||||
description: Cluster of job
|
||||
example: fritz
|
||||
type: string
|
||||
jobId:
|
||||
description: Cluster Job ID of job
|
||||
example: 123000
|
||||
type: integer
|
||||
jobState:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.JobState'
|
||||
description: Final job state
|
||||
example: completed
|
||||
startTime:
|
||||
description: Start Time of job as epoch
|
||||
example: 1649723812
|
||||
type: integer
|
||||
stopTime:
|
||||
description: Stop Time of job as epoch
|
||||
example: 1649763839
|
||||
type: integer
|
||||
required:
|
||||
- jobState
|
||||
- stopTime
|
||||
type: object
|
||||
schema.Accelerator:
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
type: object
|
||||
schema.Cluster:
|
||||
properties:
|
||||
metricConfig:
|
||||
items:
|
||||
$ref: '#/definitions/schema.MetricConfig'
|
||||
type: array
|
||||
name:
|
||||
type: string
|
||||
subClusters:
|
||||
items:
|
||||
$ref: '#/definitions/schema.SubCluster'
|
||||
type: array
|
||||
type: object
|
||||
schema.Job:
|
||||
description: Information of a HPC job.
|
||||
properties:
|
||||
arrayJobId:
|
||||
description: The unique identifier of an array job
|
||||
example: 123000
|
||||
type: integer
|
||||
cluster:
|
||||
description: The unique identifier of a cluster
|
||||
example: fritz
|
||||
type: string
|
||||
concurrentJobs:
|
||||
$ref: '#/definitions/schema.JobLinkResultList'
|
||||
duration:
|
||||
description: Duration of job in seconds (Min > 0)
|
||||
example: 43200
|
||||
minimum: 1
|
||||
type: integer
|
||||
energy:
|
||||
type: number
|
||||
energyFootprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
exclusive:
|
||||
description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs
|
||||
of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple
|
||||
jobs of same user'
|
||||
example: 1
|
||||
maximum: 2
|
||||
minimum: 0
|
||||
type: integer
|
||||
footprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
id:
|
||||
description: The unique identifier of a job in the database
|
||||
type: integer
|
||||
jobId:
|
||||
description: The unique identifier of a job
|
||||
example: 123000
|
||||
type: integer
|
||||
jobState:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.JobState'
|
||||
description: Final state of job
|
||||
enum:
|
||||
- completed
|
||||
- failed
|
||||
@@ -172,67 +210,50 @@ definitions:
|
||||
metaData:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Additional information about the job
|
||||
type: object
|
||||
monitoringStatus:
|
||||
description: 'State of monitoring system during job run: 0 - Disabled, 1 -
|
||||
Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull'
|
||||
example: 1
|
||||
maximum: 3
|
||||
minimum: 0
|
||||
type: integer
|
||||
numAcc:
|
||||
description: Number of accelerators used (Min > 0)
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
numHwthreads:
|
||||
description: NumCores int32 `json:"numCores" db:"num_cores"
|
||||
example:"20" minimum:"1"` //
|
||||
Number of HWThreads used (Min > 0)
|
||||
example: 20
|
||||
minimum: 1
|
||||
type: integer
|
||||
numNodes:
|
||||
description: Number of nodes used (Min > 0)
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
partition:
|
||||
description: The Slurm partition to which the job was submitted
|
||||
example: main
|
||||
type: string
|
||||
project:
|
||||
description: The unique identifier of a project
|
||||
example: abcd200
|
||||
type: string
|
||||
resources:
|
||||
description: Resources used by job
|
||||
items:
|
||||
$ref: '#/definitions/schema.Resource'
|
||||
type: array
|
||||
smt:
|
||||
description: SMT threads used by job
|
||||
example: 4
|
||||
type: integer
|
||||
startTime:
|
||||
description: Start time as 'time.Time' data type
|
||||
type: string
|
||||
subCluster:
|
||||
description: The unique identifier of a sub cluster
|
||||
example: main
|
||||
type: string
|
||||
tags:
|
||||
description: List of tags
|
||||
items:
|
||||
$ref: '#/definitions/schema.Tag'
|
||||
type: array
|
||||
user:
|
||||
description: The unique identifier of a user
|
||||
example: abcd100h
|
||||
type: string
|
||||
walltime:
|
||||
description: Requested walltime of job in seconds (Min > 0)
|
||||
example: 86400
|
||||
minimum: 1
|
||||
type: integer
|
||||
@@ -257,39 +278,40 @@ definitions:
|
||||
description: Meta data information of a HPC job.
|
||||
properties:
|
||||
arrayJobId:
|
||||
description: The unique identifier of an array job
|
||||
example: 123000
|
||||
type: integer
|
||||
cluster:
|
||||
description: The unique identifier of a cluster
|
||||
example: fritz
|
||||
type: string
|
||||
concurrentJobs:
|
||||
$ref: '#/definitions/schema.JobLinkResultList'
|
||||
duration:
|
||||
description: Duration of job in seconds (Min > 0)
|
||||
example: 43200
|
||||
minimum: 1
|
||||
type: integer
|
||||
energy:
|
||||
type: number
|
||||
energyFootprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
exclusive:
|
||||
description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs
|
||||
of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple
|
||||
jobs of same user'
|
||||
example: 1
|
||||
maximum: 2
|
||||
minimum: 0
|
||||
type: integer
|
||||
footprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
id:
|
||||
description: The unique identifier of a job in the database
|
||||
type: integer
|
||||
jobId:
|
||||
description: The unique identifier of a job
|
||||
example: 123000
|
||||
type: integer
|
||||
jobState:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.JobState'
|
||||
description: Final state of job
|
||||
enum:
|
||||
- completed
|
||||
- failed
|
||||
@@ -301,74 +323,56 @@ definitions:
|
||||
metaData:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Additional information about the job
|
||||
type: object
|
||||
monitoringStatus:
|
||||
description: 'State of monitoring system during job run: 0 - Disabled, 1 -
|
||||
Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull'
|
||||
example: 1
|
||||
maximum: 3
|
||||
minimum: 0
|
||||
type: integer
|
||||
numAcc:
|
||||
description: Number of accelerators used (Min > 0)
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
numHwthreads:
|
||||
description: NumCores int32 `json:"numCores" db:"num_cores"
|
||||
example:"20" minimum:"1"` //
|
||||
Number of HWThreads used (Min > 0)
|
||||
example: 20
|
||||
minimum: 1
|
||||
type: integer
|
||||
numNodes:
|
||||
description: Number of nodes used (Min > 0)
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
partition:
|
||||
description: The Slurm partition to which the job was submitted
|
||||
example: main
|
||||
type: string
|
||||
project:
|
||||
description: The unique identifier of a project
|
||||
example: abcd200
|
||||
type: string
|
||||
resources:
|
||||
description: Resources used by job
|
||||
items:
|
||||
$ref: '#/definitions/schema.Resource'
|
||||
type: array
|
||||
smt:
|
||||
description: SMT threads used by job
|
||||
example: 4
|
||||
type: integer
|
||||
startTime:
|
||||
description: Start epoch time stamp in seconds (Min > 0)
|
||||
example: 1649723812
|
||||
minimum: 1
|
||||
type: integer
|
||||
statistics:
|
||||
additionalProperties:
|
||||
$ref: '#/definitions/schema.JobStatistics'
|
||||
description: Metric statistics of job
|
||||
type: object
|
||||
subCluster:
|
||||
description: The unique identifier of a sub cluster
|
||||
example: main
|
||||
type: string
|
||||
tags:
|
||||
description: List of tags
|
||||
items:
|
||||
$ref: '#/definitions/schema.Tag'
|
||||
type: array
|
||||
user:
|
||||
description: The unique identifier of a user
|
||||
example: abcd100h
|
||||
type: string
|
||||
walltime:
|
||||
description: Requested walltime of job in seconds (Min > 0)
|
||||
example: 86400
|
||||
minimum: 1
|
||||
type: integer
|
||||
@@ -427,6 +431,37 @@ definitions:
|
||||
unit:
|
||||
$ref: '#/definitions/schema.Unit'
|
||||
type: object
|
||||
schema.MetricConfig:
|
||||
properties:
|
||||
aggregation:
|
||||
type: string
|
||||
alert:
|
||||
type: number
|
||||
caution:
|
||||
type: number
|
||||
energy:
|
||||
type: string
|
||||
footprint:
|
||||
type: string
|
||||
lowerIsBetter:
|
||||
type: boolean
|
||||
name:
|
||||
type: string
|
||||
normal:
|
||||
type: number
|
||||
peak:
|
||||
type: number
|
||||
scope:
|
||||
$ref: '#/definitions/schema.MetricScope'
|
||||
subClusters:
|
||||
items:
|
||||
$ref: '#/definitions/schema.SubClusterConfig'
|
||||
type: array
|
||||
timestep:
|
||||
type: integer
|
||||
unit:
|
||||
$ref: '#/definitions/schema.Unit'
|
||||
type: object
|
||||
schema.MetricScope:
|
||||
enum:
|
||||
- invalid_scope
|
||||
@@ -454,22 +489,25 @@ definitions:
|
||||
min:
|
||||
type: number
|
||||
type: object
|
||||
schema.MetricValue:
|
||||
properties:
|
||||
unit:
|
||||
$ref: '#/definitions/schema.Unit'
|
||||
value:
|
||||
type: number
|
||||
type: object
|
||||
schema.Resource:
|
||||
description: A resource used by a job
|
||||
properties:
|
||||
accelerators:
|
||||
description: List of of accelerator device ids
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
configuration:
|
||||
description: The configuration options of the node
|
||||
type: string
|
||||
hostname:
|
||||
description: Name of the host (= node)
|
||||
type: string
|
||||
hwthreads:
|
||||
description: List of OS processor ids
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
@@ -497,6 +535,10 @@ definitions:
|
||||
items:
|
||||
type: number
|
||||
type: array
|
||||
median:
|
||||
items:
|
||||
type: number
|
||||
type: array
|
||||
min:
|
||||
items:
|
||||
type: number
|
||||
@@ -508,21 +550,112 @@ definitions:
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
schema.SubCluster:
|
||||
properties:
|
||||
coresPerSocket:
|
||||
type: integer
|
||||
energyFootprint:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
flopRateScalar:
|
||||
$ref: '#/definitions/schema.MetricValue'
|
||||
flopRateSimd:
|
||||
$ref: '#/definitions/schema.MetricValue'
|
||||
footprint:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
memoryBandwidth:
|
||||
$ref: '#/definitions/schema.MetricValue'
|
||||
metricConfig:
|
||||
items:
|
||||
$ref: '#/definitions/schema.MetricConfig'
|
||||
type: array
|
||||
name:
|
||||
type: string
|
||||
nodes:
|
||||
type: string
|
||||
processorType:
|
||||
type: string
|
||||
socketsPerNode:
|
||||
type: integer
|
||||
threadsPerCore:
|
||||
type: integer
|
||||
topology:
|
||||
$ref: '#/definitions/schema.Topology'
|
||||
type: object
|
||||
schema.SubClusterConfig:
|
||||
properties:
|
||||
alert:
|
||||
type: number
|
||||
caution:
|
||||
type: number
|
||||
energy:
|
||||
type: string
|
||||
footprint:
|
||||
type: string
|
||||
lowerIsBetter:
|
||||
type: boolean
|
||||
name:
|
||||
type: string
|
||||
normal:
|
||||
type: number
|
||||
peak:
|
||||
type: number
|
||||
remove:
|
||||
type: boolean
|
||||
type: object
|
||||
schema.Tag:
|
||||
description: Defines a tag using name and type.
|
||||
properties:
|
||||
id:
|
||||
description: The unique DB identifier of a tag
|
||||
type: integer
|
||||
name:
|
||||
description: Tag Name
|
||||
example: Testjob
|
||||
type: string
|
||||
scope:
|
||||
example: global
|
||||
type: string
|
||||
type:
|
||||
description: Tag Type
|
||||
example: Debug
|
||||
type: string
|
||||
type: object
|
||||
schema.Topology:
|
||||
properties:
|
||||
accelerators:
|
||||
items:
|
||||
$ref: '#/definitions/schema.Accelerator'
|
||||
type: array
|
||||
core:
|
||||
items:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
type: array
|
||||
die:
|
||||
items:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
type: array
|
||||
memoryDomain:
|
||||
items:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
type: array
|
||||
node:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
socket:
|
||||
items:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
type: array
|
||||
type: object
|
||||
schema.Unit:
|
||||
properties:
|
||||
base:
|
||||
@@ -543,6 +676,43 @@ info:
|
||||
title: ClusterCockpit REST API
|
||||
version: 1.0.0
|
||||
paths:
|
||||
/clusters/:
|
||||
get:
|
||||
description: Get a list of all cluster configs. Specific cluster can be requested
|
||||
using query parameter.
|
||||
parameters:
|
||||
- description: Job Cluster
|
||||
in: query
|
||||
name: cluster
|
||||
type: string
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Array of clusters
|
||||
schema:
|
||||
$ref: '#/definitions/api.GetClustersApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Lists all cluster configs
|
||||
tags:
|
||||
- Cluster query
|
||||
/jobs/:
|
||||
get:
|
||||
description: |-
|
||||
@@ -609,6 +779,57 @@ paths:
|
||||
tags:
|
||||
- Job query
|
||||
/jobs/{id}:
|
||||
get:
|
||||
description: |-
|
||||
Job to get is specified by database ID
|
||||
Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||
parameters:
|
||||
- description: Database ID of Job
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: integer
|
||||
- description: Include all available metrics
|
||||
in: query
|
||||
name: all-metrics
|
||||
type: boolean
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Job resource
|
||||
schema:
|
||||
$ref: '#/definitions/api.GetJobApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"404":
|
||||
description: Resource not found
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"422":
|
||||
description: 'Unprocessable Entity: finding job failed: sql: no rows in
|
||||
result set'
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Get job meta and optional all metric data
|
||||
tags:
|
||||
- Job query
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
@@ -663,7 +884,7 @@ paths:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Get complete job meta and metric data
|
||||
summary: Get job meta and configurable metric data
|
||||
tags:
|
||||
- Job query
|
||||
/jobs/delete_job/:
|
||||
@@ -810,6 +1031,53 @@ paths:
|
||||
summary: Remove a job from the sql database
|
||||
tags:
|
||||
- Job remove
|
||||
/jobs/edit_meta/{id}:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: |-
|
||||
Edit key value pairs in job metadata json
|
||||
If a key already exists its content will be overwritten
|
||||
parameters:
|
||||
- description: Job Database ID
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: integer
|
||||
- description: Kay value pair to add
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/api.EditMetaRequest'
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Updated job resource
|
||||
schema:
|
||||
$ref: '#/definitions/schema.Job'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"404":
|
||||
description: Job does not exist
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Edit meta-data json
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/start_job/:
|
||||
post:
|
||||
consumes:
|
||||
@@ -906,68 +1174,13 @@ paths:
|
||||
summary: Marks job as completed and triggers archiving
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/stop_job/{id}:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: |-
|
||||
Job to stop is specified by database ID. Only stopTime and final state are required in request body.
|
||||
Returns full job resource information according to 'JobMeta' scheme.
|
||||
parameters:
|
||||
- description: Database ID of Job
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: integer
|
||||
- description: stopTime and final state in request body
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/api.StopJobApiRequest'
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Job resource
|
||||
schema:
|
||||
$ref: '#/definitions/schema.JobMeta'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"404":
|
||||
description: Resource not found
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"422":
|
||||
description: 'Unprocessable Entity: finding job failed: sql: no rows in
|
||||
result set'
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Marks job as completed and triggers archiving
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/tag_job/{id}:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: |-
|
||||
Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
||||
Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username.
|
||||
If tagged job is already finished: Tag will be written directly to respective archive files.
|
||||
parameters:
|
||||
- description: Job Database ID
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
CC_USER=clustercockpit
|
||||
|
||||
CC_GROUP=clustercockpit
|
||||
|
||||
CC_HOME=/tmp
|
||||
|
||||
LOG_DIR=/var/log
|
||||
|
||||
DATA_DIR=/var/run/cc-backend
|
||||
|
||||
MAX_OPEN_FILES=10000
|
||||
|
||||
CONF_DIR=/etc/cc-backend
|
||||
|
||||
CONF_FILE=/etc/cc-backend/cc-backend.json
|
||||
|
||||
RESTART_ON_UPGRADE=true
|
||||
@@ -1,12 +0,0 @@
|
||||
Package: cc-backend
|
||||
Version: {VERSION}
|
||||
Installed-Size: {INSTALLED_SIZE}
|
||||
Architecture: {ARCH}
|
||||
Maintainer: thomas.gruber@fau.de
|
||||
Depends: libc6 (>= 2.2.1)
|
||||
Build-Depends: debhelper-compat (= 13), git, golang-go, npm, yarn
|
||||
Description: ClusterCockpit backend and web frontend
|
||||
Homepage: https://github.com/ClusterCockpit/cc-backend
|
||||
Source: cc-backend
|
||||
Rules-Requires-Root: no
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
[Unit]
|
||||
Description=ClusterCockpit backend and web frontend (cc-backend)
|
||||
Documentation=https://github.com/ClusterCockpit/cc-backend
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
EnvironmentFile=/etc/default/cc-backend
|
||||
Type=simple
|
||||
User=clustercockpit
|
||||
Group=clustercockpit
|
||||
Restart=on-failure
|
||||
TimeoutStopSec=100
|
||||
LimitNOFILE=infinity
|
||||
ExecStart=/usr/bin/cc-backend --config ${CONF_FILE}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,70 +0,0 @@
|
||||
Name: cc-backend
|
||||
Version: %{VERS}
|
||||
Release: 1%{?dist}
|
||||
Summary: ClusterCockpit backend and web frontend
|
||||
|
||||
License: MIT
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
|
||||
#BuildRequires: go-toolset
|
||||
#BuildRequires: systemd-rpm-macros
|
||||
#BuildRequires: npm
|
||||
|
||||
Provides: %{name} = %{version}
|
||||
|
||||
%description
|
||||
ClusterCockpit backend and web frontend
|
||||
|
||||
%global debug_package %{nil}
|
||||
|
||||
%prep
|
||||
%autosetup
|
||||
|
||||
|
||||
%build
|
||||
#CURRENT_TIME=$(date +%Y-%m-%d:T%H:%M:\%S)
|
||||
#LD_FLAGS="-s -X main.buildTime=${CURRENT_TIME} -X main.version=%{VERS}"
|
||||
mkdir ./var
|
||||
touch ./var/job.db
|
||||
cd web/frontend && yarn install && yarn build && cd -
|
||||
go build -ldflags="-s -X main.version=%{VERS}" ./cmd/cc-backend
|
||||
|
||||
|
||||
%install
|
||||
# Install cc-backend
|
||||
#make PREFIX=%{buildroot} install
|
||||
install -Dpm 755 cc-backend %{buildroot}/%{_bindir}/%{name}
|
||||
install -Dpm 0600 configs/config.json %{buildroot}%{_sysconfdir}/%{name}/%{name}.json
|
||||
# Integrate into system
|
||||
install -Dpm 0644 build/package/%{name}.service %{buildroot}%{_unitdir}/%{name}.service
|
||||
install -Dpm 0600 build/package/%{name}.config %{buildroot}%{_sysconfdir}/default/%{name}
|
||||
install -Dpm 0644 build/package/%{name}.sysusers %{buildroot}%{_sysusersdir}/%{name}.conf
|
||||
|
||||
|
||||
%check
|
||||
# go test should be here... :)
|
||||
|
||||
%pre
|
||||
%sysusers_create_package scripts/%{name}.sysusers
|
||||
|
||||
%post
|
||||
%systemd_post %{name}.service
|
||||
|
||||
%preun
|
||||
%systemd_preun %{name}.service
|
||||
|
||||
%files
|
||||
# Binary
|
||||
%attr(-,clustercockpit,clustercockpit) %{_bindir}/%{name}
|
||||
# Config
|
||||
%dir %{_sysconfdir}/%{name}
|
||||
%attr(0600,clustercockpit,clustercockpit) %config(noreplace) %{_sysconfdir}/%{name}/%{name}.json
|
||||
# Systemd
|
||||
%{_unitdir}/%{name}.service
|
||||
%{_sysconfdir}/default/%{name}
|
||||
%{_sysusersdir}/%{name}.conf
|
||||
|
||||
%changelog
|
||||
* Mon Mar 07 2022 Thomas Gruber - 0.1
|
||||
- Initial metric store implementation
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
#Type Name ID GECOS Home directory Shell
|
||||
u clustercockpit - "User for ClusterCockpit" /run/cc-backend /sbin/nologin
|
||||
33
cmd/cc-backend/cli.go
Normal file
33
cmd/cc-backend/cli.go
Normal file
@@ -0,0 +1,33 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import "flag"
|
||||
|
||||
var (
|
||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
)
|
||||
|
||||
func cliInit() {
|
||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'hpc_user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
||||
flag.Parse()
|
||||
}
|
||||
85
cmd/cc-backend/init.go
Normal file
85
cmd/cc-backend/init.go
Normal file
@@ -0,0 +1,85 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
)
|
||||
|
||||
const envString = `
|
||||
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||
# You can generate your own keypair using the gen-keypair tool
|
||||
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||
|
||||
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
`
|
||||
|
||||
const configString = `
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
func initEnv() {
|
||||
if util.CheckFileExists("var") {
|
||||
fmt.Print("Directory ./var already exists. Exiting!\n")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
|
||||
log.Fatalf("Writing config.json failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
|
||||
log.Fatalf("Writing .env failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.Mkdir("var", 0o777); err != nil {
|
||||
log.Fatalf("Mkdir var failed: %s", err.Error())
|
||||
}
|
||||
|
||||
err := repository.MigrateDB("sqlite3", "./var/job.db")
|
||||
if err != nil {
|
||||
log.Fatalf("Initialize job.db failed: %s", err.Error())
|
||||
}
|
||||
}
|
||||
@@ -1,157 +1,52 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
"github.com/go-co-op/gocron"
|
||||
"github.com/google/gops/agent"
|
||||
"github.com/gorilla/handlers"
|
||||
"github.com/gorilla/mux"
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
|
||||
_ "github.com/go-sql-driver/mysql"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
const logoString = `
|
||||
____ _ _ ____ _ _ _
|
||||
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
|
||||
_____ _ _ ____ _ _ _
|
||||
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
|
||||
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
|
||||
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
|
||||
\____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
||||
\_____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
||||
|_|
|
||||
`
|
||||
|
||||
const envString = `
|
||||
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||
# You can generate your own keypair using the gen-keypair tool
|
||||
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||
|
||||
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
`
|
||||
|
||||
const configString = `
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
var (
|
||||
date string
|
||||
commit string
|
||||
version string
|
||||
)
|
||||
|
||||
func initEnv() {
|
||||
if util.CheckFileExists("var") {
|
||||
fmt.Print("Directory ./var already exists. Exiting!\n")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if err := os.WriteFile("config.json", []byte(configString), 0666); err != nil {
|
||||
log.Fatalf("Writing config.json failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.WriteFile(".env", []byte(envString), 0666); err != nil {
|
||||
log.Fatalf("Writing .env failed: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := os.Mkdir("var", 0777); err != nil {
|
||||
log.Fatalf("Mkdir var failed: %s", err.Error())
|
||||
}
|
||||
|
||||
err := repository.MigrateDB("sqlite3", "./var/job.db")
|
||||
if err != nil {
|
||||
log.Fatalf("Initialize job.db failed: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
var flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagDev, flagVersion, flagLogDateTime bool
|
||||
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
||||
flag.Parse()
|
||||
cliInit()
|
||||
|
||||
if flagVersion {
|
||||
fmt.Print(logoString)
|
||||
@@ -166,14 +61,6 @@ func main() {
|
||||
// Apply config flags for pkg/log
|
||||
log.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
if flagInit {
|
||||
initEnv()
|
||||
fmt.Print("Succesfully setup environment!\n")
|
||||
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
|
||||
fmt.Print("Add your job-archive at ./var/job-archive.\n")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// See https://github.com/google/gops (Runtime overhead is almost zero)
|
||||
if flagGops {
|
||||
if err := agent.Listen(agent.Options{}); err != nil {
|
||||
@@ -205,19 +92,35 @@ func main() {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if flagRevertDB {
|
||||
err := repository.RevertDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if flagForceDB {
|
||||
err := repository.ForceDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||
db := repository.GetConnection()
|
||||
|
||||
var authentication *auth.Authentication
|
||||
if flagInit {
|
||||
initEnv()
|
||||
fmt.Print("Successfully setup environment!\n")
|
||||
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
|
||||
fmt.Print("Add your job-archive at ./var/job-archive.\n")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
var err error
|
||||
if authentication, err = auth.Init(); err != nil {
|
||||
log.Fatalf("auth initialization failed: %v", err)
|
||||
}
|
||||
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
|
||||
authentication.SessionMaxAge = d
|
||||
}
|
||||
auth.Init()
|
||||
|
||||
if flagNewUser != "" {
|
||||
parts := strings.SplitN(flagNewUser, ":", 3)
|
||||
@@ -239,12 +142,14 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
if flagSyncLDAP {
|
||||
if authentication.LdapAuth == nil {
|
||||
if authHandle.LdapAuth == nil {
|
||||
log.Fatal("cannot sync: LDAP authentication is not configured")
|
||||
}
|
||||
|
||||
if err := authentication.LdapAuth.Sync(); err != nil {
|
||||
if err := authHandle.LdapAuth.Sync(); err != nil {
|
||||
log.Fatalf("LDAP sync failed: %v", err)
|
||||
}
|
||||
log.Info("LDAP sync successfull")
|
||||
@@ -261,13 +166,14 @@ func main() {
|
||||
log.Warnf("user '%s' does not have the API role", user.Username)
|
||||
}
|
||||
|
||||
jwt, err := authentication.JwtAuth.ProvideJWT(user)
|
||||
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
|
||||
}
|
||||
|
||||
fmt.Printf("MAIN > JWT for '%s': %s\n", user.Username, jwt)
|
||||
}
|
||||
|
||||
} else if flagNewUser != "" || flagDelUser != "" {
|
||||
log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled")
|
||||
}
|
||||
@@ -276,7 +182,7 @@ func main() {
|
||||
log.Fatalf("failed to initialize archive: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
||||
if err := metricdata.Init(); err != nil {
|
||||
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
|
||||
}
|
||||
|
||||
@@ -296,214 +202,16 @@ func main() {
|
||||
return
|
||||
}
|
||||
|
||||
// Setup the http.Handler/Router used by the server
|
||||
jobRepo := repository.GetJobRepository()
|
||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
||||
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
if os.Getenv("DEBUG") != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
||||
}
|
||||
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
})
|
||||
}
|
||||
|
||||
api := &api.RestApi{
|
||||
JobRepository: jobRepo,
|
||||
Resolver: resolver,
|
||||
MachineStateDir: config.Keys.MachineStateDir,
|
||||
Authentication: authentication,
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
||||
|
||||
r.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo})
|
||||
}).Methods(http.MethodGet)
|
||||
r.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
r.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
secured := r.PathPrefix("/").Subrouter()
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
r.Handle("/login", authentication.Login(
|
||||
// On success:
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
})
|
||||
})).Methods(http.MethodPost)
|
||||
|
||||
r.Handle("/jwt-login", authentication.Login(
|
||||
// On success:
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
})
|
||||
}))
|
||||
|
||||
r.Handle("/logout", authentication.Logout(
|
||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Bye - ClusterCockpit",
|
||||
MsgType: "alert-info",
|
||||
Message: "Logout successful",
|
||||
Build: buildInfo,
|
||||
})
|
||||
}))).Methods(http.MethodPost)
|
||||
|
||||
secured.Use(func(next http.Handler) http.Handler {
|
||||
return authentication.Auth(
|
||||
// On success;
|
||||
next,
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Authentication failed - ClusterCockpit",
|
||||
MsgType: "alert-danger",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
r.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||
}
|
||||
secured.Handle("/query", graphQLEndpoint)
|
||||
|
||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
||||
})
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
routerConfig.SetupRoutes(secured, buildInfo)
|
||||
api.MountRoutes(secured)
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
if i, err := os.Stat("./var/img"); err == nil {
|
||||
if i.IsDir() {
|
||||
log.Info("Use local directory for static images")
|
||||
r.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
}
|
||||
}
|
||||
r.PathPrefix("/").Handler(web.ServeFiles())
|
||||
} else {
|
||||
r.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
}
|
||||
|
||||
r.Use(handlers.CompressHandler)
|
||||
r.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
r.Use(handlers.CORS(
|
||||
handlers.AllowCredentials(),
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"})))
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, r, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
} else {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
}
|
||||
})
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
taskManager.Start()
|
||||
serverInit()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
server := http.Server{
|
||||
ReadTimeout: 10 * time.Second,
|
||||
WriteTimeout: 10 * time.Second,
|
||||
Handler: handler,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
log.Fatalf("starting http listener failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||
go func() {
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
||||
}()
|
||||
}
|
||||
|
||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||
if err != nil {
|
||||
log.Fatalf("loading X509 keypair failed: %v", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
CipherSuites: []uint16{
|
||||
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
},
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
} else {
|
||||
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
|
||||
}
|
||||
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err = runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
log.Fatalf("error while preparing server start: %s", err.Error())
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatalf("starting server failed: %v", err)
|
||||
}
|
||||
serverStart()
|
||||
}()
|
||||
|
||||
wg.Add(1)
|
||||
@@ -514,117 +222,15 @@ func main() {
|
||||
<-sigs
|
||||
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
||||
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
serverShutdown()
|
||||
|
||||
// Then, wait for any async archivings still pending...
|
||||
api.JobRepository.WaitForArchiving()
|
||||
taskManager.Shutdown()
|
||||
}()
|
||||
|
||||
s := gocron.NewScheduler(time.Local)
|
||||
|
||||
if config.Keys.StopJobsExceedingWalltime > 0 {
|
||||
log.Info("Register undead jobs service")
|
||||
|
||||
s.Every(1).Day().At("3:00").Do(func() {
|
||||
err = jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
|
||||
}
|
||||
runtime.GC()
|
||||
})
|
||||
}
|
||||
|
||||
var cfg struct {
|
||||
Compression int `json:"compression"`
|
||||
Retention schema.Retention `json:"retention"`
|
||||
}
|
||||
|
||||
cfg.Retention.IncludeDB = true
|
||||
|
||||
if err = json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
}
|
||||
|
||||
switch cfg.Retention.Policy {
|
||||
case "delete":
|
||||
log.Info("Register retention delete service")
|
||||
|
||||
s.Every(1).Day().At("4:00").Do(func() {
|
||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().CleanUp(jobs)
|
||||
|
||||
if cfg.Retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %s", err.Error())
|
||||
}
|
||||
}
|
||||
})
|
||||
case "move":
|
||||
log.Info("Register retention move service")
|
||||
|
||||
s.Every(1).Day().At("4:00").Do(func() {
|
||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().Move(jobs, cfg.Retention.Location)
|
||||
|
||||
if cfg.Retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %v", err)
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
if cfg.Compression > 0 {
|
||||
log.Info("Register compression service")
|
||||
|
||||
s.Every(1).Day().At("5:00").Do(func() {
|
||||
var jobs []*schema.Job
|
||||
|
||||
ar := archive.GetHandle()
|
||||
startTime := time.Now().Unix() - int64(cfg.Compression*24*3600)
|
||||
lastTime := ar.CompressLast(startTime)
|
||||
if startTime == lastTime {
|
||||
log.Info("Compression Service - Complete archive run")
|
||||
jobs, err = jobRepo.FindJobsBetween(0, startTime)
|
||||
|
||||
} else {
|
||||
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for compression jobs: %v", err)
|
||||
}
|
||||
ar.Compress(jobs)
|
||||
})
|
||||
}
|
||||
|
||||
s.StartAsync()
|
||||
|
||||
if os.Getenv("GOGC") == "" {
|
||||
debug.SetGCPercent(25)
|
||||
}
|
||||
runtimeEnv.SystemdNotifiy(true, "running")
|
||||
wg.Wait()
|
||||
log.Print("Gracefull shutdown completed!")
|
||||
log.Print("Graceful shutdown completed!")
|
||||
}
|
||||
|
||||
318
cmd/cc-backend/server.go
Normal file
318
cmd/cc-backend/server.go
Normal file
@@ -0,0 +1,318 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
"github.com/gorilla/handlers"
|
||||
"github.com/gorilla/mux"
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
)
|
||||
|
||||
var (
|
||||
router *mux.Router
|
||||
server *http.Server
|
||||
apiHandle *api.RestApi
|
||||
)
|
||||
|
||||
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
json.NewEncoder(rw).Encode(map[string]string{
|
||||
"status": http.StatusText(http.StatusUnauthorized),
|
||||
"error": err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
func serverInit() {
|
||||
// Setup the http.Handler/Router used by the server
|
||||
graph.Init()
|
||||
resolver := graph.GetResolverInstance()
|
||||
graphQLEndpoint := handler.NewDefaultServer(
|
||||
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
|
||||
if os.Getenv("DEBUG") != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
||||
}
|
||||
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
})
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
apiHandle = api.New()
|
||||
|
||||
router = mux.NewRouter()
|
||||
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
||||
|
||||
info := map[string]interface{}{}
|
||||
info["hasOpenIDConnect"] = false
|
||||
|
||||
if config.Keys.OpenIDConfig != nil {
|
||||
openIDConnect := auth.NewOIDC(authHandle)
|
||||
openIDConnect.RegisterEndpoints(router)
|
||||
info["hasOpenIDConnect"] = true
|
||||
}
|
||||
|
||||
router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
log.Debugf("##%v##", info)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||
}).Methods(http.MethodGet)
|
||||
router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
secured := router.PathPrefix("/").Subrouter()
|
||||
securedapi := router.PathPrefix("/api").Subrouter()
|
||||
userapi := router.PathPrefix("/userapi").Subrouter()
|
||||
configapi := router.PathPrefix("/config").Subrouter()
|
||||
frontendapi := router.PathPrefix("/frontend").Subrouter()
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
router.Handle("/login", authHandle.Login(
|
||||
// On success: Handled within Login()
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})).Methods(http.MethodPost)
|
||||
|
||||
router.Handle("/jwt-login", authHandle.Login(
|
||||
// On success: Handled within Login()
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))
|
||||
|
||||
router.Handle("/logout", authHandle.Logout(
|
||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Bye - ClusterCockpit",
|
||||
MsgType: "alert-info",
|
||||
Message: "Logout successful",
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))).Methods(http.MethodPost)
|
||||
|
||||
secured.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.Auth(
|
||||
// On success;
|
||||
next,
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Authentication failed - ClusterCockpit",
|
||||
MsgType: "alert-danger",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
Redirect: r.RequestURI,
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
securedapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
userapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthUserApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
configapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthConfigApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
frontendapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthFrontendApi(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||
}
|
||||
secured.Handle("/query", graphQLEndpoint)
|
||||
|
||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
||||
})
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
routerConfig.SetupRoutes(secured, buildInfo)
|
||||
apiHandle.MountApiRoutes(securedapi)
|
||||
apiHandle.MountUserApiRoutes(userapi)
|
||||
apiHandle.MountConfigApiRoutes(configapi)
|
||||
apiHandle.MountFrontendApiRoutes(frontendapi)
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
if i, err := os.Stat("./var/img"); err == nil {
|
||||
if i.IsDir() {
|
||||
log.Info("Use local directory for static images")
|
||||
router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
}
|
||||
}
|
||||
router.PathPrefix("/").Handler(web.ServeFiles())
|
||||
} else {
|
||||
router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
}
|
||||
|
||||
router.Use(handlers.CompressHandler)
|
||||
router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
router.Use(handlers.CORS(
|
||||
handlers.AllowCredentials(),
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"})))
|
||||
}
|
||||
|
||||
func serverStart() {
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
} else {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
}
|
||||
})
|
||||
|
||||
server = &http.Server{
|
||||
ReadTimeout: 20 * time.Second,
|
||||
WriteTimeout: 20 * time.Second,
|
||||
Handler: handler,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
log.Fatalf("starting http listener failed: %v", err)
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||
go func() {
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
||||
}()
|
||||
}
|
||||
|
||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(
|
||||
config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||
if err != nil {
|
||||
log.Fatalf("loading X509 keypair failed: %v", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
CipherSuites: []uint16{
|
||||
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
},
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
} else {
|
||||
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
|
||||
}
|
||||
//
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
log.Fatalf("error while preparing server start: %s", err.Error())
|
||||
}
|
||||
|
||||
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatalf("starting server failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func serverShutdown() {
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
|
||||
// Then, wait for any async archivings still pending...
|
||||
archiver.WaitForArchiving()
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
## Intro
|
||||
|
||||
cc-backend requires a configuration file that specifies the cluster systems to be used.
|
||||
To override the default, specify the location of a json configuration file with the `-config <file path>` command line option.
|
||||
All security-related configurations, e.g. keys and passwords, are set using
|
||||
environment variables.
|
||||
It is supported to set these by means of a `.env` file in the project root.
|
||||
|
||||
## Configuration Options
|
||||
|
||||
* `addr`: Type string. Address where the http (or https) server will listen on (for example: 'localhost:80'). Default `:8080`.
|
||||
* `apiAllowedIPs`: Type string array. Addresses from which the secured API endpoints (/users and other auth related endpoints) can be reached
|
||||
* `user`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
|
||||
* `group`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
|
||||
* `disable-authentication`: Type bool. Disable authentication (for everything: API, Web-UI, ...). Default `false`.
|
||||
* `embed-static-files`: Type bool. If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not. Default `true`.
|
||||
* `static-files`: Type string. Folder where static assets can be found, if `embed-static-files` is `false`. No default.
|
||||
* `db-driver`: Type string. 'sqlite3' or 'mysql' (mysql will work for mariadb as well). Default `sqlite3`.
|
||||
* `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`.
|
||||
* `job-archive`: Type object.
|
||||
- `kind`: Type string. At them moment only file is supported as value.
|
||||
- `path`: Type string. Path to the job-archive. Default: `./var/job-archive`.
|
||||
- `compression`: Type integer. Setup automatic compression for jobs older than number of days.
|
||||
- `retention`: Type object.
|
||||
- `policy`: Type string (required). Retention policy. Possible values none, delete,
|
||||
move.
|
||||
- `includeDB`: Type boolean. Also remove jobs from database.
|
||||
- `age`: Type integer. Act on jobs with startTime older than age (in days).
|
||||
- `location`: Type string. The target directory for retention. Only applicable for retention policy move.
|
||||
* `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`.
|
||||
* `validate`: Type bool. Validate all input json documents against json schema.
|
||||
* `session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`.
|
||||
* `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates.
|
||||
* `redirect-http-to`: Type string. If not the empty string and `addr` does not end in ":80", redirect every request incoming at port 80 to that url.
|
||||
* `machine-state-dir`: Type string. Where to store MachineState files. TODO: Explain in more detail!
|
||||
* `stop-jobs-exceeding-walltime`: Type int. If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job. Default `0`.
|
||||
* `short-running-jobs-duration`: Type int. Do not show running jobs shorter than X seconds. Default `300`.
|
||||
* `jwts`: Type object (required). For JWT Authentication.
|
||||
- `max-age`: Type string (required). Configure how long a token is valid. As string parsable by time.ParseDuration().
|
||||
- `cookieName`: Type string. Cookie that should be checked for a JWT token.
|
||||
- `vaidateUser`: Type boolean. Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.
|
||||
- `trustedIssuer`: Type string. Issuer that should be accepted when validating external JWTs.
|
||||
- `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt with values provided in JWT.
|
||||
* `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`.
|
||||
- `url`: Type string (required). URL of LDAP directory server.
|
||||
- `user_base`: Type string (required). Base DN of user tree root.
|
||||
- `search_dn`: Type string (required). DN for authenticating LDAP admin account with general read rights.
|
||||
- `user_bind`: Type string (required). Expression used to authenticate users via LDAP bind. Must contain `uid={username}`.
|
||||
- `user_filter`: Type string (required). Filter to extract users for syncing.
|
||||
- `username_attr`: Type string. Attribute with full user name. Defaults to `gecos` if not provided.
|
||||
- `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.
|
||||
- `sync_del_old_users`: Type boolean. Delete obsolete users in database.
|
||||
- `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt if user exists in Ldap directory.
|
||||
* `clusters`: Type array of objects (required)
|
||||
- `name`: Type string. The name of the cluster.
|
||||
- `metricDataRepository`: Type object with properties: `kind` (Type string, can be one of `cc-metric-store`, `influxdb` ), `url` (Type string), `token` (Type string)
|
||||
- `filterRanges` Type object. This option controls the slider ranges for the UI controls of numNodes, duration, and startTime. Example:
|
||||
```
|
||||
"filterRanges": {
|
||||
"numNodes": { "from": 1, "to": 64 },
|
||||
"duration": { "from": 0, "to": 86400 },
|
||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
||||
}
|
||||
```
|
||||
* `ui-defaults`: Type object. Default configuration for ui views. If overwritten, all options must be provided! Most options can be overwritten by the user via the web interface.
|
||||
- `analysis_view_histogramMetrics`: Type string array. Metrics to show as job count histograms in analysis view. Default `["flops_any", "mem_bw", "mem_used"]`.
|
||||
- `analysis_view_scatterPlotMetrics`: Type array of string array. Initial
|
||||
scatter plot configuration in analysis view. Default `[["flops_any", "mem_bw"], ["flops_any", "cpu_load"], ["cpu_load", "mem_bw"]]`.
|
||||
- `job_view_nodestats_selectedMetrics`: Type string array. Initial metrics shown in node statistics table of single job view. Default `["flops_any", "mem_bw", "mem_used"]`.
|
||||
- `job_view_polarPlotMetrics`: Type string array. Metrics shown in polar plot of single job view. Default `["flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"]`.
|
||||
- `job_view_selectedMetrics`: Type string array. Default `["flops_any", "mem_bw", "mem_used"]`.
|
||||
- `plot_general_colorBackground`: Type bool. Color plot background according to job average threshold limits. Default `true`.
|
||||
- `plot_general_colorscheme`: Type string array. Initial color scheme. Default `"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"`.
|
||||
- `plot_general_lineWidth`: Type int. Initial linewidth. Default `3`.
|
||||
- `plot_list_jobsPerPage`: Type int. Jobs shown per page in job lists. Default `50`.
|
||||
- `plot_list_selectedMetrics`: Type string array. Initial metric plots shown in jobs lists. Default `"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"`.
|
||||
- `plot_view_plotsPerRow`: Type int. Number of plots per row in single job view. Default `3`.
|
||||
- `plot_view_showPolarplot`: Type bool. Option to toggle polar plot in single job view. Default `true`.
|
||||
- `plot_view_showRoofline`: Type bool. Option to toggle roofline plot in single job view. Default `true`.
|
||||
- `plot_view_showStatTable`: Type bool. Option to toggle the node statistic table in single job view. Default `true`.
|
||||
- `system_view_selectedMetric`: Type string. Initial metric shown in system view. Default `cpu_load`.
|
||||
|
||||
Some of the `ui-defaults` values can be appended by `:<clustername>` in order to have different settings depending on the current cluster. Those are notably `job_view_nodestats_selectedMetrics`, `job_view_polarPlotMetrics`, `job_view_selectedMetrics` and `plot_list_selectedMetrics`.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
An example env file is found in this directory. Copy it to `.env` in the project root and adapt it for your needs.
|
||||
|
||||
* `JWT_PUBLIC_KEY` and `JWT_PRIVATE_KEY`: Base64 encoded Ed25519 keys used for JSON Web Token (JWT) authentication. You can generate your own keypair using `go run ./cmd/gen-keypair/gen-keypair.go`. More information in [README_TOKENS.md](./README_TOKENS.md).
|
||||
* `SESSION_KEY`: Some random bytes used as secret for cookie-based sessions.
|
||||
* `LDAP_ADMIN_PASSWORD`: The LDAP admin user password (optional).
|
||||
* `CROSS_LOGIN_JWT_HS512_KEY`: Used for token based logins via another authentication service.
|
||||
* `LOGLEVEL`: Can be `err`, `warn`, `info` or `debug` (optional, `warn` by default). Can be used to reduce logging.
|
||||
@@ -1,51 +0,0 @@
|
||||
## Introduction
|
||||
|
||||
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs.
|
||||
JSON Web Token (JWT) is an open standard (RFC 7519) that defines a compact and self-contained way for securely transmitting information between parties as a JSON object.
|
||||
This information can be verified and trusted because it is digitally signed.
|
||||
In ClusterCockpit JWTs are signed using a public/private key pair using ECDSA.
|
||||
Because tokens are signed using public/private key pairs, the signature also certifies that only the party holding the private key is the one that signed it.
|
||||
Expiration of the generated tokens as well as the max. length of a browser session can be configured in the `config.json` file described [here](./README.md).
|
||||
|
||||
The [Ed25519](https://ed25519.cr.yp.to/) algorithm for signatures was used because it is compatible with other tools that require authentication, such as NATS.io, and because these elliptic-curve methods provide simillar security with smaller keys compared to something like RSA. They are sligthly more expensive to validate, but that effect is negligible.
|
||||
|
||||
## JWT Payload
|
||||
|
||||
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
|
||||
Currently ClusterCockpit sets the following claims:
|
||||
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
|
||||
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
|
||||
* `roles`: An array of strings specifying the roles set for the subject.
|
||||
* `exp`: Expiration date of the token (only if explicitly configured)
|
||||
|
||||
It is important to know that JWTs are not encrypted, only signed. This means that outsiders cannot create new JWTs or modify existing ones, but they are able to read out the username.
|
||||
|
||||
## Workflow
|
||||
|
||||
1. Create a new ECDSA Public/private keypair:
|
||||
```
|
||||
$ go build ./cmd/gen-keypair/
|
||||
$ ./gen-keypair
|
||||
```
|
||||
2. Add keypair in your `.env` file. A template can be found in `./configs`.
|
||||
|
||||
When a user logs in via the `/login` page using a browser, a session cookie (secured using the random bytes in the `SESSION_KEY` env. variable you shoud change as well) is used for all requests after the successfull login. The JWTs make it easier to use the APIs of ClusterCockpit using scripts or other external programs. The token is specified n the `Authorization` HTTP header using the [Bearer schema](https://datatracker.ietf.org/doc/html/rfc6750) (there is an example below). Tokens can be issued to users from the configuration view in the Web-UI or the command line. In order to use the token for API endpoints such as `/api/jobs/start_job/`, the user that executes it needs to have the `api` role. Regular users can only perform read-only queries and only look at data connected to jobs they started themselves.
|
||||
|
||||
## cc-metric-store
|
||||
|
||||
The [cc-metric-store](https://github.com/ClusterCockpit/cc-metric-store) also uses JWTs for authentication. As it does not issue new tokens, it does not need to kown the private key. The public key of the keypair that is used to generate the JWTs that grant access to the `cc-metric-store` can be specified in its `config.json`. When configuring the `metricDataRepository` object in the `cluster.json` file, you can put a token issued by ClusterCockpit itself.
|
||||
|
||||
## Setup user and JWT token for REST API authorization
|
||||
|
||||
1. Create user:
|
||||
```
|
||||
$ ./cc-backend --add-user <username>:api:<password> --no-server
|
||||
```
|
||||
2. Issue token for user:
|
||||
```
|
||||
$ ./cc-backend --jwt <username> --no-server
|
||||
```
|
||||
3. Use issued token token on client side:
|
||||
```
|
||||
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
|
||||
```
|
||||
@@ -1,56 +1,67 @@
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "alex",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
"addr": "127.0.0.1:8080",
|
||||
"short-running-jobs-duration": 300,
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"enable-resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
300,
|
||||
120,
|
||||
60
|
||||
]
|
||||
},
|
||||
"emission-constant": 317,
|
||||
"clusters": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "alex",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
69
configs/config-mariadb.json
Normal file
69
configs/config-mariadb.json
Normal file
@@ -0,0 +1,69 @@
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"short-running-jobs-duration": 300,
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"db-driver": "mysql",
|
||||
"db": "clustercockpit:demo@tcp(127.0.0.1:3306)/clustercockpit",
|
||||
"enable-resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
300,
|
||||
120,
|
||||
60
|
||||
]
|
||||
},
|
||||
"emission-constant": 317,
|
||||
"clusters": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "alex",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
12
configs/default_metrics.json
Normal file
12
configs/default_metrics.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"clusters": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"default_metrics": "cpu_load, flops_any, core_power, lustre_open, mem_used, mem_bw, net_bytes_in"
|
||||
},
|
||||
{
|
||||
"name": "alex",
|
||||
"default_metrics": "flops_any, mem_bw, mem_used, vectorization_ratio"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -117,10 +117,12 @@ foreach my $ln (split("\n", $topo)) {
|
||||
|
||||
my $node;
|
||||
my @sockets;
|
||||
my @nodeCores;
|
||||
foreach my $socket ( @{$DOMAINS{socket}} ) {
|
||||
push @sockets, "[".join(",", @{$socket})."]";
|
||||
$node .= join(",", @{$socket})
|
||||
push @nodeCores, join(",", @{$socket});
|
||||
}
|
||||
$node = join(",", @nodeCores);
|
||||
$INFO{sockets} = join(",\n", @sockets);
|
||||
|
||||
my @memDomains;
|
||||
@@ -212,9 +214,27 @@ print <<"END";
|
||||
"socketsPerNode": $INFO{socketsPerNode},
|
||||
"coresPerSocket": $INFO{coresPerSocket},
|
||||
"threadsPerCore": $INFO{threadsPerCore},
|
||||
"flopRateScalar": $flopsScalar,
|
||||
"flopRateSimd": $flopsSimd,
|
||||
"memoryBandwidth": $memBw,
|
||||
"flopRateScalar": {
|
||||
"unit": {
|
||||
"base": "F/s",
|
||||
"prefix": "G"
|
||||
},
|
||||
"value": $flopsScalar
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"unit": {
|
||||
"base": "F/s",
|
||||
"prefix": "G"
|
||||
},
|
||||
"value": $flopsSimd
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"unit": {
|
||||
"base": "B/s",
|
||||
"prefix": "G"
|
||||
},
|
||||
"value": $memBw
|
||||
},
|
||||
"nodes": "<FILL IN NODE RANGES>",
|
||||
"topology": {
|
||||
"node": [$node],
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
# Release versions
|
||||
|
||||
Versions are marked according to [semantic versioning] (https://semver.org).
|
||||
Each version embeds the following static assets in the binary:
|
||||
* Web frontend with javascript files and all static assets.
|
||||
* Golang template files for server-side rendering.
|
||||
* JSON schema files for validation.
|
||||
* Database migration files.
|
||||
|
||||
The remaining external assets are:
|
||||
* The SQL database used.
|
||||
* The job archive
|
||||
* The configuration files `config.json` and `.env`.
|
||||
|
||||
The external assets are versioned with integer IDs.
|
||||
This means that each release binary is bound to specific versions of the SQL
|
||||
database and the job archive.
|
||||
The configuration file is checked against the current schema at startup.
|
||||
The `-migrate-db` command line switch can be used to upgrade the SQL database
|
||||
to migrate from a previous version to the latest one.
|
||||
We offer a separate tool `archive-migration` to migrate an existing job archive
|
||||
archive from the previous to the latest version.
|
||||
|
||||
# Versioning of APIs
|
||||
|
||||
cc-backend provides two API backends:
|
||||
* A REST API for querying jobs.
|
||||
* A GraphQL API for data exchange between web frontend and cc-backend.
|
||||
|
||||
The REST API will also be versioned. We still have to decide whether we will also
|
||||
support older REST API versions by versioning the endpoint URLs.
|
||||
The GraphQL API is for internal use and will not be versioned.
|
||||
|
||||
# How to build
|
||||
|
||||
In general it is recommended to use the provided release binary.
|
||||
In case you want to build build `cc-backend` please always use the provided makefile. This will ensure
|
||||
that the frontend is also built correctly and that the version in the binary is encoded in the binary.
|
||||
234
docs/Hands-on.md
234
docs/Hands-on.md
@@ -1,234 +0,0 @@
|
||||
# Hands-on setup ClusterCockpit from scratch (w/o docker)
|
||||
|
||||
## Prerequisites
|
||||
* perl
|
||||
* go
|
||||
* npm
|
||||
* Optional: curl
|
||||
* Script migrateTimestamp.pl
|
||||
|
||||
## Documentation
|
||||
You find READMEs or api docs in
|
||||
* ./cc-backend/configs
|
||||
* ./cc-backend/init
|
||||
* ./cc-backend/api
|
||||
|
||||
## ClusterCockpit configuration files
|
||||
### cc-backend
|
||||
* `./.env` Passwords and Tokens set in the environment
|
||||
* `./config.json` Configuration options for cc-backend
|
||||
|
||||
### cc-metric-store
|
||||
* `./config.json` Optional to overwrite configuration options
|
||||
|
||||
### cc-metric-collector
|
||||
Not yet included in the hands-on setup.
|
||||
|
||||
## Setup Components
|
||||
Start by creating a base folder for all of the following steps.
|
||||
* `mkdir clustercockpit`
|
||||
* `cd clustercockpit`
|
||||
|
||||
### Setup cc-backend
|
||||
* Clone Repository
|
||||
- `git clone https://github.com/ClusterCockpit/cc-backend.git`
|
||||
- `cd cc-backend`
|
||||
* Build
|
||||
- `make`
|
||||
* Activate & configure environment for cc-backend
|
||||
- `cp configs/env-template.txt .env`
|
||||
- Optional: Have a look via `vim .env`
|
||||
- Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./`
|
||||
* Back to toplevel `clustercockpit`
|
||||
- `cd ..`
|
||||
* Prepare Datafolder and Database file
|
||||
- `mkdir var`
|
||||
- `./cc-backend -migrate-db`
|
||||
|
||||
### Setup cc-metric-store
|
||||
* Clone Repository
|
||||
- `git clone https://github.com/ClusterCockpit/cc-metric-store.git`
|
||||
- `cd cc-metric-store`
|
||||
* Build Go Executable
|
||||
- `go get`
|
||||
- `go build`
|
||||
* Prepare Datafolders
|
||||
- `mkdir -p var/checkpoints`
|
||||
- `mkdir -p var/archive`
|
||||
* Update Config
|
||||
- `vim config.json`
|
||||
- Exchange existing setting in `metrics` with the following:
|
||||
```
|
||||
"clock": { "frequency": 60, "aggregation": null },
|
||||
"cpi": { "frequency": 60, "aggregation": null },
|
||||
"cpu_load": { "frequency": 60, "aggregation": null },
|
||||
"flops_any": { "frequency": 60, "aggregation": null },
|
||||
"flops_dp": { "frequency": 60, "aggregation": null },
|
||||
"flops_sp": { "frequency": 60, "aggregation": null },
|
||||
"ib_bw": { "frequency": 60, "aggregation": null },
|
||||
"lustre_bw": { "frequency": 60, "aggregation": null },
|
||||
"mem_bw": { "frequency": 60, "aggregation": null },
|
||||
"mem_used": { "frequency": 60, "aggregation": null },
|
||||
"rapl_power": { "frequency": 60, "aggregation": null }
|
||||
```
|
||||
* Back to toplevel `clustercockpit`
|
||||
- `cd ..`
|
||||
|
||||
### Setup Demo Data
|
||||
* `mkdir source-data`
|
||||
* `cd source-data`
|
||||
* Download JobArchive-Source:
|
||||
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar.xz`
|
||||
- `tar xJf job-archive-dev.tar.xz`
|
||||
- `mv ./job-archive ./job-archive-source`
|
||||
- `rm ./job-archive-dev.tar.xz`
|
||||
* Download CC-Metric-Store Checkpoints:
|
||||
- `mkdir -p cc-metric-store-source/checkpoints`
|
||||
- `cd cc-metric-store-source/checkpoints`
|
||||
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/cc-metric-store-checkpoints.tar.xz`
|
||||
- `tar xf cc-metric-store-checkpoints.tar.xz`
|
||||
- `rm cc-metric-store-checkpoints.tar.xz`
|
||||
* Back to `source-data`
|
||||
- `cd ../..`
|
||||
* Run timestamp migration script. This may take tens of minutes!
|
||||
- `cp ../migrateTimestamps.pl .`
|
||||
- `./migrateTimestamps.pl`
|
||||
- Expected output:
|
||||
```
|
||||
Starting to update start- and stoptimes in job-archive for emmy
|
||||
Starting to update start- and stoptimes in job-archive for woody
|
||||
Done for job-archive
|
||||
Starting to update checkpoint filenames and data starttimes for emmy
|
||||
Starting to update checkpoint filenames and data starttimes for woody
|
||||
Done for checkpoints
|
||||
```
|
||||
* Copy `cluster.json` files from source to migrated folders
|
||||
- `cp source-data/job-archive-source/emmy/cluster.json cc-backend/var/job-archive/emmy/`
|
||||
- `cp source-data/job-archive-source/woody/cluster.json cc-backend/var/job-archive/woody/`
|
||||
* Initialize Job-Archive in SQLite3 job.db and add demo user
|
||||
- `cd cc-backend`
|
||||
- `./cc-backend -init-db -add-user demo:admin:demo`
|
||||
- Expected output:
|
||||
```
|
||||
<6>[INFO] new user "demo" created (roles: ["admin"], auth-source: 0)
|
||||
<6>[INFO] Building job table...
|
||||
<6>[INFO] A total of 3936 jobs have been registered in 1.791 seconds.
|
||||
```
|
||||
* Back to toplevel `clustercockpit`
|
||||
- `cd ..`
|
||||
|
||||
### Startup both Apps
|
||||
* In cc-backend root: `$./cc-backend -server -dev`
|
||||
- Starts Clustercockpit at `http:localhost:8080`
|
||||
- Log: `<6>[INFO] HTTP server listening at :8080...`
|
||||
- Use local internet browser to access interface
|
||||
- You should see and be able to browse finished Jobs
|
||||
- Metadata is read from SQLite3 database
|
||||
- Metricdata is read from job-archive/JSON-Files
|
||||
- Create User in settings (top-right corner)
|
||||
- Name `apiuser`
|
||||
- Username `apiuser`
|
||||
- Role `API`
|
||||
- Submit & Refresh Page
|
||||
- Create JTW for `apiuser`
|
||||
- In Userlist, press `Gen. JTW` for `apiuser`
|
||||
- Save JWT for later use
|
||||
* In cc-metric-store root: `$./cc-metric-store`
|
||||
- Start the cc-metric-store on `http:localhost:8081`, Log:
|
||||
```
|
||||
2022/07/15 17:17:42 Loading checkpoints newer than 2022-07-13T17:17:42+02:00
|
||||
2022/07/15 17:17:45 Checkpoints loaded (5621 files, 319 MB, that took 3.034652s)
|
||||
2022/07/15 17:17:45 API http endpoint listening on '0.0.0.0:8081'
|
||||
```
|
||||
- Does *not* have a graphical interface
|
||||
- Otpional: Test function by executing:
|
||||
```
|
||||
$ curl -H "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" -D - "http://localhost:8081/api/query" -d "{ \"cluster\": \"emmy\", \"from\": $(expr $(date +%s) - 60), \"to\": $(date +%s), \"queries\": [{
|
||||
\"metric\": \"flops_any\",
|
||||
\"host\": \"e1111\"
|
||||
}] }"
|
||||
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Date: Fri, 15 Jul 2022 13:57:22 GMT
|
||||
Content-Length: 119
|
||||
{"results":[[JSON-DATA-ARRAY]]}
|
||||
```
|
||||
|
||||
### Development API web interfaces
|
||||
The `-dev` flag enables web interfaces to document and test the apis:
|
||||
* http://localhost:8080/playground - A GraphQL playground. To use it you must have a authenticated session in the same browser.
|
||||
* http://localhost:8080/swagger - A Swagger UI. To use it you have to be logged out, so no user session in the same browser. Use the JWT token with role Api generate previously to authenticate via http header.
|
||||
|
||||
### Use cc-backend API to start job
|
||||
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
|
||||
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
|
||||
* Click the `/job/start_job` endpoint and click the Try it out button.
|
||||
* Enter the following json into the request body text area and fill in a recent start timestamp by executing `date +%s`.:
|
||||
```
|
||||
{
|
||||
"jobId": 100000,
|
||||
"arrayJobId": 0,
|
||||
"user": "ccdemouser",
|
||||
"subCluster": "main",
|
||||
"cluster": "emmy",
|
||||
"startTime": <date +%s>,
|
||||
"project": "ccdemoproject",
|
||||
"resources": [
|
||||
{"hostname": "e0601"},
|
||||
{"hostname": "e0823"},
|
||||
{"hostname": "e0337"},
|
||||
{"hostname": "e1111"}],
|
||||
"numNodes": 4,
|
||||
"numHwthreads": 80,
|
||||
"walltime": 86400
|
||||
}
|
||||
```
|
||||
* The response body should be the database id of the started job, for example:
|
||||
```
|
||||
{
|
||||
"id": 3937
|
||||
}
|
||||
```
|
||||
* Check in ClusterCockpit
|
||||
- User `ccdemouser` should appear in Users-Tab with one running job
|
||||
- It could take up to 5 Minutes until the Job is displayed with some current data (5 Min Short-Job Filter)
|
||||
- Job then is marked with a green `running` tag
|
||||
- Metricdata displayed is read from cc-metric-store!
|
||||
|
||||
|
||||
### Use cc-backend API to stop job
|
||||
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
|
||||
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
|
||||
* Click the `/job/stop_job/{id}` endpoint and click the Try it out button.
|
||||
* Enter the database id at id that was returned by `start_job` and copy the following into the request body. Replace the timestamp with a recent one:
|
||||
```
|
||||
{
|
||||
"cluster": "emmy",
|
||||
"jobState": "completed",
|
||||
"stopTime": <RECENT TS>
|
||||
}
|
||||
```
|
||||
* On success a json document with the job meta data is returned.
|
||||
|
||||
* Check in ClusterCockpit
|
||||
- User `ccdemouser` should appear in Users-Tab with one completed job
|
||||
- Job is no longer marked with a green `running` tag -> Completed!
|
||||
- Metricdata displayed is now read from job-archive!
|
||||
* Check in job-archive
|
||||
- `cd ./cc-backend/var/job-archive/emmy/100/000`
|
||||
- `cd $STARTTIME`
|
||||
- Inspect `meta.json` and `data.json`
|
||||
|
||||
## Helper scripts
|
||||
* In this tarball you can find the perl script `generate_subcluster.pl` that helps to generate the subcluster section for your system.
|
||||
Usage:
|
||||
* Log into an exclusive cluster node.
|
||||
* The LIKWID tools likwid-topology and likwid-bench must be in the PATH!
|
||||
* `$./generate_subcluster.pl` outputs the subcluster section on `stdout`
|
||||
|
||||
Please be aware that
|
||||
* You have to enter the name and node list for the subCluster manually.
|
||||
* GPU detection only works if LIKWID was build with Cuda avalable and you run likwid-topology also with Cuda loaded.
|
||||
* Do not blindly trust the measured peakflops values.
|
||||
* Because the script blindly relies on the CSV format output by likwid-topology this is a fragile undertaking!
|
||||
@@ -1,99 +0,0 @@
|
||||
## Introduction
|
||||
|
||||
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs. JSON
|
||||
Web Token (JWT) is an open standard (RFC 7519) that defines a compact and
|
||||
self-contained way for securely transmitting information between parties as a
|
||||
JSON object. This information can be verified and trusted because it is
|
||||
digitally signed. In ClusterCockpit JWTs are signed using a public/private key
|
||||
pair using ECDSA. Because tokens are signed using public/private key pairs, the
|
||||
signature also certifies that only the party holding the private key is the one
|
||||
that signed it. Token expiration is set to the configuration option MaxAge.
|
||||
|
||||
## JWT Payload
|
||||
|
||||
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
|
||||
Currently ClusterCockpit sets the following claims:
|
||||
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
|
||||
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
|
||||
* `roles`: An array of strings specifying the roles set for the subject.
|
||||
|
||||
## Workflow
|
||||
|
||||
1. Create a new ECDSA Public/private keypair:
|
||||
```
|
||||
$ go build ./tools/gen-keypair.go
|
||||
$ ./gen-keypair
|
||||
```
|
||||
2. Add keypair in your `.env` file. A template can be found in `./configs`.
|
||||
|
||||
There are two usage scenarios:
|
||||
* The APIs are used during a browser session. API accesses are authorized with
|
||||
the active session.
|
||||
* The REST API is used outside a browser session, e.g. by scripts. In this case
|
||||
you have to issue a token manually. This possible from within the
|
||||
configuration view or on the command line. It is recommended to issue a JWT
|
||||
token in this case for a special user that only has the `api` role. By using
|
||||
different users for different purposes a fine grained access control and
|
||||
access revocation management is possible.
|
||||
|
||||
The token is commonly specified in the Authorization HTTP header using the Bearer schema.
|
||||
|
||||
## Setup user and JWT token for REST API authorization
|
||||
|
||||
1. Create user:
|
||||
```
|
||||
$ ./cc-backend --add-user <username>:api:<Password> --no-server
|
||||
```
|
||||
2. Issue token for user:
|
||||
```
|
||||
$ ./cc-backend -jwt <username> -no-server
|
||||
```
|
||||
3. Use issued token token on client side:
|
||||
```
|
||||
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
|
||||
```
|
||||
|
||||
## Accept externally generated JWTs provided via cookie
|
||||
If there is an external service like an AuthAPI that can generate JWTs and hand
|
||||
them over to ClusterCockpit via cookies, CC can be configured to accept them:
|
||||
|
||||
1. `.env`: CC needs a public ed25519 key to verify foreign JWT signatures.
|
||||
Public keys in PEM format can be converted with the instructions in
|
||||
[/tools/convert-pem-pubkey-for-cc](../tools/convert-pem-pubkey-for-cc/Readme.md)
|
||||
.
|
||||
|
||||
```
|
||||
CROSS_LOGIN_JWT_PUBLIC_KEY="+51iXX8BdLFocrppRxIw52xCOf8xFSH/eNilN5IHVGc="
|
||||
```
|
||||
|
||||
2. `config.json`: Insert a name for the cookie (set by the external service)
|
||||
containing the JWT so that CC knows where to look at. Define a trusted issuer
|
||||
(JWT claim 'iss'), otherwise it will be rejected. If you want usernames and
|
||||
user roles from JWTs ('sub' and 'roles' claim) to be validated against CC's
|
||||
internal database, you need to enable it here. Unknown users will then be
|
||||
rejected and roles set via JWT will be ignored.
|
||||
|
||||
```json
|
||||
"jwts": {
|
||||
"cookieName": "access_cc",
|
||||
"forceJWTValidationViaDatabase": true,
|
||||
"trustedExternalIssuer": "auth.example.com"
|
||||
}
|
||||
```
|
||||
|
||||
3. Make sure your external service includes the same issuer (`iss`) in its JWTs.
|
||||
Example JWT payload:
|
||||
|
||||
```json
|
||||
{
|
||||
"iat": 1668161471,
|
||||
"nbf": 1668161471,
|
||||
"exp": 1668161531,
|
||||
"sub": "alice",
|
||||
"roles": [
|
||||
"user"
|
||||
],
|
||||
"jti": "a1b2c3d4-1234-5678-abcd-a1b2c3d4e5f6",
|
||||
"iss": "auth.example.com"
|
||||
}
|
||||
```
|
||||
@@ -1,78 +0,0 @@
|
||||
The job archive specifies an exchange format for job meta and performance metric
|
||||
data. It consists of two parts:
|
||||
* a [SQLite database schema](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#sqlite-database-schema) for job meta data and performance statistics
|
||||
* a [Json file format](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#json-file-format) together with a [Directory hierarchy specification](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#directory-hierarchy-specification)
|
||||
|
||||
By using an open, portable and simple specification based on files it is
|
||||
possible to exchange job performance data for research and analysis purposes as
|
||||
well as use it as a robust way for archiving job performance data to disk.
|
||||
|
||||
# SQLite database schema
|
||||
## Introduction
|
||||
|
||||
A SQLite 3 database schema is provided to standardize the job meta data
|
||||
information in a portable way. The schema also includes optional columns for job
|
||||
performance statistics (called a job performance footprint). The database acts
|
||||
as a front end to filter and select subsets of job IDs, that are the keys to get
|
||||
the full job performance data in the job performance tree hierarchy.
|
||||
|
||||
## Database schema
|
||||
|
||||
The schema includes 3 tables: the job table, a tag table and a jobtag table
|
||||
representing the MANY-TO-MANY relation between jobs and tags. The SQL schema is
|
||||
specified
|
||||
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/schemas/jobs-sqlite.sql).
|
||||
Explanation of the various columns including the JSON datatypes is documented
|
||||
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/job-meta.schema.json).
|
||||
|
||||
# Directory hierarchy specification
|
||||
|
||||
## Specification
|
||||
|
||||
To manage the number of directories within a single directory a tree approach is
|
||||
used splitting the integer job ID. The job id is split in junks of 1000 each.
|
||||
Usually 2 layers of directories is sufficient but the concept can be used for an
|
||||
arbitrary number of layers.
|
||||
|
||||
For a 2 layer schema this can be achieved with (code example in Perl):
|
||||
``` perl
|
||||
$level1 = $jobID/1000;
|
||||
$level2 = $jobID%1000;
|
||||
$dstPath = sprintf("%s/%s/%d/%03d", $trunk, $destdir, $level1, $level2);
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
For the job ID 1034871 the directory path is `./1034/871/`.
|
||||
|
||||
# Json file format
|
||||
## Overview
|
||||
|
||||
Every cluster must be configured in a `cluster.json` file.
|
||||
|
||||
The job data consists of two files:
|
||||
* `meta.json`: Contains job meta information and job statistics.
|
||||
* `data.json`: Contains complete job data with time series
|
||||
|
||||
The description of the json format specification is available as [[json
|
||||
schema|https://json-schema.org/]] format file. The latest version of the json
|
||||
schema is part of the `cc-backend` source tree. For external reference it is
|
||||
also available in a separate repository.
|
||||
|
||||
## Specification `cluster.json`
|
||||
|
||||
The json schema specification is available
|
||||
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/cluster.schema.json).
|
||||
|
||||
## Specification `meta.json`
|
||||
|
||||
The json schema specification is available
|
||||
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-meta.schema.json).
|
||||
|
||||
## Specification `data.json`
|
||||
|
||||
The json schema specification is available
|
||||
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-data.schema.json).
|
||||
Metric time series data is stored for a fixed time step. The time step is set
|
||||
per metric. If no value is available for a metric time series data timestamp
|
||||
`null` is entered.
|
||||
@@ -1,29 +0,0 @@
|
||||
# Overview
|
||||
|
||||
Customizing `cc-backend` means changing the logo, legal texts, and the login
|
||||
template instead of the placeholders. You can also place a text file in `./var`
|
||||
to add dynamic status or notification messages to the clusterCockpit homepage.
|
||||
|
||||
# Replace legal texts
|
||||
|
||||
To replace the `imprint.tmpl` and `privacy.tmpl` legal texts, you can place your
|
||||
version in `./var/`. At startup `cc-backend` will check if `./var/imprint.tmpl` and/or
|
||||
`./var/privacy.tmpl` exist and use them instead of the built-in placeholders.
|
||||
You can use the placeholders in `web/templates` as a blueprint.
|
||||
|
||||
# Replace login template
|
||||
To replace the default login layout and styling, you can place your version in
|
||||
`./var/`. At startup `cc-backend` will check if `./var/login.tmpl` exist and use
|
||||
it instead of the built-in placeholder. You can use the default temaplte
|
||||
`web/templates/login.tmpl` as a blueprint.
|
||||
|
||||
# Replace logo
|
||||
To change the logo displayed in the navigation bar, you can provide the file
|
||||
`logo.png` in the folder `./var/img/`. On startup `cc-backend` will check if the
|
||||
folder exists and use the images provided there instead of the built-in images.
|
||||
You may also place additional images there you use in a custom login template.
|
||||
|
||||
# Add notification banner on homepage
|
||||
To add a notification banner you can add a file `notice.txt` to `./var`. As long
|
||||
as this file is present all text in this file is shown in an info banner on the
|
||||
homepage.
|
||||
@@ -1,78 +0,0 @@
|
||||
In general, an upgrade is nothing more than a replacement of the binary file.
|
||||
All the necessary files, except the database file, the configuration file and
|
||||
the job archive, are embedded in the binary file. It is recommended to use a
|
||||
directory where the file names of the binary files are named with a version
|
||||
indicator. This can be, for example, the date or the Unix epoch time. A symbolic
|
||||
link points to the version to be used. This makes it easier to switch to earlier
|
||||
versions.
|
||||
|
||||
The database and the job archive are versioned. Each release binary supports
|
||||
specific versions of the database and job archive. If a version mismatch is
|
||||
detected, the application is terminated and migration is required.
|
||||
|
||||
**IMPORTANT NOTE**
|
||||
|
||||
It is recommended to make a backup copy of the database before each update. This
|
||||
is mandatory in case the database needs to be migrated. In the case of sqlite,
|
||||
this means to stopping `cc-backend` and copying the sqlite database file
|
||||
somewhere.
|
||||
|
||||
# Migrating the database
|
||||
|
||||
After you have backed up the database, run the following command to migrate the
|
||||
database to the latest version:
|
||||
```
|
||||
$ ./cc-backend -migrate-db
|
||||
```
|
||||
|
||||
The migration files are embedded in the binary and can also be viewed in the cc
|
||||
backend [source tree](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository/migrations).
|
||||
There are separate migration files for both supported
|
||||
database backends.
|
||||
We use the [migrate library](https://github.com/golang-migrate/migrate).
|
||||
|
||||
If something goes wrong, you can check the status and get the current schema
|
||||
(here for sqlite):
|
||||
```
|
||||
$ sqlite3 var/job.db
|
||||
```
|
||||
In the sqlite console execute:
|
||||
```
|
||||
.schema
|
||||
```
|
||||
to get the current databse schema.
|
||||
You can query the current version and whether the migration failed with:
|
||||
```
|
||||
SELECT * FROM schema_migrations;
|
||||
```
|
||||
The first column indicates the current database version and the second column is
|
||||
a dirty flag indicating whether the migration was successful.
|
||||
|
||||
# Migrating the job archive
|
||||
|
||||
Job archive migration requires a separate tool (`archive-migration`), which is
|
||||
part of the cc-backend source tree (build with `go build ./tools/archive-migration`)
|
||||
and is also provided as part of the releases.
|
||||
|
||||
Migration is supported only between two successive releases. The migration tool
|
||||
migrates the existing job archive to a new job archive. This means that there
|
||||
must be enough disk space for two complete job archives. If the tool is called
|
||||
without options:
|
||||
```
|
||||
$ ./archive-migration
|
||||
```
|
||||
|
||||
it is assumed that a job archive exists in `./var/job-archive`. The new job
|
||||
archive is written to `./var/job-archive-new`. Since execution is threaded in case
|
||||
of a fatal error, it is impossible to determine in which job the error occurred.
|
||||
In this case, you can run the tool in debug mode (with the `-debug` flag). In
|
||||
debug mode, threading is disabled and the job ID of each migrated job is output.
|
||||
Jobs with empty files will be skipped. Between multiple runs of the tools, the
|
||||
`job-archive-new` directory must be moved or deleted.
|
||||
|
||||
The `cluster.json` files in `job-archive-new` must be checked for errors, especially
|
||||
whether the aggregation attribute is set correctly for all metrics.
|
||||
|
||||
Migration takes several hours for relatively large job archives (several hundred
|
||||
GB). A versioned job archive contains a version.txt file in the root directory
|
||||
of the job archive. This file contains the version as an unsigned integer.
|
||||
@@ -1,180 +0,0 @@
|
||||
# Overview
|
||||
|
||||
The authentication is implemented in `internal/auth/`. In `auth.go`
|
||||
an interface is defined that any authentication provider must fulfill. It also
|
||||
acts as a dispatcher to delegate the calls to the available authentication
|
||||
providers.
|
||||
|
||||
Two authentication types are available:
|
||||
* JWT authentication for the REST API that does not create a session cookie
|
||||
* Session based authentication using a session cookie
|
||||
|
||||
The most important routines in auth are:
|
||||
* `Login()` Handle POST request to login user and start a new session
|
||||
* `Auth()` Authenticate user and put User Object in context of the request
|
||||
|
||||
The http router calls auth in the following cases:
|
||||
* `r.Handle("/login", authentication.Login( ... )).Methods(http.MethodPost)`:
|
||||
The POST request on the `/login` route will call the Login callback.
|
||||
* `r.Handle("/jwt-login", authentication.Login( ... ))`:
|
||||
Any request on the `/jwt-login` route will call the Login callback. Intended
|
||||
for use for the JWT token based authenticators.
|
||||
* Any route in the secured subrouter will always call Auth(), on success it will
|
||||
call the next handler in the chain, on failure it will render the login
|
||||
template.
|
||||
```
|
||||
secured.Use(func(next http.Handler) http.Handler {
|
||||
return authentication.Auth(
|
||||
// On success;
|
||||
next,
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
// Render login form
|
||||
})
|
||||
})
|
||||
```
|
||||
|
||||
A JWT token can be used to initiate an authenticated user
|
||||
session. This can either happen by calling the login route with a token
|
||||
provided in a header or via a special cookie containing the JWT token.
|
||||
For API routes the access is authenticated on every request using the JWT token
|
||||
and no session is initiated.
|
||||
|
||||
# Login
|
||||
|
||||
The Login function (located in `auth.go`):
|
||||
* Extracts the user name and gets the user from the user database table. In case the
|
||||
user is not found the user object is set to nil.
|
||||
* Iterates over all authenticators and:
|
||||
- Calls its `CanLogin` function which checks if the authentication method is
|
||||
supported for this user.
|
||||
- Calls its `Login` function to authenticate the user. On success a valid user
|
||||
object is returned.
|
||||
- Creates a new session object, stores the user attributes in the session and
|
||||
saves the session.
|
||||
- Starts the `onSuccess` http handler
|
||||
|
||||
## Local authenticator
|
||||
|
||||
This authenticator is applied if
|
||||
```
|
||||
return user != nil && user.AuthSource == AuthViaLocalPassword
|
||||
```
|
||||
|
||||
Compares the password provided by the login form to the password hash stored in
|
||||
the user database table:
|
||||
```
|
||||
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil {
|
||||
log.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username)
|
||||
return nil, fmt.Errorf("Authentication failed")
|
||||
}
|
||||
```
|
||||
|
||||
## LDAP authenticator
|
||||
|
||||
This authenticator is applied if the user was found in the database and its
|
||||
AuthSource is LDAP:
|
||||
```
|
||||
if user != nil {
|
||||
if user.AuthSource == schema.AuthViaLDAP {
|
||||
return user, true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If the option `SyncUserOnLogin` is set it tried to sync the user from the LDAP
|
||||
directory. In case this succeeds the user is persisted to the database and can
|
||||
login.
|
||||
|
||||
Gets the LDAP connection and tries a bind with the provided credentials:
|
||||
```
|
||||
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
|
||||
log.Errorf("AUTH/LDAP > Authentication for user %s failed: %v", user.Username, err)
|
||||
return nil, fmt.Errorf("Authentication failed")
|
||||
}
|
||||
```
|
||||
|
||||
## JWT Session authenticator
|
||||
|
||||
Login via JWT token will create a session without password.
|
||||
For login the `X-Auth-Token` header is not supported. This authenticator is
|
||||
applied if the Authorization header or query parameter login-token is present:
|
||||
```
|
||||
return user, r.Header.Get("Authorization") != "" ||
|
||||
r.URL.Query().Get("login-token") != ""
|
||||
```
|
||||
|
||||
The Login function:
|
||||
* Parses the token and checks if it is expired
|
||||
* Check if the signing method is EdDSA or HS256 or HS512
|
||||
* Check if claims are valid and extracts the claims
|
||||
* The following claims have to be present:
|
||||
- `sub`: The subject, in this case this is the username
|
||||
- `exp`: Expiration in Unix epoch time
|
||||
- `roles`: String array with roles of user
|
||||
* In case user does not exist in the database and the option `SyncUserOnLogin`
|
||||
is set add user to user database table with `AuthViaToken` AuthSource.
|
||||
* Return valid user object
|
||||
|
||||
## JWT Cookie Session authenticator
|
||||
|
||||
Login via JWT cookie token will create a session without password.
|
||||
It is first checked if the required configuration options are set:
|
||||
* `trustedIssuer`
|
||||
* `CookieName`
|
||||
|
||||
and optionally the environment variable `CROSS_LOGIN_JWT_PUBLIC_KEY` is set.
|
||||
|
||||
This authenticator is applied if the configured cookie is present:
|
||||
```
|
||||
jwtCookie, err := r.Cookie(cookieName)
|
||||
|
||||
if err == nil && jwtCookie.Value != "" {
|
||||
return true
|
||||
}
|
||||
```
|
||||
|
||||
The Login function:
|
||||
* Extracts and parses the token
|
||||
* Checks if signing method is Ed25519/EdDSA
|
||||
* In case publicKeyCrossLogin is configured:
|
||||
- Check if `iss` issuer claim matched trusted issuer from configuration
|
||||
- Return public cross login key
|
||||
- Otherwise return standard public key
|
||||
* Check if claims are valid
|
||||
* Depending on the option `validateUser` the roles are
|
||||
extracted from JWT token or taken from user object fetched from database
|
||||
* Ask browser to delete the JWT cookie
|
||||
* In case user does not exist in the database and the option `SyncUserOnLogin`
|
||||
is set add user to user database table with `AuthViaToken` AuthSource.
|
||||
* Return valid user object
|
||||
|
||||
# Auth
|
||||
|
||||
The Auth function (located in `auth.go`):
|
||||
* Returns a new http handler function that is defined right away
|
||||
* This handler tries two methods to authenticate a user:
|
||||
- Via a JWT API token in `AuthViaJWT()`
|
||||
- Via a valid session in `AuthViaSession()`
|
||||
* If err is not nil and the user object is valid it puts the user object in the
|
||||
request context and starts the onSuccess http handler
|
||||
* Otherwise it calls the onFailure handler
|
||||
|
||||
## AuthViaJWT
|
||||
|
||||
Implemented in JWTAuthenticator:
|
||||
* Extract token either from header `X-Auth-Token` or `Authorization` with Bearer
|
||||
prefix
|
||||
* Parse token and check if it is valid. The Parse routine will also check if the
|
||||
token is expired.
|
||||
* If the option `validateUser` is set it will ensure the
|
||||
user object exists in the database and takes the roles from the database user
|
||||
* Otherwise the roles are extracted from the roles claim
|
||||
* Returns a valid user object with AuthType set to AuthToken
|
||||
|
||||
## AuthViaSession
|
||||
|
||||
* Extracts session
|
||||
* Get values username, projects, and roles from session
|
||||
* Returns a valid user object with AuthType set to AuthSession
|
||||
@@ -1,33 +0,0 @@
|
||||
## Tips for frontend development
|
||||
|
||||
The frontend assets including the Svelte js files are per default embedded in
|
||||
the bgo binary. To enable a quick turnaround cycle for web development of the
|
||||
frontend disable embedding of static assets in `config.json`:
|
||||
```
|
||||
"embed-static-files": false,
|
||||
"static-files": "./web/frontend/public/",
|
||||
|
||||
```
|
||||
|
||||
Start the node build process (in directory `./web/frontend`) in development mode:
|
||||
```
|
||||
$ npm run dev
|
||||
```
|
||||
|
||||
This will start the build process in listen mode. Whenever you change a source
|
||||
files the depending javascript targets will be automatically rebuild.
|
||||
In case the javascript files are minified you may need to set the production
|
||||
flag by hand to false in `./web/frontend/rollup.config.mjs`:
|
||||
```
|
||||
const production = false
|
||||
```
|
||||
|
||||
Usually this should work automatically.
|
||||
|
||||
Because the files are still served by ./cc-backend you have to reload the view
|
||||
explicitly in your browser.
|
||||
|
||||
A common setup is to have three terminals open:
|
||||
* One running cc-backend (working directory repository root): `./cc-backend -server -dev`
|
||||
* Another running npm in developer mode (working directory `./web/frontend`): `npm run dev`
|
||||
* And the last one editing the frontend source files
|
||||
@@ -1,13 +0,0 @@
|
||||
# Steps to prepare a release
|
||||
|
||||
1. On `hotfix` branch:
|
||||
* Update ReleaseNotes.md
|
||||
* Update version in Makefile
|
||||
* Commit, push, and pull request
|
||||
* Merge in master
|
||||
|
||||
2. On Linux host:
|
||||
* Pull master
|
||||
* Ensure that GitHub Token environment variable `GITHUB_TOKEN` is set
|
||||
* Create release tag: `git tag v1.1.0 -m release`
|
||||
* Execute `goreleaser release`
|
||||
@@ -1,34 +0,0 @@
|
||||
## Overview
|
||||
|
||||
We use the standard golang testing environment.
|
||||
|
||||
The following conventions are used:
|
||||
|
||||
* *White box unit tests*: Tests for internal functionality are placed in files
|
||||
* *Black box unit tests*: Tests for public interfaces are placed in files
|
||||
with `<package name>_test.go` and belong to the package `<package_name>_test`.
|
||||
There only exists one package test file per package.
|
||||
* *Integration tests*: Tests that use multiple componenents are placed in a
|
||||
package test file. These are named `<package name>_test.go` and belong to the
|
||||
package `<package_name>_test`.
|
||||
* *Test assets*: Any required files are placed in a directory `./testdata`
|
||||
within each package directory.
|
||||
|
||||
## Executing tests
|
||||
|
||||
Visual Studio Code has a very good golang test integration.
|
||||
For debugging a test this is the recommended solution.
|
||||
|
||||
The Makefile provided by us has a `test` target that executes:
|
||||
```
|
||||
$ go clean -testcache
|
||||
$ go build ./...
|
||||
$ go vet ./...
|
||||
$ go test ./...
|
||||
```
|
||||
|
||||
Of course the commands can also be used on the command line.
|
||||
For details about golang testing refer to the standard documentation:
|
||||
|
||||
* [Testing package](https://pkg.go.dev/testing)
|
||||
* [go test command](https://pkg.go.dev/cmd/go#hdr-Test_packages)
|
||||
@@ -1,229 +0,0 @@
|
||||
#!/usr/bin/env perl
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use JSON::PP; # from Perl default install
|
||||
use Time::Local qw( timelocal ); # from Perl default install
|
||||
use Time::Piece; # from Perl default install
|
||||
|
||||
### JSON
|
||||
my $json = JSON::PP->new->allow_nonref;
|
||||
|
||||
### TIME AND DATE
|
||||
# now
|
||||
my $localtime = localtime;
|
||||
my $epochtime = $localtime->epoch;
|
||||
# 5 days ago: Via epoch due to possible reverse month borders
|
||||
my $epochlessfive = $epochtime - (86400 * 5);
|
||||
my $locallessfive = localtime($epochlessfive);
|
||||
# Calc like `date --date 'TZ="Europe/Berlin" 0:00 5 days ago' +%s`)
|
||||
my ($day, $month, $year) = ($locallessfive->mday, $locallessfive->_mon, $locallessfive->year);
|
||||
my $checkpointStart = timelocal(0, 0, 0, $day, $month, $year);
|
||||
# for checkpoints
|
||||
my $halfday = 43200;
|
||||
|
||||
### JOB-ARCHIVE
|
||||
my $archiveTarget = './cc-backend/var/job-archive';
|
||||
my $archiveSrc = './source-data/job-archive-source';
|
||||
my @ArchiveClusters;
|
||||
|
||||
# Gen folder
|
||||
if ( not -d $archiveTarget ){
|
||||
mkdir( $archiveTarget ) or die "Couldn't create $archiveTarget directory, $!";
|
||||
}
|
||||
|
||||
# Get clusters by job-archive/$subfolder
|
||||
opendir my $dh, $archiveSrc or die "can't open directory: $!";
|
||||
while ( readdir $dh ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
|
||||
my $cluster = $_;
|
||||
push @ArchiveClusters, $cluster;
|
||||
}
|
||||
|
||||
# start for jobarchive
|
||||
foreach my $cluster ( @ArchiveClusters ) {
|
||||
print "Starting to update start- and stoptimes in job-archive for $cluster\n";
|
||||
|
||||
my $clusterTarget = "$archiveTarget/$cluster";
|
||||
|
||||
if ( not -d $clusterTarget ){
|
||||
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
|
||||
}
|
||||
|
||||
opendir my $dhLevel1, "$archiveSrc/$cluster" or die "can't open directory: $!";
|
||||
while ( readdir $dhLevel1 ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..';
|
||||
my $level1 = $_;
|
||||
|
||||
if ( -d "$archiveSrc/$cluster/$level1" ) {
|
||||
opendir my $dhLevel2, "$archiveSrc/$cluster/$level1" or die "can't open directory: $!";
|
||||
while ( readdir $dhLevel2 ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..';
|
||||
my $level2 = $_;
|
||||
my $jobSource = "$archiveSrc/$cluster/$level1/$level2";
|
||||
my $jobOrigin = "$jobSource";
|
||||
my $jobTargetL1 = "$clusterTarget/$level1";
|
||||
my $jobTargetL2 = "$jobTargetL1/$level2";
|
||||
|
||||
# check if files are directly accessible (old format) else get subfolders as file and update path
|
||||
if ( ! -e "$jobSource/meta.json") {
|
||||
opendir(D, "$jobSource") || die "Can't open directory $jobSource: $!\n";
|
||||
my @folders = readdir(D);
|
||||
closedir(D);
|
||||
if (!@folders) {
|
||||
next;
|
||||
}
|
||||
|
||||
foreach my $folder ( @folders ) {
|
||||
next if $folder eq '.' or $folder eq '..';
|
||||
$jobSource = "$jobSource/".$folder;
|
||||
}
|
||||
}
|
||||
# check if subfolder contains file, else skip
|
||||
if ( ! -e "$jobSource/meta.json") {
|
||||
print "$jobSource skipped\n";
|
||||
next;
|
||||
}
|
||||
|
||||
open my $metafh, '<', "$jobSource/meta.json" or die "Can't open file $!";
|
||||
my $rawstr = do { local $/; <$metafh> };
|
||||
close($metafh);
|
||||
my $metadata = $json->decode($rawstr);
|
||||
|
||||
# NOTE Start meta.json iteration here
|
||||
# my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT;
|
||||
# Set new startTime: Between 5 days and 1 day before now
|
||||
|
||||
# Remove id from attributes
|
||||
$metadata->{startTime} = $epochtime - (int(rand(432000)) + 86400);
|
||||
$metadata->{stopTime} = $metadata->{startTime} + $metadata->{duration};
|
||||
|
||||
# Add starttime subfolder to target path
|
||||
my $jobTargetL3 = "$jobTargetL2/".$metadata->{startTime};
|
||||
|
||||
if ( not -d $jobTargetL1 ){
|
||||
mkdir( $jobTargetL1 ) or die "Couldn't create $jobTargetL1 directory, $!";
|
||||
}
|
||||
|
||||
if ( not -d $jobTargetL2 ){
|
||||
mkdir( $jobTargetL2 ) or die "Couldn't create $jobTargetL2 directory, $!";
|
||||
}
|
||||
|
||||
# target is not directory
|
||||
if ( not -d $jobTargetL3 ){
|
||||
mkdir( $jobTargetL3 ) or die "Couldn't create $jobTargetL3 directory, $!";
|
||||
|
||||
my $outstr = $json->encode($metadata);
|
||||
open my $metaout, '>', "$jobTargetL3/meta.json" or die "Can't write to file $!";
|
||||
print $metaout $outstr;
|
||||
close($metaout);
|
||||
|
||||
open my $datafh, '<', "$jobSource/data.json" or die "Can't open file $!";
|
||||
my $datastr = do { local $/; <$datafh> };
|
||||
close($datafh);
|
||||
|
||||
open my $dataout, '>', "$jobTargetL3/data.json" or die "Can't write to file $!";
|
||||
print $dataout $datastr;
|
||||
close($dataout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
print "Done for job-archive\n";
|
||||
sleep(1);
|
||||
exit;
|
||||
|
||||
## CHECKPOINTS
|
||||
my $checkpTarget = './cc-metric-store/var/checkpoints';
|
||||
my $checkpSource = './source-data/cc-metric-store-source/checkpoints';
|
||||
my @CheckpClusters;
|
||||
|
||||
# Gen folder
|
||||
if ( not -d $checkpTarget ){
|
||||
mkdir( $checkpTarget ) or die "Couldn't create $checkpTarget directory, $!";
|
||||
}
|
||||
|
||||
# Get clusters by cc-metric-store/$subfolder
|
||||
opendir my $dhc, $checkpSource or die "can't open directory: $!";
|
||||
while ( readdir $dhc ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
|
||||
my $cluster = $_;
|
||||
push @CheckpClusters, $cluster;
|
||||
}
|
||||
closedir($dhc);
|
||||
|
||||
# start for checkpoints
|
||||
foreach my $cluster ( @CheckpClusters ) {
|
||||
print "Starting to update checkpoint filenames and data starttimes for $cluster\n";
|
||||
|
||||
my $clusterTarget = "$checkpTarget/$cluster";
|
||||
|
||||
if ( not -d $clusterTarget ){
|
||||
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
|
||||
}
|
||||
|
||||
opendir my $dhLevel1, "$checkpSource/$cluster" or die "can't open directory: $!";
|
||||
while ( readdir $dhLevel1 ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..';
|
||||
# Nodename as level1-folder
|
||||
my $level1 = $_;
|
||||
|
||||
if ( -d "$checkpSource/$cluster/$level1" ) {
|
||||
|
||||
my $nodeSource = "$checkpSource/$cluster/$level1/";
|
||||
my $nodeOrigin = "$nodeSource";
|
||||
my $nodeTarget = "$clusterTarget/$level1";
|
||||
my @files;
|
||||
|
||||
if ( -e "$nodeSource/1609459200.json") { # 1609459200 == First Checkpoint time in latest dump
|
||||
opendir(D, "$nodeSource") || die "Can't open directory $nodeSource: $!\n";
|
||||
while ( readdir D ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..';
|
||||
my $nodeFile = $_;
|
||||
push @files, $nodeFile;
|
||||
}
|
||||
closedir(D);
|
||||
my $length = @files;
|
||||
if (!@files || $length != 14) { # needs 14 files == 7 days worth of data
|
||||
next;
|
||||
}
|
||||
} else {
|
||||
next;
|
||||
}
|
||||
|
||||
# sort for integer timestamp-filename-part (moduleless): Guarantees start with index == 0 == 1609459200.json
|
||||
my @sortedFiles = sort { ($a =~ /^([0-9]{10}).json$/)[0] <=> ($b =~ /^([0-9]{10}).json$/)[0] } @files;
|
||||
|
||||
if ( not -d $nodeTarget ){
|
||||
mkdir( $nodeTarget ) or die "Couldn't create $nodeTarget directory, $!";
|
||||
|
||||
while (my ($index, $file) = each(@sortedFiles)) {
|
||||
open my $checkfh, '<', "$nodeSource/$file" or die "Can't open file $!";
|
||||
my $rawstr = do { local $/; <$checkfh> };
|
||||
close($checkfh);
|
||||
my $checkpdata = $json->decode($rawstr);
|
||||
|
||||
my $newTimestamp = $checkpointStart + ($index * $halfday);
|
||||
# Get Diff from old Timestamp
|
||||
my $timeDiff = $newTimestamp - $checkpdata->{from};
|
||||
# Set new timestamp
|
||||
$checkpdata->{from} = $newTimestamp;
|
||||
|
||||
foreach my $metric (keys %{$checkpdata->{metrics}}) {
|
||||
$checkpdata->{metrics}->{$metric}->{start} += $timeDiff;
|
||||
}
|
||||
|
||||
my $outstr = $json->encode($checkpdata);
|
||||
|
||||
open my $checkout, '>', "$nodeTarget/$newTimestamp.json" or die "Can't write to file $!";
|
||||
print $checkout $outstr;
|
||||
close($checkout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir($dhLevel1);
|
||||
}
|
||||
print "Done for checkpoints\n";
|
||||
@@ -1,36 +0,0 @@
|
||||
# Docs for ClusterCockpit Searchbar
|
||||
|
||||
## Usage
|
||||
|
||||
* Searchtags are implemented as `type:<query>` search-string
|
||||
* Types `jobId, jobName, projectId, username, name, arrayJobId` for roles `admin` and `support`
|
||||
* `jobName` is jobName as persisted in `job.meta_data` table-column
|
||||
* `username` is actual account identifier as persisted in `job.user` table-column
|
||||
* `name` is account owners name as persisted in `user.name` table-column
|
||||
* Types `jobId, jobName, projectId, arrayJobId` for role `user`
|
||||
* Examples:
|
||||
* `jobName:myJob12`
|
||||
* `jobId:123456`
|
||||
* `username:abcd100`
|
||||
* `name:Paul`
|
||||
* If no searchTag used: Best guess search with the following hierarchy
|
||||
* `jobId -> username -> name -> projectId -> jobName`
|
||||
* Destinations:
|
||||
* JobId: Job-Table (Allows multiple identical matches, e.g. JobIds from different clusters)
|
||||
* JobName: Job-Table (Allows multiple identical matches, e.g. JobNames from different clusters)
|
||||
* ProjectId: Job-Table
|
||||
* Username: Users-Table
|
||||
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table. Also, a `Last 30 Days` is active by default and might filter out expected users.
|
||||
* Name: Users-Table
|
||||
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table. Also, a `Last 30 Days` is active by default and might filter out expected users.
|
||||
* ArrayJobId: Job-Table (Lists all Jobs of Queried ArrayJobId)
|
||||
* Best guess search always redirects to Job-Table or `/monitoring/user/$USER` (first username match)
|
||||
* Unprocessable queries will display messages detailing the cause (Info, Warning, Error)
|
||||
* Spaces trimmed (both for searchTag and queryString)
|
||||
* ` job12` == `job12`
|
||||
* `projectID : abcd ` == `projectId:abcd`
|
||||
* `jobName`- and `name-`queries work with a part of the target-string
|
||||
* `jobName:myjob` for jobName "myjob_cluster1"
|
||||
* `name:Paul` for name "Paul Atreides"
|
||||
|
||||
* JobName GQL Query is resolved as matching the query as a part of the whole metaData-JSON in the SQL DB.
|
||||
128
go.mod
128
go.mod
@@ -1,88 +1,98 @@
|
||||
module github.com/ClusterCockpit/cc-backend
|
||||
|
||||
go 1.18
|
||||
go 1.23.5
|
||||
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.17.36
|
||||
github.com/99designs/gqlgen v0.17.63
|
||||
github.com/ClusterCockpit/cc-lib v0.0.0-20250216162506-b737e48423c5
|
||||
github.com/ClusterCockpit/cc-units v0.4.0
|
||||
github.com/Masterminds/squirrel v1.5.3
|
||||
github.com/go-co-op/gocron v1.25.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.4
|
||||
github.com/go-sql-driver/mysql v1.7.0
|
||||
github.com/golang-jwt/jwt/v4 v4.5.0
|
||||
github.com/golang-migrate/migrate/v4 v4.15.2
|
||||
github.com/google/gops v0.3.27
|
||||
github.com/gorilla/handlers v1.5.1
|
||||
github.com/gorilla/mux v1.8.0
|
||||
github.com/gorilla/sessions v1.2.1
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.12.2
|
||||
github.com/jmoiron/sqlx v1.3.5
|
||||
github.com/mattn/go-sqlite3 v1.14.16
|
||||
github.com/prometheus/client_golang v1.14.0
|
||||
github.com/prometheus/common v0.40.0
|
||||
github.com/Masterminds/squirrel v1.5.4
|
||||
github.com/coreos/go-oidc/v3 v3.11.0
|
||||
github.com/go-co-op/gocron/v2 v2.9.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.8
|
||||
github.com/go-sql-driver/mysql v1.8.1
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1
|
||||
github.com/golang-migrate/migrate/v4 v4.17.1
|
||||
github.com/google/gops v0.3.28
|
||||
github.com/gorilla/handlers v1.5.2
|
||||
github.com/gorilla/mux v1.8.1
|
||||
github.com/gorilla/sessions v1.4.0
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0
|
||||
github.com/jmoiron/sqlx v1.4.0
|
||||
github.com/mattn/go-sqlite3 v1.14.22
|
||||
github.com/prometheus/client_golang v1.20.5
|
||||
github.com/prometheus/common v0.55.0
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.2.0
|
||||
github.com/swaggo/http-swagger v1.3.3
|
||||
github.com/swaggo/swag v1.16.1
|
||||
github.com/vektah/gqlparser/v2 v2.5.8
|
||||
golang.org/x/crypto v0.12.0
|
||||
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||
github.com/swaggo/http-swagger v1.3.4
|
||||
github.com/swaggo/swag v1.16.4
|
||||
github.com/vektah/gqlparser/v2 v2.5.22
|
||||
golang.org/x/crypto v0.33.0
|
||||
golang.org/x/exp v0.0.0-20250215185904-eff6e970281f
|
||||
golang.org/x/oauth2 v0.21.0
|
||||
golang.org/x/time v0.10.0
|
||||
)
|
||||
|
||||
require (
|
||||
filippo.io/edwards25519 v1.1.0 // indirect
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||
github.com/agnivade/levenshtein v1.1.1 // indirect
|
||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/containerd/containerd v1.6.18 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/deepmap/oapi-codegen v1.12.4 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.3 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.20.0 // indirect
|
||||
github.com/go-openapi/jsonreference v0.20.2 // indirect
|
||||
github.com/go-openapi/spec v0.20.9 // indirect
|
||||
github.com/go-openapi/swag v0.22.4 // indirect
|
||||
github.com/golang/protobuf v1.5.2 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.1 // indirect
|
||||
github.com/gorilla/websocket v1.5.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.6 // indirect
|
||||
github.com/expr-lang/expr v1.16.9 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.0.3 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
||||
github.com/go-openapi/jsonreference v0.21.0 // indirect
|
||||
github.com/go-openapi/spec v0.21.0 // indirect
|
||||
github.com/go-openapi/swag v0.23.0 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.3 // indirect
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1 // indirect
|
||||
github.com/jonboulle/clockwork v0.4.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/jpillora/backoff v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/compress v1.17.9 // indirect
|
||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/mailru/easyjson v0.9.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||
github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/prometheus/client_model v0.3.0 // indirect
|
||||
github.com/prometheus/procfs v0.9.0 // indirect
|
||||
github.com/nats-io/nats.go v1.39.0 // indirect
|
||||
github.com/nats-io/nkeys v0.4.9 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/swaggo/files v1.0.0 // indirect
|
||||
github.com/urfave/cli/v2 v2.25.7 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
go.uber.org/atomic v1.10.0 // indirect
|
||||
golang.org/x/mod v0.12.0 // indirect
|
||||
golang.org/x/net v0.14.0 // indirect
|
||||
golang.org/x/oauth2 v0.5.0 // indirect
|
||||
golang.org/x/sys v0.11.0 // indirect
|
||||
golang.org/x/text v0.12.0 // indirect
|
||||
golang.org/x/tools v0.12.0 // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/protobuf v1.30.0 // indirect
|
||||
github.com/sosodev/duration v1.3.1 // indirect
|
||||
github.com/swaggo/files v1.0.1 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.5 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
golang.org/x/mod v0.23.0 // indirect
|
||||
golang.org/x/net v0.35.0 // indirect
|
||||
golang.org/x/sync v0.11.0 // indirect
|
||||
golang.org/x/sys v0.30.0 // indirect
|
||||
golang.org/x/text v0.22.0 // indirect
|
||||
golang.org/x/tools v0.30.0 // indirect
|
||||
google.golang.org/protobuf v1.36.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
sigs.k8s.io/yaml v1.3.0 // indirect
|
||||
sigs.k8s.io/yaml v1.4.0 // indirect
|
||||
)
|
||||
|
||||
61
gqlgen.yml
61
gqlgen.yml
@@ -61,23 +61,50 @@ models:
|
||||
fields:
|
||||
partitions:
|
||||
resolver: true
|
||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||
NullableFloat:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||
MetricScope:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||
MetricValue:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||
JobStatistics:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||
GlobalMetricListItem:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.GlobalMetricListItem",
|
||||
}
|
||||
ClusterSupport:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.ClusterSupport" }
|
||||
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
||||
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
||||
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
||||
Resource:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||
JobState:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||
TimeRange:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||
IntRange:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
||||
JobMetric:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
||||
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
|
||||
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" }
|
||||
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
||||
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" }
|
||||
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
||||
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
||||
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||
MetricStatistics:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics",
|
||||
}
|
||||
MetricConfig:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
||||
SubClusterConfig:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig",
|
||||
}
|
||||
Accelerator:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
||||
Topology:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
||||
FilterRanges:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||
SubCluster:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||
StatsSeries:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[Unit]
|
||||
Description=ClusterCockpit Web Server (Go edition)
|
||||
Description=ClusterCockpit Web Server
|
||||
Documentation=https://github.com/ClusterCockpit/cc-backend
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -14,13 +14,16 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@@ -117,7 +120,7 @@ func setup(t *testing.T) *api.RestApi {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -144,23 +147,20 @@ func setup(t *testing.T) *api.RestApi {
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||
|
||||
repository.Connect("sqlite3", dbfilepath)
|
||||
db := repository.GetConnection()
|
||||
|
||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
||||
if err := metricdata.Init(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
jobRepo := repository.GetJobRepository()
|
||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
auth.Init()
|
||||
graph.Init()
|
||||
|
||||
return &api.RestApi{
|
||||
JobRepository: resolver.Repo,
|
||||
Resolver: resolver,
|
||||
}
|
||||
return api.New()
|
||||
}
|
||||
|
||||
func cleanup() {
|
||||
@@ -175,7 +175,6 @@ func cleanup() {
|
||||
func TestRestApi(t *testing.T) {
|
||||
restapi := setup(t)
|
||||
t.Cleanup(cleanup)
|
||||
|
||||
testData := schema.JobData{
|
||||
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||
schema.MetricScopeNode: {
|
||||
@@ -192,12 +191,18 @@ func TestRestApi(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
return testData, nil
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
restapi.MountRoutes(r)
|
||||
r.PathPrefix("/api").Subrouter()
|
||||
r.StrictSlash(true)
|
||||
restapi.MountApiRoutes(r)
|
||||
|
||||
var TestJobId int64 = 123
|
||||
var TestClusterName string = "testcluster"
|
||||
var TestStartTime int64 = 123456789
|
||||
|
||||
const startJobBody string = `{
|
||||
"jobId": 123,
|
||||
@@ -213,7 +218,7 @@ func TestRestApi(t *testing.T) {
|
||||
"exclusive": 1,
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"tags": [{ "type": "testTagType", "name": "testTagName" }],
|
||||
"tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
@@ -224,28 +229,33 @@ func TestRestApi(t *testing.T) {
|
||||
"startTime": 123456789
|
||||
}`
|
||||
|
||||
var dbid int64
|
||||
const contextUserKey repository.ContextKey = "user"
|
||||
contextUserValue := &schema.User{
|
||||
Username: "testuser",
|
||||
Projects: make([]string, 0),
|
||||
Roles: []string{"user"},
|
||||
AuthType: 0,
|
||||
AuthSource: 2,
|
||||
}
|
||||
|
||||
if ok := t.Run("StartJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusCreated {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
var res api.StartJobApiResponse
|
||||
if err := json.Unmarshal(recorder.Body.Bytes(), &res); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(res.DBID)))
|
||||
resolver := graph.GetResolverInstance()
|
||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
job.Tags, err = restapi.Resolver.Job().Tags(context.Background(), job)
|
||||
job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -269,11 +279,9 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatalf("unexpected job properties: %#v", job)
|
||||
}
|
||||
|
||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" {
|
||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||
t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||
}
|
||||
|
||||
dbid = res.DBID
|
||||
}); !ok {
|
||||
return
|
||||
}
|
||||
@@ -289,17 +297,19 @@ func TestRestApi(t *testing.T) {
|
||||
|
||||
var stoppedJob *schema.Job
|
||||
if ok := t.Run("StopJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
restapi.JobRepository.WaitForArchiving()
|
||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(dbid)))
|
||||
archiver.WaitForArchiving()
|
||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -327,7 +337,7 @@ func TestRestApi(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run("CheckArchive", func(t *testing.T) {
|
||||
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
|
||||
data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -341,10 +351,12 @@ func TestRestApi(t *testing.T) {
|
||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusUnprocessableEntity {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
@@ -371,10 +383,12 @@ func TestRestApi(t *testing.T) {
|
||||
}`
|
||||
|
||||
ok := t.Run("StartJobFailed", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusCreated {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
@@ -384,8 +398,10 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatal("subtest failed")
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
const stopJobBodyFailed string = `{
|
||||
"jobId": 12345,
|
||||
"jobId": 12345,
|
||||
"cluster": "testcluster",
|
||||
|
||||
"jobState": "failed",
|
||||
@@ -393,16 +409,18 @@ func TestRestApi(t *testing.T) {
|
||||
}`
|
||||
|
||||
ok = t.Run("StopJobFailed", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
restapi.JobRepository.WaitForArchiving()
|
||||
archiver.WaitForArchiving()
|
||||
jobid, cluster := int64(12345), "testcluster"
|
||||
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
||||
if err != nil {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
57
internal/api/nats.go
Normal file
57
internal/api/nats.go
Normal file
@@ -0,0 +1,57 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package api
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
lp "github.com/ClusterCockpit/cc-lib/ccMessage"
|
||||
"github.com/ClusterCockpit/cc-lib/sinks"
|
||||
)
|
||||
|
||||
type NatsClient struct {
|
||||
SinkManager sinks.SinkManager
|
||||
SinkChannel chan lp.CCMessage
|
||||
}
|
||||
|
||||
var (
|
||||
initOnce sync.Once
|
||||
ni *NatsClient
|
||||
)
|
||||
|
||||
func Init(wg *sync.WaitGroup) {
|
||||
initOnce.Do(func() {
|
||||
ni = &NatsClient{}
|
||||
var err error
|
||||
|
||||
if len(config.Keys.SinkConfigFile) == 0 {
|
||||
log.Error("Sink configuration file must be set")
|
||||
return
|
||||
}
|
||||
ni.SinkManager, err = sinks.New(wg, config.Keys.SinkConfigFile)
|
||||
if err != nil {
|
||||
log.Error(err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
ni.SinkChannel = make(chan lp.CCMessage, 200)
|
||||
ni.SinkManager.AddInput(ni.SinkChannel)
|
||||
ni.SinkManager.Start()
|
||||
})
|
||||
}
|
||||
|
||||
func Shutdown() {
|
||||
if ni.SinkManager != nil {
|
||||
log.Debug("Shutdown SinkManager...")
|
||||
ni.SinkManager.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func forwardJob(job schema.BaseJob) {
|
||||
payload := lp.NewEvent("start_job", nil , meta map[string]string, event string, tm time.Time)
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -19,12 +19,13 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@@ -53,69 +54,87 @@ import (
|
||||
|
||||
type RestApi struct {
|
||||
JobRepository *repository.JobRepository
|
||||
Resolver *graph.Resolver
|
||||
Authentication *auth.Authentication
|
||||
MachineStateDir string
|
||||
RepositoryMutex sync.Mutex
|
||||
}
|
||||
|
||||
func (api *RestApi) MountRoutes(r *mux.Router) {
|
||||
r = r.PathPrefix("/api").Subrouter()
|
||||
func New() *RestApi {
|
||||
return &RestApi{
|
||||
JobRepository: repository.GetJobRepository(),
|
||||
MachineStateDir: config.Keys.MachineStateDir,
|
||||
Authentication: auth.GetAuthInstance(),
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) MountApiRoutes(r *mux.Router) {
|
||||
r.StrictSlash(true)
|
||||
|
||||
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/jobs/stop_job/{id}", api.stopJobById).Methods(http.MethodPost, http.MethodPut)
|
||||
// r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
|
||||
|
||||
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodPost)
|
||||
r.HandleFunc("/jobs/{id}", api.getCompleteJobById).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch)
|
||||
r.HandleFunc("/jobs/edit_meta/{id}", api.editMeta).Methods(http.MethodPost, http.MethodPatch)
|
||||
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/delete_job/", api.deleteJobByRequest).Methods(http.MethodDelete)
|
||||
r.HandleFunc("/jobs/delete_job/{id}", api.deleteJobById).Methods(http.MethodDelete)
|
||||
r.HandleFunc("/jobs/delete_job_before/{ts}", api.deleteJobBefore).Methods(http.MethodDelete)
|
||||
|
||||
r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet)
|
||||
|
||||
if api.MachineStateDir != "" {
|
||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet)
|
||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost)
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
|
||||
r.StrictSlash(true)
|
||||
|
||||
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodPost)
|
||||
r.HandleFunc("/jobs/{id}", api.getCompleteJobById).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
||||
}
|
||||
|
||||
func (api *RestApi) MountConfigApiRoutes(r *mux.Router) {
|
||||
r.StrictSlash(true)
|
||||
|
||||
if api.Authentication != nil {
|
||||
r.HandleFunc("/jwt/", api.getJWT).Methods(http.MethodGet)
|
||||
r.HandleFunc("/roles/", api.getRoles).Methods(http.MethodGet)
|
||||
r.HandleFunc("/users/", api.createUser).Methods(http.MethodPost, http.MethodPut)
|
||||
r.HandleFunc("/users/", api.getUsers).Methods(http.MethodGet)
|
||||
r.HandleFunc("/users/", api.deleteUser).Methods(http.MethodDelete)
|
||||
r.HandleFunc("/user/{id}", api.updateUser).Methods(http.MethodPost)
|
||||
r.HandleFunc("/notice/", api.editNotice).Methods(http.MethodPost)
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) MountFrontendApiRoutes(r *mux.Router) {
|
||||
r.StrictSlash(true)
|
||||
|
||||
if api.Authentication != nil {
|
||||
r.HandleFunc("/jwt/", api.getJWT).Methods(http.MethodGet)
|
||||
r.HandleFunc("/configuration/", api.updateConfiguration).Methods(http.MethodPost)
|
||||
}
|
||||
}
|
||||
|
||||
// StartJobApiResponse model
|
||||
type StartJobApiResponse struct {
|
||||
// Database ID of new job
|
||||
DBID int64 `json:"id"`
|
||||
}
|
||||
|
||||
// DeleteJobApiResponse model
|
||||
type DeleteJobApiResponse struct {
|
||||
Message string `json:"msg"`
|
||||
}
|
||||
|
||||
// UpdateUserApiResponse model
|
||||
type UpdateUserApiResponse struct {
|
||||
// DefaultApiResponse model
|
||||
type DefaultJobApiResponse struct {
|
||||
Message string `json:"msg"`
|
||||
}
|
||||
|
||||
// StopJobApiRequest model
|
||||
type StopJobApiRequest struct {
|
||||
// Stop Time of job as epoch
|
||||
JobId *int64 `json:"jobId" example:"123000"`
|
||||
Cluster *string `json:"cluster" example:"fritz"`
|
||||
StartTime *int64 `json:"startTime" example:"1649723812"`
|
||||
State schema.JobState `json:"jobState" validate:"required" example:"completed"`
|
||||
StopTime int64 `json:"stopTime" validate:"required" example:"1649763839"`
|
||||
State schema.JobState `json:"jobState" validate:"required" example:"completed"` // Final job state
|
||||
JobId *int64 `json:"jobId" example:"123000"` // Cluster Job ID of job
|
||||
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
|
||||
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
|
||||
}
|
||||
|
||||
// DeleteJobApiRequest model
|
||||
@@ -132,6 +151,11 @@ type GetJobsApiResponse struct {
|
||||
Page int `json:"page"` // Page id returned
|
||||
}
|
||||
|
||||
// GetClustersApiResponse model
|
||||
type GetClustersApiResponse struct {
|
||||
Clusters []*schema.Cluster `json:"clusters"` // Array of clusters
|
||||
}
|
||||
|
||||
// ErrorResponse model
|
||||
type ErrorResponse struct {
|
||||
// Statustext of Errorcode
|
||||
@@ -142,8 +166,15 @@ type ErrorResponse struct {
|
||||
// ApiTag model
|
||||
type ApiTag struct {
|
||||
// Tag Type
|
||||
Type string `json:"type" example:"Debug"`
|
||||
Name string `json:"name" example:"Testjob"` // Tag Name
|
||||
Type string `json:"type" example:"Debug"`
|
||||
Name string `json:"name" example:"Testjob"` // Tag Name
|
||||
Scope string `json:"scope" example:"global"` // Tag Scope for Frontend Display
|
||||
}
|
||||
|
||||
// ApiMeta model
|
||||
type EditMetaRequest struct {
|
||||
Key string `json:"key" example:"jobScript"`
|
||||
Value string `json:"value" example:"bash script"`
|
||||
}
|
||||
|
||||
type TagJobApiRequest []*ApiTag
|
||||
@@ -155,10 +186,15 @@ type GetJobApiResponse struct {
|
||||
Data []*JobMetricWithName
|
||||
}
|
||||
|
||||
type GetCompleteJobApiResponse struct {
|
||||
Meta *schema.Job
|
||||
Data schema.JobData
|
||||
}
|
||||
|
||||
type JobMetricWithName struct {
|
||||
Metric *schema.JobMetric `json:"metric"`
|
||||
Name string `json:"name"`
|
||||
Scope schema.MetricScope `json:"scope"`
|
||||
Metric *schema.JobMetric `json:"metric"`
|
||||
}
|
||||
|
||||
type ApiReturnedUser struct {
|
||||
@@ -223,6 +259,55 @@ func securedCheck(r *http.Request) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// getClusters godoc
|
||||
// @summary Lists all cluster configs
|
||||
// @tags Cluster query
|
||||
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
|
||||
// @produce json
|
||||
// @param cluster query string false "Job Cluster"
|
||||
// @success 200 {object} api.GetClustersApiResponse "Array of clusters"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /clusters/ [get]
|
||||
func (api *RestApi) getClusters(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
bw := bufio.NewWriter(rw)
|
||||
defer bw.Flush()
|
||||
|
||||
var clusters []*schema.Cluster
|
||||
|
||||
if r.URL.Query().Has("cluster") {
|
||||
name := r.URL.Query().Get("cluster")
|
||||
cluster := archive.GetCluster(name)
|
||||
if cluster == nil {
|
||||
handleError(fmt.Errorf("unknown cluster: %s", name), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
clusters = append(clusters, cluster)
|
||||
} else {
|
||||
clusters = archive.Clusters
|
||||
}
|
||||
|
||||
payload := GetClustersApiResponse{
|
||||
Clusters: clusters,
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// getJobs godoc
|
||||
// @summary Lists all jobs
|
||||
// @tags Job query
|
||||
@@ -243,18 +328,10 @@ func securedCheck(r *http.Request) error {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/ [get]
|
||||
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
withMetadata := false
|
||||
filter := &model.JobFilter{}
|
||||
page := &model.PageRequest{ItemsPerPage: 25, Page: 1}
|
||||
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc}
|
||||
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
|
||||
|
||||
for key, vals := range r.URL.Query() {
|
||||
switch key {
|
||||
@@ -333,7 +410,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
StartTime: job.StartTime.Unix(),
|
||||
}
|
||||
|
||||
res.Tags, err = api.JobRepository.GetTags(&job.ID)
|
||||
res.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
@@ -342,10 +419,8 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
if res.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
|
||||
res.Statistics, err = archive.GetStatistics(job)
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -369,15 +444,14 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
// getJobById godoc
|
||||
// @summary Get complete job meta and metric data
|
||||
// getCompleteJobById godoc
|
||||
// @summary Get job meta and optional all metric data
|
||||
// @tags Job query
|
||||
// @description Job to get is specified by database ID
|
||||
// @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||
// @accept json
|
||||
// @produce json
|
||||
// @param id path int true "Database ID of Job"
|
||||
// @param request body api.GetJobApiRequest true "Array of metric names"
|
||||
// @param id path int true "Database ID of Job"
|
||||
// @param all-metrics query bool false "Include all available metrics"
|
||||
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
@@ -386,16 +460,8 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/{id} [post]
|
||||
func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v",
|
||||
schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// @router /jobs/{id} [get]
|
||||
func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// Fetch job from db
|
||||
id, ok := mux.Vars(r)["id"]
|
||||
var job *schema.Job
|
||||
@@ -407,13 +473,118 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.FindById(id)
|
||||
job, err = api.JobRepository.FindById(r.Context(), id) // Get Job from Repo by ID
|
||||
} else {
|
||||
handleError(fmt.Errorf("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
|
||||
}
|
||||
if _, err = api.JobRepository.FetchMetadata(job); err != nil {
|
||||
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
var scopes []schema.MetricScope
|
||||
|
||||
if job.NumNodes == 1 {
|
||||
scopes = []schema.MetricScope{"core"}
|
||||
} else {
|
||||
scopes = []schema.MetricScope{"node"}
|
||||
}
|
||||
|
||||
var data schema.JobData
|
||||
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
resolution := 0
|
||||
|
||||
for _, mc := range metricConfigs {
|
||||
resolution = max(resolution, mc.Timestep)
|
||||
}
|
||||
|
||||
if r.URL.Query().Get("all-metrics") == "true" {
|
||||
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context(), resolution)
|
||||
if err != nil {
|
||||
log.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("/api/job/%s: get job %d", id, job.JobID)
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
bw := bufio.NewWriter(rw)
|
||||
defer bw.Flush()
|
||||
|
||||
payload := GetCompleteJobApiResponse{
|
||||
Meta: job,
|
||||
Data: data,
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// getJobById godoc
|
||||
// @summary Get job meta and configurable metric data
|
||||
// @tags Job query
|
||||
// @description Job to get is specified by database ID
|
||||
// @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||
// @accept json
|
||||
// @produce json
|
||||
// @param id path int true "Database ID of Job"
|
||||
// @param request body api.GetJobApiRequest true "Array of metric names"
|
||||
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/{id} [post]
|
||||
func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// Fetch job from db
|
||||
id, ok := mux.Vars(r)["id"]
|
||||
var job *schema.Job
|
||||
var err error
|
||||
if ok {
|
||||
id, e := strconv.ParseInt(id, 10, 64)
|
||||
if e != nil {
|
||||
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.FindById(r.Context(), id)
|
||||
} else {
|
||||
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
handleError(fmt.Errorf("finding job with db id %s failed: %w", id, err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
|
||||
}
|
||||
if _, err = api.JobRepository.FetchMetadata(job); err != nil {
|
||||
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -431,9 +602,16 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
scopes = []schema.MetricScope{"node"}
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadData(job, metrics, scopes, r.Context())
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
resolution := 0
|
||||
|
||||
for _, mc := range metricConfigs {
|
||||
resolution = max(resolution, mc.Timestep)
|
||||
}
|
||||
|
||||
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context(), resolution)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job data")
|
||||
log.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -464,10 +642,56 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
// editMeta godoc
|
||||
// @summary Edit meta-data json
|
||||
// @tags Job add and modify
|
||||
// @description Edit key value pairs in job metadata json
|
||||
// @description If a key already exists its content will be overwritten
|
||||
// @accept json
|
||||
// @produce json
|
||||
// @param id path int true "Job Database ID"
|
||||
// @param request body api.EditMetaRequest true "Kay value pair to add"
|
||||
// @success 200 {object} schema.Job "Updated job resource"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 404 {object} api.ErrorResponse "Job does not exist"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/edit_meta/{id} [post]
|
||||
func (api *RestApi) editMeta(rw http.ResponseWriter, r *http.Request) {
|
||||
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
var req EditMetaRequest
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if err := api.JobRepository.UpdateMetadata(job, req.Key, req.Value); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(job)
|
||||
}
|
||||
|
||||
// tagJob godoc
|
||||
// @summary Adds one or more tags to a job
|
||||
// @tags Job add and modify
|
||||
// @description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
|
||||
// @description Tag Scope for frontend visibility will default to "global" if none entered, other options: "admin" or specific username.
|
||||
// @description If tagged job is already finished: Tag will be written directly to respective archive files.
|
||||
// @accept json
|
||||
// @produce json
|
||||
@@ -481,26 +705,19 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/tag_job/{id} [post]
|
||||
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
iid, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
id, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
job, err := api.JobRepository.FindById(iid)
|
||||
job, err := api.JobRepository.FindById(r.Context(), id)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags, err = api.JobRepository.GetTags(&job.ID)
|
||||
job.Tags, err = api.JobRepository.GetTags(repository.GetUserFromContext(r.Context()), &job.ID)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
@@ -513,16 +730,17 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
for _, tag := range req {
|
||||
tagId, err := api.JobRepository.AddTagOrCreate(job.ID, tag.Type, tag.Name)
|
||||
tagId, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), job.ID, tag.Type, tag.Name, tag.Scope)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
job.Tags = append(job.Tags, &schema.Tag{
|
||||
ID: tagId,
|
||||
Type: tag.Type,
|
||||
Name: tag.Name,
|
||||
ID: tagId,
|
||||
Type: tag.Type,
|
||||
Name: tag.Name,
|
||||
Scope: tag.Scope,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -548,13 +766,6 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/start_job/ [post]
|
||||
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
req := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||
@@ -597,7 +808,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||
|
||||
for _, tag := range req.Tags {
|
||||
if _, err := api.JobRepository.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
||||
if _, err := api.JobRepository.AddTagOrCreate(repository.GetUserFromContext(r.Context()), id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
handleError(fmt.Errorf("adding tag to new job %d failed: %w", id, err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
@@ -607,68 +818,11 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
|
||||
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusCreated)
|
||||
json.NewEncoder(rw).Encode(StartJobApiResponse{
|
||||
DBID: id,
|
||||
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||
Message: "success",
|
||||
})
|
||||
}
|
||||
|
||||
// stopJobById godoc
|
||||
// @summary Marks job as completed and triggers archiving
|
||||
// @tags Job add and modify
|
||||
// @description Job to stop is specified by database ID. Only stopTime and final state are required in request body.
|
||||
// @description Returns full job resource information according to 'JobMeta' scheme.
|
||||
// @accept json
|
||||
// @produce json
|
||||
// @param id path int true "Database ID of Job"
|
||||
// @param request body api.StopJobApiRequest true "stopTime and final state in request body"
|
||||
// @success 200 {object} schema.JobMeta "Job resource"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/stop_job/{id} [post]
|
||||
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse request body: Only StopTime and State
|
||||
req := StopJobApiRequest{}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Fetch job (that will be stopped) from db
|
||||
id, ok := mux.Vars(r)["id"]
|
||||
var job *schema.Job
|
||||
var err error
|
||||
if ok {
|
||||
id, e := strconv.ParseInt(id, 10, 64)
|
||||
if e != nil {
|
||||
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.FindById(id)
|
||||
} else {
|
||||
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
api.checkAndHandleStopJob(rw, job, req)
|
||||
}
|
||||
|
||||
// stopJobByRequest godoc
|
||||
// @summary Marks job as completed and triggers archiving
|
||||
// @tags Job add and modify
|
||||
@@ -686,13 +840,6 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/stop_job/ [post]
|
||||
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse request body
|
||||
req := StopJobApiRequest{}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
@@ -708,8 +855,8 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
// log.Printf("loading db job for stopJobByRequest... : stopJobApiRequest=%v", req)
|
||||
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
@@ -734,11 +881,6 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/delete_job/{id} [delete]
|
||||
func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil && !user.HasRole(schema.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Fetch job (that will be stopped) from db
|
||||
id, ok := mux.Vars(r)["id"]
|
||||
var err error
|
||||
@@ -760,7 +902,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
|
||||
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||
Message: fmt.Sprintf("Successfully deleted job %s", id),
|
||||
})
|
||||
}
|
||||
@@ -782,12 +924,6 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/delete_job/ [delete]
|
||||
func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse request body
|
||||
req := DeleteJobApiRequest{}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
@@ -804,7 +940,6 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
|
||||
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
@@ -818,7 +953,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
|
||||
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||
Message: fmt.Sprintf("Successfully deleted job %d", job.ID),
|
||||
})
|
||||
}
|
||||
@@ -839,11 +974,6 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/delete_job_before/{ts} [delete]
|
||||
func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil && !user.HasRole(schema.RoleApi) {
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
var cnt int
|
||||
// Fetch job (that will be stopped) from db
|
||||
id, ok := mux.Vars(r)["ts"]
|
||||
@@ -867,21 +997,20 @@ func (api *RestApi) deleteJobBefore(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
json.NewEncoder(rw).Encode(DeleteJobApiResponse{
|
||||
json.NewEncoder(rw).Encode(DefaultJobApiResponse{
|
||||
Message: fmt.Sprintf("Successfully deleted %d jobs", cnt),
|
||||
})
|
||||
}
|
||||
|
||||
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
|
||||
|
||||
// Sanity checks
|
||||
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != schema.JobStateRunning {
|
||||
handleError(errors.New("stopTime must be larger than startTime and only running jobs can be stopped"), http.StatusBadRequest, rw)
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger than startTime %d and only running jobs can be stopped (state is: %s)", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix(), job.State), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if req.State != "" && !req.State.Valid() {
|
||||
handleError(fmt.Errorf("invalid job state: %#v", req.State), http.StatusBadRequest, rw)
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : invalid requested job state: %#v", job.JobID, job.ID, job.Cluster, req.State), http.StatusBadRequest, rw)
|
||||
return
|
||||
} else if req.State == "" {
|
||||
req.State = schema.JobStateCompleted
|
||||
@@ -891,11 +1020,11 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
||||
job.Duration = int32(req.StopTime - job.StartTime.Unix())
|
||||
job.State = req.State
|
||||
if err := api.JobRepository.Stop(job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
handleError(fmt.Errorf("marking job as stopped failed: %w", err), http.StatusInternalServerError, rw)
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : marking job as '%s' (duration: %d) in DB failed: %w", job.JobID, job.ID, job.Cluster, job.State, job.Duration, err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime)
|
||||
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s, duration=%d, state=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||
|
||||
// Send a response (with status OK). This means that erros that happen from here on forward
|
||||
// can *NOT* be communicated to the client. If reading from a MetricDataRepository or
|
||||
@@ -910,7 +1039,7 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
api.JobRepository.TriggerArchiving(job)
|
||||
archiver.TriggerArchiving(job)
|
||||
}
|
||||
|
||||
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
@@ -938,7 +1067,8 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
} `json:"error"`
|
||||
}
|
||||
|
||||
data, err := api.Resolver.Query().JobMetrics(r.Context(), id, metrics, scopes)
|
||||
resolver := graph.GetResolverInstance()
|
||||
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
|
||||
if err != nil {
|
||||
json.NewEncoder(rw).Encode(Respone{
|
||||
Error: &struct {
|
||||
@@ -1015,12 +1145,13 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||
Password: password,
|
||||
Email: email,
|
||||
Projects: []string{project},
|
||||
Roles: []string{role}}); err != nil {
|
||||
Roles: []string{role},
|
||||
}); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Write([]byte(fmt.Sprintf("User %v successfully created!\n", username)))
|
||||
fmt.Fprintf(rw, "User %v successfully created!\n", username)
|
||||
}
|
||||
|
||||
// deleteUser godoc
|
||||
@@ -1165,6 +1296,69 @@ func (api *RestApi) updateUser(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
// editNotice godoc
|
||||
// @summary Updates or empties the notice box content
|
||||
// @tags User
|
||||
// @description Modifies the content of notice.txt, shown as notice box on the homepage.
|
||||
// @description If more than one formValue is set then only the highest priority field is used.
|
||||
// @description Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||
// @accept mpfd
|
||||
// @produce plain
|
||||
// @param new-content formData string false "Priority 1: New content to display"
|
||||
// @success 200 {string} string "Success Response Message"
|
||||
// @failure 400 {string} string "Bad Request"
|
||||
// @failure 401 {string} string "Unauthorized"
|
||||
// @failure 403 {string} string "Forbidden"
|
||||
// @failure 422 {string} string "Unprocessable Entity: The user could not be updated"
|
||||
// @failure 500 {string} string "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /notice/ [post]
|
||||
func (api *RestApi) editNotice(rw http.ResponseWriter, r *http.Request) {
|
||||
err := securedCheck(r)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
http.Error(rw, "Only admins are allowed to update the notice.txt file", http.StatusForbidden)
|
||||
return
|
||||
}
|
||||
|
||||
// Get Value
|
||||
newContent := r.FormValue("new-content")
|
||||
|
||||
// Check FIle
|
||||
noticeExists := util.CheckFileExists("./var/notice.txt")
|
||||
if !noticeExists {
|
||||
ntxt, err := os.Create("./var/notice.txt")
|
||||
if err != nil {
|
||||
log.Errorf("Creating ./var/notice.txt failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
return
|
||||
}
|
||||
ntxt.Close()
|
||||
}
|
||||
|
||||
if newContent != "" {
|
||||
if err := os.WriteFile("./var/notice.txt", []byte(newContent), 0o666); err != nil {
|
||||
log.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
return
|
||||
} else {
|
||||
rw.Write([]byte("Update Notice Content Success"))
|
||||
}
|
||||
} else {
|
||||
if err := os.WriteFile("./var/notice.txt", []byte(""), 0o666); err != nil {
|
||||
log.Errorf("Writing to ./var/notice.txt failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
return
|
||||
} else {
|
||||
rw.Write([]byte("Empty Notice Content Success"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (api *RestApi) getJWT(rw http.ResponseWriter, r *http.Request) {
|
||||
err := securedCheck(r)
|
||||
if err != nil {
|
||||
@@ -1225,7 +1419,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
|
||||
rw.Header().Set("Content-Type", "text/plain")
|
||||
key, value := r.FormValue("key"), r.FormValue("value")
|
||||
|
||||
fmt.Printf("REST > KEY: %#v\nVALUE: %#v\n", key, value)
|
||||
// fmt.Printf("REST > KEY: %#v\nVALUE: %#v\n", key, value)
|
||||
|
||||
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, repository.GetUserFromContext(r.Context())); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
|
||||
|
||||
94
internal/archiver/archiveWorker.go
Normal file
94
internal/archiver/archiveWorker.go
Normal file
@@ -0,0 +1,94 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
var (
|
||||
archivePending sync.WaitGroup
|
||||
archiveChannel chan *schema.Job
|
||||
jobRepo *repository.JobRepository
|
||||
)
|
||||
|
||||
func Start(r *repository.JobRepository) {
|
||||
archiveChannel = make(chan *schema.Job, 128)
|
||||
jobRepo = r
|
||||
|
||||
go archivingWorker()
|
||||
}
|
||||
|
||||
// Archiving worker thread
|
||||
func archivingWorker() {
|
||||
for {
|
||||
select {
|
||||
case job, ok := <-archiveChannel:
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
start := time.Now()
|
||||
// not using meta data, called to load JobMeta into Cache?
|
||||
// will fail if job meta not in repository
|
||||
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||
// TODO: Maybe use context with cancel/timeout here
|
||||
jobMeta, err := ArchiveJob(job, context.Background())
|
||||
if err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
stmt := sq.Update("job").Where("job.id = ?", job.ID)
|
||||
|
||||
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
// Update the jobs database entry one last time:
|
||||
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
|
||||
if err := jobRepo.Execute(stmt); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||
archivePending.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
func TriggerArchiving(job *schema.Job) {
|
||||
if archiveChannel == nil {
|
||||
log.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
|
||||
}
|
||||
|
||||
archivePending.Add(1)
|
||||
archiveChannel <- job
|
||||
}
|
||||
|
||||
// Wait for background thread to finish pending archiving operations
|
||||
func WaitForArchiving() {
|
||||
// close channel and wait for worker to process remaining jobs
|
||||
archivePending.Wait()
|
||||
}
|
||||
82
internal/archiver/archiver.go
Normal file
82
internal/archiver/archiver.go
Normal file
@@ -0,0 +1,82 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
// FIXME: Add a config option for this
|
||||
if job.NumNodes <= 8 {
|
||||
// This will add the native scope if core scope is not available
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
|
||||
if job.NumAcc > 0 {
|
||||
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||
}
|
||||
|
||||
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||
if err != nil {
|
||||
log.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// This should never happen ?
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
// If the file based archive is disabled,
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if config.Keys.DisableArchive {
|
||||
return jobMeta, nil
|
||||
}
|
||||
|
||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -10,10 +10,14 @@ import (
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
@@ -26,19 +30,37 @@ type Authenticator interface {
|
||||
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
||||
}
|
||||
|
||||
type Authentication struct {
|
||||
sessionStore *sessions.CookieStore
|
||||
SessionMaxAge time.Duration
|
||||
var (
|
||||
initOnce sync.Once
|
||||
authInstance *Authentication
|
||||
)
|
||||
|
||||
authenticators []Authenticator
|
||||
var ipUserLimiters sync.Map
|
||||
|
||||
func getIPUserLimiter(ip, username string) *rate.Limiter {
|
||||
key := ip + ":" + username
|
||||
limiter, ok := ipUserLimiters.Load(key)
|
||||
if !ok {
|
||||
newLimiter := rate.NewLimiter(rate.Every(time.Hour/10), 10)
|
||||
ipUserLimiters.Store(key, newLimiter)
|
||||
return newLimiter
|
||||
}
|
||||
return limiter.(*rate.Limiter)
|
||||
}
|
||||
|
||||
type Authentication struct {
|
||||
sessionStore *sessions.CookieStore
|
||||
LdapAuth *LdapAuthenticator
|
||||
JwtAuth *JWTAuthenticator
|
||||
LocalAuth *LocalAuthenticator
|
||||
authenticators []Authenticator
|
||||
SessionMaxAge time.Duration
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthViaSession(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, error) {
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
session, err := auth.sessionStore.Get(r, "session")
|
||||
if err != nil {
|
||||
log.Error("Error while getting session store")
|
||||
@@ -62,81 +84,161 @@ func (auth *Authentication) AuthViaSession(
|
||||
}, nil
|
||||
}
|
||||
|
||||
func Init() (*Authentication, error) {
|
||||
auth := &Authentication{}
|
||||
func Init() {
|
||||
initOnce.Do(func() {
|
||||
authInstance = &Authentication{}
|
||||
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Error("Error while initializing authentication -> decoding session key failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
if config.Keys.LdapConfig != nil {
|
||||
ldapAuth := &LdapAuthenticator{}
|
||||
if err := ldapAuth.Init(); err != nil {
|
||||
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
}
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
auth.LdapAuth = ldapAuth
|
||||
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
|
||||
}
|
||||
} else {
|
||||
log.Info("Missing LDAP configuration: No LDAP support!")
|
||||
}
|
||||
|
||||
if config.Keys.JwtConfig != nil {
|
||||
auth.JwtAuth = &JWTAuthenticator{}
|
||||
if err := auth.JwtAuth.Init(); err != nil {
|
||||
log.Error("Error while initializing authentication -> jwtAuth init failed")
|
||||
return nil, err
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Fatal("Error while initializing authentication -> decoding session key failed")
|
||||
}
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||
if err := jwtSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err == nil {
|
||||
authInstance.SessionMaxAge = d
|
||||
}
|
||||
|
||||
if config.Keys.LdapConfig != nil {
|
||||
ldapAuth := &LdapAuthenticator{}
|
||||
if err := ldapAuth.Init(); err != nil {
|
||||
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||
} else {
|
||||
authInstance.LdapAuth = ldapAuth
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
|
||||
}
|
||||
} else {
|
||||
auth.authenticators = append(auth.authenticators, jwtSessionAuth)
|
||||
log.Info("Missing LDAP configuration: No LDAP support!")
|
||||
}
|
||||
|
||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||
if config.Keys.JwtConfig != nil {
|
||||
authInstance.JwtAuth = &JWTAuthenticator{}
|
||||
if err := authInstance.JwtAuth.Init(); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> jwtAuth init failed")
|
||||
}
|
||||
|
||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||
if err := jwtSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
|
||||
}
|
||||
|
||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
|
||||
}
|
||||
} else {
|
||||
auth.authenticators = append(auth.authenticators, jwtCookieSessionAuth)
|
||||
log.Info("Missing JWT configuration: No JWT token support!")
|
||||
}
|
||||
} else {
|
||||
log.Info("Missing JWT configuration: No JWT token support!")
|
||||
|
||||
authInstance.LocalAuth = &LocalAuthenticator{}
|
||||
if err := authInstance.LocalAuth.Init(); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> localAuth init failed")
|
||||
}
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
|
||||
})
|
||||
}
|
||||
|
||||
func GetAuthInstance() *Authentication {
|
||||
if authInstance == nil {
|
||||
log.Fatal("Authentication module not initialized!")
|
||||
}
|
||||
|
||||
auth.LocalAuth = &LocalAuthenticator{}
|
||||
if err := auth.LocalAuth.Init(); err != nil {
|
||||
log.Error("Error while initializing authentication -> localAuth init failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
|
||||
return authInstance
|
||||
}
|
||||
|
||||
return auth, nil
|
||||
func handleTokenUser(tokenUser *schema.User) {
|
||||
r := repository.GetUserRepository()
|
||||
dbUser, err := r.GetUser(tokenUser.Username)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && config.Keys.JwtConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(tokenUser); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
|
||||
}
|
||||
} else if err == nil && config.Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, tokenUser); err != nil {
|
||||
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleOIDCUser(OIDCUser *schema.User) {
|
||||
r := repository.GetUserRepository()
|
||||
dbUser, err := r.GetUser(OIDCUser.Username)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && config.Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(OIDCUser); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
|
||||
}
|
||||
} else if err == nil && config.Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
|
||||
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error {
|
||||
session, err := auth.sessionStore.New(r, "session")
|
||||
if err != nil {
|
||||
log.Errorf("session creation failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return err
|
||||
}
|
||||
|
||||
if auth.SessionMaxAge != 0 {
|
||||
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
||||
}
|
||||
if config.Keys.HttpsCertFile == "" && config.Keys.HttpsKeyFile == "" {
|
||||
session.Options.Secure = false
|
||||
}
|
||||
session.Options.SameSite = http.SameSiteStrictMode
|
||||
session.Values["username"] = user.Username
|
||||
session.Values["projects"] = user.Projects
|
||||
session.Values["roles"] = user.Roles
|
||||
if err := auth.sessionStore.Save(r, rw, session); err != nil {
|
||||
log.Warnf("session save failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (auth *Authentication) Login(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error)) http.Handler {
|
||||
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
username := r.FormValue("username")
|
||||
var dbUser *schema.User
|
||||
ip, _, err := net.SplitHostPort(r.RemoteAddr)
|
||||
if err != nil {
|
||||
ip = r.RemoteAddr
|
||||
}
|
||||
|
||||
username := r.FormValue("username")
|
||||
|
||||
limiter := getIPUserLimiter(ip, username)
|
||||
if !limiter.Allow() {
|
||||
log.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
|
||||
onfailure(rw, r, errors.New("Too many login attempts, try again in a few minutes."))
|
||||
return
|
||||
}
|
||||
|
||||
var dbUser *schema.User
|
||||
if username != "" {
|
||||
var err error
|
||||
dbUser, err = repository.GetUserRepository().GetUser(username)
|
||||
@@ -161,28 +263,19 @@ func (auth *Authentication) Login(
|
||||
return
|
||||
}
|
||||
|
||||
session, err := auth.sessionStore.New(r, "session")
|
||||
if err != nil {
|
||||
log.Errorf("session creation failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if auth.SessionMaxAge != 0 {
|
||||
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
||||
}
|
||||
session.Values["username"] = user.Username
|
||||
session.Values["projects"] = user.Projects
|
||||
session.Values["roles"] = user.Roles
|
||||
if err := auth.sessionStore.Save(r, rw, session); err != nil {
|
||||
log.Warnf("session save failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
if err := auth.SaveSession(rw, r, user); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
|
||||
if r.FormValue("redirect") != "" {
|
||||
http.RedirectHandler(r.FormValue("redirect"), http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
http.RedirectHandler("/", http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
@@ -193,37 +286,146 @@ func (auth *Authentication) Login(
|
||||
|
||||
func (auth *Authentication) Auth(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error)) http.Handler {
|
||||
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("authentication failed: %s", err.Error())
|
||||
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
if user == nil {
|
||||
user, err = auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("authentication failed: %s", err.Error())
|
||||
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
log.Debug("authentication failed")
|
||||
log.Info("auth -> authentication failed")
|
||||
onfailure(rw, r, errors.New("unauthorized (please login first)"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleApi) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasAllRoles([]schema.Role{schema.RoleAdmin, schema.RoleApi}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
log.Info("auth api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
log.Info("auth api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthUserApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth user api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleApi) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
log.Info("auth user api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
log.Info("auth user api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthConfigApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth config api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil && user.HasRole(schema.RoleAdmin) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
log.Info("auth config api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthFrontendApi(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth frontend api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
log.Info("auth frontend api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
session, err := auth.sessionStore.Get(r, "session")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -17,7 +17,7 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/golang-jwt/jwt/v4"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
type JWTAuthenticator struct {
|
||||
@@ -49,8 +49,8 @@ func (ja *JWTAuthenticator) Init() error {
|
||||
|
||||
func (ja *JWTAuthenticator) AuthViaJWT(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, error) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
rawtoken := r.Header.Get("X-Auth-Token")
|
||||
if rawtoken == "" {
|
||||
rawtoken = r.Header.Get("Authorization")
|
||||
@@ -73,9 +73,9 @@ func (ja *JWTAuthenticator) AuthViaJWT(
|
||||
log.Warn("Error while parsing JWT token")
|
||||
return nil, err
|
||||
}
|
||||
if err := token.Claims.Valid(); err != nil {
|
||||
if !token.Valid {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
return nil, err
|
||||
return nil, errors.New("jwt token claims are not valid")
|
||||
}
|
||||
|
||||
// Token is valid, extract payload
|
||||
@@ -88,7 +88,6 @@ func (ja *JWTAuthenticator) AuthViaJWT(
|
||||
if config.Keys.JwtConfig.ValidateUser {
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(sub)
|
||||
|
||||
// Deny any logins for unknown usernames
|
||||
if err != nil {
|
||||
log.Warn("Could not find user from JWT in internal database.")
|
||||
@@ -117,7 +116,6 @@ func (ja *JWTAuthenticator) AuthViaJWT(
|
||||
|
||||
// Generate a new JWT that can be used for authentication
|
||||
func (ja *JWTAuthenticator) ProvideJWT(user *schema.User) (string, error) {
|
||||
|
||||
if ja.privateKey == nil {
|
||||
return "", errors.New("environment variable 'JWT_PRIVATE_KEY' not set")
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -17,7 +17,7 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/golang-jwt/jwt/v4"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
type JWTCookieSessionAuthenticator struct {
|
||||
@@ -90,8 +90,8 @@ func (ja *JWTCookieSessionAuthenticator) CanLogin(
|
||||
user *schema.User,
|
||||
username string,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, bool) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
jc := config.Keys.JwtConfig
|
||||
cookieName := ""
|
||||
if jc.CookieName != "" {
|
||||
@@ -113,8 +113,8 @@ func (ja *JWTCookieSessionAuthenticator) CanLogin(
|
||||
func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
user *schema.User,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, error) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
jc := config.Keys.JwtConfig
|
||||
jwtCookie, err := r.Cookie(jc.CookieName)
|
||||
var rawtoken string
|
||||
@@ -144,10 +144,9 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check token validity and extract paypload
|
||||
if err := token.Claims.Valid(); err != nil {
|
||||
if !token.Valid {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
return nil, err
|
||||
return nil, errors.New("jwt token claims are not valid")
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
@@ -199,10 +198,8 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if jc.SyncUserOnLogin {
|
||||
if err := repository.GetUserRepository().AddUser(user); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB", user.Username)
|
||||
}
|
||||
if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -17,7 +17,7 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/golang-jwt/jwt/v4"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
type JWTSessionAuthenticator struct {
|
||||
@@ -44,8 +44,8 @@ func (ja *JWTSessionAuthenticator) CanLogin(
|
||||
user *schema.User,
|
||||
username string,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, bool) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
return user, r.Header.Get("Authorization") != "" ||
|
||||
r.URL.Query().Get("login-token") != ""
|
||||
}
|
||||
@@ -53,8 +53,8 @@ func (ja *JWTSessionAuthenticator) CanLogin(
|
||||
func (ja *JWTSessionAuthenticator) Login(
|
||||
user *schema.User,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, error) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
rawtoken := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ")
|
||||
if rawtoken == "" {
|
||||
rawtoken = r.URL.Query().Get("login-token")
|
||||
@@ -71,9 +71,9 @@ func (ja *JWTSessionAuthenticator) Login(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err = token.Claims.Valid(); err != nil {
|
||||
if !token.Valid {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
return nil, err
|
||||
return nil, errors.New("jwt token claims are not valid")
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
@@ -138,10 +138,8 @@ func (ja *JWTSessionAuthenticator) Login(
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if config.Keys.JwtConfig.SyncUserOnLogin {
|
||||
if err := repository.GetUserRepository().AddUser(user); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB", user.Username)
|
||||
}
|
||||
if config.Keys.JwtConfig.SyncUserOnLogin || config.Keys.JwtConfig.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -10,7 +10,6 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
@@ -21,7 +20,7 @@ import (
|
||||
|
||||
type LdapAuthenticator struct {
|
||||
syncPassword string
|
||||
UserAttr string
|
||||
UserAttr string
|
||||
}
|
||||
|
||||
var _ Authenticator = (*LdapAuthenticator)(nil)
|
||||
@@ -34,33 +33,6 @@ func (la *LdapAuthenticator) Init() error {
|
||||
|
||||
lc := config.Keys.LdapConfig
|
||||
|
||||
if lc.SyncInterval != "" {
|
||||
interval, err := time.ParseDuration(lc.SyncInterval)
|
||||
if err != nil {
|
||||
log.Warnf("Could not parse duration for sync interval: %v",
|
||||
lc.SyncInterval)
|
||||
return err
|
||||
}
|
||||
|
||||
if interval == 0 {
|
||||
log.Info("Sync interval is zero")
|
||||
return nil
|
||||
}
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(interval)
|
||||
for t := range ticker.C {
|
||||
log.Printf("sync started at %s", t.Format(time.RFC3339))
|
||||
if err := la.Sync(); err != nil {
|
||||
log.Errorf("sync failed: %s", err.Error())
|
||||
}
|
||||
log.Print("sync done")
|
||||
}
|
||||
}()
|
||||
} else {
|
||||
log.Info("LDAP configuration key sync_interval invalid")
|
||||
}
|
||||
|
||||
if lc.UserAttr != "" {
|
||||
la.UserAttr = lc.UserAttr
|
||||
} else {
|
||||
@@ -74,8 +46,8 @@ func (la *LdapAuthenticator) CanLogin(
|
||||
user *schema.User,
|
||||
username string,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, bool) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
lc := config.Keys.LdapConfig
|
||||
|
||||
if user != nil {
|
||||
@@ -138,8 +110,8 @@ func (la *LdapAuthenticator) CanLogin(
|
||||
func (la *LdapAuthenticator) Login(
|
||||
user *schema.User,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, error) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
l, err := la.getLdapConnection(false)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting ldap connection")
|
||||
@@ -238,7 +210,6 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
}
|
||||
|
||||
func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
|
||||
|
||||
lc := config.Keys.LdapConfig
|
||||
conn, err := ldap.DialURL(lc.Url)
|
||||
if err != nil {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
196
internal/auth/oidc.go
Normal file
196
internal/auth/oidc.go
Normal file
@@ -0,0 +1,196 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package auth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/coreos/go-oidc/v3/oidc"
|
||||
"github.com/gorilla/mux"
|
||||
"golang.org/x/oauth2"
|
||||
)
|
||||
|
||||
type OIDC struct {
|
||||
client *oauth2.Config
|
||||
provider *oidc.Provider
|
||||
authentication *Authentication
|
||||
clientID string
|
||||
}
|
||||
|
||||
func randString(nByte int) (string, error) {
|
||||
b := make([]byte, nByte)
|
||||
if _, err := io.ReadFull(rand.Reader, b); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return base64.RawURLEncoding.EncodeToString(b), nil
|
||||
}
|
||||
|
||||
func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value string) {
|
||||
c := &http.Cookie{
|
||||
Name: name,
|
||||
Value: value,
|
||||
MaxAge: int(time.Hour.Seconds()),
|
||||
Secure: r.TLS != nil,
|
||||
HttpOnly: true,
|
||||
}
|
||||
http.SetCookie(w, c)
|
||||
}
|
||||
|
||||
func NewOIDC(a *Authentication) *OIDC {
|
||||
provider, err := oidc.NewProvider(context.Background(), config.Keys.OpenIDConfig.Provider)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
clientID := os.Getenv("OID_CLIENT_ID")
|
||||
if clientID == "" {
|
||||
log.Warn("environment variable 'OID_CLIENT_ID' not set (Open ID connect auth will not work)")
|
||||
}
|
||||
clientSecret := os.Getenv("OID_CLIENT_SECRET")
|
||||
if clientSecret == "" {
|
||||
log.Warn("environment variable 'OID_CLIENT_SECRET' not set (Open ID connect auth will not work)")
|
||||
}
|
||||
|
||||
client := &oauth2.Config{
|
||||
ClientID: clientID,
|
||||
ClientSecret: clientSecret,
|
||||
Endpoint: provider.Endpoint(),
|
||||
RedirectURL: "oidc-callback",
|
||||
Scopes: []string{oidc.ScopeOpenID, "profile", "email"},
|
||||
}
|
||||
|
||||
oa := &OIDC{provider: provider, client: client, clientID: clientID, authentication: a}
|
||||
|
||||
return oa
|
||||
}
|
||||
|
||||
func (oa *OIDC) RegisterEndpoints(r *mux.Router) {
|
||||
r.HandleFunc("/oidc-login", oa.OAuth2Login)
|
||||
r.HandleFunc("/oidc-callback", oa.OAuth2Callback)
|
||||
}
|
||||
|
||||
func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
||||
c, err := r.Cookie("state")
|
||||
if err != nil {
|
||||
http.Error(rw, "state cookie not found", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
state := c.Value
|
||||
|
||||
c, err = r.Cookie("verifier")
|
||||
if err != nil {
|
||||
http.Error(rw, "verifier cookie not found", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
codeVerifier := c.Value
|
||||
|
||||
_ = r.ParseForm()
|
||||
if r.Form.Get("state") != state {
|
||||
http.Error(rw, "State invalid", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
code := r.Form.Get("code")
|
||||
if code == "" {
|
||||
http.Error(rw, "Code not found", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
token, err := oa.client.Exchange(context.Background(), code, oauth2.VerifierOption(codeVerifier))
|
||||
if err != nil {
|
||||
http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
userInfo, err := oa.provider.UserInfo(context.Background(), oauth2.StaticTokenSource(token))
|
||||
if err != nil {
|
||||
http.Error(rw, "Failed to get userinfo: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// // Extract the ID Token from OAuth2 token.
|
||||
// rawIDToken, ok := token.Extra("id_token").(string)
|
||||
// if !ok {
|
||||
// http.Error(rw, "Cannot access idToken", http.StatusInternalServerError)
|
||||
// }
|
||||
//
|
||||
// verifier := oa.provider.Verifier(&oidc.Config{ClientID: oa.clientID})
|
||||
// // Parse and verify ID Token payload.
|
||||
// idToken, err := verifier.Verify(context.Background(), rawIDToken)
|
||||
// if err != nil {
|
||||
// http.Error(rw, "Failed to extract idToken: "+err.Error(), http.StatusInternalServerError)
|
||||
// }
|
||||
|
||||
projects := make([]string, 0)
|
||||
|
||||
// Extract custom claims
|
||||
var claims struct {
|
||||
Username string `json:"preferred_username"`
|
||||
Name string `json:"name"`
|
||||
Profile struct {
|
||||
Client struct {
|
||||
Roles []string `json:"roles"`
|
||||
} `json:"clustercockpit"`
|
||||
} `json:"resource_access"`
|
||||
}
|
||||
if err := userInfo.Claims(&claims); err != nil {
|
||||
http.Error(rw, "Failed to extract Claims: "+err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
var roles []string
|
||||
for _, r := range claims.Profile.Client.Roles {
|
||||
switch r {
|
||||
case "user":
|
||||
roles = append(roles, schema.GetRoleString(schema.RoleUser))
|
||||
case "admin":
|
||||
roles = append(roles, schema.GetRoleString(schema.RoleAdmin))
|
||||
}
|
||||
}
|
||||
|
||||
if len(roles) == 0 {
|
||||
roles = append(roles, schema.GetRoleString(schema.RoleUser))
|
||||
}
|
||||
|
||||
user := &schema.User{
|
||||
Username: claims.Username,
|
||||
Name: claims.Name,
|
||||
Roles: roles,
|
||||
Projects: projects,
|
||||
AuthSource: schema.AuthViaOIDC,
|
||||
}
|
||||
|
||||
if config.Keys.OpenIDConfig.SyncUserOnLogin || config.Keys.OpenIDConfig.UpdateUserOnLogin {
|
||||
handleOIDCUser(user)
|
||||
}
|
||||
|
||||
oa.authentication.SaveSession(rw, r, user)
|
||||
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(ctx))
|
||||
}
|
||||
|
||||
func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) {
|
||||
state, err := randString(16)
|
||||
if err != nil {
|
||||
http.Error(rw, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
setCallbackCookie(rw, r, "state", state)
|
||||
|
||||
// use PKCE to protect against CSRF attacks
|
||||
codeVerifier := oauth2.GenerateVerifier()
|
||||
setCallbackCookie(rw, r, "verifier", codeVerifier)
|
||||
|
||||
// Redirect user to consent page to ask for permission
|
||||
url := oa.client.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.S256ChallengeOption(codeVerifier))
|
||||
http.Redirect(rw, r, url, http.StatusFound)
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -29,8 +29,9 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
|
||||
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
||||
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_showFootprint": true,
|
||||
"job_list_usePaging": false,
|
||||
"plot_general_colorBackground": true,
|
||||
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
||||
"plot_general_lineWidth": 3,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
44
internal/config/default_metrics.go
Normal file
44
internal/config/default_metrics.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type DefaultMetricsCluster struct {
|
||||
Name string `json:"name"`
|
||||
DefaultMetrics string `json:"default_metrics"`
|
||||
}
|
||||
|
||||
type DefaultMetricsConfig struct {
|
||||
Clusters []DefaultMetricsCluster `json:"clusters"`
|
||||
}
|
||||
|
||||
func LoadDefaultMetricsConfig() (*DefaultMetricsConfig, error) {
|
||||
filePath := "configs/default_metrics.json"
|
||||
if _, err := os.Stat(filePath); os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
data, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var cfg DefaultMetricsConfig
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
func ParseMetricsString(s string) []string {
|
||||
parts := strings.Split(s, ",")
|
||||
var metrics []string
|
||||
for _, p := range parts {
|
||||
trimmed := strings.TrimSpace(p)
|
||||
if trimmed != "" {
|
||||
metrics = append(metrics, trimmed)
|
||||
}
|
||||
}
|
||||
return metrics
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
@@ -16,11 +16,23 @@ type Count struct {
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
type EnergyFootprintValue struct {
|
||||
Hardware string `json:"hardware"`
|
||||
Metric string `json:"metric"`
|
||||
Value float64 `json:"value"`
|
||||
}
|
||||
|
||||
type FloatRange struct {
|
||||
From float64 `json:"from"`
|
||||
To float64 `json:"to"`
|
||||
}
|
||||
|
||||
type FootprintValue struct {
|
||||
Name string `json:"name"`
|
||||
Stat string `json:"stat"`
|
||||
Value float64 `json:"value"`
|
||||
}
|
||||
|
||||
type Footprints struct {
|
||||
TimeWeights *TimeWeights `json:"timeWeights"`
|
||||
Metrics []*MetricFootprints `json:"metrics"`
|
||||
@@ -46,16 +58,14 @@ type JobFilter struct {
|
||||
Cluster *StringInput `json:"cluster,omitempty"`
|
||||
Partition *StringInput `json:"partition,omitempty"`
|
||||
Duration *schema.IntRange `json:"duration,omitempty"`
|
||||
Energy *FloatRange `json:"energy,omitempty"`
|
||||
MinRunningFor *int `json:"minRunningFor,omitempty"`
|
||||
NumNodes *schema.IntRange `json:"numNodes,omitempty"`
|
||||
NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
|
||||
NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
|
||||
StartTime *schema.TimeRange `json:"startTime,omitempty"`
|
||||
State []schema.JobState `json:"state,omitempty"`
|
||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg,omitempty"`
|
||||
MemBwAvg *FloatRange `json:"memBwAvg,omitempty"`
|
||||
LoadAvg *FloatRange `json:"loadAvg,omitempty"`
|
||||
MemUsedMax *FloatRange `json:"memUsedMax,omitempty"`
|
||||
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
||||
Exclusive *int `json:"exclusive,omitempty"`
|
||||
Node *StringInput `json:"node,omitempty"`
|
||||
}
|
||||
@@ -78,29 +88,31 @@ type JobMetricWithName struct {
|
||||
}
|
||||
|
||||
type JobResultList struct {
|
||||
Items []*schema.Job `json:"items"`
|
||||
Offset *int `json:"offset,omitempty"`
|
||||
Limit *int `json:"limit,omitempty"`
|
||||
Count *int `json:"count,omitempty"`
|
||||
Items []*schema.Job `json:"items"`
|
||||
Offset *int `json:"offset,omitempty"`
|
||||
Limit *int `json:"limit,omitempty"`
|
||||
Count *int `json:"count,omitempty"`
|
||||
HasNextPage *bool `json:"hasNextPage,omitempty"`
|
||||
}
|
||||
|
||||
type JobsStatistics struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
TotalJobs int `json:"totalJobs"`
|
||||
RunningJobs int `json:"runningJobs"`
|
||||
ShortJobs int `json:"shortJobs"`
|
||||
TotalWalltime int `json:"totalWalltime"`
|
||||
TotalNodes int `json:"totalNodes"`
|
||||
TotalNodeHours int `json:"totalNodeHours"`
|
||||
TotalCores int `json:"totalCores"`
|
||||
TotalCoreHours int `json:"totalCoreHours"`
|
||||
TotalAccs int `json:"totalAccs"`
|
||||
TotalAccHours int `json:"totalAccHours"`
|
||||
HistDuration []*HistoPoint `json:"histDuration"`
|
||||
HistNumNodes []*HistoPoint `json:"histNumNodes"`
|
||||
HistNumCores []*HistoPoint `json:"histNumCores"`
|
||||
HistNumAccs []*HistoPoint `json:"histNumAccs"`
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
TotalJobs int `json:"totalJobs"`
|
||||
RunningJobs int `json:"runningJobs"`
|
||||
ShortJobs int `json:"shortJobs"`
|
||||
TotalWalltime int `json:"totalWalltime"`
|
||||
TotalNodes int `json:"totalNodes"`
|
||||
TotalNodeHours int `json:"totalNodeHours"`
|
||||
TotalCores int `json:"totalCores"`
|
||||
TotalCoreHours int `json:"totalCoreHours"`
|
||||
TotalAccs int `json:"totalAccs"`
|
||||
TotalAccHours int `json:"totalAccHours"`
|
||||
HistDuration []*HistoPoint `json:"histDuration"`
|
||||
HistNumNodes []*HistoPoint `json:"histNumNodes"`
|
||||
HistNumCores []*HistoPoint `json:"histNumCores"`
|
||||
HistNumAccs []*HistoPoint `json:"histNumAccs"`
|
||||
HistMetrics []*MetricHistoPoints `json:"histMetrics"`
|
||||
}
|
||||
|
||||
type MetricFootprints struct {
|
||||
@@ -108,14 +120,46 @@ type MetricFootprints struct {
|
||||
Data []schema.Float `json:"data"`
|
||||
}
|
||||
|
||||
type MetricHistoPoint struct {
|
||||
Bin *int `json:"bin,omitempty"`
|
||||
Count int `json:"count"`
|
||||
Min *int `json:"min,omitempty"`
|
||||
Max *int `json:"max,omitempty"`
|
||||
}
|
||||
|
||||
type MetricHistoPoints struct {
|
||||
Metric string `json:"metric"`
|
||||
Unit string `json:"unit"`
|
||||
Stat *string `json:"stat,omitempty"`
|
||||
Data []*MetricHistoPoint `json:"data,omitempty"`
|
||||
}
|
||||
|
||||
type MetricStatItem struct {
|
||||
MetricName string `json:"metricName"`
|
||||
Range *FloatRange `json:"range"`
|
||||
}
|
||||
|
||||
type Mutation struct {
|
||||
}
|
||||
|
||||
type NodeMetrics struct {
|
||||
Host string `json:"host"`
|
||||
SubCluster string `json:"subCluster"`
|
||||
Metrics []*JobMetricWithName `json:"metrics"`
|
||||
}
|
||||
|
||||
type NodesResultList struct {
|
||||
Items []*NodeMetrics `json:"items"`
|
||||
Offset *int `json:"offset,omitempty"`
|
||||
Limit *int `json:"limit,omitempty"`
|
||||
Count *int `json:"count,omitempty"`
|
||||
TotalNodes *int `json:"totalNodes,omitempty"`
|
||||
HasNextPage *bool `json:"hasNextPage,omitempty"`
|
||||
}
|
||||
|
||||
type OrderByInput struct {
|
||||
Field string `json:"field"`
|
||||
Type string `json:"type"`
|
||||
Order SortDirectionEnum `json:"order"`
|
||||
}
|
||||
|
||||
@@ -124,6 +168,9 @@ type PageRequest struct {
|
||||
Page int `json:"page"`
|
||||
}
|
||||
|
||||
type Query struct {
|
||||
}
|
||||
|
||||
type StringInput struct {
|
||||
Eq *string `json:"eq,omitempty"`
|
||||
Neq *string `json:"neq,omitempty"`
|
||||
@@ -134,8 +181,9 @@ type StringInput struct {
|
||||
}
|
||||
|
||||
type TimeRangeOutput struct {
|
||||
From time.Time `json:"from"`
|
||||
To time.Time `json:"to"`
|
||||
Range *string `json:"range,omitempty"`
|
||||
From time.Time `json:"from"`
|
||||
To time.Time `json:"to"`
|
||||
}
|
||||
|
||||
type TimeWeights struct {
|
||||
|
||||
@@ -1,15 +1,39 @@
|
||||
package graph
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
// This file will not be regenerated automatically.
|
||||
//
|
||||
// It serves as dependency injection for your app, add any dependencies you require here.
|
||||
var (
|
||||
initOnce sync.Once
|
||||
resolverInstance *Resolver
|
||||
)
|
||||
|
||||
type Resolver struct {
|
||||
DB *sqlx.DB
|
||||
Repo *repository.JobRepository
|
||||
}
|
||||
|
||||
func Init() {
|
||||
initOnce.Do(func() {
|
||||
db := repository.GetConnection()
|
||||
resolverInstance = &Resolver{
|
||||
DB: db.DB, Repo: repository.GetJobRepository(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func GetResolverInstance() *Resolver {
|
||||
if resolverInstance == nil {
|
||||
log.Fatal("Authentication module not initialized!")
|
||||
}
|
||||
|
||||
return resolverInstance
|
||||
}
|
||||
|
||||
@@ -2,18 +2,22 @@ package graph
|
||||
|
||||
// This file will be automatically regenerated based on the schema, any resolver implementations
|
||||
// will be copied through when generating and any unknown code will be moved to the end.
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.36
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.57
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
@@ -27,15 +31,12 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(&obj.ID)
|
||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
|
||||
}
|
||||
|
||||
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
||||
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
|
||||
if obj.State == schema.JobStateRunning {
|
||||
obj.Duration = int32(time.Now().Unix() - obj.StartTimeUnix)
|
||||
}
|
||||
|
||||
// FIXME: Make the hardcoded duration configurable
|
||||
if obj.Exclusive != 1 && obj.Duration > 600 {
|
||||
return r.Repo.FindConcurrentJobs(ctx, obj)
|
||||
}
|
||||
@@ -43,8 +44,72 @@ func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*mod
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Footprint is the resolver for the footprint field.
|
||||
func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.FootprintValue, error) {
|
||||
rawFootprint, err := r.Repo.FetchFootprint(obj)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching job footprint data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.FootprintValue{}
|
||||
for name, value := range rawFootprint {
|
||||
|
||||
parts := strings.Split(name, "_")
|
||||
statPart := parts[len(parts)-1]
|
||||
nameParts := parts[:len(parts)-1]
|
||||
|
||||
res = append(res, &model.FootprintValue{
|
||||
Name: strings.Join(nameParts, "_"),
|
||||
Stat: statPart,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
|
||||
return res, err
|
||||
}
|
||||
|
||||
// EnergyFootprint is the resolver for the energyFootprint field.
|
||||
func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*model.EnergyFootprintValue, error) {
|
||||
rawEnergyFootprint, err := r.Repo.FetchEnergyFootprint(obj)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching job energy footprint data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.EnergyFootprintValue{}
|
||||
for name, value := range rawEnergyFootprint {
|
||||
// Suboptimal: Nearly hardcoded metric name expectations
|
||||
matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`)
|
||||
matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
|
||||
matchMem := regexp.MustCompile(`mem|Mem|MEM`)
|
||||
matchCore := regexp.MustCompile(`core|Core|CORE`)
|
||||
|
||||
hwType := ""
|
||||
switch test := name; { // NOtice ';' for var declaration
|
||||
case matchCpu.MatchString(test):
|
||||
hwType = "CPU"
|
||||
case matchAcc.MatchString(test):
|
||||
hwType = "Accelerator"
|
||||
case matchMem.MatchString(test):
|
||||
hwType = "Memory"
|
||||
case matchCore.MatchString(test):
|
||||
hwType = "Core"
|
||||
default:
|
||||
hwType = "Other"
|
||||
}
|
||||
|
||||
res = append(res, &model.EnergyFootprintValue{
|
||||
Hardware: hwType,
|
||||
Metric: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
||||
return res, err
|
||||
}
|
||||
|
||||
// MetaData is the resolver for the metaData field.
|
||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
|
||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (any, error) {
|
||||
return r.Repo.FetchMetadata(obj)
|
||||
}
|
||||
|
||||
@@ -53,15 +118,20 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
|
||||
return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
|
||||
}
|
||||
|
||||
// Name is the resolver for the name field.
|
||||
func (r *metricValueResolver) Name(ctx context.Context, obj *schema.MetricValue) (*string, error) {
|
||||
panic(fmt.Errorf("not implemented: Name - name"))
|
||||
}
|
||||
|
||||
// CreateTag is the resolver for the createTag field.
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
||||
id, err := r.Repo.CreateTag(typeArg, name)
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string, scope string) (*schema.Tag, error) {
|
||||
id, err := r.Repo.CreateTag(typeArg, name, scope)
|
||||
if err != nil {
|
||||
log.Warn("Error while creating tag")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
||||
}
|
||||
|
||||
// DeleteTag is the resolver for the deleteTag field.
|
||||
@@ -71,6 +141,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
|
||||
|
||||
// AddTagsToJob is the resolver for the addTagsToJob field.
|
||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
// Selectable Tags Pre-Filtered by Scope in Frontend: No backend check required
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while adding tag to job")
|
||||
@@ -85,7 +156,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
|
||||
if tags, err = r.Repo.AddTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
|
||||
log.Warn("Error while adding tag")
|
||||
return nil, err
|
||||
}
|
||||
@@ -96,6 +167,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
|
||||
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
|
||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
// Removable Tags Pre-Filtered by Scope in Frontend: No backend check required
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing job id")
|
||||
@@ -110,7 +182,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
|
||||
if tags, err = r.Repo.RemoveTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
|
||||
log.Warn("Error while removing tag")
|
||||
return nil, err
|
||||
}
|
||||
@@ -136,7 +208,12 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error)
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(nil)
|
||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), nil)
|
||||
}
|
||||
|
||||
// GlobalMetrics is the resolver for the globalMetrics field.
|
||||
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
|
||||
return archive.GlobalMetricList, nil
|
||||
}
|
||||
|
||||
// User is the resolver for the user field.
|
||||
@@ -171,7 +248,7 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job, err := r.Repo.FindById(numericId)
|
||||
job, err := r.Repo.FindById(ctx, numericId)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
@@ -187,14 +264,24 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
}
|
||||
|
||||
// JobMetrics is the resolver for the jobMetrics field.
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying job for metrics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
|
||||
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job data")
|
||||
return nil, err
|
||||
@@ -240,14 +327,41 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &model.JobResultList{Items: jobs, Count: &count}, nil
|
||||
if !config.Keys.UiDefaults["job_list_usePaging"].(bool) {
|
||||
hasNextPage := false
|
||||
// page.Page += 1 : Simple, but expensive
|
||||
// Example Page 4 @ 10 IpP : Does item 41 exist?
|
||||
// Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 @ 10 IpP exists.
|
||||
nextPage := &model.PageRequest{
|
||||
ItemsPerPage: 1,
|
||||
Page: ((page.Page * page.ItemsPerPage) + 1),
|
||||
}
|
||||
|
||||
nextJobs, err := r.Repo.QueryJobs(ctx, filter, nextPage, order)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying next jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(nextJobs) == 1 {
|
||||
hasNextPage = true
|
||||
}
|
||||
|
||||
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
||||
} else {
|
||||
return &model.JobResultList{Items: jobs, Count: &count}, nil
|
||||
}
|
||||
}
|
||||
|
||||
// JobsStatistics is the resolver for the jobsStatistics field.
|
||||
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) ([]*model.JobsStatistics, error) {
|
||||
var err error
|
||||
var stats []*model.JobsStatistics
|
||||
|
||||
// Top Level Defaults
|
||||
var defaultDurationBins string = "1h"
|
||||
var defaultMetricBins int = 10
|
||||
|
||||
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
||||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
||||
if groupBy == nil {
|
||||
@@ -281,8 +395,13 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
}
|
||||
|
||||
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
|
||||
|
||||
if numDurationBins == nil {
|
||||
numDurationBins = &defaultDurationBins
|
||||
}
|
||||
|
||||
if groupBy == nil {
|
||||
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0])
|
||||
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0], numDurationBins)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -291,6 +410,22 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
}
|
||||
}
|
||||
|
||||
if requireField(ctx, "histMetrics") {
|
||||
|
||||
if numMetricBins == nil {
|
||||
numMetricBins = &defaultMetricBins
|
||||
}
|
||||
|
||||
if groupBy == nil {
|
||||
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0], numMetricBins)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
return nil, errors.New("metric histograms only implemented without groupBy argument")
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
@@ -302,8 +437,8 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
|
||||
// NodeMetrics is the resolver for the nodeMetrics field.
|
||||
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user != nil && !user.HasRole(schema.RoleAdmin) {
|
||||
return nil, errors.New("you need to be an administrator for this query")
|
||||
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
@@ -312,9 +447,9 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
}
|
||||
}
|
||||
|
||||
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading node data")
|
||||
log.Warn("error while loading node data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -324,7 +459,10 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
Host: hostname,
|
||||
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||
}
|
||||
host.SubCluster, _ = archive.GetSubClusterByNode(cluster, hostname)
|
||||
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||
if err != nil {
|
||||
log.Warnf("error in nodeMetrics resolver: %s", err)
|
||||
}
|
||||
|
||||
for metric, scopedMetrics := range metrics {
|
||||
for _, scopedMetric := range scopedMetrics {
|
||||
@@ -342,6 +480,68 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
return nodeMetrics, nil
|
||||
}
|
||||
|
||||
// NodeMetricsList is the resolver for the nodeMetricsList field.
|
||||
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, totalNodes, hasNextPage, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, *resolution, from, to, page, ctx)
|
||||
if err != nil {
|
||||
log.Warn("error while loading node data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodeMetricsList := make([]*model.NodeMetrics, 0, len(data))
|
||||
for hostname, metrics := range data {
|
||||
host := &model.NodeMetrics{
|
||||
Host: hostname,
|
||||
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||
}
|
||||
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||
if err != nil {
|
||||
log.Warnf("error in nodeMetrics resolver: %s", err)
|
||||
}
|
||||
|
||||
for metric, scopedMetrics := range metrics {
|
||||
for scope, scopedMetric := range scopedMetrics {
|
||||
host.Metrics = append(host.Metrics, &model.JobMetricWithName{
|
||||
Name: metric,
|
||||
Scope: scope,
|
||||
Metric: scopedMetric,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
nodeMetricsList = append(nodeMetricsList, host)
|
||||
}
|
||||
|
||||
nodeMetricsListResult := &model.NodesResultList{
|
||||
Items: nodeMetricsList,
|
||||
TotalNodes: &totalNodes,
|
||||
HasNextPage: &hasNextPage,
|
||||
}
|
||||
|
||||
return nodeMetricsListResult, nil
|
||||
}
|
||||
|
||||
// NumberOfNodes is the resolver for the numberOfNodes field.
|
||||
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
|
||||
nodeList, err := archive.ParseNodeList(obj.Nodes)
|
||||
@@ -357,6 +557,9 @@ func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver
|
||||
// Job returns generated.JobResolver implementation.
|
||||
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
||||
|
||||
// MetricValue returns generated.MetricValueResolver implementation.
|
||||
func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricValueResolver{r} }
|
||||
|
||||
// Mutation returns generated.MutationResolver implementation.
|
||||
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
||||
|
||||
@@ -368,6 +571,7 @@ func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subCluste
|
||||
|
||||
type clusterResolver struct{ *Resolver }
|
||||
type jobResolver struct{ *Resolver }
|
||||
type metricValueResolver struct{ *Resolver }
|
||||
type mutationResolver struct{ *Resolver }
|
||||
type queryResolver struct{ *Resolver }
|
||||
type subClusterResolver struct{ *Resolver }
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
|
||||
"github.com/99designs/gqlgen/graphql"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@@ -24,8 +24,8 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
rows int, cols int,
|
||||
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
||||
|
||||
minX float64, minY float64, maxX float64, maxY float64,
|
||||
) ([][]float64, error) {
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||
if err != nil {
|
||||
log.Error("Error while querying jobs for roofline")
|
||||
@@ -47,7 +47,14 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
continue
|
||||
}
|
||||
|
||||
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
// resolution := 0
|
||||
|
||||
// for _, mc := range metricConfigs {
|
||||
// resolution = max(resolution, mc.Timestep)
|
||||
// }
|
||||
|
||||
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
||||
return nil, err
|
||||
@@ -120,7 +127,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
||||
continue
|
||||
}
|
||||
|
||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
log.Error("Error while loading averages for footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -8,9 +8,9 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
@@ -42,8 +42,8 @@ func HandleImportFlag(flag string) error {
|
||||
}
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err = dec.Decode(&jobMeta); err != nil {
|
||||
job := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
if err = dec.Decode(&job); err != nil {
|
||||
log.Warn("Error while decoding raw json metadata for import")
|
||||
return err
|
||||
}
|
||||
@@ -67,32 +67,60 @@ func HandleImportFlag(flag string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// checkJobData(&jobData)
|
||||
job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
// if _, err = r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
||||
// if err != nil {
|
||||
// log.Warn("Error while finding job in jobRepository")
|
||||
// return err
|
||||
// }
|
||||
//
|
||||
// return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist")
|
||||
// }
|
||||
//
|
||||
job := schema.Job{
|
||||
BaseJob: jobMeta.BaseJob,
|
||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: Other metrics...
|
||||
job.LoadAvg = loadJobStat(&jobMeta, "cpu_load")
|
||||
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
|
||||
job.MemUsedMax = loadJobStat(&jobMeta, "mem_used")
|
||||
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
|
||||
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
|
||||
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(&job, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||
energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100) / 100)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
@@ -110,7 +138,7 @@ func HandleImportFlag(flag string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
|
||||
if err = archive.GetHandle().ImportJob(&job, &jobData); err != nil {
|
||||
log.Error("Error while importing job")
|
||||
return err
|
||||
}
|
||||
@@ -122,8 +150,8 @@ func HandleImportFlag(flag string) error {
|
||||
}
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
||||
log.Error("Error while adding or creating tag")
|
||||
if err := r.ImportTag(id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||
log.Error("Error while adding or creating tag on import")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -82,7 +82,7 @@ func setup(t *testing.T) *repository.JobRepository {
|
||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -7,6 +7,7 @@ package importer
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -16,6 +17,11 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
const (
|
||||
addTagQuery = "INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)"
|
||||
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
|
||||
)
|
||||
|
||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||
// repopulate them using the jobs found in `archive`.
|
||||
func InitDB() error {
|
||||
@@ -60,13 +66,58 @@ func InitDB() error {
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
}
|
||||
|
||||
// TODO: Other metrics...
|
||||
job.LoadAvg = loadJobStat(jobMeta, "cpu_load")
|
||||
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
|
||||
job.MemUsedMax = loadJobStat(jobMeta, "mem_used")
|
||||
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
|
||||
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
|
||||
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||
energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100) / 100)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
@@ -88,7 +139,8 @@ func InitDB() error {
|
||||
continue
|
||||
}
|
||||
|
||||
id, err := r.TransactionAdd(t, job)
|
||||
id, err := r.TransactionAddNamed(t,
|
||||
repository.NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
@@ -99,7 +151,9 @@ func InitDB() error {
|
||||
tagstr := tag.Name + ":" + tag.Type
|
||||
tagId, ok := tags[tagstr]
|
||||
if !ok {
|
||||
tagId, err = r.TransactionAddTag(t, tag)
|
||||
tagId, err = r.TransactionAdd(t,
|
||||
addTagQuery,
|
||||
tag.Name, tag.Type)
|
||||
if err != nil {
|
||||
log.Errorf("Error adding tag: %v", err)
|
||||
errorOccured++
|
||||
@@ -108,7 +162,9 @@ func InitDB() error {
|
||||
tags[tagstr] = tagId
|
||||
}
|
||||
|
||||
r.TransactionSetTag(t, id, tagId)
|
||||
r.TransactionAdd(t,
|
||||
setTagQuery,
|
||||
id, tagId)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
@@ -150,18 +206,6 @@ func SanityChecks(job *schema.BaseJob) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
if metric == "mem_used" {
|
||||
return stats.Max
|
||||
} else {
|
||||
return stats.Avg
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0
|
||||
}
|
||||
|
||||
func checkJobData(d *schema.JobData) error {
|
||||
for _, scopes := range *d {
|
||||
// var newUnit schema.Unit
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
1486
internal/importer/testdata/cluster-fritz.json
vendored
1486
internal/importer/testdata/cluster-fritz.json
vendored
File diff suppressed because it is too large
Load Diff
307
internal/metricDataDispatcher/dataLoader.go
Normal file
307
internal/metricDataDispatcher/dataLoader.go
Normal file
@@ -0,0 +1,307 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricDataDispatcher
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/resampler"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
|
||||
func cacheKey(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) string {
|
||||
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
|
||||
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
|
||||
job.ID, job.State, metrics, scopes, resolution)
|
||||
}
|
||||
|
||||
// Fetches the metric data for a job.
|
||||
func LoadData(job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ interface{}, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
if job.State == schema.JobStateRunning ||
|
||||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
||||
config.Keys.DisableArchive {
|
||||
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
}
|
||||
|
||||
if scopes == nil {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
cluster := archive.GetCluster(job.Cluster)
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
// return err, 0, 0 // Reactivating will block archiving on one partial error
|
||||
} else {
|
||||
log.Error("Error while loading job data from metric repository")
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
size = jd.Size()
|
||||
} else {
|
||||
var jd_temp schema.JobData
|
||||
jd_temp, err = archive.GetHandle().LoadJobData(job)
|
||||
if err != nil {
|
||||
log.Error("Error while loading job data from archive")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
//Deep copy the cached archive hashmap
|
||||
jd = metricdata.DeepCopy(jd_temp)
|
||||
|
||||
//Resampling for archived data.
|
||||
//Pass the resolution from frontend here.
|
||||
for _, v := range jd {
|
||||
for _, v_ := range v {
|
||||
timestep := 0
|
||||
for i := 0; i < len(v_.Series); i += 1 {
|
||||
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
|
||||
if err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
v_.Timestep = timestep
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid sending unrequested data to the client:
|
||||
if metrics != nil || scopes != nil {
|
||||
if metrics == nil {
|
||||
metrics = make([]string, 0, len(jd))
|
||||
for k := range jd {
|
||||
metrics = append(metrics, k)
|
||||
}
|
||||
}
|
||||
|
||||
res := schema.JobData{}
|
||||
for _, metric := range metrics {
|
||||
if perscope, ok := jd[metric]; ok {
|
||||
if len(perscope) > 1 {
|
||||
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
||||
for _, scope := range scopes {
|
||||
if jm, ok := perscope[scope]; ok {
|
||||
subset[scope] = jm
|
||||
}
|
||||
}
|
||||
|
||||
if len(subset) > 0 {
|
||||
perscope = subset
|
||||
}
|
||||
}
|
||||
|
||||
res[metric] = perscope
|
||||
}
|
||||
}
|
||||
jd = res
|
||||
}
|
||||
size = jd.Size()
|
||||
}
|
||||
|
||||
ttl = 5 * time.Hour
|
||||
if job.State == schema.JobStateRunning {
|
||||
ttl = 2 * time.Minute
|
||||
}
|
||||
|
||||
// FIXME: Review: Is this really necessary or correct.
|
||||
// Note: Lines 147-170 formerly known as prepareJobData(jobData, scopes)
|
||||
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
|
||||
// to be available at the scope 'node'. If a job has a lot of nodes,
|
||||
// statisticsSeries should be available so that a min/median/max Graph can be
|
||||
// used instead of a lot of single lines.
|
||||
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
|
||||
// Existing (archived) StatsSeries can be 'min/mean/max'!
|
||||
const maxSeriesSize int = 15
|
||||
for _, scopes := range jd {
|
||||
for _, jm := range scopes {
|
||||
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
|
||||
continue
|
||||
}
|
||||
|
||||
jm.AddStatisticsSeries()
|
||||
}
|
||||
}
|
||||
|
||||
nodeScopeRequested := false
|
||||
for _, scope := range scopes {
|
||||
if scope == schema.MetricScopeNode {
|
||||
nodeScopeRequested = true
|
||||
}
|
||||
}
|
||||
|
||||
if nodeScopeRequested {
|
||||
jd.AddNodeScope("flops_any")
|
||||
jd.AddNodeScope("mem_bw")
|
||||
}
|
||||
|
||||
return jd, ttl, size
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in returned dataset")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(schema.JobData), nil
|
||||
}
|
||||
|
||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||
func LoadAverages(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
data [][]schema.Float,
|
||||
ctx context.Context,
|
||||
) error {
|
||||
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
|
||||
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
||||
}
|
||||
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||
return err
|
||||
}
|
||||
|
||||
for i, m := range metrics {
|
||||
nodes, ok := stats[m]
|
||||
if !ok {
|
||||
data[i] = append(data[i], schema.NaN)
|
||||
continue
|
||||
}
|
||||
|
||||
sum := 0.0
|
||||
for _, node := range nodes {
|
||||
sum += node.Avg
|
||||
}
|
||||
data[i] = append(data[i], schema.Float(sum))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for the classic node/system view. Returns a map of nodes to a map of metrics.
|
||||
func LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
repo, err := metricdata.GetMetricDataRepo(cluster)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
repo, err := metricdata.GetMetricDataRepo(cluster)
|
||||
if err != nil {
|
||||
return nil, 0, false, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, totalNodes, hasNextPage, err := repo.LoadNodeListData(cluster, subCluster, nodeFilter, metrics, scopes, resolution, from, to, page, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
return nil, totalNodes, hasNextPage, err
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
|
||||
const maxSeriesSize int = 8
|
||||
for _, jd := range data {
|
||||
for _, scopes := range jd {
|
||||
for _, jm := range scopes {
|
||||
if jm.StatisticsSeries != nil || len(jm.Series) < maxSeriesSize {
|
||||
continue
|
||||
}
|
||||
jm.AddStatisticsSeries()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, totalNodes, hasNextPage, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, totalNodes, hasNextPage, nil
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -11,10 +11,12 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@@ -32,32 +34,33 @@ type CCMetricStoreConfig struct {
|
||||
}
|
||||
|
||||
type CCMetricStore struct {
|
||||
here2there map[string]string
|
||||
there2here map[string]string
|
||||
client http.Client
|
||||
jwt string
|
||||
url string
|
||||
queryEndpoint string
|
||||
client http.Client
|
||||
here2there map[string]string
|
||||
there2here map[string]string
|
||||
}
|
||||
|
||||
type ApiQueryRequest struct {
|
||||
Cluster string `json:"cluster"`
|
||||
Queries []ApiQuery `json:"queries"`
|
||||
ForAllNodes []string `json:"for-all-nodes"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
WithStats bool `json:"with-stats"`
|
||||
WithData bool `json:"with-data"`
|
||||
Queries []ApiQuery `json:"queries"`
|
||||
ForAllNodes []string `json:"for-all-nodes"`
|
||||
}
|
||||
|
||||
type ApiQuery struct {
|
||||
Type *string `json:"type,omitempty"`
|
||||
SubType *string `json:"subtype,omitempty"`
|
||||
Metric string `json:"metric"`
|
||||
Hostname string `json:"host"`
|
||||
Aggregate bool `json:"aggreg"`
|
||||
Type *string `json:"type,omitempty"`
|
||||
Resolution int `json:"resolution"`
|
||||
TypeIds []string `json:"type-ids,omitempty"`
|
||||
SubType *string `json:"subtype,omitempty"`
|
||||
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||
Aggregate bool `json:"aggreg"`
|
||||
}
|
||||
|
||||
type ApiQueryResponse struct {
|
||||
@@ -66,17 +69,17 @@ type ApiQueryResponse struct {
|
||||
}
|
||||
|
||||
type ApiMetricData struct {
|
||||
Error *string `json:"error"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
Data []schema.Float `json:"data"`
|
||||
Avg schema.Float `json:"avg"`
|
||||
Min schema.Float `json:"min"`
|
||||
Max schema.Float `json:"max"`
|
||||
Error *string `json:"error"`
|
||||
Data []schema.Float `json:"data"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
Resolution int `json:"resolution"`
|
||||
Avg schema.Float `json:"avg"`
|
||||
Min schema.Float `json:"min"`
|
||||
Max schema.Float `json:"max"`
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
|
||||
|
||||
var config CCMetricStoreConfig
|
||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json config")
|
||||
@@ -122,15 +125,15 @@ func (ccms *CCMetricStore) toLocalName(metric string) string {
|
||||
|
||||
func (ccms *CCMetricStore) doRequest(
|
||||
ctx context.Context,
|
||||
body *ApiQueryRequest) (*ApiQueryResponse, error) {
|
||||
|
||||
body *ApiQueryRequest,
|
||||
) (*ApiQueryResponse, error) {
|
||||
buf := &bytes.Buffer{}
|
||||
if err := json.NewEncoder(buf).Encode(body); err != nil {
|
||||
log.Warn("Error while encoding request body")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf)
|
||||
if err != nil {
|
||||
log.Warn("Error while building request body")
|
||||
return nil, err
|
||||
@@ -139,6 +142,13 @@ func (ccms *CCMetricStore) doRequest(
|
||||
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
|
||||
}
|
||||
|
||||
// versioning the cc-metric-store query API.
|
||||
// v2 = data with resampling
|
||||
// v1 = data without resampling
|
||||
q := req.URL.Query()
|
||||
q.Add("version", "v2")
|
||||
req.URL.RawQuery = q.Encode()
|
||||
|
||||
res, err := ccms.client.Do(req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
@@ -162,9 +172,10 @@ func (ccms *CCMetricStore) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
|
||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution)
|
||||
if err != nil {
|
||||
log.Warn("Error while building queries")
|
||||
return nil, err
|
||||
@@ -186,7 +197,7 @@ func (ccms *CCMetricStore) LoadData(
|
||||
}
|
||||
|
||||
var errors []string
|
||||
var jobData schema.JobData = make(schema.JobData)
|
||||
jobData := make(schema.JobData)
|
||||
for i, row := range resBody.Results {
|
||||
query := req.Queries[i]
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
@@ -196,17 +207,22 @@ func (ccms *CCMetricStore) LoadData(
|
||||
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
}
|
||||
|
||||
res := mc.Timestep
|
||||
if len(row) > 0 {
|
||||
res = row[0].Resolution
|
||||
}
|
||||
|
||||
jobMetric, ok := jobData[metric][scope]
|
||||
if !ok {
|
||||
jobMetric = &schema.JobMetric{
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Timestep,
|
||||
Timestep: res,
|
||||
Series: make([]schema.Series, 0),
|
||||
}
|
||||
jobData[metric][scope] = jobMetric
|
||||
}
|
||||
|
||||
for _, res := range row {
|
||||
for ndx, res := range row {
|
||||
if res.Error != nil {
|
||||
/* Build list for "partial errors", if any */
|
||||
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
||||
@@ -216,12 +232,11 @@ func (ccms *CCMetricStore) LoadData(
|
||||
id := (*string)(nil)
|
||||
if query.Type != nil {
|
||||
id = new(string)
|
||||
*id = query.TypeIds[0]
|
||||
*id = query.TypeIds[ndx]
|
||||
}
|
||||
|
||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||
// TODO: use schema.Float instead of float64?
|
||||
// This is done because regular float64 can not be JSONed when NaN.
|
||||
// "schema.Float()" because regular float64 can not be JSONed when NaN.
|
||||
res.Avg = schema.Float(0)
|
||||
res.Min = schema.Float(0)
|
||||
res.Max = schema.Float(0)
|
||||
@@ -252,7 +267,6 @@ func (ccms *CCMetricStore) LoadData(
|
||||
/* Returns list for "partial errors" */
|
||||
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||
}
|
||||
|
||||
return jobData, nil
|
||||
}
|
||||
|
||||
@@ -267,8 +281,9 @@ var (
|
||||
func (ccms *CCMetricStore) buildQueries(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope) ([]ApiQuery, []schema.MetricScope, error) {
|
||||
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
||||
assignedScope := []schema.MetricScope{}
|
||||
|
||||
@@ -313,12 +328,18 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
|
||||
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
||||
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
||||
if scope != schema.MetricScopeAccelerator {
|
||||
// Skip all other catched cases
|
||||
continue
|
||||
}
|
||||
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: host.Accelerators,
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: host.Accelerators,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
|
||||
continue
|
||||
@@ -331,11 +352,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
}
|
||||
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: host.Accelerators,
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: host.Accelerators,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -344,11 +366,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
// HWThread -> HWThead
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(hwthreads),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(hwthreads),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -359,11 +382,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||
for _, core := range cores {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Core[core]),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Core[core]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
@@ -375,11 +399,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||
for _, socket := range sockets {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
@@ -389,11 +414,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
// HWThread -> Node
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(hwthreads),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(hwthreads),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -403,11 +429,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -417,11 +444,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -431,11 +459,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -445,11 +474,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -459,11 +489,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: false,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -473,11 +504,12 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -486,8 +518,9 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
// Node -> Node
|
||||
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
@@ -504,9 +537,17 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
func (ccms *CCMetricStore) LoadStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
ctx context.Context,
|
||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
|
||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}) // #166 Add scope shere for analysis view accelerator normalization?
|
||||
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
// resolution := 9000
|
||||
|
||||
// for _, mc := range metricConfigs {
|
||||
// resolution = min(resolution, mc.Timestep)
|
||||
// }
|
||||
|
||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
|
||||
if err != nil {
|
||||
log.Warn("Error while building query")
|
||||
return nil, err
|
||||
@@ -566,8 +607,8 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
req := ApiQueryRequest{
|
||||
Cluster: cluster,
|
||||
From: from.Unix(),
|
||||
@@ -584,8 +625,9 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
for _, node := range nodes {
|
||||
for _, metric := range metrics {
|
||||
req.Queries = append(req.Queries, ApiQuery{
|
||||
Hostname: node,
|
||||
Metric: ccms.toRemoteName(metric),
|
||||
Hostname: node,
|
||||
Metric: ccms.toRemoteName(metric),
|
||||
Resolution: 60, // Default for Node Queries
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -593,7 +635,7 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -651,8 +693,446 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func intToStringSlice(is []int) []string {
|
||||
func (ccms *CCMetricStore) LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
// 0) Init additional vars
|
||||
var totalNodes int = 0
|
||||
var hasNextPage bool = false
|
||||
|
||||
// 1) Get list of all nodes
|
||||
var nodes []string
|
||||
if subCluster != "" {
|
||||
scNodes := archive.NodeLists[cluster][subCluster]
|
||||
nodes = scNodes.PrintList()
|
||||
} else {
|
||||
subClusterNodeLists := archive.NodeLists[cluster]
|
||||
for _, nodeList := range subClusterNodeLists {
|
||||
nodes = append(nodes, nodeList.PrintList()...)
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Filter nodes
|
||||
if nodeFilter != "" {
|
||||
filteredNodes := []string{}
|
||||
for _, node := range nodes {
|
||||
if strings.Contains(node, nodeFilter) {
|
||||
filteredNodes = append(filteredNodes, node)
|
||||
}
|
||||
}
|
||||
nodes = filteredNodes
|
||||
}
|
||||
|
||||
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ...
|
||||
totalNodes = len(nodes)
|
||||
sort.Strings(nodes)
|
||||
|
||||
// 3) Apply paging
|
||||
if len(nodes) > page.ItemsPerPage {
|
||||
start := (page.Page - 1) * page.ItemsPerPage
|
||||
end := start + page.ItemsPerPage
|
||||
if end > len(nodes) {
|
||||
end = len(nodes)
|
||||
hasNextPage = false
|
||||
} else {
|
||||
hasNextPage = true
|
||||
}
|
||||
nodes = nodes[start:end]
|
||||
}
|
||||
|
||||
// Note: Order of node data is not guaranteed after this point, but contents match page and filter criteria
|
||||
|
||||
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution)
|
||||
if err != nil {
|
||||
log.Warn("Error while building queries")
|
||||
return nil, totalNodes, hasNextPage, err
|
||||
}
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: cluster,
|
||||
Queries: queries,
|
||||
From: from.Unix(),
|
||||
To: to.Unix(),
|
||||
WithStats: true,
|
||||
WithData: true,
|
||||
}
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
|
||||
return nil, totalNodes, hasNextPage, err
|
||||
}
|
||||
|
||||
var errors []string
|
||||
data := make(map[string]schema.JobData)
|
||||
for i, row := range resBody.Results {
|
||||
var query ApiQuery
|
||||
if resBody.Queries != nil {
|
||||
query = resBody.Queries[i]
|
||||
} else {
|
||||
query = req.Queries[i]
|
||||
}
|
||||
// qdata := res[0]
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
scope := assignedScope[i]
|
||||
mc := archive.GetMetricConfig(cluster, metric)
|
||||
|
||||
res := mc.Timestep
|
||||
if len(row) > 0 {
|
||||
res = row[0].Resolution
|
||||
}
|
||||
|
||||
// Init Nested Map Data Structures If Not Found
|
||||
hostData, ok := data[query.Hostname]
|
||||
if !ok {
|
||||
hostData = make(schema.JobData)
|
||||
data[query.Hostname] = hostData
|
||||
}
|
||||
|
||||
metricData, ok := hostData[metric]
|
||||
if !ok {
|
||||
metricData = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
data[query.Hostname][metric] = metricData
|
||||
}
|
||||
|
||||
scopeData, ok := metricData[scope]
|
||||
if !ok {
|
||||
scopeData = &schema.JobMetric{
|
||||
Unit: mc.Unit,
|
||||
Timestep: res,
|
||||
Series: make([]schema.Series, 0),
|
||||
}
|
||||
data[query.Hostname][metric][scope] = scopeData
|
||||
}
|
||||
|
||||
for ndx, res := range row {
|
||||
if res.Error != nil {
|
||||
/* Build list for "partial errors", if any */
|
||||
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
||||
continue
|
||||
}
|
||||
|
||||
id := (*string)(nil)
|
||||
if query.Type != nil {
|
||||
id = new(string)
|
||||
*id = query.TypeIds[ndx]
|
||||
}
|
||||
|
||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||
// "schema.Float()" because regular float64 can not be JSONed when NaN.
|
||||
res.Avg = schema.Float(0)
|
||||
res.Min = schema.Float(0)
|
||||
res.Max = schema.Float(0)
|
||||
}
|
||||
|
||||
scopeData.Series = append(scopeData.Series, schema.Series{
|
||||
Hostname: query.Hostname,
|
||||
Id: id,
|
||||
Statistics: schema.MetricStatistics{
|
||||
Avg: float64(res.Avg),
|
||||
Min: float64(res.Min),
|
||||
Max: float64(res.Max),
|
||||
},
|
||||
Data: res.Data,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if len(errors) != 0 {
|
||||
/* Returns list of "partial errors" */
|
||||
return data, totalNodes, hasNextPage, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||
}
|
||||
|
||||
return data, totalNodes, hasNextPage, nil
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) buildNodeQueries(
|
||||
cluster string,
|
||||
subCluster string,
|
||||
nodes []string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||
|
||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||
assignedScope := []schema.MetricScope{}
|
||||
|
||||
// Get Topol before loop if subCluster given
|
||||
var subClusterTopol *schema.SubCluster
|
||||
var scterr error
|
||||
if subCluster != "" {
|
||||
subClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster)
|
||||
if scterr != nil {
|
||||
// TODO: Log
|
||||
return nil, nil, scterr
|
||||
}
|
||||
}
|
||||
|
||||
for _, metric := range metrics {
|
||||
remoteName := ccms.toRemoteName(metric)
|
||||
mc := archive.GetMetricConfig(cluster, metric)
|
||||
if mc == nil {
|
||||
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||
log.Infof("metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
// Avoid duplicates...
|
||||
handledScopes := make([]schema.MetricScope, 0, 3)
|
||||
|
||||
scopesLoop:
|
||||
for _, requestedScope := range scopes {
|
||||
nativeScope := mc.Scope
|
||||
|
||||
scope := nativeScope.Max(requestedScope)
|
||||
for _, s := range handledScopes {
|
||||
if scope == s {
|
||||
continue scopesLoop
|
||||
}
|
||||
}
|
||||
handledScopes = append(handledScopes, scope)
|
||||
|
||||
for _, hostname := range nodes {
|
||||
|
||||
// If no subCluster given, get it by node
|
||||
if subCluster == "" {
|
||||
subClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname)
|
||||
if scnerr != nil {
|
||||
return nil, nil, scnerr
|
||||
}
|
||||
subClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName)
|
||||
if scterr != nil {
|
||||
return nil, nil, scterr
|
||||
}
|
||||
}
|
||||
|
||||
// Always full node hwthread id list, no partial queries expected -> Use "topology.Node" directly where applicable
|
||||
// Always full accelerator id list, no partial queries expected -> Use "acceleratorIds" directly where applicable
|
||||
topology := subClusterTopol.Topology
|
||||
acceleratorIds := topology.GetAcceleratorIDs()
|
||||
|
||||
// Moved check here if metric matches hardware specs
|
||||
if nativeScope == schema.MetricScopeAccelerator && len(acceleratorIds) == 0 {
|
||||
continue scopesLoop
|
||||
}
|
||||
|
||||
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
||||
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
||||
if scope != schema.MetricScopeAccelerator {
|
||||
// Skip all other catched cases
|
||||
continue
|
||||
}
|
||||
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: acceleratorIds,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
|
||||
continue
|
||||
}
|
||||
|
||||
// Accelerator -> Node
|
||||
if nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode {
|
||||
if len(acceleratorIds) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: acceleratorIds,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// HWThread -> HWThead
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Node),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// HWThread -> Core
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
|
||||
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||
for _, core := range cores {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Core[core]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// HWThread -> Socket
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||
for _, socket := range sockets {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// HWThread -> Node
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Node),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Core -> Core
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
||||
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Core -> Node
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// MemoryDomain -> MemoryDomain
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// MemoryDoman -> Node
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Socket -> Socket
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Socket -> Node
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Node -> Node
|
||||
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return queries, assignedScope, nil
|
||||
}
|
||||
|
||||
func intToStringSlice(is []int) []string {
|
||||
ss := make([]string, len(is))
|
||||
for i, x := range is {
|
||||
ss[i] = strconv.Itoa(x)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@@ -60,7 +61,8 @@ func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
ctx context.Context,
|
||||
resolution int) (schema.JobData, error) {
|
||||
|
||||
measurementsConds := make([]string, 0, len(metrics))
|
||||
for _, m := range metrics {
|
||||
@@ -311,3 +313,21 @@ func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
||||
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
var totalNodes int = 0
|
||||
var hasNextPage bool = false
|
||||
// TODO : Implement to be used in NodeList-View
|
||||
log.Infof("LoadNodeListData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodeFilter %v, scopes %v", cluster, metrics, nodeFilter, scopes)
|
||||
|
||||
return nil, totalNodes, hasNextPage, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -8,13 +8,11 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
@@ -24,22 +22,21 @@ type MetricDataRepository interface {
|
||||
Init(rawConfig json.RawMessage) error
|
||||
|
||||
// Return the JobData for the given job, only with the requested metrics.
|
||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
|
||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
|
||||
|
||||
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
|
||||
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
||||
|
||||
// Return a map of hosts to a map of metrics at the requested scopes for that node.
|
||||
// Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node.
|
||||
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
|
||||
|
||||
// Return a map of hosts to a map of metrics to a map of scopes for multiple nodes.
|
||||
LoadNodeListData(cluster, subCluster, nodeFilter string, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, page *model.PageRequest, ctx context.Context) (map[string]schema.JobData, int, bool, error)
|
||||
}
|
||||
|
||||
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
||||
|
||||
var useArchive bool
|
||||
|
||||
func Init(disableArchive bool) error {
|
||||
|
||||
useArchive = !disableArchive
|
||||
func Init() error {
|
||||
for _, cluster := range config.Keys.Clusters {
|
||||
if cluster.MetricDataRepository != nil {
|
||||
var kind struct {
|
||||
@@ -74,283 +71,13 @@ func Init(disableArchive bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
|
||||
// Fetches the metric data for a job.
|
||||
func LoadData(job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
if job.State == schema.JobStateRunning ||
|
||||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
|
||||
!useArchive {
|
||||
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
|
||||
if !ok {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
|
||||
}
|
||||
|
||||
if scopes == nil {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
cluster := archive.GetCluster(job.Cluster)
|
||||
for _, mc := range cluster.MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading job data from metric repository")
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
size = jd.Size()
|
||||
} else {
|
||||
jd, err = archive.GetHandle().LoadJobData(job)
|
||||
if err != nil {
|
||||
log.Error("Error while loading job data from archive")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
// Avoid sending unrequested data to the client:
|
||||
if metrics != nil || scopes != nil {
|
||||
if metrics == nil {
|
||||
metrics = make([]string, 0, len(jd))
|
||||
for k := range jd {
|
||||
metrics = append(metrics, k)
|
||||
}
|
||||
}
|
||||
|
||||
res := schema.JobData{}
|
||||
for _, metric := range metrics {
|
||||
if perscope, ok := jd[metric]; ok {
|
||||
if len(perscope) > 1 {
|
||||
subset := make(map[schema.MetricScope]*schema.JobMetric)
|
||||
for _, scope := range scopes {
|
||||
if jm, ok := perscope[scope]; ok {
|
||||
subset[scope] = jm
|
||||
}
|
||||
}
|
||||
|
||||
if len(subset) > 0 {
|
||||
perscope = subset
|
||||
}
|
||||
}
|
||||
|
||||
res[metric] = perscope
|
||||
}
|
||||
}
|
||||
jd = res
|
||||
}
|
||||
size = jd.Size()
|
||||
}
|
||||
|
||||
ttl = 5 * time.Hour
|
||||
if job.State == schema.JobStateRunning {
|
||||
ttl = 2 * time.Minute
|
||||
}
|
||||
|
||||
prepareJobData(job, jd, scopes)
|
||||
|
||||
return jd, ttl, size
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in returned dataset")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data.(schema.JobData), nil
|
||||
}
|
||||
|
||||
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
|
||||
func LoadAverages(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
data [][]schema.Float,
|
||||
ctx context.Context) error {
|
||||
|
||||
if job.State != schema.JobStateRunning && useArchive {
|
||||
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
|
||||
}
|
||||
|
||||
repo, ok := metricDataRepos[job.Cluster]
|
||||
if !ok {
|
||||
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||
return err
|
||||
}
|
||||
|
||||
for i, m := range metrics {
|
||||
nodes, ok := stats[m]
|
||||
if !ok {
|
||||
data[i] = append(data[i], schema.NaN)
|
||||
continue
|
||||
}
|
||||
|
||||
sum := 0.0
|
||||
for _, node := range nodes {
|
||||
sum += node.Avg
|
||||
}
|
||||
data[i] = append(data[i], schema.Float(sum))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for the node/system view. Returns a map of nodes to a map of metrics.
|
||||
func LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
|
||||
func GetMetricDataRepo(cluster string) (MetricDataRepository, error) {
|
||||
var err error
|
||||
repo, ok := metricDataRepos[cluster]
|
||||
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
err = fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func cacheKey(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope) string {
|
||||
|
||||
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
|
||||
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]",
|
||||
job.ID, job.State, metrics, scopes)
|
||||
}
|
||||
|
||||
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
|
||||
// to be available at the scope 'node'. If a job has a lot of nodes,
|
||||
// statisticsSeries should be available so that a min/mean/max Graph can be
|
||||
// used instead of a lot of single lines.
|
||||
func prepareJobData(
|
||||
job *schema.Job,
|
||||
jobData schema.JobData,
|
||||
scopes []schema.MetricScope) {
|
||||
|
||||
const maxSeriesSize int = 15
|
||||
for _, scopes := range jobData {
|
||||
for _, jm := range scopes {
|
||||
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
|
||||
continue
|
||||
}
|
||||
|
||||
jm.AddStatisticsSeries()
|
||||
}
|
||||
}
|
||||
|
||||
nodeScopeRequested := false
|
||||
for _, scope := range scopes {
|
||||
if scope == schema.MetricScopeNode {
|
||||
nodeScopeRequested = true
|
||||
}
|
||||
}
|
||||
|
||||
if nodeScopeRequested {
|
||||
jobData.AddNodeScope("flops_any")
|
||||
jobData.AddNodeScope("mem_bw")
|
||||
}
|
||||
}
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
// TODO: Talk about this! What resolutions to store data at...
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
if job.NumNodes <= 8 {
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
|
||||
jobData, err := LoadData(job, allMetrics, scopes, ctx)
|
||||
if err != nil {
|
||||
log.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// TODO/FIXME: Calc average for non-node metrics as well!
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
// If the file based archive is disabled,
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if !useArchive {
|
||||
return jobMeta, nil
|
||||
}
|
||||
|
||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
||||
return repo, err
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@@ -166,10 +167,10 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
||||
var rt http.RoundTripper = nil
|
||||
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
|
||||
prom_pw := promcfg.Secret(prom_pw)
|
||||
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper)
|
||||
rt = promcfg.NewBasicAuthRoundTripper(promcfg.NewInlineSecret(config.Username), promcfg.NewInlineSecret(string(prom_pw)), promapi.DefaultRoundTripper)
|
||||
} else {
|
||||
if config.Username != "" {
|
||||
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.")
|
||||
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set")
|
||||
}
|
||||
}
|
||||
// init client
|
||||
@@ -204,8 +205,8 @@ func (pdb *PrometheusDataRepository) FormatQuery(
|
||||
metric string,
|
||||
scope schema.MetricScope,
|
||||
nodes []string,
|
||||
cluster string) (string, error) {
|
||||
|
||||
cluster string,
|
||||
) (string, error) {
|
||||
args := PromQLArgs{}
|
||||
if len(nodes) > 0 {
|
||||
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
|
||||
@@ -233,12 +234,13 @@ func (pdb *PrometheusDataRepository) RowToSeries(
|
||||
from time.Time,
|
||||
step int64,
|
||||
steps int64,
|
||||
row *promm.SampleStream) schema.Series {
|
||||
row *promm.SampleStream,
|
||||
) schema.Series {
|
||||
ts := from.Unix()
|
||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
||||
// init array of expected length with NaN
|
||||
values := make([]schema.Float, steps+1)
|
||||
for i, _ := range values {
|
||||
for i := range values {
|
||||
values[i] = schema.NaN
|
||||
}
|
||||
// copy recorded values from prom sample pair
|
||||
@@ -263,8 +265,9 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
// TODO respect requested scope
|
||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
@@ -306,7 +309,6 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
@@ -335,7 +337,7 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
pdb.RowToSeries(from, step, steps, row))
|
||||
}
|
||||
// only add metric if at least one host returned data
|
||||
if !ok && len(jobMetric.Series) > 0{
|
||||
if !ok && len(jobMetric.Series) > 0 {
|
||||
jobData[metric][scope] = jobMetric
|
||||
}
|
||||
// sort by hostname to get uniform coloring
|
||||
@@ -351,12 +353,12 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
func (pdb *PrometheusDataRepository) LoadStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
|
||||
ctx context.Context,
|
||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
// map of metrics of nodes of stats
|
||||
stats := map[string]map[string]schema.MetricStatistics{}
|
||||
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job for stats")
|
||||
return nil, err
|
||||
@@ -376,7 +378,8 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
t0 := time.Now()
|
||||
// Map of hosts of metrics of value slices
|
||||
data := make(map[string]map[string][]*schema.JobMetric)
|
||||
@@ -411,7 +414,6 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
@@ -445,3 +447,21 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (pdb *PrometheusDataRepository) LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
var totalNodes int = 0
|
||||
var hasNextPage bool = false
|
||||
// TODO : Implement to be used in NodeList-View
|
||||
log.Infof("LoadNodeListData unimplemented for PrometheusDataRepository, Args: cluster %s, metrics %v, nodeFilter %v, scopes %v", cluster, metrics, nodeFilter, scopes)
|
||||
|
||||
return nil, totalNodes, hasNextPage, errors.New("METRICDATA/INFLUXV2 > unimplemented for PrometheusDataRepository")
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -9,10 +9,11 @@ import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
)
|
||||
|
||||
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
@@ -27,9 +28,10 @@ func (tmdr *TestMetricDataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context) (schema.JobData, error) {
|
||||
ctx context.Context,
|
||||
resolution int) (schema.JobData, error) {
|
||||
|
||||
return TestLoadDataCallback(job, metrics, scopes, ctx)
|
||||
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
|
||||
}
|
||||
|
||||
func (tmdr *TestMetricDataRepository) LoadStats(
|
||||
@@ -48,3 +50,62 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
|
||||
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
func (tmdr *TestMetricDataRepository) LoadNodeListData(
|
||||
cluster, subCluster, nodeFilter string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
page *model.PageRequest,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, int, bool, error) {
|
||||
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
func DeepCopy(jd_temp schema.JobData) schema.JobData {
|
||||
var jd schema.JobData
|
||||
|
||||
jd = make(schema.JobData, len(jd_temp))
|
||||
for k, v := range jd_temp {
|
||||
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
|
||||
for k_, v_ := range v {
|
||||
jd[k][k_] = new(schema.JobMetric)
|
||||
jd[k][k_].Series = make([]schema.Series, len(v_.Series))
|
||||
for i := 0; i < len(v_.Series); i += 1 {
|
||||
jd[k][k_].Series[i].Data = make([]schema.Float, len(v_.Series[i].Data))
|
||||
copy(jd[k][k_].Series[i].Data, v_.Series[i].Data)
|
||||
jd[k][k_].Series[i].Hostname = v_.Series[i].Hostname
|
||||
jd[k][k_].Series[i].Id = v_.Series[i].Id
|
||||
jd[k][k_].Series[i].Statistics.Avg = v_.Series[i].Statistics.Avg
|
||||
jd[k][k_].Series[i].Statistics.Min = v_.Series[i].Statistics.Min
|
||||
jd[k][k_].Series[i].Statistics.Max = v_.Series[i].Statistics.Max
|
||||
}
|
||||
jd[k][k_].Timestep = v_.Timestep
|
||||
jd[k][k_].Unit.Base = v_.Unit.Base
|
||||
jd[k][k_].Unit.Prefix = v_.Unit.Prefix
|
||||
if v_.StatisticsSeries != nil {
|
||||
// Init Slices
|
||||
jd[k][k_].StatisticsSeries = new(schema.StatsSeries)
|
||||
jd[k][k_].StatisticsSeries.Max = make([]schema.Float, len(v_.StatisticsSeries.Max))
|
||||
jd[k][k_].StatisticsSeries.Min = make([]schema.Float, len(v_.StatisticsSeries.Min))
|
||||
jd[k][k_].StatisticsSeries.Median = make([]schema.Float, len(v_.StatisticsSeries.Median))
|
||||
jd[k][k_].StatisticsSeries.Mean = make([]schema.Float, len(v_.StatisticsSeries.Mean))
|
||||
// Copy Data
|
||||
copy(jd[k][k_].StatisticsSeries.Max, v_.StatisticsSeries.Max)
|
||||
copy(jd[k][k_].StatisticsSeries.Min, v_.StatisticsSeries.Min)
|
||||
copy(jd[k][k_].StatisticsSeries.Median, v_.StatisticsSeries.Median)
|
||||
copy(jd[k][k_].StatisticsSeries.Mean, v_.StatisticsSeries.Mean)
|
||||
// Handle Percentiles
|
||||
for k__, v__ := range v_.StatisticsSeries.Percentiles {
|
||||
jd[k][k_].StatisticsSeries.Percentiles[k__] = make([]schema.Float, len(v__))
|
||||
copy(jd[k][k_].StatisticsSeries.Percentiles[k__], v__)
|
||||
}
|
||||
} else {
|
||||
jd[k][k_].StatisticsSeries = v_.StatisticsSeries
|
||||
}
|
||||
}
|
||||
}
|
||||
return jd
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
@@ -29,14 +29,10 @@ var (
|
||||
)
|
||||
|
||||
type JobRepository struct {
|
||||
DB *sqlx.DB
|
||||
driver string
|
||||
|
||||
DB *sqlx.DB
|
||||
stmtCache *sq.StmtCache
|
||||
cache *lrucache.Cache
|
||||
|
||||
archiveChannel chan *schema.Job
|
||||
archivePending sync.WaitGroup
|
||||
driver string
|
||||
}
|
||||
|
||||
func GetJobRepository() *JobRepository {
|
||||
@@ -47,47 +43,48 @@ func GetJobRepository() *JobRepository {
|
||||
DB: db.DB,
|
||||
driver: db.Driver,
|
||||
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
archiveChannel: make(chan *schema.Job, 128),
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
}
|
||||
// start archiving worker
|
||||
go jobRepoInstance.archivingWorker()
|
||||
})
|
||||
return jobRepoInstance
|
||||
}
|
||||
|
||||
var jobColumns []string = []string{
|
||||
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id",
|
||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id",
|
||||
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
||||
"job.duration", "job.walltime", "job.resources", // "job.meta_data",
|
||||
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
|
||||
}
|
||||
|
||||
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
||||
job := &schema.Job{}
|
||||
|
||||
if err := row.Scan(
|
||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||
&job.Duration, &job.Walltime, &job.RawResources /*&job.RawMetaData*/); err != nil {
|
||||
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
||||
log.Warnf("Error while scanning rows (Job): %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
||||
log.Warn("Error while unmarhsaling raw resources json")
|
||||
log.Warn("Error while unmarshaling raw resources json")
|
||||
return nil, err
|
||||
}
|
||||
job.RawResources = nil
|
||||
|
||||
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
||||
// return nil, err
|
||||
// }
|
||||
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||
log.Warnf("Error while unmarshaling raw footprint json: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
job.RawFootprint = nil
|
||||
|
||||
job.StartTime = time.Unix(job.StartTimeUnix, 0)
|
||||
if job.Duration == 0 && job.State == schema.JobStateRunning {
|
||||
// Always ensure accurate duration for running jobs
|
||||
if job.State == schema.JobStateRunning {
|
||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
||||
}
|
||||
|
||||
job.RawResources = nil
|
||||
return job, nil
|
||||
}
|
||||
|
||||
@@ -206,239 +203,84 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil {
|
||||
if _, err = sq.Update("job").
|
||||
Set("meta_data", job.RawMetaData).
|
||||
Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
|
||||
return nil
|
||||
return archive.UpdateMetadata(job, job.MetaData)
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) Find(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64) (*schema.Job, error) {
|
||||
|
||||
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
cachekey := fmt.Sprintf("footprint:%d", job.ID)
|
||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||
job.Footprint = cached.(map[string]float64)
|
||||
return job.Footprint, nil
|
||||
}
|
||||
|
||||
log.Debugf("Timer Find %s", time.Since(start))
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindAll(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64) ([]*schema.Job, error) {
|
||||
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
|
||||
log.Warn("Error while scanning for job footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 10)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
log.Debugf("Timer FindAll %s", time.Since(start))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// FindById executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindConcurrentJobs(
|
||||
ctx context.Context,
|
||||
job *schema.Job) (*model.JobLinkResultList, error) {
|
||||
if job == nil {
|
||||
if len(job.RawFootprint) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
query = query.Where("cluster = ?", job.Cluster)
|
||||
var startTime int64
|
||||
var stopTime int64
|
||||
|
||||
startTime = job.StartTimeUnix
|
||||
hostname := job.Resources[0].Hostname
|
||||
|
||||
if job.State == schema.JobStateRunning {
|
||||
stopTime = time.Now().Unix()
|
||||
} else {
|
||||
stopTime = startTime + int64(job.Duration)
|
||||
}
|
||||
|
||||
// Add 200s overlap for jobs start time at the end
|
||||
startTimeTail := startTime + 10
|
||||
stopTimeTail := stopTime - 200
|
||||
startTimeFront := startTime + 200
|
||||
|
||||
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTime)
|
||||
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
||||
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||
log.Warn("Error while unmarshaling raw footprint json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
items := make([]*model.JobLink, 0, 10)
|
||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||
r.cache.Put(cachekey, job.Footprint, len(job.Footprint), 24*time.Hour)
|
||||
log.Debugf("Timer FetchFootprint %s", time.Since(start))
|
||||
return job.Footprint, nil
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
|
||||
start := time.Now()
|
||||
cachekey := fmt.Sprintf("energyFootprint:%d", job.ID)
|
||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||
job.EnergyFootprint = cached.(map[string]float64)
|
||||
return job.EnergyFootprint, nil
|
||||
}
|
||||
|
||||
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
|
||||
log.Warn("Error while scanning for job energy_footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
if len(job.RawEnergyFootprint) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
cnt := len(items)
|
||||
|
||||
return &model.JobLinkResultList{
|
||||
ListQuery: &queryString,
|
||||
Items: items,
|
||||
Count: &cnt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Start inserts a new job in the table, returning the unique job ID.
|
||||
// Statistics are not transfered!
|
||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
||||
if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
|
||||
log.Warn("Error while unmarshaling raw energy footprint json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
||||
}
|
||||
|
||||
res, err := r.DB.NamedExec(`INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, `+"`partition`"+`, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data
|
||||
);`, job)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return res.LastInsertId()
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) Stop(
|
||||
jobId int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32) (err error) {
|
||||
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
|
||||
log.Debugf("Timer FetchEnergyFootprint %s", time.Since(start))
|
||||
return job.EnergyFootprint, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
var cnt int
|
||||
qs := fmt.Sprintf("SELECT count(*) FROM job WHERE job.start_time < %d", startTime)
|
||||
err := r.DB.Get(&cnt, qs) //ignore error as it will also occur in delete statement
|
||||
_, err = r.DB.Exec(`DELETE FROM job WHERE job.start_time < ?`, startTime)
|
||||
q := sq.Select("count(*)").From("job").Where("job.start_time < ?", startTime)
|
||||
q.RunWith(r.DB).QueryRow().Scan(cnt)
|
||||
qd := sq.Delete("job").Where("job.start_time < ?", startTime)
|
||||
_, err := qd.RunWith(r.DB).Exec()
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(" DeleteJobsBefore(%d): error %#v", startTime, err)
|
||||
s, _, _ := qd.ToSql()
|
||||
log.Errorf(" DeleteJobsBefore(%d) with %s: error %#v", startTime, s, err)
|
||||
} else {
|
||||
log.Debugf("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
|
||||
}
|
||||
@@ -446,127 +288,34 @@ func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
}
|
||||
|
||||
func (r *JobRepository) DeleteJobById(id int64) error {
|
||||
_, err := r.DB.Exec(`DELETE FROM job WHERE job.id = ?`, id)
|
||||
qd := sq.Delete("job").Where("job.id = ?", id)
|
||||
_, err := qd.RunWith(r.DB).Exec()
|
||||
|
||||
if err != nil {
|
||||
log.Errorf("DeleteJobById(%d): error %#v", id, err)
|
||||
s, _, _ := qd.ToSql()
|
||||
log.Errorf("DeleteJobById(%d) with %s : error %#v", id, s, err)
|
||||
} else {
|
||||
log.Debugf("DeleteJobById(%d): Success", id)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", job)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) MarkArchived(
|
||||
jobId int64,
|
||||
monitoringStatus int32,
|
||||
metricStats map[string]schema.JobStatistics) error {
|
||||
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
for metric, stats := range metricStats {
|
||||
switch metric {
|
||||
case "flops_any":
|
||||
stmt = stmt.Set("flops_any_avg", stats.Avg)
|
||||
case "mem_used":
|
||||
stmt = stmt.Set("mem_used_max", stats.Max)
|
||||
case "mem_bw":
|
||||
stmt = stmt.Set("mem_bw_avg", stats.Avg)
|
||||
case "load":
|
||||
case "cpu_load":
|
||||
stmt = stmt.Set("load_avg", stats.Avg)
|
||||
case "net_bw":
|
||||
stmt = stmt.Set("net_bw_avg", stats.Avg)
|
||||
case "file_bw":
|
||||
stmt = stmt.Set("file_bw_avg", stats.Avg)
|
||||
default:
|
||||
log.Debugf("MarkArchived() Metric '%v' unknown", metric)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Warn("Error while marking job as archived")
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Archiving worker thread
|
||||
func (r *JobRepository) archivingWorker() {
|
||||
for {
|
||||
select {
|
||||
case job, ok := <-r.archiveChannel:
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
start := time.Now()
|
||||
// not using meta data, called to load JobMeta into Cache?
|
||||
// will fail if job meta not in repository
|
||||
if _, err := r.FetchMetadata(job); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
|
||||
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
// metricdata.ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||
// TODO: Maybe use context with cancel/timeout here
|
||||
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
|
||||
if err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
|
||||
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
continue
|
||||
}
|
||||
|
||||
// Update the jobs database entry one last time:
|
||||
if err := r.MarkArchived(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
|
||||
continue
|
||||
}
|
||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||
r.archivePending.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
|
||||
r.archivePending.Add(1)
|
||||
r.archiveChannel <- job
|
||||
}
|
||||
|
||||
// Wait for background thread to finish pending archiving operations
|
||||
func (r *JobRepository) WaitForArchiving() {
|
||||
// close channel and wait for worker to process remaining jobs
|
||||
r.archivePending.Wait()
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
|
||||
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
|
||||
return searchterm, "", "", ""
|
||||
} else { // Has to have letters and logged-in user for other guesses
|
||||
if user != nil {
|
||||
// Find username in jobs (match)
|
||||
uresult, _ := r.FindColumnValue(user, searchterm, "job", "user", "user", false)
|
||||
// Find username by username in job table (match)
|
||||
uresult, _ := r.FindColumnValue(user, searchterm, "job", "hpc_user", "hpc_user", false)
|
||||
if uresult != "" {
|
||||
return "", uresult, "", ""
|
||||
}
|
||||
// Find username by name (like)
|
||||
nresult, _ := r.FindColumnValue(user, searchterm, "user", "username", "name", true)
|
||||
// Find username by real name in hpc_user table (like)
|
||||
nresult, _ := r.FindColumnValue(user, searchterm, "hpc_user", "username", "name", true)
|
||||
if nresult != "" {
|
||||
return "", nresult, "", ""
|
||||
}
|
||||
// Find projectId in jobs (match)
|
||||
// Find projectId by projectId in job table (match)
|
||||
presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false)
|
||||
if presult != "" {
|
||||
return "", "", presult, ""
|
||||
@@ -577,8 +326,10 @@ func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm
|
||||
}
|
||||
}
|
||||
|
||||
var ErrNotFound = errors.New("no such jobname, project or user")
|
||||
var ErrForbidden = errors.New("not authorized")
|
||||
var (
|
||||
ErrNotFound = errors.New("no such jobname, project or user")
|
||||
ErrForbidden = errors.New("not authorized")
|
||||
)
|
||||
|
||||
func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, table string, selectColumn string, whereColumn string, isLike bool) (result string, err error) {
|
||||
compareStr := " = ?"
|
||||
@@ -646,7 +397,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||
start := time.Now()
|
||||
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
|
||||
parts := []string{}
|
||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||
return nil, 0, 1000
|
||||
}
|
||||
|
||||
@@ -662,7 +413,6 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||
// AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host.
|
||||
// Hosts with zero jobs running on them will not show up!
|
||||
func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) {
|
||||
|
||||
start := time.Now()
|
||||
subclusters := make(map[string]map[string]int)
|
||||
rows, err := sq.Select("resources", "subcluster").From("job").
|
||||
@@ -704,8 +454,8 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
return subclusters, nil
|
||||
}
|
||||
|
||||
// FIXME: Set duration to requested walltime?
|
||||
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
|
||||
start := time.Now()
|
||||
res, err := sq.Update("job").
|
||||
Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
|
||||
@@ -733,8 +483,47 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
|
||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||
query := sq.Select(jobColumns...).From("job").
|
||||
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.duration > 600")
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
log.Infof("Return job count %d", len(jobs))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateDuration() error {
|
||||
stmnt := sq.Update("job").
|
||||
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
|
||||
Where("job_state = 'running'")
|
||||
|
||||
_, err := stmnt.RunWith(r.stmtCache).Exec()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
|
||||
var query sq.SelectBuilder
|
||||
|
||||
if startTimeBegin == startTimeEnd || startTimeBegin > startTimeEnd {
|
||||
@@ -772,27 +561,109 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
|
||||
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
|
||||
) VALUES (
|
||||
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
|
||||
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
|
||||
);`
|
||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", job)
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while NamedJobInsert")
|
||||
return 0, err
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return id, nil
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
}
|
||||
|
||||
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
|
||||
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) MarkArchived(
|
||||
stmt sq.UpdateBuilder,
|
||||
monitoringStatus int32,
|
||||
) sq.UpdateBuilder {
|
||||
return stmt.Set("monitoring_status", monitoringStatus)
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateEnergy(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return stmt, err
|
||||
}
|
||||
energyFootprint := make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||
energy = math.Round(((LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
energyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
}
|
||||
|
||||
var rawFootprint []byte
|
||||
if rawFootprint, err = json.Marshal(energyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return stmt, err
|
||||
}
|
||||
|
||||
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateFootprint(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return stmt, err
|
||||
}
|
||||
footprint := make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
var statType string
|
||||
for _, gm := range archive.GlobalMetricList {
|
||||
if gm.Name == fp {
|
||||
statType = gm.Footprint
|
||||
}
|
||||
}
|
||||
|
||||
if statType != "avg" && statType != "min" && statType != "max" {
|
||||
log.Warnf("unknown statType for footprint update: %s", statType)
|
||||
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
|
||||
}
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
footprint[name] = LoadJobStat(jobMeta, fp, statType)
|
||||
}
|
||||
|
||||
var rawFootprint []byte
|
||||
if rawFootprint, err = json.Marshal(footprint); err != nil {
|
||||
log.Warnf("Error while marshaling footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return stmt, err
|
||||
}
|
||||
|
||||
return stmt.Set("footprint", string(rawFootprint)), nil
|
||||
}
|
||||
|
||||
75
internal/repository/jobCreate.go
Normal file
75
internal/repository/jobCreate.go
Normal file
@@ -0,0 +1,75 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
);`
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while NamedJobInsert")
|
||||
return 0, err
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// Start inserts a new job in the table, returning the unique job ID.
|
||||
// Statistics are not transfered!
|
||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
|
||||
}
|
||||
|
||||
return r.InsertJob(job)
|
||||
}
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) Stop(
|
||||
jobId int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
}
|
||||
261
internal/repository/jobFind.go
Normal file
261
internal/repository/jobFind.go
Normal file
@@ -0,0 +1,261 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) Find(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) (*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
|
||||
|
||||
log.Debugf("Timer Find %s", time.Since(start))
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindAll(
|
||||
jobId *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) ([]*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobs := make([]*schema.Job, 0, 10)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
log.Debugf("Timer FindAll %s", time.Since(start))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// FindById executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
|
||||
q, qerr := SecurityCheck(ctx, q)
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByIdWithUser executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id. The user is passed directly,
|
||||
// instead as part of the context.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
|
||||
q, qerr := SecurityCheckWithUser(user, q)
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByIdDirect executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByJobId executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the slurm id and the clustername.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
q, qerr := SecurityCheck(ctx, q)
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// IsJobOwner executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the slurm id,a username and the cluster.
|
||||
// It returns a bool.
|
||||
// If job was found, user is owner: test err != sql.ErrNoRows
|
||||
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
|
||||
q := sq.Select("id").
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.hpc_user = ?", user).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
return err != sql.ErrNoRows
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindConcurrentJobs(
|
||||
ctx context.Context,
|
||||
job *schema.Job,
|
||||
) (*model.JobLinkResultList, error) {
|
||||
if job == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
query = query.Where("cluster = ?", job.Cluster)
|
||||
var startTime int64
|
||||
var stopTime int64
|
||||
|
||||
startTime = job.StartTimeUnix
|
||||
hostname := job.Resources[0].Hostname
|
||||
|
||||
if job.State == schema.JobStateRunning {
|
||||
stopTime = time.Now().Unix()
|
||||
} else {
|
||||
stopTime = startTime + int64(job.Duration)
|
||||
}
|
||||
|
||||
// Add 200s overlap for jobs start time at the end
|
||||
startTimeTail := startTime + 10
|
||||
stopTimeTail := stopTime - 200
|
||||
startTimeFront := startTime + 200
|
||||
|
||||
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTime)
|
||||
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
||||
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
items := make([]*model.JobLink, 0, 10)
|
||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
|
||||
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
cnt := len(items)
|
||||
|
||||
return &model.JobLinkResultList{
|
||||
ListQuery: &queryString,
|
||||
Items: items,
|
||||
Count: &cnt,
|
||||
}, nil
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -22,8 +22,8 @@ func (r *JobRepository) QueryJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter,
|
||||
page *model.PageRequest,
|
||||
order *model.OrderByInput) ([]*schema.Job, error) {
|
||||
|
||||
order *model.OrderByInput,
|
||||
) ([]*schema.Job, error) {
|
||||
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
@@ -31,14 +31,28 @@ func (r *JobRepository) QueryJobs(
|
||||
|
||||
if order != nil {
|
||||
field := toSnakeCase(order.Field)
|
||||
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
|
||||
if order.Type == "col" {
|
||||
// "col": Fixed column name query
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
|
||||
}
|
||||
} else {
|
||||
// "foot": Order by footprint JSON field values
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(meta_data)")
|
||||
switch order.Order {
|
||||
case model.SortDirectionEnumAsc:
|
||||
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
|
||||
case model.SortDirectionEnumDesc:
|
||||
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
|
||||
default:
|
||||
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,9 +87,10 @@ func (r *JobRepository) QueryJobs(
|
||||
|
||||
func (r *JobRepository) CountJobs(
|
||||
ctx context.Context,
|
||||
filters []*model.JobFilter) (int, error) {
|
||||
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
|
||||
filters []*model.JobFilter,
|
||||
) (int, error) {
|
||||
// DISTICT count for tags filters, does not affect other queries
|
||||
query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
|
||||
if qerr != nil {
|
||||
return 0, qerr
|
||||
}
|
||||
@@ -92,35 +107,43 @@ func (r *JobRepository) CountJobs(
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||
user := GetUserFromContext(ctx)
|
||||
func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||
if user == nil {
|
||||
var qnil sq.SelectBuilder
|
||||
return qnil, fmt.Errorf("user context is nil!")
|
||||
} else if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleApi}) { // Admin & Co. : All jobs
|
||||
return qnil, fmt.Errorf("user context is nil")
|
||||
}
|
||||
|
||||
switch {
|
||||
case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
|
||||
return query, nil
|
||||
} else if user.HasRole(schema.RoleManager) { // Manager : Add filter for managed projects' jobs only + personal jobs
|
||||
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
|
||||
return query, nil
|
||||
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
|
||||
if len(user.Projects) != 0 {
|
||||
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.user": user.Username}}), nil
|
||||
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
|
||||
} else {
|
||||
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
|
||||
return query.Where("job.user = ?", user.Username), nil
|
||||
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||
}
|
||||
} else if user.HasRole(schema.RoleUser) { // User : Only personal jobs
|
||||
return query.Where("job.user = ?", user.Username), nil
|
||||
} else {
|
||||
// Shortterm compatibility: Return User-Query if no roles:
|
||||
return query.Where("job.user = ?", user.Username), nil
|
||||
// // On the longterm: Return Error instead of fallback:
|
||||
// var qnil sq.SelectBuilder
|
||||
// return qnil, fmt.Errorf("user '%s' with unknown roles [%#v]", user.Username, user.Roles)
|
||||
case user.HasRole(schema.RoleUser): // User : Only personal jobs
|
||||
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||
default: // No known Role, return error
|
||||
var qnil sq.SelectBuilder
|
||||
return qnil, fmt.Errorf("user has no or unknown roles")
|
||||
}
|
||||
}
|
||||
|
||||
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
|
||||
user := GetUserFromContext(ctx)
|
||||
|
||||
return SecurityCheckWithUser(user, query)
|
||||
}
|
||||
|
||||
// Build a sq.SelectBuilder out of a schema.JobFilter.
|
||||
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if filter.Tags != nil {
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags})
|
||||
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
|
||||
}
|
||||
if filter.JobID != nil {
|
||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||
@@ -129,26 +152,25 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
|
||||
}
|
||||
if filter.User != nil {
|
||||
query = buildStringCondition("job.user", filter.User, query)
|
||||
query = buildStringCondition("job.hpc_user", filter.User, query)
|
||||
}
|
||||
if filter.Project != nil {
|
||||
query = buildStringCondition("job.project", filter.Project, query)
|
||||
}
|
||||
if filter.JobName != nil {
|
||||
query = buildStringCondition("job.meta_data", filter.JobName, query)
|
||||
query = buildMetaJsonCondition("jobName", filter.JobName, query)
|
||||
}
|
||||
if filter.Cluster != nil {
|
||||
query = buildStringCondition("job.cluster", filter.Cluster, query)
|
||||
}
|
||||
if filter.Partition != nil {
|
||||
query = buildStringCondition("job.partition", filter.Partition, query)
|
||||
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
|
||||
}
|
||||
if filter.StartTime != nil {
|
||||
query = buildTimeCondition("job.start_time", filter.StartTime, query)
|
||||
}
|
||||
if filter.Duration != nil {
|
||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||
query = query.Where("(CASE WHEN job.job_state = 'running' THEN (? - job.start_time) ELSE job.duration END) BETWEEN ? AND ?", now, filter.Duration.From, filter.Duration.To)
|
||||
query = buildIntCondition("job.duration", filter.Duration, query)
|
||||
}
|
||||
if filter.MinRunningFor != nil {
|
||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||
@@ -174,17 +196,13 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
if filter.Node != nil {
|
||||
query = buildStringCondition("job.resources", filter.Node, query)
|
||||
}
|
||||
if filter.FlopsAnyAvg != nil {
|
||||
query = buildFloatCondition("job.flops_any_avg", filter.FlopsAnyAvg, query)
|
||||
if filter.Energy != nil {
|
||||
query = buildFloatCondition("job.energy", filter.Energy, query)
|
||||
}
|
||||
if filter.MemBwAvg != nil {
|
||||
query = buildFloatCondition("job.mem_bw_avg", filter.MemBwAvg, query)
|
||||
}
|
||||
if filter.LoadAvg != nil {
|
||||
query = buildFloatCondition("job.load_avg", filter.LoadAvg, query)
|
||||
}
|
||||
if filter.MemUsedMax != nil {
|
||||
query = buildFloatCondition("job.mem_used_max", filter.MemUsedMax, query)
|
||||
if filter.MetricStats != nil {
|
||||
for _, ms := range filter.MetricStats {
|
||||
query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
|
||||
}
|
||||
}
|
||||
return query
|
||||
}
|
||||
@@ -193,6 +211,10 @@ func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuild
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.From != nil && cond.To != nil {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
|
||||
@@ -200,13 +222,32 @@ func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBui
|
||||
return query.Where("? <= "+field, cond.From.Unix())
|
||||
} else if cond.To != nil {
|
||||
return query.Where(field+" <= ?", cond.To.Unix())
|
||||
} else if cond.Range != "" {
|
||||
now := time.Now().Unix()
|
||||
var then int64
|
||||
switch cond.Range {
|
||||
case "last6h":
|
||||
then = now - (60 * 60 * 6)
|
||||
case "last24h":
|
||||
then = now - (60 * 60 * 24)
|
||||
case "last7d":
|
||||
then = now - (60 * 60 * 24 * 7)
|
||||
case "last30d":
|
||||
then = now - (60 * 60 * 24 * 30)
|
||||
default:
|
||||
log.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
|
||||
return query
|
||||
}
|
||||
return query.Where(field+" BETWEEN ? AND ?", then, now)
|
||||
} else {
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(footprint)")
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
|
||||
}
|
||||
|
||||
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
@@ -227,16 +268,38 @@ func buildStringCondition(field string, cond *model.StringInput, query sq.Select
|
||||
}
|
||||
if cond.In != nil {
|
||||
queryElements := make([]string, len(cond.In))
|
||||
for i, val := range cond.In {
|
||||
queryElements[i] = val
|
||||
}
|
||||
copy(queryElements, cond.In)
|
||||
return query.Where(sq.Or{sq.Eq{field: queryElements}})
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(meta_data)")
|
||||
// add "AND" Sql query Block for field match
|
||||
if cond.Eq != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") = ?", *cond.Eq)
|
||||
}
|
||||
if cond.Neq != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") != ?", *cond.Neq)
|
||||
}
|
||||
if cond.StartsWith != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
|
||||
}
|
||||
if cond.EndsWith != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint("%", *cond.EndsWith))
|
||||
}
|
||||
if cond.Contains != nil {
|
||||
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
var (
|
||||
matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
)
|
||||
|
||||
func toSnakeCase(str string) string {
|
||||
for _, c := range str {
|
||||
@@ -1,13 +1,15 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
@@ -30,7 +32,7 @@ func TestFind(t *testing.T) {
|
||||
func TestFindById(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
job, err := r.FindById(5)
|
||||
job, err := r.FindById(getContext(t), 5)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -45,7 +47,19 @@ func TestFindById(t *testing.T) {
|
||||
func TestGetTags(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
tags, counts, err := r.CountTags(nil)
|
||||
const contextUserKey ContextKey = "user"
|
||||
contextUserValue := &schema.User{
|
||||
Username: "testuser",
|
||||
Projects: make([]string, 0),
|
||||
Roles: []string{"user"},
|
||||
AuthType: 0,
|
||||
AuthSource: 2,
|
||||
}
|
||||
|
||||
ctx := context.WithValue(getContext(t), contextUserKey, contextUserValue)
|
||||
|
||||
// Test Tag has Scope "global"
|
||||
tags, counts, err := r.CountTags(GetUserFromContext(ctx))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -16,7 +16,7 @@ import (
|
||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||
)
|
||||
|
||||
const Version uint = 6
|
||||
const Version uint = 8
|
||||
|
||||
//go:embed migrations/*
|
||||
var migrationFiles embed.FS
|
||||
@@ -57,7 +57,7 @@ func checkDBVersion(backend string, db *sql.DB) error {
|
||||
log.Fatalf("unsupported database backend: %s", backend)
|
||||
}
|
||||
|
||||
v, _, err := m.Version()
|
||||
v, dirty, err := m.Version()
|
||||
if err != nil {
|
||||
if err == migrate.ErrNilVersion {
|
||||
log.Warn("Legacy database without version or missing database file!")
|
||||
@@ -68,18 +68,18 @@ func checkDBVersion(backend string, db *sql.DB) error {
|
||||
|
||||
if v < Version {
|
||||
return fmt.Errorf("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
||||
} else if v > Version {
|
||||
return fmt.Errorf("unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool", v, Version)
|
||||
}
|
||||
|
||||
if v > Version {
|
||||
return fmt.Errorf("unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool", v, Version)
|
||||
if dirty {
|
||||
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func MigrateDB(backend string, db string) error {
|
||||
var m *migrate.Migrate
|
||||
|
||||
func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err error) {
|
||||
switch backend {
|
||||
case "sqlite3":
|
||||
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
|
||||
@@ -89,22 +89,39 @@ func MigrateDB(backend string, db string) error {
|
||||
|
||||
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
|
||||
if err != nil {
|
||||
return err
|
||||
return m, err
|
||||
}
|
||||
case "mysql":
|
||||
d, err := iofs.New(migrationFiles, "migrations/mysql")
|
||||
if err != nil {
|
||||
return err
|
||||
return m, err
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
|
||||
if err != nil {
|
||||
return err
|
||||
return m, err
|
||||
}
|
||||
default:
|
||||
log.Fatalf("unsupported database backend: %s", backend)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func MigrateDB(backend string, db string) error {
|
||||
m, err := getMigrateInstance(backend, db)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
v, dirty, err := m.Version()
|
||||
|
||||
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
||||
|
||||
if dirty {
|
||||
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
|
||||
}
|
||||
|
||||
if err := m.Up(); err != nil {
|
||||
if err == migrate.ErrNoChange {
|
||||
log.Info("DB already up to date!")
|
||||
@@ -116,3 +133,35 @@ func MigrateDB(backend string, db string) error {
|
||||
m.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
func RevertDB(backend string, db string) error {
|
||||
m, err := getMigrateInstance(backend, db)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := m.Migrate(Version - 1); err != nil {
|
||||
if err == migrate.ErrNoChange {
|
||||
log.Info("DB already up to date!")
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
m.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
func ForceDB(backend string, db string) error {
|
||||
m, err := getMigrateInstance(backend, db)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := m.Force(int(Version)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
ALTER TABLE tag MODIFY id INTEGER;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
@@ -0,0 +1,3 @@
|
||||
SET FOREIGN_KEY_CHECKS = 0;
|
||||
ALTER TABLE tag MODIFY id INTEGER AUTO_INCREMENT;
|
||||
SET FOREIGN_KEY_CHECKS = 1;
|
||||
@@ -0,0 +1,83 @@
|
||||
ALTER TABLE job DROP energy;
|
||||
ALTER TABLE job DROP energy_footprint;
|
||||
ALTER TABLE job ADD COLUMN flops_any_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_used_max;
|
||||
ALTER TABLE job ADD COLUMN load_avg;
|
||||
ALTER TABLE job ADD COLUMN net_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN net_data_vol_total;
|
||||
ALTER TABLE job ADD COLUMN file_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN file_data_vol_total;
|
||||
|
||||
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
|
||||
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
|
||||
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
|
||||
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
|
||||
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
|
||||
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
|
||||
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
|
||||
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
|
||||
|
||||
ALTER TABLE job DROP footprint;
|
||||
-- Do not use reserved keywords anymore
|
||||
RENAME TABLE hpc_user TO `user`;
|
||||
ALTER TABLE job RENAME COLUMN hpc_user TO `user`;
|
||||
ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_subcluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_user;
|
||||
DROP INDEX IF EXISTS jobs_user_starttime;
|
||||
DROP INDEX IF EXISTS jobs_user_duration;
|
||||
DROP INDEX IF EXISTS jobs_user_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_project;
|
||||
DROP INDEX IF EXISTS jobs_project_user;
|
||||
DROP INDEX IF EXISTS jobs_project_starttime;
|
||||
DROP INDEX IF EXISTS jobs_project_duration;
|
||||
DROP INDEX IF EXISTS jobs_project_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_starttime;
|
||||
DROP INDEX IF EXISTS jobs_duration;
|
||||
DROP INDEX IF EXISTS jobs_numnodes;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numnodes_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numacc_starttime;
|
||||
DROP INDEX IF EXISTS jobs_energy_starttime;
|
||||
123
internal/repository/migrations/mysql/08_add-footprint.up.sql
Normal file
123
internal/repository/migrations/mysql/08_add-footprint.up.sql
Normal file
@@ -0,0 +1,123 @@
|
||||
DROP INDEX IF EXISTS job_stats ON job;
|
||||
DROP INDEX IF EXISTS job_by_user ON job;
|
||||
DROP INDEX IF EXISTS job_by_starttime ON job;
|
||||
DROP INDEX IF EXISTS job_by_job_id ON job;
|
||||
DROP INDEX IF EXISTS job_list ON job;
|
||||
DROP INDEX IF EXISTS job_list_user ON job;
|
||||
DROP INDEX IF EXISTS job_list_users ON job;
|
||||
DROP INDEX IF EXISTS job_list_users_start ON job;
|
||||
|
||||
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
|
||||
ALTER TABLE job ADD COLUMN energy_footprint JSON;
|
||||
|
||||
ALTER TABLE job ADD COLUMN footprint JSON;
|
||||
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
|
||||
|
||||
-- Do not use reserved keywords anymore
|
||||
RENAME TABLE `user` TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN `user` TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition;
|
||||
|
||||
ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN project VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50);
|
||||
ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25);
|
||||
|
||||
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
|
||||
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
|
||||
|
||||
ALTER TABLE job DROP flops_any_avg;
|
||||
ALTER TABLE job DROP mem_bw_avg;
|
||||
ALTER TABLE job DROP mem_used_max;
|
||||
ALTER TABLE job DROP load_avg;
|
||||
ALTER TABLE job DROP net_bw_avg;
|
||||
ALTER TABLE job DROP net_data_vol_total;
|
||||
ALTER TABLE job DROP file_bw_avg;
|
||||
ALTER TABLE job DROP file_data_vol_total;
|
||||
|
||||
-- Indices for: Single filters, combined filters, sorting, sorting with filters
|
||||
-- Cluster Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
|
||||
-- Cluster+Partition Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||
|
||||
-- Cluster+Partition+Jobstate Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
|
||||
-- Cluster+Partition+Jobstate Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||
-- Cluster+JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||
|
||||
-- User Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
|
||||
-- User Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||
|
||||
-- Project Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||
-- Project Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||
|
||||
-- JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||
-- JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||
|
||||
-- ArrayJob Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||
|
||||
-- Sorting without active filters
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||
|
||||
-- Optimize DB index usage
|
||||
103
internal/repository/migrations/sqlite3/08_add-footprint.down.sql
Normal file
103
internal/repository/migrations/sqlite3/08_add-footprint.down.sql
Normal file
@@ -0,0 +1,103 @@
|
||||
ALTER TABLE job DROP energy;
|
||||
ALTER TABLE job DROP energy_footprint;
|
||||
ALTER TABLE job ADD COLUMN flops_any_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN mem_used_max;
|
||||
ALTER TABLE job ADD COLUMN load_avg;
|
||||
ALTER TABLE job ADD COLUMN net_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN net_data_vol_total;
|
||||
ALTER TABLE job ADD COLUMN file_bw_avg;
|
||||
ALTER TABLE job ADD COLUMN file_data_vol_total;
|
||||
|
||||
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
|
||||
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
|
||||
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
|
||||
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
|
||||
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
|
||||
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
|
||||
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
|
||||
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
|
||||
|
||||
ALTER TABLE job DROP footprint;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_subcluster;
|
||||
DROP INDEX IF EXISTS jobs_cluster_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_numacc;
|
||||
DROP INDEX IF EXISTS jobs_cluster_jobstate_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_user;
|
||||
DROP INDEX IF EXISTS jobs_user_starttime;
|
||||
DROP INDEX IF EXISTS jobs_user_duration;
|
||||
DROP INDEX IF EXISTS jobs_user_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_user_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_user_numacc;
|
||||
DROP INDEX IF EXISTS jobs_user_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_project;
|
||||
DROP INDEX IF EXISTS jobs_project_user;
|
||||
DROP INDEX IF EXISTS jobs_project_starttime;
|
||||
DROP INDEX IF EXISTS jobs_project_duration;
|
||||
DROP INDEX IF EXISTS jobs_project_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_project_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_project_numacc;
|
||||
DROP INDEX IF EXISTS jobs_project_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_jobstate;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_user;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_project;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_starttime;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_duration;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_jobstate_numacc;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
|
||||
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_starttime;
|
||||
DROP INDEX IF EXISTS jobs_duration;
|
||||
DROP INDEX IF EXISTS jobs_numnodes;
|
||||
DROP INDEX IF EXISTS jobs_numhwthreads;
|
||||
DROP INDEX IF EXISTS jobs_numacc;
|
||||
DROP INDEX IF EXISTS jobs_energy;
|
||||
|
||||
DROP INDEX IF EXISTS jobs_duration_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numnodes_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numhwthreads_starttime;
|
||||
DROP INDEX IF EXISTS jobs_numacc_starttime;
|
||||
DROP INDEX IF EXISTS jobs_energy_starttime;
|
||||
142
internal/repository/migrations/sqlite3/08_add-footprint.up.sql
Normal file
142
internal/repository/migrations/sqlite3/08_add-footprint.up.sql
Normal file
@@ -0,0 +1,142 @@
|
||||
DROP INDEX IF EXISTS job_stats;
|
||||
DROP INDEX IF EXISTS job_by_user;
|
||||
DROP INDEX IF EXISTS job_by_starttime;
|
||||
DROP INDEX IF EXISTS job_by_job_id;
|
||||
DROP INDEX IF EXISTS job_list;
|
||||
DROP INDEX IF EXISTS job_list_user;
|
||||
DROP INDEX IF EXISTS job_list_users;
|
||||
DROP INDEX IF EXISTS job_list_users_start;
|
||||
|
||||
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
|
||||
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
|
||||
|
||||
ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL;
|
||||
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
|
||||
|
||||
-- Do not use reserved keywords anymore
|
||||
ALTER TABLE "user" RENAME TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN "user" TO hpc_user;
|
||||
ALTER TABLE job RENAME COLUMN "partition" TO cluster_partition;
|
||||
|
||||
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
|
||||
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
|
||||
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
|
||||
|
||||
ALTER TABLE job DROP flops_any_avg;
|
||||
ALTER TABLE job DROP mem_bw_avg;
|
||||
ALTER TABLE job DROP mem_used_max;
|
||||
ALTER TABLE job DROP load_avg;
|
||||
ALTER TABLE job DROP net_bw_avg;
|
||||
ALTER TABLE job DROP net_data_vol_total;
|
||||
ALTER TABLE job DROP file_bw_avg;
|
||||
ALTER TABLE job DROP file_data_vol_total;
|
||||
|
||||
-- Indices for: Single filters, combined filters, sorting, sorting with filters
|
||||
-- Cluster Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
|
||||
-- Cluster+Partition Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
|
||||
|
||||
-- Cluster+Partition+Jobstate Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
|
||||
-- Cluster+Partition+Jobstate Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||
-- Cluster+JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
|
||||
|
||||
-- User Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
|
||||
-- User Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
|
||||
|
||||
-- Project Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||
-- Project Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
|
||||
|
||||
-- JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||
-- JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
|
||||
|
||||
-- ArrayJob Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||
|
||||
-- Sorting without active filters
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||
|
||||
-- Optimize DB index usage
|
||||
PRAGMA optimize;
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -55,7 +55,7 @@ func BenchmarkDB_FindJobById(b *testing.B) {
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_, err := db.FindById(jobId)
|
||||
_, err := db.FindById(getContext(b), jobId)
|
||||
noErr(b, err)
|
||||
}
|
||||
})
|
||||
@@ -111,7 +111,7 @@ func BenchmarkDB_QueryJobs(b *testing.B) {
|
||||
user := "mppi133h"
|
||||
filter.User = &model.StringInput{Eq: &user}
|
||||
page := &model.PageRequest{ItemsPerPage: 50, Page: 1}
|
||||
order := &model.OrderByInput{Field: "startTime", Order: model.SortDirectionEnumDesc}
|
||||
order := &model.OrderByInput{Field: "startTime", Type: "col", Order: model.SortDirectionEnumDesc}
|
||||
|
||||
b.Run("QueryJobs", func(b *testing.B) {
|
||||
db := setup(b)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -12,13 +12,16 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
// GraphQL validation should make sure that no unkown values can be specified.
|
||||
var groupBy2column = map[model.Aggregate]string{
|
||||
model.AggregateUser: "job.user",
|
||||
model.AggregateUser: "job.hpc_user",
|
||||
model.AggregateProject: "job.project",
|
||||
model.AggregateCluster: "job.cluster",
|
||||
}
|
||||
@@ -37,8 +40,8 @@ var sortBy2column = map[model.SortByAggregate]string{
|
||||
func (r *JobRepository) buildCountQuery(
|
||||
filter []*model.JobFilter,
|
||||
kind string,
|
||||
col string) sq.SelectBuilder {
|
||||
|
||||
col string,
|
||||
) sq.SelectBuilder {
|
||||
var query sq.SelectBuilder
|
||||
|
||||
if col != "" {
|
||||
@@ -65,16 +68,16 @@ func (r *JobRepository) buildCountQuery(
|
||||
|
||||
func (r *JobRepository) buildStatsQuery(
|
||||
filter []*model.JobFilter,
|
||||
col string) sq.SelectBuilder {
|
||||
|
||||
col string,
|
||||
) sq.SelectBuilder {
|
||||
var query sq.SelectBuilder
|
||||
castType := r.getCastType()
|
||||
|
||||
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
||||
|
||||
if col != "" {
|
||||
// Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select(col, "COUNT(job.id) as totalJobs",
|
||||
// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
|
||||
@@ -82,10 +85,9 @@ func (r *JobRepository) buildStatsQuery(
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
|
||||
).From("job").GroupBy(col)
|
||||
|
||||
).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
|
||||
} else {
|
||||
// Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
|
||||
query = sq.Select("COUNT(job.id)",
|
||||
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
|
||||
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
|
||||
@@ -104,15 +106,15 @@ func (r *JobRepository) buildStatsQuery(
|
||||
return query
|
||||
}
|
||||
|
||||
func (r *JobRepository) getUserName(ctx context.Context, id string) string {
|
||||
user := GetUserFromContext(ctx)
|
||||
name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
|
||||
if name != "" {
|
||||
return name
|
||||
} else {
|
||||
return "-"
|
||||
}
|
||||
}
|
||||
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
|
||||
// user := GetUserFromContext(ctx)
|
||||
// name, _ := r.FindColumnValue(user, id, "hpc_user", "name", "username", false)
|
||||
// if name != "" {
|
||||
// return name
|
||||
// } else {
|
||||
// return "-"
|
||||
// }
|
||||
// }
|
||||
|
||||
func (r *JobRepository) getCastType() string {
|
||||
var castType string
|
||||
@@ -134,8 +136,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
filter []*model.JobFilter,
|
||||
page *model.PageRequest,
|
||||
sortBy *model.SortByAggregate,
|
||||
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
|
||||
groupBy *model.Aggregate,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
col := groupBy2column[*groupBy]
|
||||
query := r.buildStatsQuery(filter, col)
|
||||
@@ -164,14 +166,20 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
|
||||
for rows.Next() {
|
||||
var id sql.NullString
|
||||
var name sql.NullString
|
||||
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
|
||||
if err := rows.Scan(&id, &jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
||||
if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
|
||||
var personName string
|
||||
|
||||
if name.Valid {
|
||||
personName = name.String
|
||||
}
|
||||
|
||||
if jobs.Valid {
|
||||
totalJobs = int(jobs.Int64)
|
||||
@@ -201,12 +209,12 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
totalAccHours = int(accHours.Int64)
|
||||
}
|
||||
|
||||
if col == "job.user" {
|
||||
name := r.getUserName(ctx, id.String)
|
||||
if col == "job.hpc_user" {
|
||||
// name := r.getUserName(ctx, id.String)
|
||||
stats = append(stats,
|
||||
&model.JobsStatistics{
|
||||
ID: id.String,
|
||||
Name: name,
|
||||
Name: personName,
|
||||
TotalJobs: totalJobs,
|
||||
TotalWalltime: totalWalltime,
|
||||
TotalNodes: totalNodes,
|
||||
@@ -214,7 +222,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
TotalCores: totalCores,
|
||||
TotalCoreHours: totalCoreHours,
|
||||
TotalAccs: totalAccs,
|
||||
TotalAccHours: totalAccHours})
|
||||
TotalAccHours: totalAccHours,
|
||||
})
|
||||
} else {
|
||||
stats = append(stats,
|
||||
&model.JobsStatistics{
|
||||
@@ -226,7 +235,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
TotalCores: totalCores,
|
||||
TotalCoreHours: totalCoreHours,
|
||||
TotalAccs: totalAccs,
|
||||
TotalAccHours: totalAccHours})
|
||||
TotalAccHours: totalAccHours,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -237,8 +247,8 @@ func (r *JobRepository) JobsStatsGrouped(
|
||||
|
||||
func (r *JobRepository) JobsStats(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter) ([]*model.JobsStatistics, error) {
|
||||
|
||||
filter []*model.JobFilter,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
query := r.buildStatsQuery(filter, "")
|
||||
query, err := SecurityCheck(ctx, query)
|
||||
@@ -273,18 +283,36 @@ func (r *JobRepository) JobsStats(
|
||||
TotalWalltime: int(walltime.Int64),
|
||||
TotalNodeHours: totalNodeHours,
|
||||
TotalCoreHours: totalCoreHours,
|
||||
TotalAccHours: totalAccHours})
|
||||
TotalAccHours: totalAccHours,
|
||||
})
|
||||
}
|
||||
|
||||
log.Debugf("Timer JobStats %s", time.Since(start))
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func LoadJobStat(job *schema.JobMeta, metric string, statType string) float64 {
|
||||
if stats, ok := job.Statistics[metric]; ok {
|
||||
switch statType {
|
||||
case "avg":
|
||||
return stats.Avg
|
||||
case "max":
|
||||
return stats.Max
|
||||
case "min":
|
||||
return stats.Min
|
||||
default:
|
||||
log.Errorf("Unknown stat type %s", statType)
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0
|
||||
}
|
||||
|
||||
func (r *JobRepository) JobCountGrouped(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
|
||||
groupBy *model.Aggregate,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
col := groupBy2column[*groupBy]
|
||||
query := r.buildCountQuery(filter, "", col)
|
||||
@@ -311,7 +339,8 @@ func (r *JobRepository) JobCountGrouped(
|
||||
stats = append(stats,
|
||||
&model.JobsStatistics{
|
||||
ID: id.String,
|
||||
TotalJobs: int(cnt.Int64)})
|
||||
TotalJobs: int(cnt.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -324,8 +353,8 @@ func (r *JobRepository) AddJobCountGrouped(
|
||||
filter []*model.JobFilter,
|
||||
groupBy *model.Aggregate,
|
||||
stats []*model.JobsStatistics,
|
||||
kind string) ([]*model.JobsStatistics, error) {
|
||||
|
||||
kind string,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
col := groupBy2column[*groupBy]
|
||||
query := r.buildCountQuery(filter, kind, col)
|
||||
@@ -372,8 +401,8 @@ func (r *JobRepository) AddJobCount(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
stats []*model.JobsStatistics,
|
||||
kind string) ([]*model.JobsStatistics, error) {
|
||||
|
||||
kind string,
|
||||
) ([]*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
query := r.buildCountQuery(filter, kind, "")
|
||||
query, err := SecurityCheck(ctx, query)
|
||||
@@ -416,15 +445,41 @@ func (r *JobRepository) AddJobCount(
|
||||
func (r *JobRepository) AddHistograms(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
stat *model.JobsStatistics) (*model.JobsStatistics, error) {
|
||||
stat *model.JobsStatistics,
|
||||
durationBins *string,
|
||||
) (*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
|
||||
var targetBinCount int
|
||||
var targetBinSize int
|
||||
switch {
|
||||
case *durationBins == "1m": // 1 Minute Bins + Max 60 Bins -> Max 60 Minutes
|
||||
targetBinCount = 60
|
||||
targetBinSize = 60
|
||||
case *durationBins == "10m": // 10 Minute Bins + Max 72 Bins -> Max 12 Hours
|
||||
targetBinCount = 72
|
||||
targetBinSize = 600
|
||||
case *durationBins == "1h": // 1 Hour Bins + Max 48 Bins -> Max 48 Hours
|
||||
targetBinCount = 48
|
||||
targetBinSize = 3600
|
||||
case *durationBins == "6h": // 6 Hour Bins + Max 12 Bins -> Max 3 Days
|
||||
targetBinCount = 12
|
||||
targetBinSize = 21600
|
||||
case *durationBins == "12h": // 12 hour Bins + Max 14 Bins -> Max 7 Days
|
||||
targetBinCount = 14
|
||||
targetBinSize = 43200
|
||||
default: // 24h
|
||||
targetBinCount = 24
|
||||
targetBinSize = 3600
|
||||
}
|
||||
|
||||
castType := r.getCastType()
|
||||
var err error
|
||||
value := fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
|
||||
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter)
|
||||
// Return X-Values always as seconds, will be formatted into minutes and hours in frontend
|
||||
value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), targetBinSize, castType)
|
||||
stat.HistDuration, err = r.jobsDurationStatisticsHistogram(ctx, value, filter, targetBinSize, &targetBinCount)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job statistics histogram: running jobs")
|
||||
log.Warn("Error while loading job statistics histogram: job duration")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -450,12 +505,47 @@ func (r *JobRepository) AddHistograms(
|
||||
return stat, nil
|
||||
}
|
||||
|
||||
// Requires thresholds for metric from config for cluster? Of all clusters and use largest? split to 10 + 1 for artifacts?
|
||||
func (r *JobRepository) AddMetricHistograms(
|
||||
ctx context.Context,
|
||||
filter []*model.JobFilter,
|
||||
metrics []string,
|
||||
stat *model.JobsStatistics,
|
||||
targetBinCount *int,
|
||||
) (*model.JobsStatistics, error) {
|
||||
start := time.Now()
|
||||
|
||||
// Running Jobs Only: First query jobdata from sqlite, then query data and make bins
|
||||
for _, f := range filter {
|
||||
if f.State != nil {
|
||||
if len(f.State) == 1 && f.State[0] == "running" {
|
||||
stat.HistMetrics = r.runningJobsMetricStatisticsHistogram(ctx, metrics, filter, targetBinCount)
|
||||
log.Debugf("Timer AddMetricHistograms %s", time.Since(start))
|
||||
return stat, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All other cases: Query and make bins in sqlite directly
|
||||
for _, m := range metrics {
|
||||
metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter, targetBinCount)
|
||||
if err != nil {
|
||||
log.Warnf("Error while loading job metric statistics histogram: %s", m)
|
||||
continue
|
||||
}
|
||||
stat.HistMetrics = append(stat.HistMetrics, metricHisto)
|
||||
}
|
||||
|
||||
log.Debugf("Timer AddMetricHistograms %s", time.Since(start))
|
||||
return stat, nil
|
||||
}
|
||||
|
||||
// `value` must be the column grouped by, but renamed to "value"
|
||||
func (r *JobRepository) jobsStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
value string,
|
||||
filters []*model.JobFilter) ([]*model.HistoPoint, error) {
|
||||
|
||||
filters []*model.JobFilter,
|
||||
) ([]*model.HistoPoint, error) {
|
||||
start := time.Now()
|
||||
query, qerr := SecurityCheck(ctx,
|
||||
sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
||||
@@ -475,6 +565,7 @@ func (r *JobRepository) jobsStatisticsHistogram(
|
||||
}
|
||||
|
||||
points := make([]*model.HistoPoint, 0)
|
||||
// is it possible to introduce zero values here? requires info about bincount
|
||||
for rows.Next() {
|
||||
point := model.HistoPoint{}
|
||||
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
||||
@@ -487,3 +578,296 @@ func (r *JobRepository) jobsStatisticsHistogram(
|
||||
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
|
||||
return points, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) jobsDurationStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
value string,
|
||||
filters []*model.JobFilter,
|
||||
binSizeSeconds int,
|
||||
targetBinCount *int,
|
||||
) ([]*model.HistoPoint, error) {
|
||||
start := time.Now()
|
||||
query, qerr := SecurityCheck(ctx,
|
||||
sq.Select(value, "COUNT(job.id) AS count").From("job"))
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
// Setup Array
|
||||
points := make([]*model.HistoPoint, 0)
|
||||
for i := 1; i <= *targetBinCount; i++ {
|
||||
point := model.HistoPoint{Value: i * binSizeSeconds, Count: 0}
|
||||
points = append(points, &point)
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
query = BuildWhereClause(f, query)
|
||||
}
|
||||
|
||||
rows, err := query.GroupBy("value").RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Fill Array at matching $Value
|
||||
for rows.Next() {
|
||||
point := model.HistoPoint{}
|
||||
if err := rows.Scan(&point.Value, &point.Count); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, e := range points {
|
||||
if e.Value == (point.Value * binSizeSeconds) {
|
||||
// Note:
|
||||
// Matching on unmodified integer value (and multiplying point.Value by binSizeSeconds after match)
|
||||
// causes frontend to loop into highest targetBinCount, due to zoom condition instantly being fullfilled (cause unknown)
|
||||
e.Count = point.Count
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
|
||||
return points, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) jobsMetricStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
metric string,
|
||||
filters []*model.JobFilter,
|
||||
bins *int,
|
||||
) (*model.MetricHistoPoints, error) {
|
||||
// Get specific Peak or largest Peak
|
||||
var metricConfig *schema.MetricConfig
|
||||
var peak float64
|
||||
var unit string
|
||||
var footprintStat string
|
||||
|
||||
for _, f := range filters {
|
||||
if f.Cluster != nil {
|
||||
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
||||
peak = metricConfig.Peak
|
||||
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
|
||||
footprintStat = metricConfig.Footprint
|
||||
log.Debugf("Cluster %s filter found with peak %f for %s", *f.Cluster.Eq, peak, metric)
|
||||
}
|
||||
}
|
||||
|
||||
if peak == 0.0 {
|
||||
for _, c := range archive.Clusters {
|
||||
for _, m := range c.MetricConfig {
|
||||
if m.Name == metric {
|
||||
if m.Peak > peak {
|
||||
peak = m.Peak
|
||||
}
|
||||
if unit == "" {
|
||||
unit = m.Unit.Prefix + m.Unit.Base
|
||||
}
|
||||
if footprintStat == "" {
|
||||
footprintStat = m.Footprint
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// log.Debugf("Metric %s, Peak %f, Unit %s, Aggregation %s", metric, peak, unit, aggreg)
|
||||
// Make bins, see https://jereze.com/code/sql-histogram/
|
||||
|
||||
start := time.Now()
|
||||
jm := fmt.Sprintf(`json_extract(footprint, "$.%s")`, (metric + "_" + footprintStat))
|
||||
|
||||
crossJoinQuery := sq.Select(
|
||||
fmt.Sprintf(`max(%s) as max`, jm),
|
||||
fmt.Sprintf(`min(%s) as min`, jm),
|
||||
).From("job").Where(
|
||||
"JSON_VALID(footprint)",
|
||||
).Where(
|
||||
fmt.Sprintf(`%s is not null`, jm),
|
||||
).Where(
|
||||
fmt.Sprintf(`%s <= %f`, jm, peak),
|
||||
)
|
||||
|
||||
crossJoinQuery, cjqerr := SecurityCheck(ctx, crossJoinQuery)
|
||||
|
||||
if cjqerr != nil {
|
||||
return nil, cjqerr
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
crossJoinQuery = BuildWhereClause(f, crossJoinQuery)
|
||||
}
|
||||
|
||||
crossJoinQuerySql, crossJoinQueryArgs, sqlerr := crossJoinQuery.ToSql()
|
||||
if sqlerr != nil {
|
||||
return nil, sqlerr
|
||||
}
|
||||
|
||||
binQuery := fmt.Sprintf(`CAST( (case when %s = value.max
|
||||
then value.max*0.999999999 else %s end - value.min) / (value.max -
|
||||
value.min) * %v as INTEGER )`, jm, jm, *bins)
|
||||
|
||||
mainQuery := sq.Select(
|
||||
fmt.Sprintf(`%s + 1 as bin`, binQuery),
|
||||
fmt.Sprintf(`count(%s) as count`, jm),
|
||||
fmt.Sprintf(`CAST(((value.max / %d) * (%v )) as INTEGER ) as min`, *bins, binQuery),
|
||||
fmt.Sprintf(`CAST(((value.max / %d) * (%v + 1 )) as INTEGER ) as max`, *bins, binQuery),
|
||||
).From("job").CrossJoin(
|
||||
fmt.Sprintf(`(%s) as value`, crossJoinQuerySql), crossJoinQueryArgs...,
|
||||
).Where(fmt.Sprintf(`%s is not null and %s <= %f`, jm, jm, peak))
|
||||
|
||||
mainQuery, qerr := SecurityCheck(ctx, mainQuery)
|
||||
|
||||
if qerr != nil {
|
||||
return nil, qerr
|
||||
}
|
||||
|
||||
for _, f := range filters {
|
||||
mainQuery = BuildWhereClause(f, mainQuery)
|
||||
}
|
||||
|
||||
// Finalize query with Grouping and Ordering
|
||||
mainQuery = mainQuery.GroupBy("bin").OrderBy("bin")
|
||||
|
||||
rows, err := mainQuery.RunWith(r.DB).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running mainQuery: %s", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Setup Array
|
||||
points := make([]*model.MetricHistoPoint, 0)
|
||||
for i := 1; i <= *bins; i++ {
|
||||
binMax := ((int(peak) / *bins) * i)
|
||||
binMin := ((int(peak) / *bins) * (i - 1))
|
||||
point := model.MetricHistoPoint{Bin: &i, Count: 0, Min: &binMin, Max: &binMax}
|
||||
points = append(points, &point)
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
point := model.MetricHistoPoint{}
|
||||
if err := rows.Scan(&point.Bin, &point.Count, &point.Min, &point.Max); err != nil {
|
||||
log.Warnf("Error while scanning rows for %s", jm)
|
||||
return nil, err // Totally bricks cc-backend if returned and if all metrics requested?
|
||||
}
|
||||
|
||||
for _, e := range points {
|
||||
if e.Bin != nil && point.Bin != nil {
|
||||
if *e.Bin == *point.Bin {
|
||||
e.Count = point.Count
|
||||
if point.Min != nil {
|
||||
e.Min = point.Min
|
||||
}
|
||||
if point.Max != nil {
|
||||
e.Max = point.Max
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Stat: &footprintStat, Data: points}
|
||||
|
||||
log.Debugf("Timer jobsStatisticsHistogram %s", time.Since(start))
|
||||
return &result, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
||||
ctx context.Context,
|
||||
metrics []string,
|
||||
filters []*model.JobFilter,
|
||||
bins *int,
|
||||
) []*model.MetricHistoPoints {
|
||||
|
||||
// Get Jobs
|
||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
||||
if err != nil {
|
||||
log.Errorf("Error while querying jobs for footprint: %s", err)
|
||||
return nil
|
||||
}
|
||||
if len(jobs) > 500 {
|
||||
log.Errorf("too many jobs matched (max: %d)", 500)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get AVGs from metric repo
|
||||
avgs := make([][]schema.Float, len(metrics))
|
||||
for i := range avgs {
|
||||
avgs[i] = make([]schema.Float, 0, len(jobs))
|
||||
}
|
||||
|
||||
for _, job := range jobs {
|
||||
if job.MonitoringStatus == schema.MonitoringStatusDisabled || job.MonitoringStatus == schema.MonitoringStatusArchivingFailed {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
log.Errorf("Error while loading averages for histogram: %s", err)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate metrics to fill endresult
|
||||
data := make([]*model.MetricHistoPoints, 0)
|
||||
for idx, metric := range metrics {
|
||||
// Get specific Peak or largest Peak
|
||||
var metricConfig *schema.MetricConfig
|
||||
var peak float64
|
||||
var unit string
|
||||
|
||||
for _, f := range filters {
|
||||
if f.Cluster != nil {
|
||||
metricConfig = archive.GetMetricConfig(*f.Cluster.Eq, metric)
|
||||
peak = metricConfig.Peak
|
||||
unit = metricConfig.Unit.Prefix + metricConfig.Unit.Base
|
||||
}
|
||||
}
|
||||
|
||||
if peak == 0.0 {
|
||||
for _, c := range archive.Clusters {
|
||||
for _, m := range c.MetricConfig {
|
||||
if m.Name == metric {
|
||||
if m.Peak > peak {
|
||||
peak = m.Peak
|
||||
}
|
||||
if unit == "" {
|
||||
unit = m.Unit.Prefix + m.Unit.Base
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make and fill bins
|
||||
peakBin := int(peak) / *bins
|
||||
|
||||
points := make([]*model.MetricHistoPoint, 0)
|
||||
for b := 0; b < *bins; b++ {
|
||||
count := 0
|
||||
bindex := b + 1
|
||||
bmin := peakBin * b
|
||||
bmax := peakBin * (b + 1)
|
||||
|
||||
// Iterate AVG values for indexed metric and count for bins
|
||||
for _, val := range avgs[idx] {
|
||||
if int(val) >= bmin && int(val) < bmax {
|
||||
count += 1
|
||||
}
|
||||
}
|
||||
|
||||
// Append Bin to Metric Result Array
|
||||
point := model.MetricHistoPoint{Bin: &bindex, Count: count, Min: &bmin, Max: &bmax}
|
||||
points = append(points, &point)
|
||||
}
|
||||
|
||||
// Append Metric Result Array to final results array
|
||||
result := model.MetricHistoPoints{Metric: metric, Unit: unit, Data: points}
|
||||
data = append(data, &result)
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
@@ -14,53 +15,80 @@ import (
|
||||
)
|
||||
|
||||
// Add the tag with id `tagId` to the job with the database id `jobId`.
|
||||
func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
|
||||
if _, err := r.stmtCache.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES ($1, $2)`, job, tag); err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
func (r *JobRepository) AddTag(user *schema.User, job int64, tag int64) ([]*schema.Tag, error) {
|
||||
j, err := r.FindByIdWithUser(user, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(job, tag)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error adding tag with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(user, &job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, tags)
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
}
|
||||
|
||||
// Removes a tag from a job
|
||||
func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
|
||||
if _, err := r.stmtCache.Exec("DELETE FROM jobtag WHERE jobtag.job_id = $1 AND jobtag.tag_id = $2", job, tag); err != nil {
|
||||
log.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
j, err := r.FindById(job)
|
||||
func (r *JobRepository) RemoveTag(user *schema.User, job, tag int64) ([]*schema.Tag, error) {
|
||||
j, err := r.FindByIdWithUser(user, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(&job)
|
||||
q := sq.Delete("jobtag").Where("jobtag.job_id = ?", job).Where("jobtag.tag_id = ?", tag)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error removing tag with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags, err := r.GetTags(user, &job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, tags)
|
||||
archiveTags, err := r.getArchiveTags(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting tags for job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return tags, archive.UpdateTags(j, archiveTags)
|
||||
}
|
||||
|
||||
// CreateTag creates a new tag with the specified type and name and returns its database id.
|
||||
func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64, err error) {
|
||||
res, err := r.stmtCache.Exec("INSERT INTO tag (tag_type, tag_name) VALUES ($1, $2)", tagType, tagName)
|
||||
func (r *JobRepository) CreateTag(tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||
// Default to "Global" scope if none defined
|
||||
if tagScope == "" {
|
||||
tagScope = "global"
|
||||
}
|
||||
|
||||
q := sq.Insert("tag").Columns("tag_type", "tag_name", "tag_scope").Values(tagType, tagName, tagScope)
|
||||
|
||||
res, err := q.RunWith(r.stmtCache).Exec()
|
||||
if err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error inserting tag with %s: %v", s, err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@@ -68,8 +96,9 @@ func (r *JobRepository) CreateTag(tagType string, tagName string) (tagId int64,
|
||||
}
|
||||
|
||||
func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts map[string]int, err error) {
|
||||
// Fetch all Tags in DB for Display in Frontend Tag-View
|
||||
tags = make([]schema.Tag, 0, 100)
|
||||
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name FROM tag")
|
||||
xrows, err := r.DB.Queryx("SELECT id, tag_type, tag_name, tag_scope FROM tag")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@@ -79,22 +108,42 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
if err = xrows.StructScan(&t); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
tags = append(tags, t)
|
||||
|
||||
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
|
||||
readable, err := r.checkScopeAuth(user, "read", t.Scope)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if readable {
|
||||
tags = append(tags, t)
|
||||
}
|
||||
}
|
||||
|
||||
q := sq.Select("t.tag_name, count(jt.tag_id)").
|
||||
// Query and Count Jobs with attached Tags
|
||||
q := sq.Select("t.tag_name, t.id, count(jt.tag_id)").
|
||||
From("tag t").
|
||||
LeftJoin("jobtag jt ON t.id = jt.tag_id").
|
||||
GroupBy("t.tag_name")
|
||||
|
||||
// Handle Scope Filtering
|
||||
scopeList := "\"global\""
|
||||
if user != nil {
|
||||
scopeList += ",\"" + user.Username + "\""
|
||||
}
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
scopeList += ",\"admin\""
|
||||
}
|
||||
q = q.Where("t.tag_scope IN (" + scopeList + ")")
|
||||
|
||||
// Handle Job Ownership
|
||||
if user != nil && user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) { // ADMIN || SUPPORT: Count all jobs
|
||||
log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
|
||||
// log.Debug("CountTags: User Admin or Support -> Count all Jobs for Tags")
|
||||
// Unchanged: Needs to be own case still, due to UserRole/NoRole compatibility handling in else case
|
||||
} else if user != nil && user.HasRole(schema.RoleManager) { // MANAGER: Count own jobs plus project's jobs
|
||||
// Build ("project1", "project2", ...) list of variable length directly in SQL string
|
||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
|
||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ? OR job.project IN (\""+strings.Join(user.Projects, "\",\"")+"\"))", user.Username)
|
||||
} else if user != nil { // USER OR NO ROLE (Compatibility): Only count own jobs
|
||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.user = ?)", user.Username)
|
||||
q = q.Where("jt.job_id IN (SELECT id FROM job WHERE job.hpc_user = ?)", user.Username)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
@@ -105,29 +154,44 @@ func (r *JobRepository) CountTags(user *schema.User) (tags []schema.Tag, counts
|
||||
counts = make(map[string]int)
|
||||
for rows.Next() {
|
||||
var tagName string
|
||||
var tagId int
|
||||
var count int
|
||||
if err = rows.Scan(&tagName, &count); err != nil {
|
||||
if err = rows.Scan(&tagName, &tagId, &count); err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
counts[tagName] = count
|
||||
// Use tagId as second Map-Key component to differentiate tags with identical names
|
||||
counts[fmt.Sprint(tagName, tagId)] = count
|
||||
}
|
||||
err = rows.Err()
|
||||
|
||||
return
|
||||
return tags, counts, err
|
||||
}
|
||||
|
||||
// AddTagOrCreate adds the tag with the specified type and name to the job with the database id `jobId`.
|
||||
// If such a tag does not yet exist, it is created.
|
||||
func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName string) (tagId int64, err error) {
|
||||
tagId, exists := r.TagId(tagType, tagName)
|
||||
func (r *JobRepository) AddTagOrCreate(user *schema.User, jobId int64, tagType string, tagName string, tagScope string) (tagId int64, err error) {
|
||||
// Default to "Global" scope if none defined
|
||||
if tagScope == "" {
|
||||
tagScope = "global"
|
||||
}
|
||||
|
||||
writable, err := r.checkScopeAuth(user, "write", tagScope)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if !writable {
|
||||
return 0, fmt.Errorf("cannot write tag scope with current authorization")
|
||||
}
|
||||
|
||||
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
tagId, err = r.CreateTag(tagType, tagName)
|
||||
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := r.AddTag(jobId, tagId); err != nil {
|
||||
if _, err := r.AddTag(user, jobId, tagId); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@@ -135,33 +199,68 @@ func (r *JobRepository) AddTagOrCreate(jobId int64, tagType string, tagName stri
|
||||
}
|
||||
|
||||
// TagId returns the database id of the tag with the specified type and name.
|
||||
func (r *JobRepository) TagId(tagType string, tagName string) (tagId int64, exists bool) {
|
||||
func (r *JobRepository) TagId(tagType string, tagName string, tagScope string) (tagId int64, exists bool) {
|
||||
exists = true
|
||||
if err := sq.Select("id").From("tag").
|
||||
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).
|
||||
Where("tag.tag_type = ?", tagType).Where("tag.tag_name = ?", tagName).Where("tag.tag_scope = ?", tagScope).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&tagId); err != nil {
|
||||
exists = false
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// GetTags returns a list of all tags if job is nil or of the tags that the job with that database ID has.
|
||||
func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name").From("tag")
|
||||
// GetTags returns a list of all scoped tags if job is nil or of the tags that the job with that database ID has.
|
||||
func (r *JobRepository) GetTags(user *schema.User, job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||
if job != nil {
|
||||
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error get tags with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags := make([]*schema.Tag, 0)
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name); err != nil {
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
// Handle Scope Filtering: Tag Scope is Global, Private (== Username) or User is auth'd to view Admin Tags
|
||||
readable, err := r.checkScopeAuth(user, "read", tag.Scope)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if readable {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
// GetArchiveTags returns a list of all tags *regardless of scope* for archiving if job is nil or of the tags that the job with that database ID has.
|
||||
func (r *JobRepository) getArchiveTags(job *int64) ([]*schema.Tag, error) {
|
||||
q := sq.Select("id", "tag_type", "tag_name", "tag_scope").From("tag")
|
||||
if job != nil {
|
||||
q = q.Join("jobtag ON jobtag.tag_id = tag.id").Where("jobtag.job_id = ?", *job)
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error get tags with %s: %v", s, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags := make([]*schema.Tag, 0)
|
||||
for rows.Next() {
|
||||
tag := &schema.Tag{}
|
||||
if err := rows.Scan(&tag.ID, &tag.Type, &tag.Name, &tag.Scope); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
@@ -170,3 +269,59 @@ func (r *JobRepository) GetTags(job *int64) ([]*schema.Tag, error) {
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) ImportTag(jobId int64, tagType string, tagName string, tagScope string) (err error) {
|
||||
// Import has no scope ctx, only import from metafile to DB (No recursive archive update required), only returns err
|
||||
|
||||
tagId, exists := r.TagId(tagType, tagName, tagScope)
|
||||
if !exists {
|
||||
tagId, err = r.CreateTag(tagType, tagName, tagScope)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
q := sq.Insert("jobtag").Columns("job_id", "tag_id").Values(jobId, tagId)
|
||||
|
||||
if _, err := q.RunWith(r.stmtCache).Exec(); err != nil {
|
||||
s, _, _ := q.ToSql()
|
||||
log.Errorf("Error adding tag on import with %s: %v", s, err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) checkScopeAuth(user *schema.User, operation string, scope string) (pass bool, err error) {
|
||||
if user != nil {
|
||||
switch {
|
||||
case operation == "write" && scope == "admin":
|
||||
if user.HasRole(schema.RoleAdmin) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
case operation == "write" && scope == "global":
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) || (len(user.Roles) == 1 && user.HasRole(schema.RoleApi)) {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
case operation == "write" && scope == user.Username:
|
||||
return true, nil
|
||||
case operation == "read" && scope == "admin":
|
||||
return user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}), nil
|
||||
case operation == "read" && scope == "global":
|
||||
return true, nil
|
||||
case operation == "read" && scope == user.Username:
|
||||
return true, nil
|
||||
default:
|
||||
if operation == "read" || operation == "write" {
|
||||
// No acceptable scope: deny tag
|
||||
return false, nil
|
||||
} else {
|
||||
return false, fmt.Errorf("error while checking tag operation auth: unknown operation (%s)", operation)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false, fmt.Errorf("error while checking tag operation auth: no user in context")
|
||||
}
|
||||
}
|
||||
|
||||
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
BIN
internal/repository/testdata/job.db-shm
vendored
BIN
internal/repository/testdata/job.db-shm
vendored
Binary file not shown.
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -6,7 +6,6 @@ package repository
|
||||
|
||||
import (
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
@@ -18,20 +17,12 @@ type Transaction struct {
|
||||
func (r *JobRepository) TransactionInit() (*Transaction, error) {
|
||||
var err error
|
||||
t := new(Transaction)
|
||||
// Inserts are bundled into transactions because in sqlite,
|
||||
// that speeds up inserts A LOT.
|
||||
|
||||
t.tx, err = r.DB.Beginx()
|
||||
if err != nil {
|
||||
log.Warn("Error while bundling transactions")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
t.stmt, err = t.tx.PrepareNamed(NamedJobInsert)
|
||||
if err != nil {
|
||||
log.Warn("Error while preparing namedJobInsert")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return t, nil
|
||||
}
|
||||
|
||||
@@ -50,7 +41,6 @@ func (r *JobRepository) TransactionCommit(t *Transaction) error {
|
||||
return err
|
||||
}
|
||||
|
||||
t.stmt = t.tx.NamedStmt(t.stmt)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -59,14 +49,17 @@ func (r *JobRepository) TransactionEnd(t *Transaction) error {
|
||||
log.Warn("Error while committing SQL transactions")
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, error) {
|
||||
res, err := t.stmt.Exec(job)
|
||||
func (r *JobRepository) TransactionAddNamed(
|
||||
t *Transaction,
|
||||
query string,
|
||||
args ...interface{},
|
||||
) (int64, error) {
|
||||
res, err := t.tx.NamedExec(query, args)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
log.Errorf("Named Exec failed: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
@@ -79,26 +72,19 @@ func (r *JobRepository) TransactionAdd(t *Transaction, job schema.Job) (int64, e
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionAddTag(t *Transaction, tag *schema.Tag) (int64, error) {
|
||||
res, err := t.tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
|
||||
func (r *JobRepository) TransactionAdd(t *Transaction, query string, args ...interface{}) (int64, error) {
|
||||
|
||||
res, err := t.tx.Exec(query, args...)
|
||||
if err != nil {
|
||||
log.Errorf("Error while inserting tag into tag table: %v (Type %v)", tag.Name, tag.Type)
|
||||
return 0, err
|
||||
}
|
||||
tagId, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
log.Errorf("TransactionAdd(), Exec() Error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return tagId, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) TransactionSetTag(t *Transaction, jobId int64, tagId int64) error {
|
||||
if _, err := t.tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, jobId, tagId); err != nil {
|
||||
log.Errorf("Error while inserting jobtag into jobtag table: %v (TagID %v)", jobId, tagId)
|
||||
return err
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Errorf("TransactionAdd(), LastInsertId() Error: %v", err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return nil
|
||||
return id, nil
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -46,8 +47,8 @@ func GetUserRepository() *UserRepository {
|
||||
func (r *UserRepository) GetUser(username string) (*schema.User, error) {
|
||||
user := &schema.User{Username: username}
|
||||
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
|
||||
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
|
||||
Where("user.username = ?", username).RunWith(r.DB).
|
||||
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("hpc_user").
|
||||
Where("hpc_user.username = ?", username).RunWith(r.DB).
|
||||
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
|
||||
log.Warnf("Error while querying user '%v' from database", username)
|
||||
return nil, err
|
||||
@@ -72,9 +73,8 @@ func (r *UserRepository) GetUser(username string) (*schema.User, error) {
|
||||
}
|
||||
|
||||
func (r *UserRepository) GetLdapUsernames() ([]string, error) {
|
||||
|
||||
var users []string
|
||||
rows, err := r.DB.Query(`SELECT username FROM user WHERE user.ldap = 1`)
|
||||
rows, err := r.DB.Query(`SELECT username FROM hpc_user WHERE hpc_user.ldap = 1`)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying usernames")
|
||||
return nil, err
|
||||
@@ -122,18 +122,62 @@ func (r *UserRepository) AddUser(user *schema.User) error {
|
||||
vals = append(vals, int(user.AuthSource))
|
||||
}
|
||||
|
||||
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(r.DB).Exec(); err != nil {
|
||||
if _, err := sq.Insert("hpc_user").Columns(cols...).Values(vals...).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Infof("new user %#v created (roles: %s, auth-source: %d, projects: %s)", user.Username, rolesJson, user.AuthSource, projectsJson)
|
||||
|
||||
defaultMetricsCfg, err := config.LoadDefaultMetricsConfig()
|
||||
if err != nil {
|
||||
log.Errorf("Error loading default metrics config: %v", err)
|
||||
} else if defaultMetricsCfg != nil {
|
||||
for _, cluster := range defaultMetricsCfg.Clusters {
|
||||
metricsArray := config.ParseMetricsString(cluster.DefaultMetrics)
|
||||
metricsJSON, err := json.Marshal(metricsArray)
|
||||
if err != nil {
|
||||
log.Errorf("Error marshaling default metrics for cluster %s: %v", cluster.Name, err)
|
||||
continue
|
||||
}
|
||||
confKey := "job_view_selectedMetrics:" + cluster.Name
|
||||
if _, err := sq.Insert("configuration").
|
||||
Columns("username", "confkey", "value").
|
||||
Values(user.Username, confKey, string(metricsJSON)).
|
||||
RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error inserting default job view metrics for user %s and cluster %s: %v", user.Username, cluster.Name, err)
|
||||
} else {
|
||||
log.Infof("Default job view metrics for user %s and cluster %s set to %s", user.Username, cluster.Name, string(metricsJSON))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *UserRepository) UpdateUser(dbUser *schema.User, user *schema.User) error {
|
||||
// user contains updated info, apply to dbuser
|
||||
// TODO: Discuss updatable fields
|
||||
if dbUser.Name != user.Name {
|
||||
if _, err := sq.Update("hpc_user").Set("name", user.Name).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("error while updating name of user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Toggled until greenlit
|
||||
// if dbUser.HasRole(schema.RoleManager) && !reflect.DeepEqual(dbUser.Projects, user.Projects) {
|
||||
// projects, _ := json.Marshal(user.Projects)
|
||||
// if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", dbUser.Username).RunWith(r.DB).Exec(); err != nil {
|
||||
// return err
|
||||
// }
|
||||
// }
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *UserRepository) DelUser(username string) error {
|
||||
|
||||
_, err := r.DB.Exec(`DELETE FROM user WHERE user.username = ?`, username)
|
||||
_, err := r.DB.Exec(`DELETE FROM hpc_user WHERE hpc_user.username = ?`, username)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting user '%s' from DB", username)
|
||||
return err
|
||||
@@ -143,8 +187,7 @@ func (r *UserRepository) DelUser(username string) error {
|
||||
}
|
||||
|
||||
func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
||||
|
||||
q := sq.Select("username", "name", "email", "roles", "projects").From("user")
|
||||
q := sq.Select("username", "name", "email", "roles", "projects").From("hpc_user")
|
||||
if specialsOnly {
|
||||
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
|
||||
}
|
||||
@@ -186,8 +229,8 @@ func (r *UserRepository) ListUsers(specialsOnly bool) ([]*schema.User, error) {
|
||||
func (r *UserRepository) AddRole(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
queryrole string) error {
|
||||
|
||||
queryrole string,
|
||||
) error {
|
||||
newRole := strings.ToLower(queryrole)
|
||||
user, err := r.GetUser(username)
|
||||
if err != nil {
|
||||
@@ -198,15 +241,15 @@ func (r *UserRepository) AddRole(
|
||||
exists, valid := user.HasValidRole(newRole)
|
||||
|
||||
if !valid {
|
||||
return fmt.Errorf("Supplied role is no valid option : %v", newRole)
|
||||
return fmt.Errorf("supplied role is no valid option : %v", newRole)
|
||||
}
|
||||
if exists {
|
||||
return fmt.Errorf("User %v already has role %v", username, newRole)
|
||||
return fmt.Errorf("user %v already has role %v", username, newRole)
|
||||
}
|
||||
|
||||
roles, _ := json.Marshal(append(user.Roles, newRole))
|
||||
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error while adding new role for user '%s'", user.Username)
|
||||
if _, err := sq.Update("hpc_user").Set("roles", roles).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("error while adding new role for user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@@ -223,14 +266,14 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
exists, valid := user.HasValidRole(oldRole)
|
||||
|
||||
if !valid {
|
||||
return fmt.Errorf("Supplied role is no valid option : %v", oldRole)
|
||||
return fmt.Errorf("supplied role is no valid option : %v", oldRole)
|
||||
}
|
||||
if !exists {
|
||||
return fmt.Errorf("Role already deleted for user '%v': %v", username, oldRole)
|
||||
return fmt.Errorf("role already deleted for user '%v': %v", username, oldRole)
|
||||
}
|
||||
|
||||
if oldRole == schema.GetRoleString(schema.RoleManager) && len(user.Projects) != 0 {
|
||||
return fmt.Errorf("Cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
|
||||
return fmt.Errorf("cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
|
||||
}
|
||||
|
||||
var newroles []string
|
||||
@@ -240,8 +283,8 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
}
|
||||
}
|
||||
|
||||
var mroles, _ = json.Marshal(newroles)
|
||||
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
mroles, _ := json.Marshal(newroles)
|
||||
if _, err := sq.Update("hpc_user").Set("roles", mroles).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
log.Errorf("Error while removing role for user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
@@ -251,15 +294,15 @@ func (r *UserRepository) RemoveRole(ctx context.Context, username string, queryr
|
||||
func (r *UserRepository) AddProject(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
project string) error {
|
||||
|
||||
project string,
|
||||
) error {
|
||||
user, err := r.GetUser(username)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleManager) {
|
||||
return fmt.Errorf("user '%s' is not a manager!", username)
|
||||
return fmt.Errorf("user '%s' is not a manager", username)
|
||||
}
|
||||
|
||||
if user.HasProject(project) {
|
||||
@@ -267,7 +310,7 @@ func (r *UserRepository) AddProject(
|
||||
}
|
||||
|
||||
projects, _ := json.Marshal(append(user.Projects, project))
|
||||
if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
if _, err := sq.Update("hpc_user").Set("projects", projects).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -281,11 +324,11 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleManager) {
|
||||
return fmt.Errorf("user '%#v' is not a manager!", username)
|
||||
return fmt.Errorf("user '%#v' is not a manager", username)
|
||||
}
|
||||
|
||||
if !user.HasProject(project) {
|
||||
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match!", username, project)
|
||||
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match", username, project)
|
||||
}
|
||||
|
||||
var exists bool
|
||||
@@ -298,14 +341,14 @@ func (r *UserRepository) RemoveProject(ctx context.Context, username string, pro
|
||||
}
|
||||
}
|
||||
|
||||
if exists == true {
|
||||
if exists {
|
||||
var result interface{}
|
||||
if len(newprojects) == 0 {
|
||||
result = "[]"
|
||||
} else {
|
||||
result, _ = json.Marshal(newprojects)
|
||||
}
|
||||
if _, err := sq.Update("user").Set("projects", result).Where("user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
if _, err := sq.Update("hpc_user").Set("projects", result).Where("hpc_user.username = ?", username).RunWith(r.DB).Exec(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@@ -321,9 +364,10 @@ const ContextUserKey ContextKey = "user"
|
||||
func GetUserFromContext(ctx context.Context) *schema.User {
|
||||
x := ctx.Value(ContextUserKey)
|
||||
if x == nil {
|
||||
log.Warnf("no user retrieved from context")
|
||||
return nil
|
||||
}
|
||||
|
||||
// log.Infof("user retrieved from context: %v", x.(*schema.User))
|
||||
return x.(*schema.User)
|
||||
}
|
||||
|
||||
@@ -336,7 +380,7 @@ func (r *UserRepository) FetchUserInCtx(ctx context.Context, username string) (*
|
||||
|
||||
user := &model.User{Username: username}
|
||||
var name, email sql.NullString
|
||||
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
|
||||
if err := sq.Select("name", "email").From("hpc_user").Where("hpc_user.username = ?", username).
|
||||
RunWith(r.DB).QueryRow().Scan(&name, &email); err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
@@ -24,9 +24,9 @@ var (
|
||||
type UserCfgRepo struct {
|
||||
DB *sqlx.DB
|
||||
Lookup *sqlx.Stmt
|
||||
lock sync.RWMutex
|
||||
uiDefaults map[string]interface{}
|
||||
cache *lrucache.Cache
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
func GetUserCfgRepo() *UserCfgRepo {
|
||||
@@ -112,8 +112,8 @@ func (uCfg *UserCfgRepo) GetUIConfig(user *schema.User) (map[string]interface{},
|
||||
// configuration.
|
||||
func (uCfg *UserCfgRepo) UpdateConfig(
|
||||
key, value string,
|
||||
user *schema.User) error {
|
||||
|
||||
user *schema.User,
|
||||
) error {
|
||||
if user == nil {
|
||||
var val interface{}
|
||||
if err := json.Unmarshal([]byte(value), &val); err != nil {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user