mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-01-15 09:11:45 +01:00
Compare commits
635 Commits
v1.4.2
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cb3ef2f782 | ||
|
|
affa85c086 | ||
|
|
aa053d78f7 | ||
|
|
fae6d9d835 | ||
|
|
78f1db7ad1 | ||
|
|
4c81696f4d | ||
|
|
a91f8f72e3 | ||
|
|
87f7ed329c | ||
|
|
8641d9053d | ||
|
|
4a5ab8a279 | ||
|
|
d179412ab6 | ||
|
|
968c7d179d | ||
|
|
a2414791bf | ||
|
|
faf3a19f0c | ||
|
|
4e6038d6c1 | ||
|
|
ddc2ecf829 | ||
|
|
c66445acb5 | ||
|
|
29a20f7b0b | ||
|
|
874c019fb6 | ||
|
|
54825626de | ||
|
|
5a8b929448 | ||
|
|
fe78f2f433 | ||
|
|
e37591ce6d | ||
|
|
998f800632 | ||
| 06ed056d43 | |||
| d446c13546 | |||
| 6e74fa294a | |||
|
|
43bdb56072 | ||
|
|
10a0b0add8 | ||
| e707fd0893 | |||
|
|
19c8e9beb1 | ||
|
|
32e5353847 | ||
|
|
d2f2d78954 | ||
| b8fdfc30c0 | |||
| 79a2ca8ae8 | |||
| d1a78c13a4 | |||
| f4b00e9de1 | |||
| 0a5e155096 | |||
| 4ecc050c4c | |||
| 88dc5036b3 | |||
| d30c6ef3bf | |||
|
0419fec810
|
|||
|
43e5fd1131
|
|||
|
|
11e94124cc | ||
|
|
102109388b | ||
|
|
60a69aa0a2 | ||
| 5e2cbd75fa | |||
| 14f1192ccb | |||
| 72b2560ecf | |||
| 7fce6fa401 | |||
| e6286768a7 | |||
| 0306723307 | |||
| 6f49998ad3 | |||
| 457c944ec6 | |||
| 33c38f9464 | |||
| 46351389b6 | |||
|
|
d56b0e93db | ||
|
|
f9aa47ea1c | ||
| d567a5312e | |||
| 97a322354f | |||
| 554527445b | |||
|
|
c5aff1a2ca | ||
| 987cc40318 | |||
| 104fd1576a | |||
| 72ce3954b4 | |||
| cfa7461855 | |||
| 44cda8a232 | |||
| cf119e6843 | |||
|
|
451744f321 | ||
|
|
ca6682b94b | ||
|
|
cbad2341c3 | ||
|
|
a956c7b135 | ||
|
|
ea6caeb2f0 | ||
|
|
c17e8b1156 | ||
|
|
b993b1e096 | ||
| d7d81e352d | |||
| 078c608bda | |||
| f2e57f9edd | |||
| 5698d5216f | |||
| 10aa2bfbd3 | |||
| 6cfed989ff | |||
| ab70acd582 | |||
|
|
79e1c236fe | ||
|
|
fed62b6c45 | ||
|
|
0d62181272 | ||
|
|
290a71bd48 | ||
|
|
6e385db378 | ||
|
|
ffe8329b84 | ||
| f13be109c2 | |||
| d24d85b970 | |||
| 8d44ac90ad | |||
|
|
4083de2a51 | ||
|
|
131df075db | ||
|
|
afd6f50ba2 | ||
|
|
ad01366705 | ||
| 6325793902 | |||
|
|
8ea176f9da | ||
| 03b5272e44 | |||
| 7da01975f7 | |||
| 7cff8bbfd2 | |||
|
|
c98cbb33f8 | ||
| f3ea95535b | |||
| b9b84b7971 | |||
| be7340ca30 | |||
| 881c4566dd | |||
| 7efbb0217f | |||
| 9e2ce39cde | |||
|
|
0ff6cae1c3 | ||
|
|
d02ba3d717 | ||
| 6aa830adb6 | |||
| be6603cbb9 | |||
|
|
8d208929d5 | ||
|
|
cb0f96b737 | ||
|
|
83723ab050 | ||
|
|
3abaefa550 | ||
|
|
389010dbbd | ||
| 81fe2c043e | |||
| c76e9bb3fe | |||
| 48b68d3410 | |||
| 2b64b31393 | |||
| 2333068de7 | |||
| 78530029ef | |||
| 329b6e5640 | |||
|
|
967f0a3294 | ||
|
|
6eb779d359 | ||
|
|
414147177a | ||
|
|
3b37f3630c | ||
|
|
7c1a818582 | ||
|
|
c4cf7e9707 | ||
|
|
1ceb681521 | ||
|
|
443176a0d1 | ||
|
|
261905a364 | ||
| e00288b160 | |||
| f141ca926f | |||
| f7a0954213 | |||
|
|
da8d562eba | ||
|
|
399af8592c | ||
| 6239e7f19b | |||
| d0e1b7186c | |||
| fea3292f50 | |||
| 9973aa9ffa | |||
| 0b38a980d2 | |||
| 20838b6882 | |||
| 8f4ef1e274 | |||
| e1c7583670 | |||
| 39a2157d46 | |||
| dd63e7157a | |||
| 340efd7926 | |||
| ecc6194b57 | |||
|
|
90c3381954 | ||
|
|
21334c8026 | ||
|
|
cbdef6ce9e | ||
|
|
591cd9fd66 | ||
|
|
e8d2a45afb | ||
|
|
3b533938a6 | ||
|
|
9fe342a7e9 | ||
|
|
2152ced97a | ||
|
|
404be5f317 | ||
|
|
f56783a439 | ||
|
|
fb278182d3 | ||
|
|
c2c63d2f67 | ||
|
|
7f740455fe | ||
|
|
946b992746 | ||
|
|
a6c43e6f2f | ||
|
|
ecad52c18d | ||
|
|
e49e5a0474 | ||
|
|
9231b3cfca | ||
|
|
68e0159292 | ||
|
|
1a674590bf | ||
|
|
1ef47e7b3f | ||
|
|
214a2762df | ||
| cb5d06decd | |||
| 8555a88202 | |||
|
|
2287f4493a | ||
|
|
bb357f7cab | ||
|
|
d9b240cd2d | ||
|
|
bea5ee96d9 | ||
|
|
7d205fd526 | ||
|
|
c15b2a0cbb | ||
|
|
7ccba30a3d | ||
|
|
8091485588 | ||
|
|
1413f968d6 | ||
|
|
d1d1bb09e9 | ||
|
|
3c1a7e0171 | ||
|
|
0cb50f2f01 | ||
|
|
2287586700 | ||
|
|
ea7660ddb3 | ||
|
|
44e98e8f2f | ||
|
|
856ccbb969 | ||
|
|
0920286b4c | ||
|
|
f34e10cfd9 | ||
| ae5d202661 | |||
| bc43c844fc | |||
| 67be9aa27b | |||
| 047b997a22 | |||
| bac51891b7 | |||
|
|
714d6af7cd | ||
| 6efd6334bb | |||
| 91f4475d76 | |||
|
|
de309784b4 | ||
|
|
a623cf53f3 | ||
| 440cd59e50 | |||
| eefb6f6265 | |||
| f5e1226837 | |||
| 151f7e701f | |||
| 40398497c2 | |||
|
|
cda10788fb | ||
|
|
845905d9c8 | ||
| 89055506d6 | |||
|
|
5908ae7905 | ||
|
|
4131665284 | ||
|
|
6a43dfb0d7 | ||
| 3d38d78845 | |||
| 600f19ac80 | |||
|
|
0a3a664653 | ||
|
|
471ec1cd2e | ||
|
|
e296cd7ca0 | ||
|
|
31cfa8cd7c | ||
|
|
70fe8aa367 | ||
|
|
cc9dafac6f | ||
|
|
32429f1481 | ||
| 9485a463b8 | |||
| 35c6ab4a08 | |||
| e58b0fa015 | |||
| beb92967e5 | |||
| 015583f1cd | |||
| d40c54b802 | |||
| 647665b6b9 | |||
| 4fc78bc382 | |||
| 50d000e7e2 | |||
|
|
ad500c4bef | ||
|
|
916077c6f8 | ||
|
|
935fb238a4 | ||
|
|
d03e5b4562 | ||
|
|
05c45c6468 | ||
| 9020613a8b | |||
| be92d5943d | |||
|
|
b2368a0751 | ||
| 7948d5f773 | |||
|
|
1a16851ad0 | ||
|
|
810c14a839 | ||
|
|
df0e8eb228 | ||
| 79605c8a9e | |||
|
|
9b644119ae | ||
|
|
ffa9919019 | ||
|
55ca892f90
|
|||
|
eaca187032
|
|||
|
|
3b9d05cc6d | ||
| d00881de2e | |||
| d8e85cf75d | |||
| 39f21763e4 | |||
|
|
af43901ca3 | ||
|
|
62565b9ae2 | ||
|
|
bca176170c | ||
|
|
2a91ca0cff | ||
|
|
19a75554b0 | ||
|
|
58ae476a3e | ||
|
|
44d8254a0b | ||
|
|
bd2cdfcef2 | ||
| a50b832c2a | |||
|
|
10194105e3 | ||
|
|
b474288df7 | ||
|
|
f338209f32 | ||
|
|
bef832e45b | ||
|
|
71cfb4db77 | ||
| 86453e7e11 | |||
|
|
98b9f8e62d | ||
| 44cd8d258d | |||
| 764b65d094 | |||
|
|
4d2c64b012 | ||
|
|
35c0b0be58 | ||
|
|
7a54e2cfb3 | ||
|
|
54283f6d3c | ||
|
|
697acd1d88 | ||
|
|
5cdb80b4d6 | ||
|
|
e48ff8be73 | ||
|
|
096217eea6 | ||
|
|
ed5290be86 | ||
|
|
b036c3903c | ||
|
|
57b43b7b60 | ||
| ab1ddb7bd1 | |||
| 881f2f32f4 | |||
| 0754ba5292 | |||
|
|
743a89c3a2 | ||
|
|
6692c3ab7c | ||
|
|
c16a5fdac4 | ||
|
|
60ec7e54f5 | ||
| dd48f5ab87 | |||
|
|
db674ec31d | ||
|
|
48150ffc8b | ||
|
|
1ad80efab6 | ||
|
|
aa8789f8f8 | ||
|
|
56e3f2da5c | ||
|
|
a4104822e2 | ||
|
|
c13f386e3b | ||
| 4bd73450b5 | |||
| 64da28e814 | |||
| 639e1b9c6d | |||
|
|
63e828d2df | ||
|
|
b8c30b5703 | ||
|
|
805ea91fc2 | ||
|
|
c4c422da57 | ||
| 544fb35121 | |||
| 43edccb284 | |||
| 7531ba4b5c | |||
| 983aa592d8 | |||
| 8378784231 | |||
| dca25cc601 | |||
|
|
c8fe81cd80 | ||
| c0a4724f57 | |||
| 484c52d813 | |||
|
|
47843b2087 | ||
|
|
c3a6126799 | ||
|
|
e94b250541 | ||
|
|
db5f6c7540 | ||
|
|
79a6c9e90d | ||
| e2e67e3977 | |||
| 6c06450701 | |||
|
|
d7379a1af2 | ||
|
|
d731611e0c | ||
|
|
dceb92ba8e | ||
|
|
1e039cb1bf | ||
|
6f3e1ffbe3
|
|||
|
|
6a6dca3fce | ||
|
|
d6d92071bf | ||
|
|
d40657dc64 | ||
|
|
6dde2a1e59 | ||
|
|
b7823cec16 | ||
|
|
eabd7b8d51 | ||
|
|
27ec445e54 | ||
|
|
ad108b285f | ||
|
|
f471214ef7 | ||
|
|
a0190f8f40 | ||
| 82af984023 | |||
| 0373010497 | |||
|
|
c22d869aa7 | ||
| 87c93e90cd | |||
| 3d6dca9386 | |||
|
|
f946e7e6ab | ||
|
|
d50dfa5867 | ||
| 249128e011 | |||
| ca16a80b1f | |||
|
|
e789e7ba9b | ||
|
|
5048f7be14 | ||
|
|
0e3603f596 | ||
| 9cd4b3c1cc | |||
| 1d9aa75960 | |||
|
|
0a24ef70e0 | ||
| 3b5d3d671e | |||
| 7db83d216e | |||
| d1a7002422 | |||
| 1d8e7e072f | |||
|
7466fe7a34
|
|||
|
|
24cf5047da | ||
|
|
1f103e5ef5 | ||
|
|
9e87974eb1 | ||
|
|
d806cf76c4 | ||
|
|
6e2703998d | ||
| 6f9737c2c2 | |||
|
|
5e696c10d5 | ||
|
|
927e25c72c | ||
| 8b1b99ba35 | |||
| 2c102cd1ff | |||
|
|
42c4926c47 | ||
|
|
703556d893 | ||
|
|
0b529a5c3c | ||
|
|
5186b3f61e | ||
| 4dc0da5099 | |||
| 1bad6ba065 | |||
| 3efee22536 | |||
| eef48ac3a3 | |||
| e35cfbc3dd | |||
| 4a5fd96b32 | |||
|
|
bdffe73f59 | ||
| cdfe722457 | |||
| 0aecea6de2 | |||
| 5a88c77171 | |||
| 8003217092 | |||
| 9b325041c1 | |||
| 1e7fbe5d56 | |||
| 0261c263f9 | |||
| 8d6ae85b0d | |||
| f14bdb3068 | |||
| 3c66840f95 | |||
| 733e3ea9d5 | |||
|
ca634bb707
|
|||
| 9abc206d1a | |||
| 85f17c0fd8 | |||
| 14bad81b9f | |||
|
|
ffd596e2c7 | ||
| 99f8187092 | |||
| f30b784f45 | |||
| f06b5f8fc0 | |||
| 2e781b900d | |||
| d76b1ae75d | |||
| 40110580e0 | |||
| eab7961a83 | |||
| 432e06e801 | |||
| fe1ff5c7a3 | |||
| 6e66b8e08b | |||
| 7abdd0545e | |||
|
|
3f1768e467 | ||
|
|
f464921ae3 | ||
|
|
7603ad3fb0 | ||
|
|
be7ccc78b8 | ||
|
|
b3135c982f | ||
|
13386175f5
|
|||
|
23e8f3dc2d
|
|||
|
|
b323ce2eef | ||
|
|
08e323ba51 | ||
|
|
9f50f36b1d | ||
|
|
4399c1d590 | ||
|
|
f7376f6dca | ||
|
|
518cb34340 | ||
|
|
f210a5f508 | ||
|
|
9ebc49dd1c | ||
|
|
c119eeb468 | ||
|
|
ab616f8f79 | ||
|
|
69286881e4 | ||
|
|
4419df8d1b | ||
|
|
aed2bd48fc | ||
|
|
d3d752f90c | ||
|
|
33ecfe88ef | ||
|
|
fd52fdd35b | ||
|
|
1d13d3dccf | ||
|
|
1c84bcae35 | ||
|
|
df497d5952 | ||
|
|
f65e122f8d | ||
| 161f0744aa | |||
| 95de9ad3b3 | |||
|
|
d5c170055f | ||
|
|
61f0521072 | ||
|
|
6ca14c55f2 | ||
|
|
1309d09aee | ||
| aba75b3a19 | |||
|
|
e87481d8db | ||
| acaad69917 | |||
|
|
ff588ad57a | ||
| 65df27154c | |||
| 8dfa1957f4 | |||
| 570eba3794 | |||
| 94a39fc61f | |||
| 2d359e5f99 | |||
|
|
04692e0c44 | ||
|
|
809fd23b88 | ||
|
|
e3653daea3 | ||
|
|
48fa75386c | ||
|
|
1b3a12a4dc | ||
|
|
543ddf540e | ||
|
|
a3fb471546 | ||
|
|
277f964b30 | ||
|
|
9bcf7adb67 | ||
|
|
f343fa0071 | ||
|
|
e5862e9218 | ||
|
|
29ae2423f8 | ||
|
|
1755a4a7df | ||
|
|
25d3325049 | ||
|
|
fb6a4c3b87 | ||
| 317f80a984 | |||
| 28cdc1d9e5 | |||
| c2087b15d5 | |||
| a8d785beb3 | |||
|
|
a6784b5549 | ||
|
|
d770292be8 | ||
|
|
b3a1037ade | ||
|
|
02946cf0b4 | ||
|
|
cf051d5108 | ||
|
|
96977c6183 | ||
|
|
73d83164fc | ||
|
|
1064f5e4a8 | ||
|
|
5be98c7087 | ||
|
|
0d689c7dff | ||
|
|
1f24ed46a0 | ||
|
|
92b4159f9e | ||
|
|
5817b41e29 | ||
| d6b132e3a6 | |||
|
|
318f70f34c | ||
|
|
e41525d40a | ||
|
|
a102220e52 | ||
|
|
e9a214c5b2 | ||
|
|
c53f5eb144 | ||
|
|
9ed64e0388 | ||
|
|
93040d4629 | ||
|
|
0144ad43f5 | ||
|
|
8da2fc30c3 | ||
| 0e27ae7795 | |||
| 33c6cdb9fe | |||
|
|
73b7014469 | ||
| 25aaf55b93 | |||
| 6a7546c43b | |||
| 0adda4bf7b | |||
|
|
f5f36427a4 | ||
|
|
590bfd3a10 | ||
|
|
16db9bd1a2 | ||
|
|
d0af933b35 | ||
|
|
2b56b40e6d | ||
|
|
4b2d7068b3 | ||
|
|
bd93b8be8e | ||
|
|
aa3fe2b872 | ||
|
|
a61ff915ac | ||
|
|
0a3e678329 | ||
|
|
d4336b0dcb | ||
|
|
65d2698af4 | ||
|
|
6454576417 | ||
|
|
a485bd5977 | ||
|
|
e733688fd0 | ||
|
|
e86f6a8cbd | ||
|
|
fcc9e17664 | ||
|
|
5c9d4ffa9a | ||
|
|
419bc2747b | ||
|
|
1ee99d6866 | ||
|
|
3ab8973895 | ||
|
|
acfa3baeb5 | ||
|
|
c21d7cf101 | ||
|
|
ec895e1d9e | ||
|
|
c964f09a4f | ||
|
|
0bc32f27df | ||
|
|
6640e93ce9 | ||
|
|
d7aefe0cf0 | ||
|
|
187fe5b361 | ||
|
|
b31aea7bc5 | ||
|
c661baf058
|
|||
|
|
0fe0461340 | ||
|
|
d5394c9e92 | ||
|
|
42135fd26c | ||
|
|
38569f55c7 | ||
|
|
5ce03c2db3 | ||
|
|
1031b3eb79 | ||
|
|
fcdf4cd476 | ||
| 6268dffff8 | |||
| c10737bfd7 | |||
|
|
bd0cc69668 | ||
|
|
84fffac264 | ||
|
|
5bf968010e | ||
|
|
61bc095d01 | ||
|
|
e376f97547 | ||
|
|
f2428d3cb3 | ||
|
|
2fdac85d31 | ||
|
|
b731395689 | ||
|
|
07405e3466 | ||
|
|
fc0c76bd77 | ||
|
|
d209547968 | ||
| 632b9fc5ea | |||
| 702591b4ec | |||
|
|
c562746e5f | ||
|
|
c0443cbec2 | ||
|
|
0191bc3821 | ||
|
|
633bd42036 | ||
|
|
998ef8d834 | ||
|
|
c25b076ca9 | ||
|
|
f43379f365 | ||
|
|
d902c0acf4 | ||
|
|
58e678d72c | ||
|
|
cbc49669d0 | ||
|
|
78bb638fd6 | ||
|
|
7a61bae471 | ||
|
|
e1b992526e | ||
|
|
1b043838ea | ||
|
|
07e72294dc | ||
|
|
b6b37ee68b | ||
|
|
43cb1f1bff | ||
|
|
f7a67c72bf | ||
|
|
c5476d08fa | ||
|
|
8af92b1557 | ||
|
|
eaa826bb8a | ||
|
|
140b3c371d | ||
|
|
f158eaa29c | ||
|
|
c4b98ade53 | ||
|
|
f2e85306ca | ||
|
|
42b9de8360 | ||
|
|
6c244f3121 | ||
|
|
9f56213d2f | ||
|
|
fb2f7cf680 | ||
|
|
8fcdd24f84 | ||
|
|
aaafde4a7c | ||
|
|
2b23003556 | ||
|
|
5681062f01 | ||
|
|
d61bf212f5 | ||
|
|
2bd7c8d51e | ||
|
|
1e63cdbcda | ||
|
|
86d85f12be | ||
|
|
dd470d49ec | ||
|
|
95d8062b00 | ||
|
|
8f82399214 | ||
|
|
6247150e9c | ||
|
5266644725
|
|||
|
81d9e96552
|
|||
|
|
4ec9f06114 | ||
|
0033e9f6c0
|
|||
|
571652c314
|
|||
|
|
7ec233e18a | ||
|
|
13c9a12336 | ||
|
|
83d472ecd6 | ||
|
|
c21da6512a | ||
|
|
4b4374e0df | ||
|
|
407276a04d | ||
|
|
64f60905b4 | ||
|
|
9e6072fed2 | ||
|
|
a3e5c424fd | ||
|
|
6683a350aa | ||
|
|
05bfa9b546 | ||
|
|
735988decb | ||
|
|
d0580592be | ||
|
|
817076bdbf | ||
|
|
736236e9ca | ||
|
|
3f4114c51b | ||
|
|
5c2c493c56 | ||
|
|
2c383ebea1 | ||
|
|
91e73450cf | ||
|
|
e55798944e | ||
|
|
5ea11a5ad2 | ||
|
|
2a3383e9e6 | ||
|
|
e871703724 | ||
|
|
1ee367d7be | ||
|
|
bce536b9b4 | ||
|
|
7c9182e0b0 | ||
|
|
aa915d639d | ||
|
|
35012b18c5 | ||
|
|
9688bad622 | ||
|
|
447b8d3372 | ||
|
|
39b22267d6 | ||
|
|
60d7984d66 | ||
|
|
33d219d2ac | ||
|
|
85a77e05af | ||
|
|
3dfeabcec6 | ||
|
|
673fdc443c | ||
|
|
2f6e5a7648 | ||
|
|
2cbe8e9517 | ||
|
|
efbe53b6b4 | ||
| 649d50812b | |||
| 2502989ca2 | |||
| ba7cc9168e | |||
| dc0d9fe038 | |||
| 0e6c6937cd | |||
| d839c53642 |
15
.github/dependabot.yml
vendored
Normal file
15
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "npm"
|
||||
directory: "/web/frontend"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
331
.github/workflows/Release.yml
vendored
331
.github/workflows/Release.yml
vendored
@@ -1,331 +0,0 @@
|
||||
# See: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions
|
||||
|
||||
# Workflow name
|
||||
name: Release
|
||||
|
||||
# Run on tag push
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
|
||||
jobs:
|
||||
|
||||
#
|
||||
# Build on AlmaLinux 8.5 using golang-1.18.2
|
||||
#
|
||||
AlmaLinux-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://hub.docker.com/_/almalinux
|
||||
container: almalinux:8.5
|
||||
# The job outputs link to the outputs of the 'rpmrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
rpm : ${{steps.rpmrename.outputs.RPM}}
|
||||
srpm : ${{steps.rpmrename.outputs.SRPM}}
|
||||
steps:
|
||||
|
||||
# Use dnf to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
dnf --assumeyes group install "Development Tools" "RPM Development Tools"
|
||||
dnf --assumeyes install wget openssl-devel diffutils delve which npm
|
||||
dnf --assumeyes install 'dnf-command(builddep)'
|
||||
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# Use dnf to install build dependencies
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
|
||||
rpm -i go*.rpm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
#dnf --assumeyes builddep build/package/cc-backend.spec
|
||||
|
||||
- name: RPM build ClusterCockpit
|
||||
id: rpmbuild
|
||||
run: make RPM
|
||||
|
||||
# AlmaLinux 8.5 is a derivate of RedHat Enterprise Linux 8 (UBI8),
|
||||
# so the created RPM both contain the substring 'el8' in the RPM file names
|
||||
# This step replaces the substring 'el8' to 'alma85'. It uses the move operation
|
||||
# because it is unclear whether the default AlmaLinux 8.5 container contains the
|
||||
# 'rename' command. This way we also get the new names for output.
|
||||
- name: Rename RPMs (s/el8/alma85/)
|
||||
id: rpmrename
|
||||
run: |
|
||||
OLD_RPM="${{steps.rpmbuild.outputs.RPM}}"
|
||||
OLD_SRPM="${{steps.rpmbuild.outputs.SRPM}}"
|
||||
NEW_RPM="${OLD_RPM/el8/alma85}"
|
||||
NEW_SRPM=${OLD_SRPM/el8/alma85}
|
||||
mv "${OLD_RPM}" "${NEW_RPM}"
|
||||
mv "${OLD_SRPM}" "${NEW_SRPM}"
|
||||
echo "::set-output name=SRPM::${NEW_SRPM}"
|
||||
echo "::set-output name=RPM::${NEW_RPM}"
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for AlmaLinux 8.5
|
||||
path: ${{ steps.rpmrename.outputs.RPM }}
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for AlmaLinux 8.5
|
||||
path: ${{ steps.rpmrename.outputs.SRPM }}
|
||||
|
||||
#
|
||||
# Build on UBI 8 using golang-1.18.2
|
||||
#
|
||||
UBI-8-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c359854d70cc534b3a3784e?container-tabs=gti
|
||||
container: registry.access.redhat.com/ubi8/ubi:8.5-226.1645809065
|
||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
||||
outputs:
|
||||
rpm : ${{steps.rpmbuild.outputs.RPM}}
|
||||
srpm : ${{steps.rpmbuild.outputs.SRPM}}
|
||||
steps:
|
||||
|
||||
# Use dnf to install development packages
|
||||
- name: Install development packages
|
||||
run: dnf --assumeyes --disableplugin=subscription-manager install rpm-build go-srpm-macros rpm-build-libs rpm-libs gcc make python38 git wget openssl-devel diffutils delve which
|
||||
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# Use dnf to install build dependencies
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
|
||||
rpm -i go*.rpm
|
||||
dnf --assumeyes --disableplugin=subscription-manager install npm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
#dnf --assumeyes builddep build/package/cc-backend.spec
|
||||
|
||||
- name: RPM build ClusterCockpit
|
||||
id: rpmbuild
|
||||
run: make RPM
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for UBI 8
|
||||
path: ${{ steps.rpmbuild.outputs.RPM }}
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for UBI 8
|
||||
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
||||
|
||||
#
|
||||
# Build on Ubuntu 20.04 using official go 1.19.1 package
|
||||
#
|
||||
Ubuntu-focal-build:
|
||||
runs-on: ubuntu-latest
|
||||
container: ubuntu:20.04
|
||||
# The job outputs link to the outputs of the 'debrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
deb : ${{steps.debrename.outputs.DEB}}
|
||||
steps:
|
||||
# Use apt to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
apt update && apt --assume-yes upgrade
|
||||
apt --assume-yes install build-essential sed git wget bash
|
||||
apt --assume-yes install npm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
- name: Install Golang
|
||||
run: |
|
||||
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
||||
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
go version
|
||||
- name: DEB build ClusterCockpit
|
||||
id: dpkg-build
|
||||
run: |
|
||||
ls -la
|
||||
pwd
|
||||
env
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
git config --global --add safe.directory $(pwd)
|
||||
make DEB
|
||||
- name: Rename DEB (add '_ubuntu20.04')
|
||||
id: debrename
|
||||
run: |
|
||||
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
||||
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu20.04.deb"
|
||||
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
||||
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save DEB as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 20.04
|
||||
path: ${{ steps.debrename.outputs.DEB }}
|
||||
|
||||
#
|
||||
# Build on Ubuntu 20.04 using official go 1.19.1 package
|
||||
#
|
||||
Ubuntu-jammy-build:
|
||||
runs-on: ubuntu-latest
|
||||
container: ubuntu:22.04
|
||||
# The job outputs link to the outputs of the 'debrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
deb : ${{steps.debrename.outputs.DEB}}
|
||||
steps:
|
||||
# Use apt to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
apt update && apt --assume-yes upgrade
|
||||
apt --assume-yes install build-essential sed git wget bash npm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
- name: Install Golang
|
||||
run: |
|
||||
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
||||
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
go version
|
||||
- name: DEB build ClusterCockpit
|
||||
id: dpkg-build
|
||||
run: |
|
||||
ls -la
|
||||
pwd
|
||||
env
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
git config --global --add safe.directory $(pwd)
|
||||
make DEB
|
||||
- name: Rename DEB (add '_ubuntu22.04')
|
||||
id: debrename
|
||||
run: |
|
||||
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
||||
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu22.04.deb"
|
||||
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
||||
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save DEB as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 22.04
|
||||
path: ${{ steps.debrename.outputs.DEB }}
|
||||
|
||||
#
|
||||
# Create release with fresh RPMs
|
||||
#
|
||||
Release:
|
||||
runs-on: ubuntu-latest
|
||||
# We need the RPMs, so add dependency
|
||||
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build, Ubuntu-jammy-build]
|
||||
|
||||
steps:
|
||||
# See: https://github.com/actions/download-artifact
|
||||
- name: Download AlmaLinux 8.5 RPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for AlmaLinux 8.5
|
||||
- name: Download AlmaLinux 8.5 SRPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for AlmaLinux 8.5
|
||||
|
||||
- name: Download UBI 8 RPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for UBI 8
|
||||
- name: Download UBI 8 SRPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for UBI 8
|
||||
|
||||
- name: Download Ubuntu 20.04 DEB
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 20.04
|
||||
|
||||
- name: Download Ubuntu 22.04 DEB
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 22.04
|
||||
|
||||
# The download actions do not publish the name of the downloaded file,
|
||||
# so we re-use the job outputs of the parent jobs. The files are all
|
||||
# downloaded to the current folder.
|
||||
# The gh-release action afterwards does not accept file lists but all
|
||||
# files have to be listed at 'files'. The step creates one output per
|
||||
# RPM package (2 per distro)
|
||||
- name: Set RPM variables
|
||||
id: files
|
||||
run: |
|
||||
ALMA_85_RPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.rpm}}")
|
||||
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
|
||||
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
|
||||
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
|
||||
U_2004_DEB=$(basename "${{ needs.Ubuntu-focal-build.outputs.deb}}")
|
||||
U_2204_DEB=$(basename "${{ needs.Ubuntu-jammy-build.outputs.deb}}")
|
||||
echo "ALMA_85_RPM::${ALMA_85_RPM}"
|
||||
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||
echo "UBI_8_RPM::${UBI_8_RPM}"
|
||||
echo "UBI_8_SRPM::${UBI_8_SRPM}"
|
||||
echo "U_2004_DEB::${U_2004_DEB}"
|
||||
echo "U_2204_DEB::${U_2204_DEB}"
|
||||
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
|
||||
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
|
||||
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
|
||||
echo "::set-output name=U_2004_DEB::${U_2004_DEB}"
|
||||
echo "::set-output name=U_2204_DEB::${U_2204_DEB}"
|
||||
|
||||
# See: https://github.com/softprops/action-gh-release
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
name: cc-backend-${{github.ref_name}}
|
||||
files: |
|
||||
${{ steps.files.outputs.ALMA_85_RPM }}
|
||||
${{ steps.files.outputs.ALMA_85_SRPM }}
|
||||
${{ steps.files.outputs.UBI_8_RPM }}
|
||||
${{ steps.files.outputs.UBI_8_SRPM }}
|
||||
${{ steps.files.outputs.U_2004_DEB }}
|
||||
${{ steps.files.outputs.U_2204_DEB }}
|
||||
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.22.x
|
||||
go-version: 1.25.x
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
- name: Build, Vet & Test
|
||||
|
||||
31
.gitignore
vendored
31
.gitignore
vendored
@@ -1,21 +1,32 @@
|
||||
/cc-backend
|
||||
|
||||
/var/job-archive
|
||||
/var/*.db
|
||||
/var/machine-state
|
||||
|
||||
/.env
|
||||
/config.json
|
||||
/uiConfig.json
|
||||
|
||||
/var/job-archive
|
||||
/var/machine-state
|
||||
/var/*.db-shm
|
||||
/var/*.db-wal
|
||||
/var/*.db
|
||||
/var/*.txt
|
||||
|
||||
/var/checkpoints*
|
||||
|
||||
migrateTimestamps.pl
|
||||
test_ccms_write_api*
|
||||
|
||||
/web/frontend/public/build
|
||||
/web/frontend/node_modules
|
||||
/.vscode/*
|
||||
|
||||
/archive-migration
|
||||
/archive-manager
|
||||
var/job.db-shm
|
||||
var/job.db-wal
|
||||
|
||||
/internal/repository/testdata/job.db-shm
|
||||
/internal/repository/testdata/job.db-wal
|
||||
|
||||
/.vscode/*
|
||||
dist/
|
||||
*.db
|
||||
internal/repository/testdata/job.db-shm
|
||||
internal/repository/testdata/job.db-wal
|
||||
.idea
|
||||
tools/archive-migration/archive-migration
|
||||
tools/archive-manager/archive-manager
|
||||
|
||||
26
AGENTS.md
Normal file
26
AGENTS.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# ClusterCockpit Backend - Agent Guidelines
|
||||
|
||||
## Build/Test Commands
|
||||
|
||||
- Build: `make` or `go build ./cmd/cc-backend`
|
||||
- Run all tests: `make test` (runs: `go clean -testcache && go build ./... && go vet ./... && go test ./...`)
|
||||
- Run single test: `go test -run TestName ./path/to/package`
|
||||
- Run single test file: `go test ./path/to/package -run TestName`
|
||||
- Frontend build: `cd web/frontend && npm install && npm run build`
|
||||
- Generate GraphQL: `make graphql` (uses gqlgen)
|
||||
- Generate Swagger: `make swagger` (uses swaggo/swag)
|
||||
|
||||
## Code Style
|
||||
|
||||
- **Formatting**: Use `gofumpt` for all Go files (strict requirement)
|
||||
- **Copyright header**: All files must include copyright header (see existing files)
|
||||
- **Package docs**: Document packages with comprehensive package-level comments explaining purpose, usage, configuration
|
||||
- **Imports**: Standard library first, then external packages, then internal packages (grouped with blank lines)
|
||||
- **Naming**: Use camelCase for private, PascalCase for exported; descriptive names (e.g., `JobRepository`, `handleError`)
|
||||
- **Error handling**: Return errors, don't panic; use custom error types where appropriate; log with cclog package
|
||||
- **Logging**: Use `cclog` package (e.g., `cclog.Errorf()`, `cclog.Warnf()`, `cclog.Debugf()`)
|
||||
- **Testing**: Use standard `testing` package; use `testify/assert` for assertions; name tests `TestFunctionName`
|
||||
- **Comments**: Document all exported functions/types with godoc-style comments
|
||||
- **Structs**: Document fields with inline comments, especially for complex configurations
|
||||
- **HTTP handlers**: Return proper status codes; use `handleError()` helper for consistent error responses
|
||||
- **JSON**: Use struct tags for JSON marshaling; `DisallowUnknownFields()` for strict decoding
|
||||
22
Makefile
22
Makefile
@@ -1,8 +1,6 @@
|
||||
TARGET = ./cc-backend
|
||||
VAR = ./var
|
||||
CFG = config.json .env
|
||||
FRONTEND = ./web/frontend
|
||||
VERSION = 1.4.2
|
||||
VERSION = 1.4.4
|
||||
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
||||
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
|
||||
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
|
||||
@@ -42,7 +40,7 @@ SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
|
||||
.NOTPARALLEL:
|
||||
|
||||
$(TARGET): $(VAR) $(CFG) $(SVELTE_TARGETS)
|
||||
$(TARGET): $(SVELTE_TARGETS)
|
||||
$(info ===> BUILD cc-backend)
|
||||
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
|
||||
|
||||
@@ -52,12 +50,12 @@ frontend:
|
||||
|
||||
swagger:
|
||||
$(info ===> GENERATE swagger)
|
||||
@go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api
|
||||
@go tool github.com/swaggo/swag/cmd/swag init --parseDependency -d ./internal/api -g rest.go -o ./api
|
||||
@mv ./api/docs.go ./internal/api/docs.go
|
||||
|
||||
graphql:
|
||||
$(info ===> GENERATE graphql)
|
||||
@go run github.com/99designs/gqlgen
|
||||
@go tool github.com/99designs/gqlgen
|
||||
|
||||
clean:
|
||||
$(info ===> CLEAN)
|
||||
@@ -68,7 +66,7 @@ distclean:
|
||||
@$(MAKE) clean
|
||||
$(info ===> DISTCLEAN)
|
||||
@rm -rf $(FRONTEND)/node_modules
|
||||
@rm -rf $(VAR)
|
||||
@rm -rf ./var
|
||||
|
||||
test:
|
||||
$(info ===> TESTING)
|
||||
@@ -82,15 +80,7 @@ tags:
|
||||
@ctags -R
|
||||
|
||||
$(VAR):
|
||||
@mkdir $(VAR)
|
||||
|
||||
config.json:
|
||||
$(info ===> Initialize config.json file)
|
||||
@cp configs/config.json config.json
|
||||
|
||||
.env:
|
||||
$(info ===> Initialize .env file)
|
||||
@cp configs/env-template.txt .env
|
||||
@mkdir -p $(VAR)
|
||||
|
||||
$(SVELTE_TARGETS): $(SVELTE_SRC)
|
||||
$(info ===> BUILD frontend)
|
||||
|
||||
93
README.md
93
README.md
@@ -1,5 +1,8 @@
|
||||
# NOTE
|
||||
|
||||
While we do our best to keep the master branch in a usable state, there is no guarantee the master branch works.
|
||||
Please do not use it for production!
|
||||
|
||||
Please have a look at the [Release
|
||||
Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md)
|
||||
for breaking changes!
|
||||
@@ -28,7 +31,7 @@ Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using
|
||||
|
||||
The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by
|
||||
default. Optionally it can use a MySQL/MariaDB database server. While there are
|
||||
metric data backends for the InfluxDB and Prometheus time series databases, the
|
||||
metric data backends for the InfluxDB and Prometheus time series databases, the
|
||||
only tested and supported setup is to use cc-metric-store as the metric data
|
||||
backend. Documentation on how to integrate ClusterCockpit with other time series
|
||||
databases will be added in the future.
|
||||
@@ -69,7 +72,7 @@ You can also try the demo using the latest release binary.
|
||||
Create a folder and put the release binary `cc-backend` into this folder.
|
||||
Execute the following steps:
|
||||
|
||||
``` shell
|
||||
```shell
|
||||
./cc-backend -init
|
||||
vim config.json (Add a second cluster entry and name the clusters alex and fritz)
|
||||
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
|
||||
@@ -88,11 +91,11 @@ Analysis, Systems and Status views).
|
||||
There is a Makefile to automate the build of cc-backend. The Makefile supports
|
||||
the following targets:
|
||||
|
||||
* `make`: Initialize `var` directory and build svelte frontend and backend
|
||||
binary. Note that there is no proper prerequisite handling. Any change of
|
||||
frontend source files will result in a complete rebuild.
|
||||
* `make clean`: Clean go build cache and remove binary.
|
||||
* `make test`: Run the tests that are also run in the GitHub workflow setup.
|
||||
- `make`: Initialize `var` directory and build svelte frontend and backend
|
||||
binary. Note that there is no proper prerequisite handling. Any change of
|
||||
frontend source files will result in a complete rebuild.
|
||||
- `make clean`: Clean go build cache and remove binary.
|
||||
- `make test`: Run the tests that are also run in the GitHub workflow setup.
|
||||
|
||||
A common workflow for setting up cc-backend from scratch is:
|
||||
|
||||
@@ -128,41 +131,41 @@ ln -s <your-existing-job-archive> ./var/job-archive
|
||||
|
||||
## Project file structure
|
||||
|
||||
* [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api)
|
||||
contains the API schema files for the REST and GraphQL APIs. The REST API is
|
||||
documented in the OpenAPI 3.0 format in
|
||||
[./api/openapi.yaml](./api/openapi.yaml).
|
||||
* [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend)
|
||||
contains `main.go` for the main application.
|
||||
* [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs)
|
||||
contains documentation about configuration and command line options and required
|
||||
environment variables. A sample configuration file is provided.
|
||||
* [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs)
|
||||
contains more in-depth documentation.
|
||||
* [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init)
|
||||
contains an example of setting up systemd for production use.
|
||||
* [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal)
|
||||
contains library source code that is not intended for use by others.
|
||||
* [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
|
||||
contains Go packages that can be used by other projects.
|
||||
* [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
|
||||
Additional command line helper tools.
|
||||
* [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
||||
Commands for getting infos about and existing job archive.
|
||||
* [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
||||
Tool to convert external pubkey for use in `cc-backend`.
|
||||
* [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
||||
contains a small application to generate a compatible JWT keypair. You find
|
||||
documentation on how to use it
|
||||
[here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
|
||||
* [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web)
|
||||
Server-side templates and frontend-related files:
|
||||
* [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend)
|
||||
Svelte components and static assets for the frontend UI
|
||||
* [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates)
|
||||
Server-side Go templates
|
||||
* [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml)
|
||||
Configures the behaviour and generation of
|
||||
[gqlgen](https://github.com/99designs/gqlgen).
|
||||
* [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh)
|
||||
is a shell script that sets up demo data, and builds and starts `cc-backend`.
|
||||
- [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api)
|
||||
contains the API schema files for the REST and GraphQL APIs. The REST API is
|
||||
documented in the OpenAPI 3.0 format in
|
||||
[./api/openapi.yaml](./api/openapi.yaml).
|
||||
- [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend)
|
||||
contains `main.go` for the main application.
|
||||
- [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs)
|
||||
contains documentation about configuration and command line options and required
|
||||
environment variables. A sample configuration file is provided.
|
||||
- [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs)
|
||||
contains more in-depth documentation.
|
||||
- [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init)
|
||||
contains an example of setting up systemd for production use.
|
||||
- [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal)
|
||||
contains library source code that is not intended for use by others.
|
||||
- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
|
||||
contains Go packages that can be used by other projects.
|
||||
- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
|
||||
Additional command line helper tools.
|
||||
- [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
||||
Commands for getting infos about and existing job archive.
|
||||
- [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
||||
Tool to convert external pubkey for use in `cc-backend`.
|
||||
- [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
||||
contains a small application to generate a compatible JWT keypair. You find
|
||||
documentation on how to use it
|
||||
[here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
|
||||
- [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web)
|
||||
Server-side templates and frontend-related files:
|
||||
- [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend)
|
||||
Svelte components and static assets for the frontend UI
|
||||
- [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates)
|
||||
Server-side Go templates
|
||||
- [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml)
|
||||
Configures the behaviour and generation of
|
||||
[gqlgen](https://github.com/99designs/gqlgen).
|
||||
- [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh)
|
||||
is a shell script that sets up demo data, and builds and starts `cc-backend`.
|
||||
|
||||
@@ -1,13 +1,27 @@
|
||||
# `cc-backend` version 1.4.2
|
||||
# `cc-backend` version 1.4.4
|
||||
|
||||
Supports job archive version 2 and database version 8.
|
||||
|
||||
This is a small bug fix release of `cc-backend`, the API backend and frontend
|
||||
This is a bug fix release of `cc-backend`, the API backend and frontend
|
||||
implementation of ClusterCockpit.
|
||||
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
|
||||
|
||||
## Breaking changes
|
||||
|
||||
The option `apiAllowedIPs` is now a required configuration attribute in
|
||||
`config.json`. This option restricts access to the admin API.
|
||||
|
||||
To retain the previous behavior that the API is per default accessible from
|
||||
everywhere set:
|
||||
|
||||
```json
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
]
|
||||
```
|
||||
|
||||
## Breaking changes for minor release 1.4.x
|
||||
|
||||
- You need to perform a database migration. Depending on your database size the
|
||||
migration might require several hours!
|
||||
- You need to adapt the `cluster.json` configuration files in the job-archive,
|
||||
@@ -22,20 +36,7 @@ For release specific notes visit the [ClusterCockpit Documentation](https://clus
|
||||
|
||||
## New features
|
||||
|
||||
- Tags have a scope now. Tags created by a basic user are only visible by that
|
||||
user. Tags created by an admin/support role can be configured to be visible by
|
||||
all users (global scope) or only be admin/support role.
|
||||
- Re-sampling support for running (requires a recent `cc-metric-store`) and
|
||||
archived jobs. This greatly speeds up loading of large or very long jobs. You
|
||||
need to add the new configuration key `enable-resampling` to the `config.json`
|
||||
file.
|
||||
- For finished jobs a total job energy is shown in the job view.
|
||||
- Continuous scrolling in job lists is default now.
|
||||
- All database queries (especially for sqlite) were optimized resulting in
|
||||
dramatically faster load times.
|
||||
- A performance and energy footprint can be freely configured on a per
|
||||
subcluster base. One can filter for footprint statistics for running and
|
||||
finished jobs.
|
||||
- Enable to delete tags from the web interface
|
||||
|
||||
## Known issues
|
||||
|
||||
|
||||
@@ -4,61 +4,88 @@ scalar Any
|
||||
scalar NullableFloat
|
||||
scalar MetricScope
|
||||
scalar JobState
|
||||
scalar SchedulerState
|
||||
scalar MonitoringState
|
||||
|
||||
type Node {
|
||||
id: ID!
|
||||
hostname: String!
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
jobsRunning: Int!
|
||||
cpusAllocated: Int
|
||||
memoryAllocated: Int
|
||||
gpusAllocated: Int
|
||||
schedulerState: SchedulerState!
|
||||
healthState: MonitoringState!
|
||||
metaData: Any
|
||||
}
|
||||
|
||||
type NodeStates {
|
||||
state: String!
|
||||
count: Int!
|
||||
}
|
||||
|
||||
type NodeStatesTimed {
|
||||
state: String!
|
||||
counts: [Int!]!
|
||||
times: [Int!]!
|
||||
}
|
||||
|
||||
type Job {
|
||||
id: ID!
|
||||
jobId: Int!
|
||||
user: String!
|
||||
project: String!
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
startTime: Time!
|
||||
duration: Int!
|
||||
walltime: Int!
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
energy: Float!
|
||||
SMT: Int!
|
||||
exclusive: Int!
|
||||
partition: String!
|
||||
arrayJobId: Int!
|
||||
id: ID!
|
||||
jobId: Int!
|
||||
user: String!
|
||||
project: String!
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
startTime: Time!
|
||||
duration: Int!
|
||||
walltime: Int!
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
energy: Float!
|
||||
SMT: Int!
|
||||
shared: String!
|
||||
partition: String!
|
||||
arrayJobId: Int!
|
||||
monitoringStatus: Int!
|
||||
state: JobState!
|
||||
tags: [Tag!]!
|
||||
resources: [Resource!]!
|
||||
concurrentJobs: JobLinkResultList
|
||||
footprint: [FootprintValue]
|
||||
energyFootprint: [EnergyFootprintValue]
|
||||
metaData: Any
|
||||
userData: User
|
||||
state: JobState!
|
||||
tags: [Tag!]!
|
||||
resources: [Resource!]!
|
||||
concurrentJobs: JobLinkResultList
|
||||
footprint: [FootprintValue]
|
||||
energyFootprint: [EnergyFootprintValue]
|
||||
metaData: Any
|
||||
userData: User
|
||||
}
|
||||
|
||||
type JobLink {
|
||||
id: ID!
|
||||
jobId: Int!
|
||||
id: ID!
|
||||
jobId: Int!
|
||||
}
|
||||
|
||||
type Cluster {
|
||||
name: String!
|
||||
partitions: [String!]! # Slurm partitions
|
||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||
name: String!
|
||||
partitions: [String!]! # Slurm partitions
|
||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||
}
|
||||
|
||||
type SubCluster {
|
||||
name: String!
|
||||
nodes: String!
|
||||
numberOfNodes: Int!
|
||||
processorType: String!
|
||||
socketsPerNode: Int!
|
||||
coresPerSocket: Int!
|
||||
threadsPerCore: Int!
|
||||
flopRateScalar: MetricValue!
|
||||
flopRateSimd: MetricValue!
|
||||
name: String!
|
||||
nodes: String!
|
||||
numberOfNodes: Int!
|
||||
processorType: String!
|
||||
socketsPerNode: Int!
|
||||
coresPerSocket: Int!
|
||||
threadsPerCore: Int!
|
||||
flopRateScalar: MetricValue!
|
||||
flopRateSimd: MetricValue!
|
||||
memoryBandwidth: MetricValue!
|
||||
topology: Topology!
|
||||
metricConfig: [MetricConfig!]!
|
||||
footprint: [String!]!
|
||||
topology: Topology!
|
||||
metricConfig: [MetricConfig!]!
|
||||
footprint: [String!]!
|
||||
}
|
||||
|
||||
type FootprintValue {
|
||||
@@ -80,75 +107,119 @@ type MetricValue {
|
||||
}
|
||||
|
||||
type Topology {
|
||||
node: [Int!]
|
||||
socket: [[Int!]!]
|
||||
node: [Int!]
|
||||
socket: [[Int!]!]
|
||||
memoryDomain: [[Int!]!]
|
||||
die: [[Int!]!]
|
||||
core: [[Int!]!]
|
||||
die: [[Int!]!]
|
||||
core: [[Int!]!]
|
||||
accelerators: [Accelerator!]
|
||||
}
|
||||
|
||||
type Accelerator {
|
||||
id: String!
|
||||
type: String!
|
||||
id: String!
|
||||
type: String!
|
||||
model: String!
|
||||
}
|
||||
|
||||
type SubClusterConfig {
|
||||
name: String!
|
||||
peak: Float
|
||||
normal: Float
|
||||
name: String!
|
||||
peak: Float
|
||||
normal: Float
|
||||
caution: Float
|
||||
alert: Float
|
||||
remove: Boolean
|
||||
alert: Float
|
||||
remove: Boolean
|
||||
}
|
||||
|
||||
type MetricConfig {
|
||||
name: String!
|
||||
unit: Unit!
|
||||
scope: MetricScope!
|
||||
name: String!
|
||||
unit: Unit!
|
||||
scope: MetricScope!
|
||||
aggregation: String!
|
||||
timestep: Int!
|
||||
peak: Float!
|
||||
normal: Float
|
||||
timestep: Int!
|
||||
peak: Float!
|
||||
normal: Float
|
||||
caution: Float!
|
||||
alert: Float!
|
||||
alert: Float!
|
||||
lowerIsBetter: Boolean
|
||||
subClusters: [SubClusterConfig!]!
|
||||
}
|
||||
|
||||
type Tag {
|
||||
id: ID!
|
||||
id: ID!
|
||||
type: String!
|
||||
name: String!
|
||||
scope: String!
|
||||
}
|
||||
|
||||
type Resource {
|
||||
hostname: String!
|
||||
hwthreads: [Int!]
|
||||
accelerators: [String!]
|
||||
hostname: String!
|
||||
hwthreads: [Int!]
|
||||
accelerators: [String!]
|
||||
configuration: String
|
||||
}
|
||||
|
||||
type JobMetricWithName {
|
||||
name: String!
|
||||
scope: MetricScope!
|
||||
name: String!
|
||||
scope: MetricScope!
|
||||
metric: JobMetric!
|
||||
}
|
||||
|
||||
type ClusterMetricWithName {
|
||||
name: String!
|
||||
unit: Unit
|
||||
timestep: Int!
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type JobMetric {
|
||||
unit: Unit
|
||||
timestep: Int!
|
||||
series: [Series!]
|
||||
unit: Unit
|
||||
timestep: Int!
|
||||
series: [Series!]
|
||||
statisticsSeries: StatsSeries
|
||||
}
|
||||
|
||||
type Series {
|
||||
hostname: String!
|
||||
id: String
|
||||
hostname: String!
|
||||
id: String
|
||||
statistics: MetricStatistics
|
||||
data: [NullableFloat!]!
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
median: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type NamedStatsWithScope {
|
||||
name: String!
|
||||
scope: MetricScope!
|
||||
stats: [ScopedStats!]!
|
||||
}
|
||||
|
||||
type ScopedStats {
|
||||
hostname: String!
|
||||
id: String
|
||||
data: MetricStatistics!
|
||||
}
|
||||
|
||||
type JobStats {
|
||||
id: Int!
|
||||
jobId: String!
|
||||
startTime: Int!
|
||||
duration: Int!
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
numNodes: Int!
|
||||
numHWThreads: Int
|
||||
numAccelerators: Int
|
||||
stats: [NamedStats!]!
|
||||
}
|
||||
|
||||
type NamedStats {
|
||||
name: String!
|
||||
data: MetricStatistics!
|
||||
}
|
||||
|
||||
type Unit {
|
||||
@@ -162,21 +233,14 @@ type MetricStatistics {
|
||||
max: Float!
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
median: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricFootprints {
|
||||
metric: String!
|
||||
data: [NullableFloat!]!
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type Footprints {
|
||||
timeWeights: TimeWeights!
|
||||
metrics: [MetricFootprints!]!
|
||||
metrics: [MetricFootprints!]!
|
||||
}
|
||||
|
||||
type TimeWeights {
|
||||
@@ -185,13 +249,43 @@ type TimeWeights {
|
||||
coreHours: [NullableFloat!]!
|
||||
}
|
||||
|
||||
enum Aggregate { USER, PROJECT, CLUSTER }
|
||||
enum SortByAggregate { TOTALWALLTIME, TOTALJOBS, TOTALNODES, TOTALNODEHOURS, TOTALCORES, TOTALCOREHOURS, TOTALACCS, TOTALACCHOURS }
|
||||
enum Aggregate {
|
||||
USER
|
||||
PROJECT
|
||||
CLUSTER
|
||||
SUBCLUSTER
|
||||
}
|
||||
enum SortByAggregate {
|
||||
TOTALWALLTIME
|
||||
TOTALJOBS
|
||||
TOTALUSERS
|
||||
TOTALNODES
|
||||
TOTALNODEHOURS
|
||||
TOTALCORES
|
||||
TOTALCOREHOURS
|
||||
TOTALACCS
|
||||
TOTALACCHOURS
|
||||
}
|
||||
|
||||
type NodeMetrics {
|
||||
host: String!
|
||||
host: String!
|
||||
state: String!
|
||||
subCluster: String!
|
||||
metrics: [JobMetricWithName!]!
|
||||
metrics: [JobMetricWithName!]!
|
||||
}
|
||||
|
||||
type ClusterMetrics {
|
||||
nodeCount: Int!
|
||||
metrics: [ClusterMetricWithName!]!
|
||||
}
|
||||
|
||||
type NodesResultList {
|
||||
items: [NodeMetrics!]!
|
||||
offset: Int
|
||||
limit: Int
|
||||
count: Int
|
||||
totalNodes: Int
|
||||
hasNextPage: Boolean
|
||||
}
|
||||
|
||||
type ClusterSupport {
|
||||
@@ -208,14 +302,14 @@ type GlobalMetricListItem {
|
||||
}
|
||||
|
||||
type Count {
|
||||
name: String!
|
||||
name: String!
|
||||
count: Int!
|
||||
}
|
||||
|
||||
type User {
|
||||
username: String!
|
||||
name: String!
|
||||
email: String!
|
||||
name: String!
|
||||
email: String!
|
||||
}
|
||||
|
||||
input MetricStatItem {
|
||||
@@ -224,23 +318,92 @@ input MetricStatItem {
|
||||
}
|
||||
|
||||
type Query {
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [Tag!]! # List of all tags
|
||||
globalMetrics: [GlobalMetricListItem!]!
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [Tag!]! # List of all tags
|
||||
globalMetrics: [GlobalMetricListItem!]!
|
||||
|
||||
user(username: String!): User
|
||||
allocatedNodes(cluster: String!): [Count!]!
|
||||
|
||||
## Node Queries New
|
||||
node(id: ID!): Node
|
||||
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
|
||||
nodeStates(filter: [NodeFilter!]): [NodeStates!]!
|
||||
nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]!
|
||||
|
||||
job(id: ID!): Job
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
|
||||
jobMetrics(
|
||||
id: ID!
|
||||
metrics: [String!]
|
||||
scopes: [MetricScope!]
|
||||
resolution: Int
|
||||
): [JobMetricWithName!]!
|
||||
|
||||
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||
|
||||
scopedJobStats(
|
||||
id: ID!
|
||||
metrics: [String!]
|
||||
scopes: [MetricScope!]
|
||||
): [NamedStatsWithScope!]!
|
||||
|
||||
jobs(
|
||||
filter: [JobFilter!]
|
||||
page: PageRequest
|
||||
order: OrderByInput
|
||||
): JobResultList!
|
||||
|
||||
jobsStatistics(
|
||||
filter: [JobFilter!]
|
||||
metrics: [String!]
|
||||
page: PageRequest
|
||||
sortBy: SortByAggregate
|
||||
groupBy: Aggregate
|
||||
numDurationBins: String
|
||||
numMetricBins: Int
|
||||
): [JobsStatistics!]!
|
||||
|
||||
jobsMetricStats(filter: [JobFilter!], metrics: [String!]): [JobStats!]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||
|
||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate): [JobsStatistics!]!
|
||||
rooflineHeatmap(
|
||||
filter: [JobFilter!]!
|
||||
rows: Int!
|
||||
cols: Int!
|
||||
minX: Float!
|
||||
minY: Float!
|
||||
maxX: Float!
|
||||
maxY: Float!
|
||||
): [[Float!]!]!
|
||||
|
||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
||||
nodeMetrics(
|
||||
cluster: String!
|
||||
nodes: [String!]
|
||||
scopes: [MetricScope!]
|
||||
metrics: [String!]
|
||||
from: Time!
|
||||
to: Time!
|
||||
): [NodeMetrics!]!
|
||||
|
||||
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
||||
nodeMetricsList(
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
stateFilter: String!
|
||||
nodeFilter: String!
|
||||
scopes: [MetricScope!]
|
||||
metrics: [String!]
|
||||
from: Time!
|
||||
to: Time!
|
||||
page: PageRequest
|
||||
resolution: Int
|
||||
): NodesResultList!
|
||||
|
||||
clusterMetrics(
|
||||
cluster: String!
|
||||
metrics: [String!]
|
||||
from: Time!
|
||||
to: Time!
|
||||
): ClusterMetrics!
|
||||
}
|
||||
|
||||
type Mutation {
|
||||
@@ -248,41 +411,59 @@ type Mutation {
|
||||
deleteTag(id: ID!): ID!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagFromList(tagIds: [ID!]!): [Int!]!
|
||||
|
||||
updateConfiguration(name: String!, value: String!): String
|
||||
}
|
||||
|
||||
type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { range: String, from: Time!, to: Time! }
|
||||
type IntRangeOutput {
|
||||
from: Int!
|
||||
to: Int!
|
||||
}
|
||||
type TimeRangeOutput {
|
||||
range: String
|
||||
from: Time!
|
||||
to: Time!
|
||||
}
|
||||
|
||||
input NodeFilter {
|
||||
hostname: StringInput
|
||||
cluster: StringInput
|
||||
subcluster: StringInput
|
||||
schedulerState: SchedulerState
|
||||
healthState: MonitoringState
|
||||
timeStart: Int
|
||||
}
|
||||
|
||||
input JobFilter {
|
||||
tags: [ID!]
|
||||
jobId: StringInput
|
||||
arrayJobId: Int
|
||||
user: StringInput
|
||||
project: StringInput
|
||||
jobName: StringInput
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
energy: FloatRange
|
||||
tags: [ID!]
|
||||
dbId: [ID!]
|
||||
jobId: StringInput
|
||||
arrayJobId: Int
|
||||
user: StringInput
|
||||
project: StringInput
|
||||
jobName: StringInput
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
energy: FloatRange
|
||||
|
||||
minRunningFor: Int
|
||||
|
||||
numNodes: IntRange
|
||||
numNodes: IntRange
|
||||
numAccelerators: IntRange
|
||||
numHWThreads: IntRange
|
||||
numHWThreads: IntRange
|
||||
|
||||
startTime: TimeRange
|
||||
state: [JobState!]
|
||||
startTime: TimeRange
|
||||
state: [JobState!]
|
||||
metricStats: [MetricStatItem!]
|
||||
exclusive: Int
|
||||
node: StringInput
|
||||
shared: String
|
||||
node: StringInput
|
||||
}
|
||||
|
||||
input OrderByInput {
|
||||
field: String!
|
||||
type: String!,
|
||||
type: String!
|
||||
order: SortDirectionEnum! = ASC
|
||||
}
|
||||
|
||||
@@ -292,34 +473,46 @@ enum SortDirectionEnum {
|
||||
}
|
||||
|
||||
input StringInput {
|
||||
eq: String
|
||||
neq: String
|
||||
contains: String
|
||||
eq: String
|
||||
neq: String
|
||||
contains: String
|
||||
startsWith: String
|
||||
endsWith: String
|
||||
in: [String!]
|
||||
endsWith: String
|
||||
in: [String!]
|
||||
}
|
||||
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input TimeRange { range: String, from: Time, to: Time }
|
||||
input IntRange {
|
||||
from: Int!
|
||||
to: Int!
|
||||
}
|
||||
input TimeRange {
|
||||
range: String
|
||||
from: Time
|
||||
to: Time
|
||||
}
|
||||
|
||||
input FloatRange {
|
||||
from: Float!
|
||||
to: Float!
|
||||
}
|
||||
|
||||
type NodeStateResultList {
|
||||
items: [Node!]!
|
||||
count: Int
|
||||
}
|
||||
|
||||
type JobResultList {
|
||||
items: [Job!]!
|
||||
items: [Job!]!
|
||||
offset: Int
|
||||
limit: Int
|
||||
count: Int
|
||||
limit: Int
|
||||
count: Int
|
||||
hasNextPage: Boolean
|
||||
}
|
||||
|
||||
type JobLinkResultList {
|
||||
listQuery: String
|
||||
items: [JobLink!]!
|
||||
count: Int
|
||||
items: [JobLink!]!
|
||||
count: Int
|
||||
}
|
||||
|
||||
type HistoPoint {
|
||||
@@ -341,27 +534,28 @@ type MetricHistoPoint {
|
||||
max: Int
|
||||
}
|
||||
|
||||
type JobsStatistics {
|
||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
||||
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
||||
totalJobs: Int! # Number of jobs
|
||||
runningJobs: Int! # Number of running jobs
|
||||
shortJobs: Int! # Number of jobs with a duration of less than duration
|
||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||
totalNodes: Int! # Sum of the nodes of all matched jobs
|
||||
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
||||
totalCores: Int! # Sum of the cores of all matched jobs
|
||||
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
||||
totalAccs: Int! # Sum of the accs of all matched jobs
|
||||
totalAccHours: Int! # Sum of the gpu hours of all matched jobs
|
||||
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
||||
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
||||
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
||||
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
||||
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
|
||||
type JobsStatistics {
|
||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster/subcluster
|
||||
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
||||
totalUsers: Int! # if *not* User-Statistics: Number of active users (based on running jobs)
|
||||
totalJobs: Int! # Number of jobs
|
||||
runningJobs: Int! # Number of running jobs
|
||||
shortJobs: Int! # Number of jobs with a duration of less than config'd ShortRunningJobsDuration
|
||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||
totalNodes: Int! # Sum of the nodes of all matched jobs
|
||||
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
||||
totalCores: Int! # Sum of the cores of all matched jobs
|
||||
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
||||
totalAccs: Int! # Sum of the accs of all matched jobs
|
||||
totalAccHours: Int! # Sum of the gpu hours of all matched jobs
|
||||
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
||||
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
||||
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
||||
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
||||
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
|
||||
}
|
||||
|
||||
input PageRequest {
|
||||
itemsPerPage: Int!
|
||||
page: Int!
|
||||
page: Int!
|
||||
}
|
||||
|
||||
645
api/swagger.json
645
api/swagger.json
@@ -15,9 +15,8 @@
|
||||
"version": "1.0.0"
|
||||
},
|
||||
"host": "localhost:8080",
|
||||
"basePath": "/api",
|
||||
"paths": {
|
||||
"/clusters/": {
|
||||
"/api/clusters/": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
@@ -74,7 +73,7 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/": {
|
||||
"/api/jobs/": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
@@ -169,7 +168,7 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/delete_job/": {
|
||||
"/api/jobs/delete_job/": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
@@ -202,7 +201,7 @@
|
||||
"200": {
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -244,7 +243,7 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/delete_job/{id}": {
|
||||
"/api/jobs/delete_job/{id}": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
@@ -272,7 +271,7 @@
|
||||
"200": {
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -314,7 +313,7 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/delete_job_before/{ts}": {
|
||||
"/api/jobs/delete_job_before/{ts}": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
@@ -342,7 +341,7 @@
|
||||
"200": {
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -384,7 +383,7 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/edit_meta/{id}": {
|
||||
"/api/jobs/edit_meta/{id}": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
@@ -454,14 +453,14 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/start_job/": {
|
||||
"/api/jobs/start_job/": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.",
|
||||
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'Job' scheme, API will fail to execute if requirements are not met.",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -479,7 +478,7 @@
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.JobMeta"
|
||||
"$ref": "#/definitions/schema.Job"
|
||||
}
|
||||
}
|
||||
],
|
||||
@@ -487,7 +486,7 @@
|
||||
"201": {
|
||||
"description": "Job added successfully",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.StartJobApiResponse"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -523,14 +522,14 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/stop_job/": {
|
||||
"/api/jobs/stop_job/": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'JobMeta' scheme.",
|
||||
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'Job' scheme.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -553,7 +552,7 @@
|
||||
"200": {
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.JobMeta"
|
||||
"$ref": "#/definitions/schema.Job"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -581,7 +580,7 @@
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
|
||||
"description": "Unprocessable Entity: job has already been stopped",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
@@ -595,7 +594,7 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/tag_job/{id}": {
|
||||
"/api/jobs/tag_job/{id}": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
@@ -668,14 +667,14 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/{id}": {
|
||||
"/api/jobs/{id}": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -749,7 +748,7 @@
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -827,119 +826,74 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/user/{id}": {
|
||||
"/api/nodestats/": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Modifies user defined by username (id) in one of four possible ways.\nIf more than one formValue is set then only the highest priority field is used.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||
"consumes": [
|
||||
"multipart/form-data"
|
||||
],
|
||||
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.",
|
||||
"produces": [
|
||||
"text/plain"
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"User"
|
||||
"Nodestates"
|
||||
],
|
||||
"summary": "Updates an existing user",
|
||||
"summary": "Deliver updated Slurm node states",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Database ID of User",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"enum": [
|
||||
"admin",
|
||||
"support",
|
||||
"manager",
|
||||
"user",
|
||||
"api"
|
||||
],
|
||||
"type": "string",
|
||||
"description": "Priority 1: Role to add",
|
||||
"name": "add-role",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"enum": [
|
||||
"admin",
|
||||
"support",
|
||||
"manager",
|
||||
"user",
|
||||
"api"
|
||||
],
|
||||
"type": "string",
|
||||
"description": "Priority 2: Role to remove",
|
||||
"name": "remove-role",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Priority 3: Project to add",
|
||||
"name": "add-project",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Priority 4: Project to remove",
|
||||
"name": "remove-project",
|
||||
"in": "formData"
|
||||
"description": "Request body containing nodes and their states",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.UpdateNodeStatesRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Success Response Message",
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: The user could not be updated",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/users/": {
|
||||
"/api/users/": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -991,70 +945,111 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
}
|
||||
},
|
||||
"/jobs/tag_job/{id}": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "User specified in form data will be saved to database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||
"description": "Removes tag(s) from a job specified by DB ID. Name and Type of Tag(s) must match.\nTag Scope is required for matching, options: \"global\", \"admin\". Private tags can not be deleted via API.\nIf tagged job is already finished: Tag will be removed from respective archive files.",
|
||||
"consumes": [
|
||||
"multipart/form-data"
|
||||
"application/json"
|
||||
],
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"Job add and modify"
|
||||
],
|
||||
"summary": "Removes one or more tags from a job",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "integer",
|
||||
"description": "Job Database ID",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"description": "Array of tag-objects to remove",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/api.ApiTag"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Updated job resource",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.Job"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Job or tag does not exist",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/tags/": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Removes tags by type and name. Name and Type of Tag(s) must match.\nTag Scope is required for matching, options: \"global\", \"admin\". Private tags can not be deleted via API.\nTag wills be removed from respective archive files.",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
"produces": [
|
||||
"text/plain"
|
||||
],
|
||||
"tags": [
|
||||
"User"
|
||||
"Tag remove"
|
||||
],
|
||||
"summary": "Adds a new user",
|
||||
"summary": "Removes all tags and job-relations for type:name tuple",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Unique user ID",
|
||||
"name": "username",
|
||||
"in": "formData",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "User password",
|
||||
"name": "password",
|
||||
"in": "formData",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"enum": [
|
||||
"admin",
|
||||
"support",
|
||||
"manager",
|
||||
"user",
|
||||
"api"
|
||||
],
|
||||
"type": "string",
|
||||
"description": "User role",
|
||||
"name": "role",
|
||||
"in": "formData",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Managed project, required for new manager role user",
|
||||
"name": "project",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Users name",
|
||||
"name": "name",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Users email",
|
||||
"name": "email",
|
||||
"in": "formData"
|
||||
"description": "Array of tag-objects to remove",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/api.ApiTag"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
@@ -1067,93 +1062,25 @@
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"404": {
|
||||
"description": "Job or tag does not exist",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: creating user failed",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "User defined by username in form data will be deleted from database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||
"consumes": [
|
||||
"multipart/form-data"
|
||||
],
|
||||
"produces": [
|
||||
"text/plain"
|
||||
],
|
||||
"tags": [
|
||||
"User"
|
||||
],
|
||||
"summary": "Deletes a user",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "User ID to delete",
|
||||
"name": "username",
|
||||
"in": "formData",
|
||||
"required": true
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "User deleted successfully"
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: deleting user failed",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1207,6 +1134,14 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.DefaultApiResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"msg": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.DeleteJobApiRequest": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
@@ -1230,14 +1165,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.DeleteJobApiResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"msg": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.EditMetaRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -1301,7 +1228,7 @@
|
||||
"description": "Array of jobs",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.JobMeta"
|
||||
"$ref": "#/definitions/schema.Job"
|
||||
}
|
||||
},
|
||||
"page": {
|
||||
@@ -1324,11 +1251,35 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.StartJobApiResponse": {
|
||||
"api.Node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"msg": {
|
||||
"cpusAllocated": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cpusTotal": {
|
||||
"type": "integer"
|
||||
},
|
||||
"gpusAllocated": {
|
||||
"type": "integer"
|
||||
},
|
||||
"gpusTotal": {
|
||||
"type": "integer"
|
||||
},
|
||||
"hostname": {
|
||||
"type": "string"
|
||||
},
|
||||
"memoryAllocated": {
|
||||
"type": "integer"
|
||||
},
|
||||
"memoryTotal": {
|
||||
"type": "integer"
|
||||
},
|
||||
"states": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -1365,6 +1316,21 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.UpdateNodeStatesRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cluster": {
|
||||
"type": "string",
|
||||
"example": "fritz"
|
||||
},
|
||||
"nodes": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/api.Node"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Accelerator": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -1400,7 +1366,6 @@
|
||||
}
|
||||
},
|
||||
"schema.Job": {
|
||||
"description": "Information of a HPC job.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"arrayJobId": {
|
||||
@@ -1425,19 +1390,15 @@
|
||||
"energyFootprint": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
"type": "number",
|
||||
"format": "float64"
|
||||
}
|
||||
},
|
||||
"exclusive": {
|
||||
"type": "integer",
|
||||
"maximum": 2,
|
||||
"minimum": 0,
|
||||
"example": 1
|
||||
},
|
||||
"footprint": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
"type": "number",
|
||||
"format": "float64"
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
@@ -1449,12 +1410,18 @@
|
||||
},
|
||||
"jobState": {
|
||||
"enum": [
|
||||
"completed",
|
||||
"failed",
|
||||
"boot_fail",
|
||||
"cancelled",
|
||||
"stopped",
|
||||
"timeout",
|
||||
"out_of_memory"
|
||||
"completed",
|
||||
"deadline",
|
||||
"failed",
|
||||
"node_fail",
|
||||
"out_of_memory",
|
||||
"pending",
|
||||
"preempted",
|
||||
"running",
|
||||
"suspended",
|
||||
"timeout"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
@@ -1498,23 +1465,48 @@
|
||||
"type": "string",
|
||||
"example": "abcd200"
|
||||
},
|
||||
"requestedMemory": {
|
||||
"description": "in MB",
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 128000
|
||||
},
|
||||
"resources": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Resource"
|
||||
}
|
||||
},
|
||||
"shared": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"none",
|
||||
"single_user",
|
||||
"multi_user"
|
||||
]
|
||||
},
|
||||
"smt": {
|
||||
"type": "integer",
|
||||
"example": 4
|
||||
},
|
||||
"startTime": {
|
||||
"type": "string"
|
||||
"type": "integer",
|
||||
"example": 1649723812
|
||||
},
|
||||
"statistics": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/definitions/schema.JobStatistics"
|
||||
}
|
||||
},
|
||||
"subCluster": {
|
||||
"type": "string",
|
||||
"example": "main"
|
||||
},
|
||||
"submitTime": {
|
||||
"type": "integer",
|
||||
"example": 1649723812
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@@ -1557,147 +1549,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.JobMeta": {
|
||||
"description": "Meta data information of a HPC job.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"arrayJobId": {
|
||||
"type": "integer",
|
||||
"example": 123000
|
||||
},
|
||||
"cluster": {
|
||||
"type": "string",
|
||||
"example": "fritz"
|
||||
},
|
||||
"concurrentJobs": {
|
||||
"$ref": "#/definitions/schema.JobLinkResultList"
|
||||
},
|
||||
"duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 43200
|
||||
},
|
||||
"energy": {
|
||||
"type": "number"
|
||||
},
|
||||
"energyFootprint": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"exclusive": {
|
||||
"type": "integer",
|
||||
"maximum": 2,
|
||||
"minimum": 0,
|
||||
"example": 1
|
||||
},
|
||||
"footprint": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
"type": "integer"
|
||||
},
|
||||
"jobId": {
|
||||
"type": "integer",
|
||||
"example": 123000
|
||||
},
|
||||
"jobState": {
|
||||
"enum": [
|
||||
"completed",
|
||||
"failed",
|
||||
"cancelled",
|
||||
"stopped",
|
||||
"timeout",
|
||||
"out_of_memory"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.JobState"
|
||||
}
|
||||
],
|
||||
"example": "completed"
|
||||
},
|
||||
"metaData": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"monitoringStatus": {
|
||||
"type": "integer",
|
||||
"maximum": 3,
|
||||
"minimum": 0,
|
||||
"example": 1
|
||||
},
|
||||
"numAcc": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 2
|
||||
},
|
||||
"numHwthreads": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 20
|
||||
},
|
||||
"numNodes": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 2
|
||||
},
|
||||
"partition": {
|
||||
"type": "string",
|
||||
"example": "main"
|
||||
},
|
||||
"project": {
|
||||
"type": "string",
|
||||
"example": "abcd200"
|
||||
},
|
||||
"resources": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Resource"
|
||||
}
|
||||
},
|
||||
"smt": {
|
||||
"type": "integer",
|
||||
"example": 4
|
||||
},
|
||||
"startTime": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 1649723812
|
||||
},
|
||||
"statistics": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/definitions/schema.JobStatistics"
|
||||
}
|
||||
},
|
||||
"subCluster": {
|
||||
"type": "string",
|
||||
"example": "main"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Tag"
|
||||
}
|
||||
},
|
||||
"user": {
|
||||
"type": "string",
|
||||
"example": "abcd100h"
|
||||
},
|
||||
"walltime": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 86400
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.JobMetric": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -1721,24 +1572,32 @@
|
||||
"schema.JobState": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"running",
|
||||
"completed",
|
||||
"failed",
|
||||
"boot_fail",
|
||||
"cancelled",
|
||||
"stopped",
|
||||
"timeout",
|
||||
"completed",
|
||||
"deadline",
|
||||
"failed",
|
||||
"node_fail",
|
||||
"out_of_memory",
|
||||
"pending",
|
||||
"preempted",
|
||||
"out_of_memory"
|
||||
"running",
|
||||
"suspended",
|
||||
"timeout"
|
||||
],
|
||||
"x-enum-varnames": [
|
||||
"JobStateRunning",
|
||||
"JobStateCompleted",
|
||||
"JobStateFailed",
|
||||
"JobStateBootFail",
|
||||
"JobStateCancelled",
|
||||
"JobStateStopped",
|
||||
"JobStateTimeout",
|
||||
"JobStateCompleted",
|
||||
"JobStateDeadline",
|
||||
"JobStateFailed",
|
||||
"JobStateNodeFail",
|
||||
"JobStateOutOfMemory",
|
||||
"JobStatePending",
|
||||
"JobStatePreempted",
|
||||
"JobStateOutOfMemory"
|
||||
"JobStateRunning",
|
||||
"JobStateSuspended",
|
||||
"JobStateTimeout"
|
||||
]
|
||||
},
|
||||
"schema.JobStatistics": {
|
||||
@@ -1937,7 +1796,8 @@
|
||||
"additionalProperties": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number"
|
||||
"type": "number",
|
||||
"format": "float64"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2025,6 +1885,9 @@
|
||||
},
|
||||
"remove": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"unit": {
|
||||
"$ref": "#/definitions/schema.Unit"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
495
api/swagger.yaml
495
api/swagger.yaml
@@ -1,4 +1,3 @@
|
||||
basePath: /api
|
||||
definitions:
|
||||
api.ApiReturnedUser:
|
||||
properties:
|
||||
@@ -32,6 +31,11 @@ definitions:
|
||||
example: Debug
|
||||
type: string
|
||||
type: object
|
||||
api.DefaultApiResponse:
|
||||
properties:
|
||||
msg:
|
||||
type: string
|
||||
type: object
|
||||
api.DeleteJobApiRequest:
|
||||
properties:
|
||||
cluster:
|
||||
@@ -49,11 +53,6 @@ definitions:
|
||||
required:
|
||||
- jobId
|
||||
type: object
|
||||
api.DeleteJobApiResponse:
|
||||
properties:
|
||||
msg:
|
||||
type: string
|
||||
type: object
|
||||
api.EditMetaRequest:
|
||||
properties:
|
||||
key:
|
||||
@@ -97,7 +96,7 @@ definitions:
|
||||
jobs:
|
||||
description: Array of jobs
|
||||
items:
|
||||
$ref: '#/definitions/schema.JobMeta'
|
||||
$ref: '#/definitions/schema.Job'
|
||||
type: array
|
||||
page:
|
||||
description: Page id returned
|
||||
@@ -112,10 +111,26 @@ definitions:
|
||||
scope:
|
||||
$ref: '#/definitions/schema.MetricScope'
|
||||
type: object
|
||||
api.StartJobApiResponse:
|
||||
api.Node:
|
||||
properties:
|
||||
msg:
|
||||
cpusAllocated:
|
||||
type: integer
|
||||
cpusTotal:
|
||||
type: integer
|
||||
gpusAllocated:
|
||||
type: integer
|
||||
gpusTotal:
|
||||
type: integer
|
||||
hostname:
|
||||
type: string
|
||||
memoryAllocated:
|
||||
type: integer
|
||||
memoryTotal:
|
||||
type: integer
|
||||
states:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
api.StopJobApiRequest:
|
||||
properties:
|
||||
@@ -139,6 +154,16 @@ definitions:
|
||||
- jobState
|
||||
- stopTime
|
||||
type: object
|
||||
api.UpdateNodeStatesRequest:
|
||||
properties:
|
||||
cluster:
|
||||
example: fritz
|
||||
type: string
|
||||
nodes:
|
||||
items:
|
||||
$ref: '#/definitions/api.Node'
|
||||
type: array
|
||||
type: object
|
||||
schema.Accelerator:
|
||||
properties:
|
||||
id:
|
||||
@@ -162,7 +187,6 @@ definitions:
|
||||
type: array
|
||||
type: object
|
||||
schema.Job:
|
||||
description: Information of a HPC job.
|
||||
properties:
|
||||
arrayJobId:
|
||||
example: 123000
|
||||
@@ -180,15 +204,12 @@ definitions:
|
||||
type: number
|
||||
energyFootprint:
|
||||
additionalProperties:
|
||||
format: float64
|
||||
type: number
|
||||
type: object
|
||||
exclusive:
|
||||
example: 1
|
||||
maximum: 2
|
||||
minimum: 0
|
||||
type: integer
|
||||
footprint:
|
||||
additionalProperties:
|
||||
format: float64
|
||||
type: number
|
||||
type: object
|
||||
id:
|
||||
@@ -200,12 +221,18 @@ definitions:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.JobState'
|
||||
enum:
|
||||
- completed
|
||||
- failed
|
||||
- boot_fail
|
||||
- cancelled
|
||||
- stopped
|
||||
- timeout
|
||||
- completed
|
||||
- deadline
|
||||
- failed
|
||||
- node_fail
|
||||
- out_of_memory
|
||||
- pending
|
||||
- preempted
|
||||
- running
|
||||
- suspended
|
||||
- timeout
|
||||
example: completed
|
||||
metaData:
|
||||
additionalProperties:
|
||||
@@ -234,18 +261,37 @@ definitions:
|
||||
project:
|
||||
example: abcd200
|
||||
type: string
|
||||
requestedMemory:
|
||||
description: in MB
|
||||
example: 128000
|
||||
minimum: 1
|
||||
type: integer
|
||||
resources:
|
||||
items:
|
||||
$ref: '#/definitions/schema.Resource'
|
||||
type: array
|
||||
shared:
|
||||
enum:
|
||||
- none
|
||||
- single_user
|
||||
- multi_user
|
||||
type: string
|
||||
smt:
|
||||
example: 4
|
||||
type: integer
|
||||
startTime:
|
||||
type: string
|
||||
example: 1649723812
|
||||
type: integer
|
||||
statistics:
|
||||
additionalProperties:
|
||||
$ref: '#/definitions/schema.JobStatistics'
|
||||
type: object
|
||||
subCluster:
|
||||
example: main
|
||||
type: string
|
||||
submitTime:
|
||||
example: 1649723812
|
||||
type: integer
|
||||
tags:
|
||||
items:
|
||||
$ref: '#/definitions/schema.Tag'
|
||||
@@ -274,109 +320,6 @@ definitions:
|
||||
$ref: '#/definitions/schema.JobLink'
|
||||
type: array
|
||||
type: object
|
||||
schema.JobMeta:
|
||||
description: Meta data information of a HPC job.
|
||||
properties:
|
||||
arrayJobId:
|
||||
example: 123000
|
||||
type: integer
|
||||
cluster:
|
||||
example: fritz
|
||||
type: string
|
||||
concurrentJobs:
|
||||
$ref: '#/definitions/schema.JobLinkResultList'
|
||||
duration:
|
||||
example: 43200
|
||||
minimum: 1
|
||||
type: integer
|
||||
energy:
|
||||
type: number
|
||||
energyFootprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
exclusive:
|
||||
example: 1
|
||||
maximum: 2
|
||||
minimum: 0
|
||||
type: integer
|
||||
footprint:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
id:
|
||||
type: integer
|
||||
jobId:
|
||||
example: 123000
|
||||
type: integer
|
||||
jobState:
|
||||
allOf:
|
||||
- $ref: '#/definitions/schema.JobState'
|
||||
enum:
|
||||
- completed
|
||||
- failed
|
||||
- cancelled
|
||||
- stopped
|
||||
- timeout
|
||||
- out_of_memory
|
||||
example: completed
|
||||
metaData:
|
||||
additionalProperties:
|
||||
type: string
|
||||
type: object
|
||||
monitoringStatus:
|
||||
example: 1
|
||||
maximum: 3
|
||||
minimum: 0
|
||||
type: integer
|
||||
numAcc:
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
numHwthreads:
|
||||
example: 20
|
||||
minimum: 1
|
||||
type: integer
|
||||
numNodes:
|
||||
example: 2
|
||||
minimum: 1
|
||||
type: integer
|
||||
partition:
|
||||
example: main
|
||||
type: string
|
||||
project:
|
||||
example: abcd200
|
||||
type: string
|
||||
resources:
|
||||
items:
|
||||
$ref: '#/definitions/schema.Resource'
|
||||
type: array
|
||||
smt:
|
||||
example: 4
|
||||
type: integer
|
||||
startTime:
|
||||
example: 1649723812
|
||||
minimum: 1
|
||||
type: integer
|
||||
statistics:
|
||||
additionalProperties:
|
||||
$ref: '#/definitions/schema.JobStatistics'
|
||||
type: object
|
||||
subCluster:
|
||||
example: main
|
||||
type: string
|
||||
tags:
|
||||
items:
|
||||
$ref: '#/definitions/schema.Tag'
|
||||
type: array
|
||||
user:
|
||||
example: abcd100h
|
||||
type: string
|
||||
walltime:
|
||||
example: 86400
|
||||
minimum: 1
|
||||
type: integer
|
||||
type: object
|
||||
schema.JobMetric:
|
||||
properties:
|
||||
series:
|
||||
@@ -392,24 +335,32 @@ definitions:
|
||||
type: object
|
||||
schema.JobState:
|
||||
enum:
|
||||
- running
|
||||
- completed
|
||||
- failed
|
||||
- boot_fail
|
||||
- cancelled
|
||||
- stopped
|
||||
- timeout
|
||||
- preempted
|
||||
- completed
|
||||
- deadline
|
||||
- failed
|
||||
- node_fail
|
||||
- out_of_memory
|
||||
- pending
|
||||
- preempted
|
||||
- running
|
||||
- suspended
|
||||
- timeout
|
||||
type: string
|
||||
x-enum-varnames:
|
||||
- JobStateRunning
|
||||
- JobStateCompleted
|
||||
- JobStateFailed
|
||||
- JobStateBootFail
|
||||
- JobStateCancelled
|
||||
- JobStateStopped
|
||||
- JobStateTimeout
|
||||
- JobStatePreempted
|
||||
- JobStateCompleted
|
||||
- JobStateDeadline
|
||||
- JobStateFailed
|
||||
- JobStateNodeFail
|
||||
- JobStateOutOfMemory
|
||||
- JobStatePending
|
||||
- JobStatePreempted
|
||||
- JobStateRunning
|
||||
- JobStateSuspended
|
||||
- JobStateTimeout
|
||||
schema.JobStatistics:
|
||||
description: Specification for job metric statistics.
|
||||
properties:
|
||||
@@ -546,6 +497,7 @@ definitions:
|
||||
percentiles:
|
||||
additionalProperties:
|
||||
items:
|
||||
format: float64
|
||||
type: number
|
||||
type: array
|
||||
type: object
|
||||
@@ -605,6 +557,8 @@ definitions:
|
||||
type: number
|
||||
remove:
|
||||
type: boolean
|
||||
unit:
|
||||
$ref: '#/definitions/schema.Unit'
|
||||
type: object
|
||||
schema.Tag:
|
||||
description: Defines a tag using name and type.
|
||||
@@ -676,7 +630,7 @@ info:
|
||||
title: ClusterCockpit REST API
|
||||
version: 1.0.0
|
||||
paths:
|
||||
/clusters/:
|
||||
/api/clusters/:
|
||||
get:
|
||||
description: Get a list of all cluster configs. Specific cluster can be requested
|
||||
using query parameter.
|
||||
@@ -713,7 +667,7 @@ paths:
|
||||
summary: Lists all cluster configs
|
||||
tags:
|
||||
- Cluster query
|
||||
/jobs/:
|
||||
/api/jobs/:
|
||||
get:
|
||||
description: |-
|
||||
Get a list of all jobs. Filters can be applied using query parameters.
|
||||
@@ -778,11 +732,11 @@ paths:
|
||||
summary: Lists all jobs
|
||||
tags:
|
||||
- Job query
|
||||
/jobs/{id}:
|
||||
/api/jobs/{id}:
|
||||
get:
|
||||
description: |-
|
||||
Job to get is specified by database ID
|
||||
Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||
Returns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.
|
||||
parameters:
|
||||
- description: Database ID of Job
|
||||
in: path
|
||||
@@ -835,7 +789,7 @@ paths:
|
||||
- application/json
|
||||
description: |-
|
||||
Job to get is specified by database ID
|
||||
Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||
Returns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.
|
||||
parameters:
|
||||
- description: Database ID of Job
|
||||
in: path
|
||||
@@ -887,7 +841,7 @@ paths:
|
||||
summary: Get job meta and configurable metric data
|
||||
tags:
|
||||
- Job query
|
||||
/jobs/delete_job/:
|
||||
/api/jobs/delete_job/:
|
||||
delete:
|
||||
consumes:
|
||||
- application/json
|
||||
@@ -906,7 +860,7 @@ paths:
|
||||
"200":
|
||||
description: Success message
|
||||
schema:
|
||||
$ref: '#/definitions/api.DeleteJobApiResponse'
|
||||
$ref: '#/definitions/api.DefaultApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@@ -937,7 +891,7 @@ paths:
|
||||
summary: Remove a job from the sql database
|
||||
tags:
|
||||
- Job remove
|
||||
/jobs/delete_job/{id}:
|
||||
/api/jobs/delete_job/{id}:
|
||||
delete:
|
||||
description: Job to remove is specified by database ID. This will not remove
|
||||
the job from the job archive.
|
||||
@@ -953,7 +907,7 @@ paths:
|
||||
"200":
|
||||
description: Success message
|
||||
schema:
|
||||
$ref: '#/definitions/api.DeleteJobApiResponse'
|
||||
$ref: '#/definitions/api.DefaultApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@@ -984,7 +938,7 @@ paths:
|
||||
summary: Remove a job from the sql database
|
||||
tags:
|
||||
- Job remove
|
||||
/jobs/delete_job_before/{ts}:
|
||||
/api/jobs/delete_job_before/{ts}:
|
||||
delete:
|
||||
description: Remove all jobs with start time before timestamp. The jobs will
|
||||
not be removed from the job archive.
|
||||
@@ -1000,7 +954,7 @@ paths:
|
||||
"200":
|
||||
description: Success message
|
||||
schema:
|
||||
$ref: '#/definitions/api.DeleteJobApiResponse'
|
||||
$ref: '#/definitions/api.DefaultApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@@ -1031,7 +985,7 @@ paths:
|
||||
summary: Remove a job from the sql database
|
||||
tags:
|
||||
- Job remove
|
||||
/jobs/edit_meta/{id}:
|
||||
/api/jobs/edit_meta/{id}:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
@@ -1078,27 +1032,27 @@ paths:
|
||||
summary: Edit meta-data json
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/start_job/:
|
||||
/api/jobs/start_job/:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: |-
|
||||
Job specified in request body will be saved to database as "running" with new DB ID.
|
||||
Job specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.
|
||||
Job specifications follow the 'Job' scheme, API will fail to execute if requirements are not met.
|
||||
parameters:
|
||||
- description: Job to add
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/schema.JobMeta'
|
||||
$ref: '#/definitions/schema.Job'
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"201":
|
||||
description: Job added successfully
|
||||
schema:
|
||||
$ref: '#/definitions/api.StartJobApiResponse'
|
||||
$ref: '#/definitions/api.DefaultApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@@ -1125,11 +1079,11 @@ paths:
|
||||
summary: Adds a new job as "running"
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/stop_job/:
|
||||
/api/jobs/stop_job/:
|
||||
post:
|
||||
description: |-
|
||||
Job to stop is specified by request body. All fields are required in this case.
|
||||
Returns full job resource information according to 'JobMeta' scheme.
|
||||
Returns full job resource information according to 'Job' scheme.
|
||||
parameters:
|
||||
- description: All fields required
|
||||
in: body
|
||||
@@ -1143,7 +1097,7 @@ paths:
|
||||
"200":
|
||||
description: Success message
|
||||
schema:
|
||||
$ref: '#/definitions/schema.JobMeta'
|
||||
$ref: '#/definitions/schema.Job'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
@@ -1161,8 +1115,7 @@ paths:
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"422":
|
||||
description: 'Unprocessable Entity: finding job failed: sql: no rows in
|
||||
result set'
|
||||
description: 'Unprocessable Entity: job has already been stopped'
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
@@ -1174,7 +1127,7 @@ paths:
|
||||
summary: Marks job as completed and triggers archiving
|
||||
tags:
|
||||
- Job add and modify
|
||||
/jobs/tag_job/{id}:
|
||||
/api/jobs/tag_job/{id}:
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
@@ -1224,128 +1177,51 @@ paths:
|
||||
summary: Adds one or more tags to a job
|
||||
tags:
|
||||
- Job add and modify
|
||||
/user/{id}:
|
||||
/api/nodestats/:
|
||||
post:
|
||||
consumes:
|
||||
- multipart/form-data
|
||||
description: |-
|
||||
Modifies user defined by username (id) in one of four possible ways.
|
||||
If more than one formValue is set then only the highest priority field is used.
|
||||
Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||
Returns a JSON-encoded list of users.
|
||||
Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||
parameters:
|
||||
- description: Database ID of User
|
||||
in: path
|
||||
name: id
|
||||
- description: Request body containing nodes and their states
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
type: string
|
||||
- description: 'Priority 1: Role to add'
|
||||
enum:
|
||||
- admin
|
||||
- support
|
||||
- manager
|
||||
- user
|
||||
- api
|
||||
in: formData
|
||||
name: add-role
|
||||
type: string
|
||||
- description: 'Priority 2: Role to remove'
|
||||
enum:
|
||||
- admin
|
||||
- support
|
||||
- manager
|
||||
- user
|
||||
- api
|
||||
in: formData
|
||||
name: remove-role
|
||||
type: string
|
||||
- description: 'Priority 3: Project to add'
|
||||
in: formData
|
||||
name: add-project
|
||||
type: string
|
||||
- description: 'Priority 4: Project to remove'
|
||||
in: formData
|
||||
name: remove-project
|
||||
type: string
|
||||
schema:
|
||||
$ref: '#/definitions/api.UpdateNodeStatesRequest'
|
||||
produces:
|
||||
- text/plain
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Success Response Message
|
||||
description: Success message
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.DefaultApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
type: string
|
||||
"422":
|
||||
description: 'Unprocessable Entity: The user could not be updated'
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Updates an existing user
|
||||
summary: Deliver updated Slurm node states
|
||||
tags:
|
||||
- User
|
||||
/users/:
|
||||
delete:
|
||||
consumes:
|
||||
- multipart/form-data
|
||||
description: |-
|
||||
User defined by username in form data will be deleted from database.
|
||||
Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||
parameters:
|
||||
- description: User ID to delete
|
||||
in: formData
|
||||
name: username
|
||||
required: true
|
||||
type: string
|
||||
produces:
|
||||
- text/plain
|
||||
responses:
|
||||
"200":
|
||||
description: User deleted successfully
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
type: string
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
type: string
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
type: string
|
||||
"422":
|
||||
description: 'Unprocessable Entity: deleting user failed'
|
||||
schema:
|
||||
type: string
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Deletes a user
|
||||
tags:
|
||||
- User
|
||||
- Nodestates
|
||||
/api/users/:
|
||||
get:
|
||||
description: |-
|
||||
Returns a JSON-encoded list of users.
|
||||
Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||
Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||
parameters:
|
||||
- description: If returned list should contain all users or only users with
|
||||
additional special roles
|
||||
@@ -1383,46 +1259,73 @@ paths:
|
||||
summary: Returns a list of users
|
||||
tags:
|
||||
- User
|
||||
post:
|
||||
/jobs/tag_job/{id}:
|
||||
delete:
|
||||
consumes:
|
||||
- multipart/form-data
|
||||
- application/json
|
||||
description: |-
|
||||
User specified in form data will be saved to database.
|
||||
Only accessible from IPs registered with apiAllowedIPs configuration option.
|
||||
Removes tag(s) from a job specified by DB ID. Name and Type of Tag(s) must match.
|
||||
Tag Scope is required for matching, options: "global", "admin". Private tags can not be deleted via API.
|
||||
If tagged job is already finished: Tag will be removed from respective archive files.
|
||||
parameters:
|
||||
- description: Unique user ID
|
||||
in: formData
|
||||
name: username
|
||||
- description: Job Database ID
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: string
|
||||
- description: User password
|
||||
in: formData
|
||||
name: password
|
||||
type: integer
|
||||
- description: Array of tag-objects to remove
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
type: string
|
||||
- description: User role
|
||||
enum:
|
||||
- admin
|
||||
- support
|
||||
- manager
|
||||
- user
|
||||
- api
|
||||
in: formData
|
||||
name: role
|
||||
schema:
|
||||
items:
|
||||
$ref: '#/definitions/api.ApiTag'
|
||||
type: array
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Updated job resource
|
||||
schema:
|
||||
$ref: '#/definitions/schema.Job'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"404":
|
||||
description: Job or tag does not exist
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Removes one or more tags from a job
|
||||
tags:
|
||||
- Job add and modify
|
||||
/tags/:
|
||||
delete:
|
||||
consumes:
|
||||
- application/json
|
||||
description: |-
|
||||
Removes tags by type and name. Name and Type of Tag(s) must match.
|
||||
Tag Scope is required for matching, options: "global", "admin". Private tags can not be deleted via API.
|
||||
Tag wills be removed from respective archive files.
|
||||
parameters:
|
||||
- description: Array of tag-objects to remove
|
||||
in: body
|
||||
name: request
|
||||
required: true
|
||||
type: string
|
||||
- description: Managed project, required for new manager role user
|
||||
in: formData
|
||||
name: project
|
||||
type: string
|
||||
- description: Users name
|
||||
in: formData
|
||||
name: name
|
||||
type: string
|
||||
- description: Users email
|
||||
in: formData
|
||||
name: email
|
||||
type: string
|
||||
schema:
|
||||
items:
|
||||
$ref: '#/definitions/api.ApiTag'
|
||||
type: array
|
||||
produces:
|
||||
- text/plain
|
||||
responses:
|
||||
@@ -1433,28 +1336,24 @@ paths:
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
type: string
|
||||
"403":
|
||||
description: Forbidden
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"404":
|
||||
description: Job or tag does not exist
|
||||
schema:
|
||||
type: string
|
||||
"422":
|
||||
description: 'Unprocessable Entity: creating user failed'
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Adds a new user
|
||||
summary: Removes all tags and job-relations for type:name tuple
|
||||
tags:
|
||||
- User
|
||||
- Tag remove
|
||||
securityDefinitions:
|
||||
ApiKeyAuth:
|
||||
in: header
|
||||
|
||||
@@ -1,18 +1,22 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// This file defines all command-line flags and their default values.
|
||||
package main
|
||||
|
||||
import "flag"
|
||||
|
||||
var (
|
||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
|
||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB,
|
||||
flagForceDB, flagDev, flagVersion, flagLogDateTime, flagApplyTags bool
|
||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
)
|
||||
|
||||
func cliInit() {
|
||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
|
||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize sqlite database file, config.json and .env")
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'hpc_user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
@@ -21,13 +25,14 @@ func cliInit() {
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||
flag.BoolVar(&flagApplyTags, "apply-tags", false, "Run taggers on all completed jobs and exit")
|
||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: <username>:[admin,support,manager,api,user]:<password>")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove a existing user. Argument format: <username>")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug, info , warn (default), err, crit]`")
|
||||
flag.Parse()
|
||||
}
|
||||
|
||||
@@ -1,16 +1,22 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// This file contains bootstrap logic for initializing the environment,
|
||||
// creating default configuration files, and setting up the database.
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/util"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
)
|
||||
|
||||
const envString = `
|
||||
@@ -25,61 +31,89 @@ SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
|
||||
const configString = `
|
||||
{
|
||||
"main": {
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
"short-running-jobs-duration": 300,
|
||||
"resampling": {
|
||||
"minimumPoints": 600,
|
||||
"trigger": 180,
|
||||
"resolutions": [
|
||||
240,
|
||||
60
|
||||
]
|
||||
},
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"emission-constant": 317
|
||||
},
|
||||
"cron": {
|
||||
"commit-job-worker": "2m",
|
||||
"duration-worker": "5m",
|
||||
"footprint-worker": "10m"
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"auth": {
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"max-age": "2000h"
|
||||
}
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "name",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2023-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
`
|
||||
|
||||
func initEnv() {
|
||||
if util.CheckFileExists("var") {
|
||||
fmt.Print("Directory ./var already exists. Exiting!\n")
|
||||
os.Exit(0)
|
||||
cclog.Exit("Directory ./var already exists. Cautiously exiting application initialization.")
|
||||
}
|
||||
|
||||
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
|
||||
log.Fatalf("Writing config.json failed: %s", err.Error())
|
||||
cclog.Abortf("Could not write default ./config.json with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
|
||||
log.Fatalf("Writing .env failed: %s", err.Error())
|
||||
cclog.Abortf("Could not write default ./.env file with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
if err := os.Mkdir("var", 0o777); err != nil {
|
||||
log.Fatalf("Mkdir var failed: %s", err.Error())
|
||||
cclog.Abortf("Could not create default ./var folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
err := repository.MigrateDB("sqlite3", "./var/job.db")
|
||||
if err != nil {
|
||||
log.Fatalf("Initialize job.db failed: %s", err.Error())
|
||||
cclog.Abortf("Could not initialize default sqlite3 database as './var/job.db'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
if err := os.Mkdir("var/job-archive", 0o777); err != nil {
|
||||
cclog.Abortf("Could not create default ./var/job-archive folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
archiveCfg := "{\"kind\": \"file\",\"path\": \"./var/job-archive\"}"
|
||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||
cclog.Abortf("Could not initialize job-archive, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,16 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// It orchestrates initialization of all subsystems including configuration,
|
||||
// database, authentication, and the HTTP server.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
@@ -12,19 +18,27 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/taskManager"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/tagger"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/taskmanager"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
"github.com/google/gops/agent"
|
||||
"github.com/joho/godotenv"
|
||||
|
||||
_ "github.com/go-sql-driver/mysql"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
@@ -39,198 +53,436 @@ const logoString = `
|
||||
|_|
|
||||
`
|
||||
|
||||
// Environment variable names
|
||||
const (
|
||||
envGOGC = "GOGC"
|
||||
)
|
||||
|
||||
// Default configurations
|
||||
const (
|
||||
defaultArchiveConfig = `{"kind":"file","path":"./var/job-archive"}`
|
||||
)
|
||||
|
||||
var (
|
||||
date string
|
||||
commit string
|
||||
version string
|
||||
)
|
||||
|
||||
func main() {
|
||||
cliInit()
|
||||
func printVersion() {
|
||||
fmt.Print(logoString)
|
||||
fmt.Printf("Version:\t%s\n", version)
|
||||
fmt.Printf("Git hash:\t%s\n", commit)
|
||||
fmt.Printf("Build time:\t%s\n", date)
|
||||
fmt.Printf("SQL db version:\t%d\n", repository.Version)
|
||||
fmt.Printf("Job archive version:\t%d\n", archive.Version)
|
||||
}
|
||||
|
||||
if flagVersion {
|
||||
fmt.Print(logoString)
|
||||
fmt.Printf("Version:\t%s\n", version)
|
||||
fmt.Printf("Git hash:\t%s\n", commit)
|
||||
fmt.Printf("Build time:\t%s\n", date)
|
||||
fmt.Printf("SQL db version:\t%d\n", repository.Version)
|
||||
fmt.Printf("Job archive version:\t%d\n", archive.Version)
|
||||
os.Exit(0)
|
||||
func initGops() error {
|
||||
if !flagGops {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Apply config flags for pkg/log
|
||||
log.Init(flagLogLevel, flagLogDateTime)
|
||||
if err := agent.Listen(agent.Options{}); err != nil {
|
||||
return fmt.Errorf("starting gops agent: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// See https://github.com/google/gops (Runtime overhead is almost zero)
|
||||
if flagGops {
|
||||
if err := agent.Listen(agent.Options{}); err != nil {
|
||||
log.Fatalf("gops/agent.Listen failed: %s", err.Error())
|
||||
}
|
||||
func loadEnvironment() error {
|
||||
if err := godotenv.Load(); err != nil {
|
||||
return fmt.Errorf("loading .env file: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func initConfiguration() error {
|
||||
ccconf.Init(flagConfigFile)
|
||||
|
||||
cfg := ccconf.GetPackageConfig("main")
|
||||
if cfg == nil {
|
||||
return fmt.Errorf("main configuration must be present")
|
||||
}
|
||||
|
||||
if err := runtimeEnv.LoadEnv("./.env"); err != nil && !os.IsNotExist(err) {
|
||||
log.Fatalf("parsing './.env' file failed: %s", err.Error())
|
||||
clustercfg := ccconf.GetPackageConfig("clusters")
|
||||
if clustercfg == nil {
|
||||
return fmt.Errorf("cluster configuration must be present")
|
||||
}
|
||||
|
||||
// Initialize sub-modules and handle command line flags.
|
||||
// The order here is important!
|
||||
config.Init(flagConfigFile)
|
||||
config.Init(cfg, clustercfg)
|
||||
return nil
|
||||
}
|
||||
|
||||
// As a special case for `db`, allow using an environment variable instead of the value
|
||||
// stored in the config. This can be done for people having security concerns about storing
|
||||
// the password for their mysql database in config.json.
|
||||
if strings.HasPrefix(config.Keys.DB, "env:") {
|
||||
envvar := strings.TrimPrefix(config.Keys.DB, "env:")
|
||||
config.Keys.DB = os.Getenv(envvar)
|
||||
}
|
||||
func initDatabase() error {
|
||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||
return nil
|
||||
}
|
||||
|
||||
func handleDatabaseCommands() error {
|
||||
if flagMigrateDB {
|
||||
err := repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return fmt.Errorf("migrating database to version %d: %w", repository.Version, err)
|
||||
}
|
||||
os.Exit(0)
|
||||
cclog.Exitf("MigrateDB Success: Migrated '%s' database at location '%s' to version %d.\n",
|
||||
config.Keys.DBDriver, config.Keys.DB, repository.Version)
|
||||
}
|
||||
|
||||
if flagRevertDB {
|
||||
err := repository.RevertDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return fmt.Errorf("reverting database to version %d: %w", repository.Version-1, err)
|
||||
}
|
||||
os.Exit(0)
|
||||
cclog.Exitf("RevertDB Success: Reverted '%s' database at location '%s' to version %d.\n",
|
||||
config.Keys.DBDriver, config.Keys.DB, repository.Version-1)
|
||||
}
|
||||
|
||||
if flagForceDB {
|
||||
err := repository.ForceDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return fmt.Errorf("forcing database to version %d: %w", repository.Version, err)
|
||||
}
|
||||
os.Exit(0)
|
||||
cclog.Exitf("ForceDB Success: Forced '%s' database at location '%s' to version %d.\n",
|
||||
config.Keys.DBDriver, config.Keys.DB, repository.Version)
|
||||
}
|
||||
|
||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||
return nil
|
||||
}
|
||||
|
||||
if flagInit {
|
||||
initEnv()
|
||||
fmt.Print("Successfully setup environment!\n")
|
||||
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
|
||||
fmt.Print("Add your job-archive at ./var/job-archive.\n")
|
||||
os.Exit(0)
|
||||
func handleUserCommands() error {
|
||||
if config.Keys.DisableAuthentication && (flagNewUser != "" || flagDelUser != "") {
|
||||
return fmt.Errorf("--add-user and --del-user can only be used if authentication is enabled")
|
||||
}
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
|
||||
auth.Init(&cfg)
|
||||
} else {
|
||||
cclog.Warn("Authentication disabled due to missing configuration")
|
||||
auth.Init(nil)
|
||||
}
|
||||
|
||||
auth.Init()
|
||||
// Check for default security keys
|
||||
checkDefaultSecurityKeys()
|
||||
|
||||
if flagNewUser != "" {
|
||||
parts := strings.SplitN(flagNewUser, ":", 3)
|
||||
if len(parts) != 3 || len(parts[0]) == 0 {
|
||||
log.Fatal("invalid argument format for user creation")
|
||||
}
|
||||
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.AddUser(&schema.User{
|
||||
Username: parts[0], Projects: make([]string, 0), Password: parts[2], Roles: strings.Split(parts[1], ","),
|
||||
}); err != nil {
|
||||
log.Fatalf("adding '%s' user authentication failed: %v", parts[0], err)
|
||||
if err := addUser(flagNewUser); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if flagDelUser != "" {
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.DelUser(flagDelUser); err != nil {
|
||||
log.Fatalf("deleting user failed: %v", err)
|
||||
if err := delUser(flagDelUser); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
if flagSyncLDAP {
|
||||
if authHandle.LdapAuth == nil {
|
||||
log.Fatal("cannot sync: LDAP authentication is not configured")
|
||||
if err := syncLDAP(authHandle); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := authHandle.LdapAuth.Sync(); err != nil {
|
||||
log.Fatalf("LDAP sync failed: %v", err)
|
||||
}
|
||||
log.Info("LDAP sync successfull")
|
||||
}
|
||||
|
||||
if flagGenJWT != "" {
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(flagGenJWT)
|
||||
if err != nil {
|
||||
log.Fatalf("could not get user from JWT: %v", err)
|
||||
if err := generateJWT(authHandle, flagGenJWT); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleApi) {
|
||||
log.Warnf("user '%s' does not have the API role", user.Username)
|
||||
}
|
||||
|
||||
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
|
||||
}
|
||||
|
||||
fmt.Printf("MAIN > JWT for '%s': %s\n", user.Username, jwt)
|
||||
}
|
||||
|
||||
} else if flagNewUser != "" || flagDelUser != "" {
|
||||
log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled")
|
||||
}
|
||||
|
||||
if err := archive.Init(config.Keys.Archive, config.Keys.DisableArchive); err != nil {
|
||||
log.Fatalf("failed to initialize archive: %s", err.Error())
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkDefaultSecurityKeys warns if default JWT keys are detected
|
||||
func checkDefaultSecurityKeys() {
|
||||
// Default JWT public key from init.go
|
||||
defaultJWTPublic := "kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
|
||||
if os.Getenv("JWT_PUBLIC_KEY") == defaultJWTPublic {
|
||||
cclog.Warn("Using default JWT keys - not recommended for production environments")
|
||||
}
|
||||
}
|
||||
|
||||
func addUser(userSpec string) error {
|
||||
parts := strings.SplitN(userSpec, ":", 3)
|
||||
if len(parts) != 3 || len(parts[0]) == 0 {
|
||||
return fmt.Errorf("invalid user format, want: <username>:[admin,support,manager,api,user]:<password>, have: %s", userSpec)
|
||||
}
|
||||
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.AddUser(&schema.User{
|
||||
Username: parts[0],
|
||||
Projects: make([]string, 0),
|
||||
Password: parts[2],
|
||||
Roles: strings.Split(parts[1], ","),
|
||||
}); err != nil {
|
||||
return fmt.Errorf("adding user '%s' with roles '%s': %w", parts[0], parts[1], err)
|
||||
}
|
||||
|
||||
cclog.Infof("Add User: Added new user '%s' with roles '%s'", parts[0], parts[1])
|
||||
return nil
|
||||
}
|
||||
|
||||
func delUser(username string) error {
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.DelUser(username); err != nil {
|
||||
return fmt.Errorf("deleting user '%s': %w", username, err)
|
||||
}
|
||||
cclog.Infof("Delete User: Deleted user '%s' from DB", username)
|
||||
return nil
|
||||
}
|
||||
|
||||
func syncLDAP(authHandle *auth.Authentication) error {
|
||||
if authHandle.LdapAuth == nil {
|
||||
return fmt.Errorf("LDAP authentication is not configured")
|
||||
}
|
||||
|
||||
if err := authHandle.LdapAuth.Sync(); err != nil {
|
||||
return fmt.Errorf("synchronizing LDAP: %w", err)
|
||||
}
|
||||
|
||||
cclog.Print("Sync LDAP: LDAP synchronization successfull.")
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateJWT(authHandle *auth.Authentication, username string) error {
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(username)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting user '%s': %w", username, err)
|
||||
}
|
||||
|
||||
if !user.HasRole(schema.RoleApi) {
|
||||
cclog.Warnf("JWT: User '%s' does not have the role 'api'. REST API endpoints will return error!\n", user.Username)
|
||||
}
|
||||
|
||||
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generating JWT for user '%s': %w", user.Username, err)
|
||||
}
|
||||
|
||||
cclog.Printf("JWT: Successfully generated JWT for user '%s': %s\n", user.Username, jwt)
|
||||
return nil
|
||||
}
|
||||
|
||||
func initSubsystems() error {
|
||||
// Initialize nats client
|
||||
natsConfig := ccconf.GetPackageConfig("nats")
|
||||
if err := nats.Init(natsConfig); err != nil {
|
||||
cclog.Warnf("initializing (optional) nats client: %s", err.Error())
|
||||
}
|
||||
nats.Connect()
|
||||
|
||||
// Initialize job archive
|
||||
archiveCfg := ccconf.GetPackageConfig("archive")
|
||||
if archiveCfg == nil {
|
||||
archiveCfg = json.RawMessage(defaultArchiveConfig)
|
||||
}
|
||||
if err := archive.Init(archiveCfg, config.Keys.DisableArchive); err != nil {
|
||||
return fmt.Errorf("initializing archive: %w", err)
|
||||
}
|
||||
|
||||
// Initialize metricdata
|
||||
if err := metricdata.Init(); err != nil {
|
||||
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
|
||||
return fmt.Errorf("initializing metricdata repository: %w", err)
|
||||
}
|
||||
|
||||
// Handle database re-initialization
|
||||
if flagReinitDB {
|
||||
if err := importer.InitDB(); err != nil {
|
||||
log.Fatalf("failed to re-initialize repository DB: %s", err.Error())
|
||||
return fmt.Errorf("re-initializing repository DB: %w", err)
|
||||
}
|
||||
cclog.Print("Init DB: Successfully re-initialized repository DB.")
|
||||
}
|
||||
|
||||
// Handle job import
|
||||
if flagImportJob != "" {
|
||||
if err := importer.HandleImportFlag(flagImportJob); err != nil {
|
||||
log.Fatalf("job import failed: %s", err.Error())
|
||||
return fmt.Errorf("importing job: %w", err)
|
||||
}
|
||||
cclog.Infof("Import Job: Imported Job '%s' into DB", flagImportJob)
|
||||
}
|
||||
|
||||
// Initialize taggers
|
||||
if config.Keys.EnableJobTaggers {
|
||||
tagger.Init()
|
||||
}
|
||||
|
||||
// Apply tags if requested
|
||||
if flagApplyTags {
|
||||
if err := tagger.RunTaggers(); err != nil {
|
||||
return fmt.Errorf("running job taggers: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if !flagServer {
|
||||
return
|
||||
}
|
||||
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
taskManager.Start()
|
||||
serverInit()
|
||||
return nil
|
||||
}
|
||||
|
||||
func runServer(ctx context.Context) error {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Start metric store if enabled
|
||||
if memorystore.InternalCCMSFlag {
|
||||
mscfg := ccconf.GetPackageConfig("metric-store")
|
||||
if mscfg == nil {
|
||||
return fmt.Errorf("metric store configuration must be present")
|
||||
}
|
||||
memorystore.Init(mscfg, &wg)
|
||||
}
|
||||
|
||||
// Start archiver and task manager
|
||||
archiver.Start(repository.GetJobRepository(), ctx)
|
||||
taskmanager.Start(ccconf.GetPackageConfig("cron"), ccconf.GetPackageConfig("archive"))
|
||||
|
||||
// Initialize web UI
|
||||
cfg := ccconf.GetPackageConfig("ui")
|
||||
if err := web.Init(cfg); err != nil {
|
||||
return fmt.Errorf("initializing web UI: %w", err)
|
||||
}
|
||||
|
||||
// Initialize HTTP server
|
||||
srv, err := NewServer(version, commit, date)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating server: %w", err)
|
||||
}
|
||||
|
||||
// Channel to collect errors from server
|
||||
errChan := make(chan error, 1)
|
||||
|
||||
// Start HTTP server
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
serverStart()
|
||||
if err := srv.Start(ctx); err != nil {
|
||||
errChan <- err
|
||||
}
|
||||
}()
|
||||
|
||||
// Handle shutdown signals
|
||||
wg.Add(1)
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-sigs
|
||||
select {
|
||||
case <-sigs:
|
||||
cclog.Info("Shutdown signal received")
|
||||
case <-ctx.Done():
|
||||
}
|
||||
|
||||
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
||||
|
||||
serverShutdown()
|
||||
|
||||
taskManager.Shutdown()
|
||||
srv.Shutdown(ctx)
|
||||
util.FsWatcherShutdown()
|
||||
taskmanager.Shutdown()
|
||||
}()
|
||||
|
||||
if os.Getenv("GOGC") == "" {
|
||||
// Set GC percent if not configured
|
||||
if os.Getenv(envGOGC) == "" {
|
||||
debug.SetGCPercent(25)
|
||||
}
|
||||
runtimeEnv.SystemdNotifiy(true, "running")
|
||||
wg.Wait()
|
||||
log.Print("Graceful shutdown completed!")
|
||||
|
||||
// Wait for completion or error
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(errChan)
|
||||
}()
|
||||
|
||||
// Check for server startup errors
|
||||
select {
|
||||
case err := <-errChan:
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
// Server started successfully, wait for completion
|
||||
if err := <-errChan; err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Print("Graceful shutdown completed!")
|
||||
return nil
|
||||
}
|
||||
|
||||
func run() error {
|
||||
cliInit()
|
||||
|
||||
if flagVersion {
|
||||
printVersion()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Initialize logger
|
||||
cclog.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
// Handle init flag
|
||||
if flagInit {
|
||||
initEnv()
|
||||
cclog.Exit("Successfully setup environment!\n" +
|
||||
"Please review config.json and .env and adjust it to your needs.\n" +
|
||||
"Add your job-archive at ./var/job-archive.")
|
||||
}
|
||||
|
||||
// Initialize gops agent
|
||||
if err := initGops(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Initialize subsystems in dependency order:
|
||||
// 1. Load environment variables from .env file (contains sensitive configuration)
|
||||
// 2. Load configuration from config.json (may reference environment variables)
|
||||
// 3. Handle database migration commands if requested
|
||||
// 4. Initialize database connection (requires config for connection string)
|
||||
// 5. Handle user commands if requested (requires database and authentication config)
|
||||
// 6. Initialize subsystems like archive and metrics (require config and database)
|
||||
|
||||
// Load environment and configuration
|
||||
if err := loadEnvironment(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := initConfiguration(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle database migration (migrate, revert, force)
|
||||
if err := handleDatabaseCommands(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Initialize database
|
||||
if err := initDatabase(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle user commands (add, delete, sync, JWT)
|
||||
if err := handleUserCommands(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Initialize subsystems (archive, metrics, taggers)
|
||||
if err := initSubsystems(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Exit if start server is not requested
|
||||
if !flagServer {
|
||||
cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||
}
|
||||
|
||||
// Run server with context
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
return runServer(ctx)
|
||||
}
|
||||
|
||||
func main() {
|
||||
if err := run(); err != nil {
|
||||
cclog.Error(err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// This file contains HTTP server setup, routing configuration, and
|
||||
// authentication middleware integration.
|
||||
package main
|
||||
|
||||
import (
|
||||
@@ -18,6 +21,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/handler/transport"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
@@ -25,20 +29,30 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/runtimeEnv"
|
||||
"github.com/gorilla/handlers"
|
||||
"github.com/gorilla/mux"
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
)
|
||||
|
||||
var (
|
||||
var buildInfo web.Build
|
||||
|
||||
// Environment variable names
|
||||
const (
|
||||
envDebug = "DEBUG"
|
||||
)
|
||||
|
||||
// Server encapsulates the HTTP server state and dependencies
|
||||
type Server struct {
|
||||
router *mux.Router
|
||||
server *http.Server
|
||||
apiHandle *api.RestApi
|
||||
)
|
||||
apiHandle *api.RestAPI
|
||||
}
|
||||
|
||||
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
@@ -49,22 +63,39 @@ func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
})
|
||||
}
|
||||
|
||||
func serverInit() {
|
||||
// NewServer creates and initializes a new Server instance
|
||||
func NewServer(version, commit, buildDate string) (*Server, error) {
|
||||
buildInfo = web.Build{Version: version, Hash: commit, Buildtime: buildDate}
|
||||
|
||||
s := &Server{
|
||||
router: mux.NewRouter(),
|
||||
}
|
||||
|
||||
if err := s.init(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (s *Server) init() error {
|
||||
// Setup the http.Handler/Router used by the server
|
||||
graph.Init()
|
||||
resolver := graph.GetResolverInstance()
|
||||
graphQLEndpoint := handler.NewDefaultServer(
|
||||
graphQLServer := handler.New(
|
||||
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
|
||||
if os.Getenv("DEBUG") != "1" {
|
||||
graphQLServer.AddTransport(transport.POST{})
|
||||
|
||||
if os.Getenv(envDebug) != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||
graphQLServer.SetRecoverFunc(func(ctx context.Context, err any) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
||||
return fmt.Errorf("MAIN > Panic caused by: %s", e.Error())
|
||||
}
|
||||
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
@@ -73,72 +104,56 @@ func serverInit() {
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
apiHandle = api.New()
|
||||
s.apiHandle = api.New()
|
||||
|
||||
router = mux.NewRouter()
|
||||
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
|
||||
|
||||
info := map[string]interface{}{}
|
||||
info := map[string]any{}
|
||||
info["hasOpenIDConnect"] = false
|
||||
|
||||
if config.Keys.OpenIDConfig != nil {
|
||||
if auth.Keys.OpenIDConfig != nil {
|
||||
openIDConnect := auth.NewOIDC(authHandle)
|
||||
openIDConnect.RegisterEndpoints(router)
|
||||
openIDConnect.RegisterEndpoints(s.router)
|
||||
info["hasOpenIDConnect"] = true
|
||||
}
|
||||
|
||||
router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
s.router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
log.Debugf("##%v##", info)
|
||||
cclog.Debugf("##%v##", info)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||
}).Methods(http.MethodGet)
|
||||
router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
s.router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
s.router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
secured := router.PathPrefix("/").Subrouter()
|
||||
securedapi := router.PathPrefix("/api").Subrouter()
|
||||
userapi := router.PathPrefix("/userapi").Subrouter()
|
||||
configapi := router.PathPrefix("/config").Subrouter()
|
||||
frontendapi := router.PathPrefix("/frontend").Subrouter()
|
||||
secured := s.router.PathPrefix("/").Subrouter()
|
||||
securedapi := s.router.PathPrefix("/api").Subrouter()
|
||||
userapi := s.router.PathPrefix("/userapi").Subrouter()
|
||||
configapi := s.router.PathPrefix("/config").Subrouter()
|
||||
frontendapi := s.router.PathPrefix("/frontend").Subrouter()
|
||||
metricstoreapi := s.router.PathPrefix("/metricstore").Subrouter()
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
router.Handle("/login", authHandle.Login(
|
||||
// On success: Handled within Login()
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})).Methods(http.MethodPost)
|
||||
// Create login failure handler (used by both /login and /jwt-login)
|
||||
loginFailureHandler := func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}
|
||||
|
||||
router.Handle("/jwt-login", authHandle.Login(
|
||||
// On success: Handled within Login()
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}))
|
||||
s.router.Handle("/login", authHandle.Login(loginFailureHandler)).Methods(http.MethodPost)
|
||||
s.router.Handle("/jwt-login", authHandle.Login(loginFailureHandler))
|
||||
|
||||
router.Handle("/logout", authHandle.Logout(
|
||||
s.router.Handle("/logout", authHandle.Logout(
|
||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
@@ -171,7 +186,7 @@ func serverInit() {
|
||||
})
|
||||
|
||||
securedapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthApi(
|
||||
return authHandle.AuthAPI(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
@@ -179,7 +194,15 @@ func serverInit() {
|
||||
})
|
||||
|
||||
userapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthUserApi(
|
||||
return authHandle.AuthUserAPI(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
onFailureResponse)
|
||||
})
|
||||
|
||||
metricstoreapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthMetricStoreAPI(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
@@ -187,7 +210,7 @@ func serverInit() {
|
||||
})
|
||||
|
||||
configapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthConfigApi(
|
||||
return authHandle.AuthConfigAPI(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
@@ -195,7 +218,7 @@ func serverInit() {
|
||||
})
|
||||
|
||||
frontendapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthFrontendApi(
|
||||
return authHandle.AuthFrontendAPI(
|
||||
// On success;
|
||||
next,
|
||||
// On failure: JSON Response
|
||||
@@ -204,11 +227,11 @@ func serverInit() {
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
s.router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
s.router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||
}
|
||||
secured.Handle("/query", graphQLEndpoint)
|
||||
secured.Handle("/query", graphQLServer)
|
||||
|
||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
@@ -217,50 +240,66 @@ func serverInit() {
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
routerConfig.SetupRoutes(secured, buildInfo)
|
||||
apiHandle.MountApiRoutes(securedapi)
|
||||
apiHandle.MountUserApiRoutes(userapi)
|
||||
apiHandle.MountConfigApiRoutes(configapi)
|
||||
apiHandle.MountFrontendApiRoutes(frontendapi)
|
||||
s.apiHandle.MountAPIRoutes(securedapi)
|
||||
s.apiHandle.MountUserAPIRoutes(userapi)
|
||||
s.apiHandle.MountConfigAPIRoutes(configapi)
|
||||
s.apiHandle.MountFrontendAPIRoutes(frontendapi)
|
||||
|
||||
if memorystore.InternalCCMSFlag {
|
||||
s.apiHandle.MountMetricStoreAPIRoutes(metricstoreapi)
|
||||
}
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
if i, err := os.Stat("./var/img"); err == nil {
|
||||
if i.IsDir() {
|
||||
log.Info("Use local directory for static images")
|
||||
router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
cclog.Info("Use local directory for static images")
|
||||
s.router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
}
|
||||
}
|
||||
router.PathPrefix("/").Handler(web.ServeFiles())
|
||||
s.router.PathPrefix("/").Handler(http.StripPrefix("/", web.ServeFiles()))
|
||||
} else {
|
||||
router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
s.router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
}
|
||||
|
||||
router.Use(handlers.CompressHandler)
|
||||
router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
router.Use(handlers.CORS(
|
||||
s.router.Use(handlers.CompressHandler)
|
||||
s.router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
s.router.Use(handlers.CORS(
|
||||
handlers.AllowCredentials(),
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"})))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func serverStart() {
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
// Server timeout defaults (in seconds)
|
||||
const (
|
||||
defaultReadTimeout = 20
|
||||
defaultWriteTimeout = 20
|
||||
)
|
||||
|
||||
func (s *Server) Start(ctx context.Context) error {
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, s.router, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
cclog.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
} else {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
cclog.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
}
|
||||
})
|
||||
|
||||
server = &http.Server{
|
||||
ReadTimeout: 20 * time.Second,
|
||||
WriteTimeout: 20 * time.Second,
|
||||
// Use configurable timeouts with defaults
|
||||
readTimeout := time.Duration(defaultReadTimeout) * time.Second
|
||||
writeTimeout := time.Duration(defaultWriteTimeout) * time.Second
|
||||
|
||||
s.server = &http.Server{
|
||||
ReadTimeout: readTimeout,
|
||||
WriteTimeout: writeTimeout,
|
||||
Handler: handler,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
@@ -268,20 +307,20 @@ func serverStart() {
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
log.Fatalf("starting http listener failed: %v", err)
|
||||
return fmt.Errorf("starting listener on '%s': %w", config.Keys.Addr, err)
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHTTPTo != "" {
|
||||
go func() {
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHTTPTo, http.StatusMovedPermanently))
|
||||
}()
|
||||
}
|
||||
|
||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||
if config.Keys.HTTPSCertFile != "" && config.Keys.HTTPSKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(
|
||||
config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||
config.Keys.HTTPSCertFile, config.Keys.HTTPSKeyFile)
|
||||
if err != nil {
|
||||
log.Fatalf("loading X509 keypair failed: %v", err)
|
||||
return fmt.Errorf("loading X509 keypair (check 'https-cert-file' and 'https-key-file' in config.json): %w", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
@@ -292,27 +331,56 @@ func serverStart() {
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
cclog.Infof("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
} else {
|
||||
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
|
||||
cclog.Infof("HTTP server listening at %s...", config.Keys.Addr)
|
||||
}
|
||||
//
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
log.Fatalf("error while preparing server start: %s", err.Error())
|
||||
return fmt.Errorf("dropping privileges: %w", err)
|
||||
}
|
||||
|
||||
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatalf("starting server failed: %v", err)
|
||||
// Handle context cancellation for graceful shutdown
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||
cclog.Errorf("Server shutdown error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
if err = s.server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
return fmt.Errorf("server failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func serverShutdown() {
|
||||
func (s *Server) Shutdown(ctx context.Context) {
|
||||
// Create a shutdown context with timeout
|
||||
shutdownCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
nc := nats.GetClient()
|
||||
if nc != nil {
|
||||
nc.Close()
|
||||
}
|
||||
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||
cclog.Errorf("Server shutdown error: %v", err)
|
||||
}
|
||||
|
||||
// Then, wait for any async archivings still pending...
|
||||
archiver.WaitForArchiving()
|
||||
// Archive all the metric store data
|
||||
if memorystore.InternalCCMSFlag {
|
||||
memorystore.Shutdown()
|
||||
}
|
||||
|
||||
// Shutdown archiver with 10 second timeout for fast shutdown
|
||||
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
cclog.Warnf("Archiver shutdown: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,30 +1,46 @@
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"short-running-jobs-duration": 300,
|
||||
"main": {
|
||||
"addr": "127.0.0.1:8080",
|
||||
"short-running-jobs-duration": 300,
|
||||
"resampling": {
|
||||
"minimumPoints": 600,
|
||||
"trigger": 180,
|
||||
"resolutions": [
|
||||
240,
|
||||
60
|
||||
]
|
||||
},
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
],
|
||||
"emission-constant": 317
|
||||
},
|
||||
"cron": {
|
||||
"commit-job-worker": "2m",
|
||||
"duration-worker": "5m",
|
||||
"footprint-worker": "10m"
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
"auth": {
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
}
|
||||
},
|
||||
"enable-resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
300,
|
||||
120,
|
||||
60
|
||||
]
|
||||
"nats": {
|
||||
"address": "nats://0.0.0.0:4222",
|
||||
"username": "root",
|
||||
"password": "root"
|
||||
},
|
||||
"emission-constant": 317,
|
||||
"clusters": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"kind": "cc-metric-store-internal",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
"token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw"
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
@@ -44,9 +60,9 @@
|
||||
{
|
||||
"name": "alex",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"kind": "cc-metric-store-internal",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
"token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw"
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
@@ -63,5 +79,28 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"metric-store": {
|
||||
"checkpoints": {
|
||||
"file-format": "avro",
|
||||
"interval": "1h",
|
||||
"directory": "./var/checkpoints",
|
||||
"restore": "48h"
|
||||
},
|
||||
"archive": {
|
||||
"interval": "1h",
|
||||
"directory": "./var/archive"
|
||||
},
|
||||
"retention-in-memory": "48h",
|
||||
"subscriptions": [
|
||||
{
|
||||
"subscribe-to": "hpc-nats",
|
||||
"cluster-tag": "fritz"
|
||||
},
|
||||
{
|
||||
"subscribe-to": "hpc-nats",
|
||||
"cluster-tag": "alex"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -12,12 +12,7 @@
|
||||
"db": "clustercockpit:demo@tcp(127.0.0.1:3306)/clustercockpit",
|
||||
"enable-resampling": {
|
||||
"trigger": 30,
|
||||
"resolutions": [
|
||||
600,
|
||||
300,
|
||||
120,
|
||||
60
|
||||
]
|
||||
"resolutions": [600, 300, 120, 60]
|
||||
},
|
||||
"emission-constant": 317,
|
||||
"clusters": [
|
||||
|
||||
@@ -1,50 +1,54 @@
|
||||
{
|
||||
"main": {
|
||||
"addr": "0.0.0.0:443",
|
||||
"ldap": {
|
||||
"url": "ldaps://test",
|
||||
"user_base": "ou=people,ou=hpc,dc=test,dc=de",
|
||||
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de",
|
||||
"user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de",
|
||||
"user_filter": "(&(objectclass=posixAccount))"
|
||||
},
|
||||
"https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem",
|
||||
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
|
||||
"user": "clustercockpit",
|
||||
"group": "clustercockpit",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"validate": true,
|
||||
"clusters": [
|
||||
{
|
||||
"name": "test",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": "eyJhbGciOiJF-E-pQBQ"
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
"validate": false,
|
||||
"apiAllowedIPs": ["*"],
|
||||
"short-running-jobs-duration": 300,
|
||||
"resampling": {
|
||||
"minimumPoints": 600,
|
||||
"trigger": 180,
|
||||
"resolutions": [
|
||||
240,
|
||||
60
|
||||
]
|
||||
}
|
||||
},
|
||||
"cron": {
|
||||
"commit-job-worker": "2m",
|
||||
"duration-worker": "5m",
|
||||
"footprint-worker": "10m"
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "test",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": "eyJhbGciOiJF-E-pQBQ"
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
],
|
||||
"jwts": {
|
||||
"cookieName": "",
|
||||
"validateUser": false,
|
||||
"max-age": "2000h",
|
||||
"trustedIssuer": ""
|
||||
},
|
||||
"short-running-jobs-duration": 300
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
45
configs/uiConfig.json
Normal file
45
configs/uiConfig.json
Normal file
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"jobList": {
|
||||
"usePaging": false,
|
||||
"showFootprint":false
|
||||
},
|
||||
"jobView": {
|
||||
"showPolarPlot": true,
|
||||
"showFootprint": true,
|
||||
"showRoofline": true,
|
||||
"showStatTable": true
|
||||
},
|
||||
"metricConfig": {
|
||||
"jobListMetrics": ["mem_bw", "flops_dp"],
|
||||
"jobViewPlotMetrics": ["mem_bw", "flops_dp"],
|
||||
"jobViewTableMetrics": ["mem_bw", "flops_dp"],
|
||||
"clusters": [
|
||||
{
|
||||
"name": "test",
|
||||
"subClusters": [
|
||||
{
|
||||
"name": "one",
|
||||
"jobListMetrics": ["mem_used", "flops_sp"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"nodeList": {
|
||||
"usePaging": true
|
||||
},
|
||||
"plotConfiguration": {
|
||||
"plotsPerRow": 3,
|
||||
"colorBackground": true,
|
||||
"lineWidth": 3,
|
||||
"colorScheme": [
|
||||
"#00bfff",
|
||||
"#0000ff",
|
||||
"#ff00ff",
|
||||
"#ff0000",
|
||||
"#ff8000",
|
||||
"#ffff00",
|
||||
"#80ff00"
|
||||
]
|
||||
}
|
||||
}
|
||||
139
go.mod
139
go.mod
@@ -1,90 +1,129 @@
|
||||
module github.com/ClusterCockpit/cc-backend
|
||||
|
||||
go 1.23
|
||||
go 1.24.0
|
||||
|
||||
toolchain go1.24.1
|
||||
|
||||
tool (
|
||||
github.com/99designs/gqlgen
|
||||
github.com/swaggo/swag/cmd/swag
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.17.57
|
||||
github.com/ClusterCockpit/cc-units v0.4.0
|
||||
github.com/99designs/gqlgen v0.17.85
|
||||
github.com/ClusterCockpit/cc-lib v1.0.2
|
||||
github.com/Masterminds/squirrel v1.5.4
|
||||
github.com/coreos/go-oidc/v3 v3.11.0
|
||||
github.com/go-co-op/gocron/v2 v2.9.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.8
|
||||
github.com/go-sql-driver/mysql v1.8.1
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1
|
||||
github.com/golang-migrate/migrate/v4 v4.17.1
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.1
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.7
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.7
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.95.0
|
||||
github.com/coreos/go-oidc/v3 v3.17.0
|
||||
github.com/expr-lang/expr v1.17.7
|
||||
github.com/go-co-op/gocron/v2 v2.19.0
|
||||
github.com/go-ldap/ldap/v3 v3.4.12
|
||||
github.com/go-sql-driver/mysql v1.9.3
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0
|
||||
github.com/golang-migrate/migrate/v4 v4.19.1
|
||||
github.com/google/gops v0.3.28
|
||||
github.com/gorilla/handlers v1.5.2
|
||||
github.com/gorilla/mux v1.8.1
|
||||
github.com/gorilla/sessions v1.4.0
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1
|
||||
github.com/jmoiron/sqlx v1.4.0
|
||||
github.com/mattn/go-sqlite3 v1.14.22
|
||||
github.com/prometheus/client_golang v1.19.1
|
||||
github.com/prometheus/common v0.55.0
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/linkedin/goavro/v2 v2.14.1
|
||||
github.com/mattn/go-sqlite3 v1.14.33
|
||||
github.com/nats-io/nats.go v1.47.0
|
||||
github.com/prometheus/client_golang v1.23.2
|
||||
github.com/prometheus/common v0.67.4
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/swaggo/http-swagger v1.3.4
|
||||
github.com/swaggo/swag v1.16.4
|
||||
github.com/vektah/gqlparser/v2 v2.5.20
|
||||
golang.org/x/crypto v0.31.0
|
||||
golang.org/x/exp v0.0.0-20240707233637-46b078467d37
|
||||
golang.org/x/oauth2 v0.21.0
|
||||
github.com/swaggo/swag v1.16.6
|
||||
github.com/vektah/gqlparser/v2 v2.5.31
|
||||
golang.org/x/crypto v0.46.0
|
||||
golang.org/x/oauth2 v0.34.0
|
||||
golang.org/x/time v0.14.0
|
||||
)
|
||||
|
||||
require (
|
||||
filippo.io/edwards25519 v1.1.0 // indirect
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||
github.com/agnivade/levenshtein v1.2.0 // indirect
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 // indirect
|
||||
github.com/aws/smithy-go v1.24.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.0.3 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
||||
github.com/go-openapi/jsonreference v0.21.0 // indirect
|
||||
github.com/go-openapi/spec v0.21.0 // indirect
|
||||
github.com/go-openapi/swag v0.23.0 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
|
||||
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.1.3 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.22.3 // indirect
|
||||
github.com/go-openapi/jsonreference v0.21.3 // indirect
|
||||
github.com/go-openapi/spec v0.22.1 // indirect
|
||||
github.com/go-openapi/swag/conv v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/jsonname v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/jsonutils v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/loading v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/stringutils v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/typeutils v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
||||
github.com/goccy/go-yaml v1.19.0 // indirect
|
||||
github.com/golang/snappy v0.0.4 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
||||
github.com/jonboulle/clockwork v0.4.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/jonboulle/clockwork v0.5.0 // indirect
|
||||
github.com/jpillora/backoff v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/klauspost/compress v1.18.1 // indirect
|
||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||
github.com/oapi-codegen/runtime v1.1.1 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/nats-io/nkeys v0.4.11 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||
github.com/prometheus/client_model v0.6.2 // indirect
|
||||
github.com/prometheus/procfs v0.16.1 // indirect
|
||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/sosodev/duration v1.3.1 // indirect
|
||||
github.com/stretchr/objx v0.5.2 // indirect
|
||||
github.com/swaggo/files v1.0.1 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.5 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
|
||||
go.uber.org/atomic v1.11.0 // indirect
|
||||
golang.org/x/mod v0.22.0 // indirect
|
||||
golang.org/x/net v0.31.0 // indirect
|
||||
golang.org/x/sync v0.10.0 // indirect
|
||||
golang.org/x/sys v0.28.0 // indirect
|
||||
golang.org/x/text v0.21.0 // indirect
|
||||
golang.org/x/tools v0.27.0 // indirect
|
||||
google.golang.org/protobuf v1.35.2 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.7 // indirect
|
||||
github.com/urfave/cli/v3 v3.6.1 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/mod v0.31.0 // indirect
|
||||
golang.org/x/net v0.48.0 // indirect
|
||||
golang.org/x/sync v0.19.0 // indirect
|
||||
golang.org/x/sys v0.39.0 // indirect
|
||||
golang.org/x/text v0.32.0 // indirect
|
||||
golang.org/x/tools v0.40.0 // indirect
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
sigs.k8s.io/yaml v1.4.0 // indirect
|
||||
sigs.k8s.io/yaml v1.6.0 // indirect
|
||||
)
|
||||
|
||||
384
go.sum
384
go.sum
@@ -1,91 +1,172 @@
|
||||
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
|
||||
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
|
||||
github.com/99designs/gqlgen v0.17.57 h1:Ak4p60BRq6QibxY0lEc0JnQhDurfhxA67sp02lMjmPc=
|
||||
github.com/99designs/gqlgen v0.17.57/go.mod h1:Jx61hzOSTcR4VJy/HFIgXiQ5rJ0Ypw8DxWLjbYDAUw0=
|
||||
github.com/99designs/gqlgen v0.17.85 h1:EkGx3U2FDcxQm8YDLQSpXIAVmpDyZ3IcBMOJi2nH1S0=
|
||||
github.com/99designs/gqlgen v0.17.85/go.mod h1:yvs8s0bkQlRfqg03YXr3eR4OQUowVhODT/tHzCXnbOU=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
|
||||
github.com/ClusterCockpit/cc-units v0.4.0 h1:zP5DOu99GmErW0tCDf0gcLrlWt42RQ9dpoONEOh4cI0=
|
||||
github.com/ClusterCockpit/cc-units v0.4.0/go.mod h1:3S3PAhAayS3pbgcT4q9Vn9VJw22Op51X0YimtG77zBw=
|
||||
github.com/ClusterCockpit/cc-lib v1.0.2 h1:ZWn3oZkXgxrr3zSigBdlOOfayZ4Om4xL20DhmritPPg=
|
||||
github.com/ClusterCockpit/cc-lib v1.0.2/go.mod h1:UGdOvXEnjFqlnPSxtvtFwO6BtXYW6NnXFoud9FtN93k=
|
||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
|
||||
github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=
|
||||
github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
|
||||
github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow=
|
||||
github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM=
|
||||
github.com/PuerkitoBio/goquery v1.9.3 h1:mpJr/ikUA9/GNJB/DBZcGeFDXUtosHRyRrwh7KGdTG0=
|
||||
github.com/PuerkitoBio/goquery v1.9.3/go.mod h1:1ndLHPdTz+DyQPICCWYlYQMPl0oXZj0G6D4LCYA6u4U=
|
||||
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
|
||||
github.com/agnivade/levenshtein v1.2.0 h1:U9L4IOT0Y3i0TIlUIDJ7rVUziKi/zPbrJGaFrtYH3SY=
|
||||
github.com/agnivade/levenshtein v1.2.0/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
|
||||
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI=
|
||||
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/NVIDIA/go-nvml v0.13.0-1 h1:OLX8Jq3dONuPOQPC7rndB6+iDmDakw0XTYgzMxObkEw=
|
||||
github.com/NVIDIA/go-nvml v0.13.0-1/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
||||
github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw=
|
||||
github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ=
|
||||
github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM=
|
||||
github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU=
|
||||
github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktpoUAgOJK3OTFc/xug0PCXYCqU0FgDKI=
|
||||
github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4=
|
||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ=
|
||||
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
|
||||
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
|
||||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
||||
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
|
||||
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ=
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.1 h1:ABlyEARCDLN034NhxlRUSZr4l71mh+T5KAeGh6cerhU=
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.1/go.mod h1:MayyLB8y+buD9hZqkCW3kX1AKq07Y5pXxtgB+rRFhz0=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 h1:489krEF9xIGkOaaX3CE/Be2uWjiXrkCH6gUX+bZA/BU=
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4/go.mod h1:IOAPF6oT9KCsceNTvvYMNHy0+kMF8akOjeDvPENWxp4=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.7 h1:vxUyWGUwmkQ2g19n7JY/9YL8MfAIl7bTesIUykECXmY=
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.7/go.mod h1:2/Qm5vKUU/r7Y+zUk/Ptt2MDAEKAfUtKc1+3U1Mo3oY=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.7 h1:tHK47VqqtJxOymRrNtUXN5SP/zUTvZKeLx4tH6PGQc8=
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.7/go.mod h1:qOZk8sPDrxhf+4Wf4oT2urYJrYt3RejHSzgAquYeppw=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 h1:I0GyV8wiYrP8XpA70g1HBcQO1JlQxCMTW9npl5UbDHY=
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17/go.mod h1:tyw7BOl5bBe/oqvoIeECFJjMdzXoa/dfVz3QQ5lgHGA=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 h1:xOLELNKGp2vsiteLsvLPwxC+mYmO6OZ8PYgiuPJzF8U=
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17/go.mod h1:5M5CI3D12dNOtH3/mk6minaRwI2/37ifCURZISxA/IQ=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 h1:WWLqlh79iO48yLkj1v3ISRNiv+3KdQoZ6JWyfcsyQik=
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17/go.mod h1:EhG22vHRrvF8oXSTYStZhJc1aUgKtnJe+aOiFEV90cM=
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk=
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16 h1:CjMzUs78RDDv4ROu3JnJn/Ig1r6ZD7/T2DXLLRpejic=
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.16/go.mod h1:uVW4OLBqbJXSHJYA9svT9BluSvvwbzLQ2Crf6UPzR3c=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 h1:0ryTNEdJbzUCEWkVXEXoqlXV72J5keC1GvILMOuD00E=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4/go.mod h1:HQ4qwNZh32C3CBeO6iJLQlgtMzqeG17ziAA/3KDJFow=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7 h1:DIBqIrJ7hv+e4CmIk2z3pyKT+3B6qVMgRsawHiR3qso=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.7/go.mod h1:vLm00xmBke75UmpNvOcZQ/Q30ZFjbczeLFqGx5urmGo=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 h1:RuNSMoozM8oXlgLG/n6WLaFGoea7/CddrCfIiSA+xdY=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17/go.mod h1:F2xxQ9TZz5gDWsclCtPQscGpP0VUOc8RqgFM3vDENmU=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16 h1:NSbvS17MlI2lurYgXnCOLvCFX38sBW4eiVER7+kkgsU=
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.16/go.mod h1:SwT8Tmqd4sA6G1qaGdzWCJN99bUmPGHfRwwq3G5Qb+A=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.95.0 h1:MIWra+MSq53CFaXXAywB2qg9YvVZifkk6vEGl/1Qor0=
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.95.0/go.mod h1:79S2BdqCJpScXZA2y+cpZuocWsjGjJINyXnOsf5DTz8=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 h1:VrhDvQib/i0lxvr3zqlUwLwJP4fpmpyD9wYG1vfSu+Y=
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.5/go.mod h1:k029+U8SY30/3/ras4G/Fnv/b88N4mAfliNn08Dem4M=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 h1:v6EiMvhEYBoHABfbGB4alOYmCIrcgyPPiBE1wZAEbqk=
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.9/go.mod h1:yifAsgBxgJWn3ggx70A3urX2AN49Y5sJTD1UQFlfqBw=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13 h1:gd84Omyu9JLriJVCbGApcLzVR3XtmC4ZDPcAI6Ftvds=
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.13/go.mod h1:sTGThjphYE4Ohw8vJiRStAcu3rbjtXRsdNB0TvZ5wwo=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 h1:5fFjR/ToSOzB2OQ/XqWpZBmNvmP/pJ1jOWYlFDJTjRQ=
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6/go.mod h1:qgFDZQSD/Kys7nJnVqYlWKnh0SSdMjAi0uSwON4wgYQ=
|
||||
github.com/aws/smithy-go v1.24.0 h1:LpilSUItNPFr1eY85RYgTIg5eIEPtvFbskaFcmmIUnk=
|
||||
github.com/aws/smithy-go v1.24.0/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/coreos/go-oidc/v3 v3.11.0 h1:Ia3MxdwpSw702YW0xgfmP1GVCMA9aEFWu12XUZ3/OtI=
|
||||
github.com/coreos/go-oidc/v3 v3.11.0/go.mod h1:gE3LgjOgFoHi9a4ce4/tJczr0Ai2/BoDhf0r5lltWI0=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
|
||||
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
|
||||
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
|
||||
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
|
||||
github.com/coreos/go-oidc/v3 v3.17.0 h1:hWBGaQfbi0iVviX4ibC7bk8OKT5qNr4klBaCHVNvehc=
|
||||
github.com/coreos/go-oidc/v3 v3.17.0/go.mod h1:wqPbKFrVnE90vty060SB40FCJ8fTHTxSwyXJqZH+sI8=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo=
|
||||
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
||||
github.com/dhui/dktest v0.4.1 h1:/w+IWuDXVymg3IrRJCHHOkMK10m9aNVMOyD0X12YVTg=
|
||||
github.com/dhui/dktest v0.4.1/go.mod h1:DdOqcUpL7vgyP4GlF3X3w7HbSlz8cEQzwewPveYEQbA=
|
||||
github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8=
|
||||
github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
|
||||
github.com/docker/docker v24.0.9+incompatible h1:HPGzNmwfLZWdxHqK9/II92pyi1EpYKsAqcl4G0Of9v0=
|
||||
github.com/docker/docker v24.0.9+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
|
||||
github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
|
||||
github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
|
||||
github.com/dhui/dktest v0.4.6 h1:+DPKyScKSEp3VLtbMDHcUq6V5Lm5zfZZVb0Sk7Ahom4=
|
||||
github.com/dhui/dktest v0.4.6/go.mod h1:JHTSYDtKkvFNFHJKqCzVzqXecyv+tKt8EzceOmQOgbU=
|
||||
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
|
||||
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
|
||||
github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI=
|
||||
github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
|
||||
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
|
||||
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
||||
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
||||
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||
github.com/expr-lang/expr v1.17.7 h1:Q0xY/e/2aCIp8g9s/LGvMDCC5PxYlvHgDZRQ4y16JX8=
|
||||
github.com/expr-lang/expr v1.17.7/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.5/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.7 h1:DTX+lbVTWaTw1hQ+PbZPlnDZPEIs0SS/GCZAl535dDk=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.7/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||
github.com/go-co-op/gocron/v2 v2.9.0 h1:+0nTyI3mjc2FGIClBdDWpaLPCNrJ+62o9xbS0ZklEKQ=
|
||||
github.com/go-co-op/gocron/v2 v2.9.0/go.mod h1:xY7bJxGazKam1cz04EebrlP4S9q4iWdiAylMGP3jY9w=
|
||||
github.com/go-jose/go-jose/v4 v4.0.3 h1:o8aphO8Hv6RPmH+GfzVuyf7YXSBibp+8YyHdOoDESGo=
|
||||
github.com/go-jose/go-jose/v4 v4.0.3/go.mod h1:NKb5HO1EZccyMpiZNbdUw/14tiXNyUJh188dfnMCAfc=
|
||||
github.com/go-ldap/ldap/v3 v3.4.8 h1:loKJyspcRezt2Q3ZRMq2p/0v8iOurlmeXDPw6fikSvQ=
|
||||
github.com/go-ldap/ldap/v3 v3.4.8/go.mod h1:qS3Sjlu76eHfHGpUdWkAXQTw4beih+cHsco2jXlIXrk=
|
||||
github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
|
||||
github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
|
||||
github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
|
||||
github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
|
||||
github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY=
|
||||
github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk=
|
||||
github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
|
||||
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
|
||||
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
|
||||
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
|
||||
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 h1:BP4M0CvQ4S3TGls2FvczZtj5Re/2ZzkV9VwqPHH/3Bo=
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
|
||||
github.com/go-co-op/gocron/v2 v2.19.0 h1:OKf2y6LXPs/BgBI2fl8PxUpNAI1DA9Mg+hSeGOS38OU=
|
||||
github.com/go-co-op/gocron/v2 v2.19.0/go.mod h1:5lEiCKk1oVJV39Zg7/YG10OnaVrDAV5GGR6O0663k6U=
|
||||
github.com/go-jose/go-jose/v4 v4.1.3 h1:CVLmWDhDVRa6Mi/IgCgaopNosCaHz7zrMeF9MlZRkrs=
|
||||
github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
|
||||
github.com/go-ldap/ldap/v3 v3.4.12 h1:1b81mv7MagXZ7+1r7cLTWmyuTqVqdwbtJSjC0DAp9s4=
|
||||
github.com/go-ldap/ldap/v3 v3.4.12/go.mod h1:+SPAGcTtOfmGsCb3h1RFiq4xpp4N636G75OEace8lNo=
|
||||
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-openapi/jsonpointer v0.22.3 h1:dKMwfV4fmt6Ah90zloTbUKWMD+0he+12XYAsPotrkn8=
|
||||
github.com/go-openapi/jsonpointer v0.22.3/go.mod h1:0lBbqeRsQ5lIanv3LHZBrmRGHLHcQoOXQnf88fHlGWo=
|
||||
github.com/go-openapi/jsonreference v0.21.3 h1:96Dn+MRPa0nYAR8DR1E03SblB5FJvh7W6krPI0Z7qMc=
|
||||
github.com/go-openapi/jsonreference v0.21.3/go.mod h1:RqkUP0MrLf37HqxZxrIAtTWW4ZJIK1VzduhXYBEeGc4=
|
||||
github.com/go-openapi/spec v0.22.1 h1:beZMa5AVQzRspNjvhe5aG1/XyBSMeX1eEOs7dMoXh/k=
|
||||
github.com/go-openapi/spec v0.22.1/go.mod h1:c7aeIQT175dVowfp7FeCvXXnjN/MrpaONStibD2WtDA=
|
||||
github.com/go-openapi/swag v0.19.15 h1:D2NRCBzS9/pEY3gP9Nl8aDqGUcPFrwG2p+CNFrLyrCM=
|
||||
github.com/go-openapi/swag/conv v0.25.4 h1:/Dd7p0LZXczgUcC/Ikm1+YqVzkEeCc9LnOWjfkpkfe4=
|
||||
github.com/go-openapi/swag/conv v0.25.4/go.mod h1:3LXfie/lwoAv0NHoEuY1hjoFAYkvlqI/Bn5EQDD3PPU=
|
||||
github.com/go-openapi/swag/jsonname v0.25.4 h1:bZH0+MsS03MbnwBXYhuTttMOqk+5KcQ9869Vye1bNHI=
|
||||
github.com/go-openapi/swag/jsonname v0.25.4/go.mod h1:GPVEk9CWVhNvWhZgrnvRA6utbAltopbKwDu8mXNUMag=
|
||||
github.com/go-openapi/swag/jsonutils v0.25.4 h1:VSchfbGhD4UTf4vCdR2F4TLBdLwHyUDTd1/q4i+jGZA=
|
||||
github.com/go-openapi/swag/jsonutils v0.25.4/go.mod h1:7OYGXpvVFPn4PpaSdPHJBtF0iGnbEaTk8AvBkoWnaAY=
|
||||
github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4 h1:IACsSvBhiNJwlDix7wq39SS2Fh7lUOCJRmx/4SN4sVo=
|
||||
github.com/go-openapi/swag/jsonutils/fixtures_test v0.25.4/go.mod h1:Mt0Ost9l3cUzVv4OEZG+WSeoHwjWLnarzMePNDAOBiM=
|
||||
github.com/go-openapi/swag/loading v0.25.4 h1:jN4MvLj0X6yhCDduRsxDDw1aHe+ZWoLjW+9ZQWIKn2s=
|
||||
github.com/go-openapi/swag/loading v0.25.4/go.mod h1:rpUM1ZiyEP9+mNLIQUdMiD7dCETXvkkC30z53i+ftTE=
|
||||
github.com/go-openapi/swag/stringutils v0.25.4 h1:O6dU1Rd8bej4HPA3/CLPciNBBDwZj9HiEpdVsb8B5A8=
|
||||
github.com/go-openapi/swag/stringutils v0.25.4/go.mod h1:GTsRvhJW5xM5gkgiFe0fV3PUlFm0dr8vki6/VSRaZK0=
|
||||
github.com/go-openapi/swag/typeutils v0.25.4 h1:1/fbZOUN472NTc39zpa+YGHn3jzHWhv42wAJSN91wRw=
|
||||
github.com/go-openapi/swag/typeutils v0.25.4/go.mod h1:Ou7g//Wx8tTLS9vG0UmzfCsjZjKhpjxayRKTHXf2pTE=
|
||||
github.com/go-openapi/swag/yamlutils v0.25.4 h1:6jdaeSItEUb7ioS9lFoCZ65Cne1/RZtPBZ9A56h92Sw=
|
||||
github.com/go-openapi/swag/yamlutils v0.25.4/go.mod h1:MNzq1ulQu+yd8Kl7wPOut/YHAAU/H6hL91fF+E2RFwc=
|
||||
github.com/go-openapi/testify/enable/yaml/v2 v2.0.2 h1:0+Y41Pz1NkbTHz8NngxTuAXxEodtNSI1WG1c/m5Akw4=
|
||||
github.com/go-openapi/testify/enable/yaml/v2 v2.0.2/go.mod h1:kme83333GCtJQHXQ8UKX3IBZu6z8T5Dvy5+CW3NLUUg=
|
||||
github.com/go-openapi/testify/v2 v2.0.2 h1:X999g3jeLcoY8qctY/c/Z8iBHTbwLz7R2WXd6Ub6wls=
|
||||
github.com/go-openapi/testify/v2 v2.0.2/go.mod h1:HCPmvFFnheKK2BuwSA0TbbdxJ3I16pjwMkYkP4Ywn54=
|
||||
github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
|
||||
github.com/go-sql-driver/mysql v1.8.1 h1:LedoTUt/eveggdHS9qUFC1EFSa8bU2+1pZjSRpvNJ1Y=
|
||||
github.com/go-sql-driver/mysql v1.8.1/go.mod h1:wEBSXgmK//2ZFJyE+qWnIsVGmvmEKlqwuVSjsCm7DZg=
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss=
|
||||
github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
|
||||
github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo=
|
||||
github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU=
|
||||
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
|
||||
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
|
||||
github.com/goccy/go-yaml v1.19.0 h1:EmkZ9RIsX+Uq4DYFowegAuJo8+xdX3T/2dwNPXbxEYE=
|
||||
github.com/goccy/go-yaml v1.19.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
|
||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk=
|
||||
github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk=
|
||||
github.com/golang-migrate/migrate/v4 v4.17.1 h1:4zQ6iqL6t6AiItphxJctQb3cFqWiSpMnX7wLTPnnYO4=
|
||||
github.com/golang-migrate/migrate/v4 v4.17.1/go.mod h1:m8hinFyWBn0SA4QKHuKh175Pm9wjmxj3S2Mia7dbXzM=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo=
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||
github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA=
|
||||
github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE=
|
||||
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
|
||||
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
|
||||
github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
@@ -97,28 +178,27 @@ github.com/gorilla/handlers v1.5.2 h1:cLTUSsNkgcwhgRqvCNmdbRWG0A3N4F+M2nWKdScwyE
|
||||
github.com/gorilla/handlers v1.5.2/go.mod h1:dX+xVpaxdSw+q0Qek8SSsl3dfMk3jNddUkMzo0GtH0w=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
||||
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
|
||||
github.com/gorilla/securecookie v1.1.2 h1:YCIWL56dvtr73r6715mJs5ZvhtnY73hBvEF8kXD8ePA=
|
||||
github.com/gorilla/securecookie v1.1.2/go.mod h1:NfCASbcHqRSY+3a8tlWJwsQap2VX5pwzwo4h3eOamfo=
|
||||
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
|
||||
github.com/gorilla/sessions v1.4.0 h1:kpIYOp/oi6MG/p5PgxApU8srsSw9tuFbt46Lt7auzqQ=
|
||||
github.com/gorilla/sessions v1.4.0/go.mod h1:FLWm50oby91+hl7p/wRxDth9bWSuk0qVL2emc7lT5ik=
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
|
||||
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
|
||||
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
|
||||
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
|
||||
github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8=
|
||||
github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0 h1:ioBbLmR5NMbAjP4UVA5r9b5xGjpABD7j65pI8kFphDM=
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0/go.mod h1:k+spCbt9hcvqvUiz0sr5D8LolXHqAAOfPw9v/RIRHl4=
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0 h1:AjbBfJuq+QoaXNcrova8smSjwJdUHnwvfjMF71M1iI4=
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
|
||||
github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98=
|
||||
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig=
|
||||
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo=
|
||||
github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY=
|
||||
github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY=
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
|
||||
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
|
||||
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
|
||||
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
|
||||
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
|
||||
@@ -133,17 +213,21 @@ github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZ
|
||||
github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc=
|
||||
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
|
||||
github.com/jmoiron/sqlx v1.4.0/go.mod h1:ZrZ7UsYB/weZdl2Bxg6jCRO9c3YHl8r3ahlKmRT4JLY=
|
||||
github.com/jonboulle/clockwork v0.4.0 h1:p4Cf1aMWXnXAUh8lVfewRBx1zaTSYKrKMF2g3ST4RZ4=
|
||||
github.com/jonboulle/clockwork v0.4.0/go.mod h1:xgRqUGwRcjKCO1vbZUEtSLrqKoPSsUpK7fnezOII0kc=
|
||||
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
|
||||
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
|
||||
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
|
||||
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
|
||||
github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I=
|
||||
github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60=
|
||||
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
|
||||
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
||||
github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
|
||||
github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
|
||||
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
|
||||
@@ -153,11 +237,14 @@ github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6Fm
|
||||
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
|
||||
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
|
||||
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
|
||||
github.com/linkedin/goavro/v2 v2.14.1 h1:/8VjDpd38PRsy02JS0jflAu7JZPfJcGTwqWgMkFS2iI=
|
||||
github.com/linkedin/goavro/v2 v2.14.1/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
|
||||
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
||||
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
|
||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.33 h1:A5blZ5ulQo2AtayQ9/limgHEkFreKj1Dv226a1K73s0=
|
||||
github.com/mattn/go-sqlite3 v1.14.33/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
|
||||
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
|
||||
github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
|
||||
github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
@@ -171,31 +258,39 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
github.com/nats-io/nats.go v1.47.0 h1:YQdADw6J/UfGUd2Oy6tn4Hq6YHxCaJrVKayxxFqYrgM=
|
||||
github.com/nats-io/nats.go v1.47.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
||||
github.com/nats-io/nkeys v0.4.11 h1:q44qGV008kYd9W1b1nEBkNzvnWxtRSQ7A8BoqRrcfa0=
|
||||
github.com/nats-io/nkeys v0.4.11/go.mod h1:szDimtgmfOi9n25JpfIdGw12tZFYXqhGxjhVxsatHVE=
|
||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
|
||||
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b h1:YWuSjZCQAPM8UUBLkYUk1e+rZcvWHJmFb6i6rM44Xs8=
|
||||
github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b/go.mod h1:3OVijpioIKYWTqjiG0zfF6wvoJ4fAXGbjdZuI2NgsRQ=
|
||||
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
||||
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
|
||||
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
|
||||
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
|
||||
github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
|
||||
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
|
||||
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||
github.com/prometheus/common v0.67.4 h1:yR3NqWO1/UyO1w2PhUvXlGQs/PtFmoveVO0KZ4+Lvsc=
|
||||
github.com/prometheus/common v0.67.4/go.mod h1:gP0fq6YjjNCLssJCQp0yk4M8W6ikLURwkdd/YKtTbyI=
|
||||
github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
|
||||
github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0=
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU=
|
||||
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
|
||||
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
|
||||
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4=
|
||||
@@ -204,108 +299,105 @@ github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8=
|
||||
github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I=
|
||||
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
|
||||
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
||||
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
|
||||
github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg=
|
||||
github.com/swaggo/http-swagger v1.3.4 h1:q7t/XLx0n15H1Q9/tk3Y9L4n210XzJF5WtnDX64a5ww=
|
||||
github.com/swaggo/http-swagger v1.3.4/go.mod h1:9dAh0unqMBAlbp1uE2Uc2mQTxNMU/ha4UbucIg1MFkQ=
|
||||
github.com/swaggo/swag v1.16.4 h1:clWJtd9LStiG3VeijiCfOVODP6VpHtKdQy9ELFG3s1A=
|
||||
github.com/swaggo/swag v1.16.4/go.mod h1:VBsHJRsDvfYvqoiMKnsdwhNV9LEMHgEDZcyVYX0sxPg=
|
||||
github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w=
|
||||
github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
|
||||
github.com/vektah/gqlparser/v2 v2.5.20 h1:kPaWbhBntxoZPaNdBaIPT1Kh0i1b/onb5kXgEdP5JCo=
|
||||
github.com/vektah/gqlparser/v2 v2.5.20/go.mod h1:xMl+ta8a5M1Yo1A1Iwt/k7gSpscwSnHZdw7tfhEGfTM=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
|
||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
||||
github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI=
|
||||
github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg=
|
||||
github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU=
|
||||
github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4=
|
||||
github.com/urfave/cli/v3 v3.6.1 h1:j8Qq8NyUawj/7rTYdBGrxcH7A/j7/G8Q5LhWEW4G3Mo=
|
||||
github.com/urfave/cli/v3 v3.6.1/go.mod h1:ysVLtOEmg2tOy6PknnYVhDoouyC/6N42TMeoMzskhso=
|
||||
github.com/vektah/gqlparser/v2 v2.5.31 h1:YhWGA1mfTjID7qJhd1+Vxhpk5HTgydrGU9IgkWBTJ7k=
|
||||
github.com/vektah/gqlparser/v2 v2.5.31/go.mod h1:c1I28gSOVNzlfc4WuDlqU7voQnsqI6OG2amkBAFmgts=
|
||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg=
|
||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
||||
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
|
||||
go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE=
|
||||
go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q=
|
||||
go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
|
||||
go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
|
||||
go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
|
||||
go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
|
||||
go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
|
||||
go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
|
||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||
go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0=
|
||||
go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8=
|
||||
go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc=
|
||||
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||
golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58=
|
||||
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
|
||||
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
|
||||
golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
|
||||
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
|
||||
golang.org/x/exp v0.0.0-20240707233637-46b078467d37 h1:uLDX+AfeFCct3a2C7uIWBKMJIR3CJMhcgfrUAqjRK6w=
|
||||
golang.org/x/exp v0.0.0-20240707233637-46b078467d37/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
|
||||
golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
|
||||
golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o=
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8=
|
||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4=
|
||||
golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
|
||||
golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI=
|
||||
golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
|
||||
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
|
||||
golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
|
||||
golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo=
|
||||
golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM=
|
||||
golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs=
|
||||
golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
|
||||
golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
|
||||
golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
|
||||
golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw=
|
||||
golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
|
||||
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
|
||||
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
|
||||
golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
|
||||
golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
|
||||
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
|
||||
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
|
||||
golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
|
||||
golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
|
||||
golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
|
||||
golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
|
||||
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
|
||||
golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o=
|
||||
golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q=
|
||||
golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA=
|
||||
golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io=
|
||||
google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
|
||||
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
|
||||
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
|
||||
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
|
||||
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
|
||||
|
||||
76
gqlgen.yml
76
gqlgen.yml
@@ -30,7 +30,9 @@ resolver:
|
||||
# gqlgen will search for any type names in the schema in these go packages
|
||||
# if they match it will use them, otherwise it will generate them.
|
||||
autobind:
|
||||
- "github.com/99designs/gqlgen/graphql/introspection"
|
||||
- "github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
- "github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
|
||||
# This section declares type mapping between the GraphQL and go type systems
|
||||
#
|
||||
@@ -50,61 +52,51 @@ models:
|
||||
- github.com/99designs/gqlgen/graphql.Int64
|
||||
- github.com/99designs/gqlgen/graphql.Int32
|
||||
Job:
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Job"
|
||||
model: "github.com/ClusterCockpit/cc-lib/schema.Job"
|
||||
fields:
|
||||
tags:
|
||||
resolver: true
|
||||
metaData:
|
||||
resolver: true
|
||||
Cluster:
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Cluster"
|
||||
model: "github.com/ClusterCockpit/cc-lib/schema.Cluster"
|
||||
fields:
|
||||
partitions:
|
||||
resolver: true
|
||||
NullableFloat:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||
MetricScope:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||
MetricValue:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||
# Node:
|
||||
# model: "github.com/ClusterCockpit/cc-lib/schema.Node"
|
||||
# fields:
|
||||
# metaData:
|
||||
# resolver: true
|
||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-lib/schema.Float" }
|
||||
MetricScope: { model: "github.com/ClusterCockpit/cc-lib/schema.MetricScope" }
|
||||
MetricValue: { model: "github.com/ClusterCockpit/cc-lib/schema.MetricValue" }
|
||||
JobStatistics:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.JobStatistics" }
|
||||
GlobalMetricListItem:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.GlobalMetricListItem",
|
||||
}
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.GlobalMetricListItem" }
|
||||
ClusterSupport:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.ClusterSupport" }
|
||||
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
||||
Resource:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||
JobState:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||
TimeRange:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||
IntRange:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
||||
JobMetric:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
||||
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.ClusterSupport" }
|
||||
Tag: { model: "github.com/ClusterCockpit/cc-lib/schema.Tag" }
|
||||
Resource: { model: "github.com/ClusterCockpit/cc-lib/schema.Resource" }
|
||||
JobState: { model: "github.com/ClusterCockpit/cc-lib/schema.JobState" }
|
||||
Node: { model: "github.com/ClusterCockpit/cc-lib/schema.Node" }
|
||||
SchedulerState:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.SchedulerState" }
|
||||
HealthState:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.MonitoringState" }
|
||||
JobMetric: { model: "github.com/ClusterCockpit/cc-lib/schema.JobMetric" }
|
||||
Series: { model: "github.com/ClusterCockpit/cc-lib/schema.Series" }
|
||||
MetricStatistics:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics",
|
||||
}
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.MetricStatistics" }
|
||||
MetricConfig:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.MetricConfig" }
|
||||
SubClusterConfig:
|
||||
{
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig",
|
||||
}
|
||||
Accelerator:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
||||
Topology:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.SubClusterConfig" }
|
||||
Accelerator: { model: "github.com/ClusterCockpit/cc-lib/schema.Accelerator" }
|
||||
Topology: { model: "github.com/ClusterCockpit/cc-lib/schema.Topology" }
|
||||
FilterRanges:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||
SubCluster:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||
StatsSeries:
|
||||
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/schema.FilterRanges" }
|
||||
SubCluster: { model: "github.com/ClusterCockpit/cc-lib/schema.SubCluster" }
|
||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-lib/schema.StatsSeries" }
|
||||
Unit: { model: "github.com/ClusterCockpit/cc-lib/schema.Unit" }
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package api_test
|
||||
@@ -27,24 +27,32 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/gorilla/mux"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func setup(t *testing.T) *api.RestApi {
|
||||
func setup(t *testing.T) *api.RestAPI {
|
||||
const testconfig = `{
|
||||
"main": {
|
||||
"addr": "0.0.0.0:8080",
|
||||
"validate": false,
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
]
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2m"
|
||||
},
|
||||
"auth": {
|
||||
"jwts": {
|
||||
"max-age": "2m"
|
||||
}
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "testcluster",
|
||||
@@ -57,7 +65,7 @@ func setup(t *testing.T) *api.RestApi {
|
||||
}
|
||||
]
|
||||
}`
|
||||
const testclusterJson = `{
|
||||
const testclusterJSON = `{
|
||||
"name": "testcluster",
|
||||
"subClusters": [
|
||||
{
|
||||
@@ -113,22 +121,22 @@ func setup(t *testing.T) *api.RestApi {
|
||||
]
|
||||
}`
|
||||
|
||||
log.Init("info", true)
|
||||
cclog.Init("info", true)
|
||||
tmpdir := t.TempDir()
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
||||
if err := os.Mkdir(jobarchive, 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0777); err != nil {
|
||||
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJson), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJSON), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -139,11 +147,22 @@ func setup(t *testing.T) *api.RestApi {
|
||||
}
|
||||
|
||||
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
config.Init(cfgFilePath)
|
||||
ccconf.Init(cfgFilePath)
|
||||
|
||||
// Load and check main configuration
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||
config.Init(cfg, clustercfg)
|
||||
} else {
|
||||
cclog.Abort("Cluster configuration must be present")
|
||||
}
|
||||
} else {
|
||||
cclog.Abort("Main configuration must be present")
|
||||
}
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||
|
||||
repository.Connect("sqlite3", dbfilepath)
|
||||
@@ -156,14 +175,25 @@ func setup(t *testing.T) *api.RestApi {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
archiver.Start(repository.GetJobRepository())
|
||||
auth.Init()
|
||||
archiver.Start(repository.GetJobRepository(), context.Background())
|
||||
|
||||
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
|
||||
auth.Init(&cfg)
|
||||
} else {
|
||||
cclog.Warn("Authentication disabled due to missing configuration")
|
||||
auth.Init(nil)
|
||||
}
|
||||
|
||||
graph.Init()
|
||||
|
||||
return api.New()
|
||||
}
|
||||
|
||||
func cleanup() {
|
||||
// Gracefully shutdown archiver with timeout
|
||||
if err := archiver.Shutdown(5 * time.Second); err != nil {
|
||||
cclog.Warnf("Archiver shutdown timeout in tests: %v", err)
|
||||
}
|
||||
// TODO: Clear all caches, reset all modules, etc...
|
||||
}
|
||||
|
||||
@@ -198,14 +228,14 @@ func TestRestApi(t *testing.T) {
|
||||
r := mux.NewRouter()
|
||||
r.PathPrefix("/api").Subrouter()
|
||||
r.StrictSlash(true)
|
||||
restapi.MountApiRoutes(r)
|
||||
restapi.MountAPIRoutes(r)
|
||||
|
||||
var TestJobId int64 = 123
|
||||
var TestClusterName string = "testcluster"
|
||||
TestClusterName := "testcluster"
|
||||
var TestStartTime int64 = 123456789
|
||||
|
||||
const startJobBody string = `{
|
||||
"jobId": 123,
|
||||
"jobId": 123,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
@@ -215,10 +245,9 @@ func TestRestApi(t *testing.T) {
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"exclusive": 1,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
@@ -249,16 +278,17 @@ func TestRestApi(t *testing.T) {
|
||||
if response.StatusCode != http.StatusCreated {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
resolver := graph.GetResolverInstance()
|
||||
// resolver := graph.GetResolverInstance()
|
||||
restapi.JobRepository.SyncJobs()
|
||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||
// if err != nil {
|
||||
// t.Fatal(err)
|
||||
// }
|
||||
|
||||
if job.JobID != 123 ||
|
||||
job.User != "testuser" ||
|
||||
@@ -267,21 +297,20 @@ func TestRestApi(t *testing.T) {
|
||||
job.SubCluster != "sc1" ||
|
||||
job.Partition != "default" ||
|
||||
job.Walltime != 3600 ||
|
||||
job.ArrayJobId != 0 ||
|
||||
job.ArrayJobID != 0 ||
|
||||
job.NumNodes != 1 ||
|
||||
job.NumHWThreads != 8 ||
|
||||
job.NumAcc != 0 ||
|
||||
job.Exclusive != 1 ||
|
||||
job.MonitoringStatus != 1 ||
|
||||
job.SMT != 1 ||
|
||||
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
||||
job.StartTime.Unix() != 123456789 {
|
||||
job.StartTime != 123456789 {
|
||||
t.Fatalf("unexpected job properties: %#v", job)
|
||||
}
|
||||
|
||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||
t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||
}
|
||||
// if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||
// t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||
// }
|
||||
}); !ok {
|
||||
return
|
||||
}
|
||||
@@ -308,7 +337,7 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
archiver.WaitForArchiving()
|
||||
// Archiving happens asynchronously, will be completed in cleanup
|
||||
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@@ -349,7 +378,7 @@ func TestRestApi(t *testing.T) {
|
||||
|
||||
t.Run("CheckDoubleStart", func(t *testing.T) {
|
||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
||||
body := strings.ReplaceAll(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||
recorder := httptest.NewRecorder()
|
||||
@@ -371,7 +400,7 @@ func TestRestApi(t *testing.T) {
|
||||
"partition": "default",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"exclusive": 1,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
@@ -399,6 +428,7 @@ func TestRestApi(t *testing.T) {
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
restapi.JobRepository.SyncJobs()
|
||||
|
||||
const stopJobBodyFailed string = `{
|
||||
"jobId": 12345,
|
||||
@@ -420,7 +450,7 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
archiver.WaitForArchiving()
|
||||
// Archiving happens asynchronously, will be completed in cleanup
|
||||
jobid, cluster := int64(12345), "testcluster"
|
||||
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
||||
if err != nil {
|
||||
|
||||
71
internal/api/cluster.go
Normal file
71
internal/api/cluster.go
Normal file
@@ -0,0 +1,71 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
// GetClustersAPIResponse model
|
||||
type GetClustersAPIResponse struct {
|
||||
Clusters []*schema.Cluster `json:"clusters"` // Array of clusters
|
||||
}
|
||||
|
||||
// getClusters godoc
|
||||
// @summary Lists all cluster configs
|
||||
// @tags Cluster query
|
||||
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
|
||||
// @produce json
|
||||
// @param cluster query string false "Job Cluster"
|
||||
// @success 200 {object} api.GetClustersApiResponse "Array of clusters"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/clusters/ [get]
|
||||
func (api *RestAPI) getClusters(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
bw := bufio.NewWriter(rw)
|
||||
defer bw.Flush()
|
||||
|
||||
var clusters []*schema.Cluster
|
||||
|
||||
if r.URL.Query().Has("cluster") {
|
||||
name := r.URL.Query().Get("cluster")
|
||||
cluster := archive.GetCluster(name)
|
||||
if cluster == nil {
|
||||
handleError(fmt.Errorf("unknown cluster: %s", name), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
clusters = append(clusters, cluster)
|
||||
} else {
|
||||
clusters = archive.Clusters
|
||||
}
|
||||
|
||||
payload := GetClustersAPIResponse{
|
||||
Clusters: clusters,
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -23,7 +23,7 @@ const docTemplate = `{
|
||||
"host": "{{.Host}}",
|
||||
"basePath": "{{.BasePath}}",
|
||||
"paths": {
|
||||
"/clusters/": {
|
||||
"/api/clusters/": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
@@ -80,7 +80,7 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/": {
|
||||
"/api/jobs/": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
@@ -175,7 +175,7 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/delete_job/": {
|
||||
"/api/jobs/delete_job/": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
@@ -208,7 +208,7 @@ const docTemplate = `{
|
||||
"200": {
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -250,7 +250,7 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/delete_job/{id}": {
|
||||
"/api/jobs/delete_job/{id}": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
@@ -278,7 +278,7 @@ const docTemplate = `{
|
||||
"200": {
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -320,7 +320,7 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/delete_job_before/{ts}": {
|
||||
"/api/jobs/delete_job_before/{ts}": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
@@ -348,7 +348,7 @@ const docTemplate = `{
|
||||
"200": {
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.DeleteJobApiResponse"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -390,7 +390,7 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/edit_meta/{id}": {
|
||||
"/api/jobs/edit_meta/{id}": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
@@ -460,14 +460,14 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/start_job/": {
|
||||
"/api/jobs/start_job/": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.",
|
||||
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'Job' scheme, API will fail to execute if requirements are not met.",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -485,7 +485,7 @@ const docTemplate = `{
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.JobMeta"
|
||||
"$ref": "#/definitions/schema.Job"
|
||||
}
|
||||
}
|
||||
],
|
||||
@@ -493,7 +493,7 @@ const docTemplate = `{
|
||||
"201": {
|
||||
"description": "Job added successfully",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.StartJobApiResponse"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -529,14 +529,14 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/stop_job/": {
|
||||
"/api/jobs/stop_job/": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'JobMeta' scheme.",
|
||||
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'Job' scheme.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -559,7 +559,7 @@ const docTemplate = `{
|
||||
"200": {
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.JobMeta"
|
||||
"$ref": "#/definitions/schema.Job"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
@@ -587,7 +587,7 @@ const docTemplate = `{
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
|
||||
"description": "Unprocessable Entity: job has already been stopped",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
@@ -601,7 +601,7 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/tag_job/{id}": {
|
||||
"/api/jobs/tag_job/{id}": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
@@ -674,14 +674,14 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/{id}": {
|
||||
"/api/jobs/{id}": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -755,7 +755,7 @@ const docTemplate = `{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'Job' scheme and all metrics according to 'JobData'.",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -833,119 +833,74 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/user/{id}": {
|
||||
"/api/nodestats/": {
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Modifies user defined by username (id) in one of four possible ways.\nIf more than one formValue is set then only the highest priority field is used.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||
"consumes": [
|
||||
"multipart/form-data"
|
||||
],
|
||||
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.",
|
||||
"produces": [
|
||||
"text/plain"
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"User"
|
||||
"Nodestates"
|
||||
],
|
||||
"summary": "Updates an existing user",
|
||||
"summary": "Deliver updated Slurm node states",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Database ID of User",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"enum": [
|
||||
"admin",
|
||||
"support",
|
||||
"manager",
|
||||
"user",
|
||||
"api"
|
||||
],
|
||||
"type": "string",
|
||||
"description": "Priority 1: Role to add",
|
||||
"name": "add-role",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"enum": [
|
||||
"admin",
|
||||
"support",
|
||||
"manager",
|
||||
"user",
|
||||
"api"
|
||||
],
|
||||
"type": "string",
|
||||
"description": "Priority 2: Role to remove",
|
||||
"name": "remove-role",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Priority 3: Project to add",
|
||||
"name": "add-project",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Priority 4: Project to remove",
|
||||
"name": "remove-project",
|
||||
"in": "formData"
|
||||
"description": "Request body containing nodes and their states",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.UpdateNodeStatesRequest"
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Success Response Message",
|
||||
"description": "Success message",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.DefaultApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: The user could not be updated",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/users/": {
|
||||
"/api/users/": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
@@ -997,70 +952,111 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
}
|
||||
},
|
||||
"/jobs/tag_job/{id}": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "User specified in form data will be saved to database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||
"description": "Removes tag(s) from a job specified by DB ID. Name and Type of Tag(s) must match.\nTag Scope is required for matching, options: \"global\", \"admin\". Private tags can not be deleted via API.\nIf tagged job is already finished: Tag will be removed from respective archive files.",
|
||||
"consumes": [
|
||||
"multipart/form-data"
|
||||
"application/json"
|
||||
],
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"Job add and modify"
|
||||
],
|
||||
"summary": "Removes one or more tags from a job",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "integer",
|
||||
"description": "Job Database ID",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"description": "Array of tag-objects to remove",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/api.ApiTag"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Updated job resource",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/schema.Job"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Job or tag does not exist",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/tags/": {
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Removes tags by type and name. Name and Type of Tag(s) must match.\nTag Scope is required for matching, options: \"global\", \"admin\". Private tags can not be deleted via API.\nTag wills be removed from respective archive files.",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
"produces": [
|
||||
"text/plain"
|
||||
],
|
||||
"tags": [
|
||||
"User"
|
||||
"Tag remove"
|
||||
],
|
||||
"summary": "Adds a new user",
|
||||
"summary": "Removes all tags and job-relations for type:name tuple",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Unique user ID",
|
||||
"name": "username",
|
||||
"in": "formData",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "User password",
|
||||
"name": "password",
|
||||
"in": "formData",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"enum": [
|
||||
"admin",
|
||||
"support",
|
||||
"manager",
|
||||
"user",
|
||||
"api"
|
||||
],
|
||||
"type": "string",
|
||||
"description": "User role",
|
||||
"name": "role",
|
||||
"in": "formData",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Managed project, required for new manager role user",
|
||||
"name": "project",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Users name",
|
||||
"name": "name",
|
||||
"in": "formData"
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Users email",
|
||||
"name": "email",
|
||||
"in": "formData"
|
||||
"description": "Array of tag-objects to remove",
|
||||
"name": "request",
|
||||
"in": "body",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/api.ApiTag"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
@@ -1073,93 +1069,25 @@ const docTemplate = `{
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"404": {
|
||||
"description": "Job or tag does not exist",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: creating user failed",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"delete": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "User defined by username in form data will be deleted from database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
|
||||
"consumes": [
|
||||
"multipart/form-data"
|
||||
],
|
||||
"produces": [
|
||||
"text/plain"
|
||||
],
|
||||
"tags": [
|
||||
"User"
|
||||
],
|
||||
"summary": "Deletes a user",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "User ID to delete",
|
||||
"name": "username",
|
||||
"in": "formData",
|
||||
"required": true
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "User deleted successfully"
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: deleting user failed",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"type": "string"
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1213,6 +1141,14 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.DefaultApiResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"msg": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.DeleteJobApiRequest": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
@@ -1236,14 +1172,6 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.DeleteJobApiResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"msg": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.EditMetaRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -1307,7 +1235,7 @@ const docTemplate = `{
|
||||
"description": "Array of jobs",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.JobMeta"
|
||||
"$ref": "#/definitions/schema.Job"
|
||||
}
|
||||
},
|
||||
"page": {
|
||||
@@ -1330,11 +1258,35 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.StartJobApiResponse": {
|
||||
"api.Node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"msg": {
|
||||
"cpusAllocated": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cpusTotal": {
|
||||
"type": "integer"
|
||||
},
|
||||
"gpusAllocated": {
|
||||
"type": "integer"
|
||||
},
|
||||
"gpusTotal": {
|
||||
"type": "integer"
|
||||
},
|
||||
"hostname": {
|
||||
"type": "string"
|
||||
},
|
||||
"memoryAllocated": {
|
||||
"type": "integer"
|
||||
},
|
||||
"memoryTotal": {
|
||||
"type": "integer"
|
||||
},
|
||||
"states": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -1371,6 +1323,21 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.UpdateNodeStatesRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cluster": {
|
||||
"type": "string",
|
||||
"example": "fritz"
|
||||
},
|
||||
"nodes": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/api.Node"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Accelerator": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -1406,7 +1373,6 @@ const docTemplate = `{
|
||||
}
|
||||
},
|
||||
"schema.Job": {
|
||||
"description": "Information of a HPC job.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"arrayJobId": {
|
||||
@@ -1431,19 +1397,15 @@ const docTemplate = `{
|
||||
"energyFootprint": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
"type": "number",
|
||||
"format": "float64"
|
||||
}
|
||||
},
|
||||
"exclusive": {
|
||||
"type": "integer",
|
||||
"maximum": 2,
|
||||
"minimum": 0,
|
||||
"example": 1
|
||||
},
|
||||
"footprint": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
"type": "number",
|
||||
"format": "float64"
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
@@ -1455,12 +1417,18 @@ const docTemplate = `{
|
||||
},
|
||||
"jobState": {
|
||||
"enum": [
|
||||
"completed",
|
||||
"failed",
|
||||
"boot_fail",
|
||||
"cancelled",
|
||||
"stopped",
|
||||
"timeout",
|
||||
"out_of_memory"
|
||||
"completed",
|
||||
"deadline",
|
||||
"failed",
|
||||
"node_fail",
|
||||
"out_of_memory",
|
||||
"pending",
|
||||
"preempted",
|
||||
"running",
|
||||
"suspended",
|
||||
"timeout"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
@@ -1504,23 +1472,48 @@ const docTemplate = `{
|
||||
"type": "string",
|
||||
"example": "abcd200"
|
||||
},
|
||||
"requestedMemory": {
|
||||
"description": "in MB",
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 128000
|
||||
},
|
||||
"resources": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Resource"
|
||||
}
|
||||
},
|
||||
"shared": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"none",
|
||||
"single_user",
|
||||
"multi_user"
|
||||
]
|
||||
},
|
||||
"smt": {
|
||||
"type": "integer",
|
||||
"example": 4
|
||||
},
|
||||
"startTime": {
|
||||
"type": "string"
|
||||
"type": "integer",
|
||||
"example": 1649723812
|
||||
},
|
||||
"statistics": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/definitions/schema.JobStatistics"
|
||||
}
|
||||
},
|
||||
"subCluster": {
|
||||
"type": "string",
|
||||
"example": "main"
|
||||
},
|
||||
"submitTime": {
|
||||
"type": "integer",
|
||||
"example": 1649723812
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@@ -1563,147 +1556,6 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.JobMeta": {
|
||||
"description": "Meta data information of a HPC job.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"arrayJobId": {
|
||||
"type": "integer",
|
||||
"example": 123000
|
||||
},
|
||||
"cluster": {
|
||||
"type": "string",
|
||||
"example": "fritz"
|
||||
},
|
||||
"concurrentJobs": {
|
||||
"$ref": "#/definitions/schema.JobLinkResultList"
|
||||
},
|
||||
"duration": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 43200
|
||||
},
|
||||
"energy": {
|
||||
"type": "number"
|
||||
},
|
||||
"energyFootprint": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"exclusive": {
|
||||
"type": "integer",
|
||||
"maximum": 2,
|
||||
"minimum": 0,
|
||||
"example": 1
|
||||
},
|
||||
"footprint": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"id": {
|
||||
"type": "integer"
|
||||
},
|
||||
"jobId": {
|
||||
"type": "integer",
|
||||
"example": 123000
|
||||
},
|
||||
"jobState": {
|
||||
"enum": [
|
||||
"completed",
|
||||
"failed",
|
||||
"cancelled",
|
||||
"stopped",
|
||||
"timeout",
|
||||
"out_of_memory"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/schema.JobState"
|
||||
}
|
||||
],
|
||||
"example": "completed"
|
||||
},
|
||||
"metaData": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"monitoringStatus": {
|
||||
"type": "integer",
|
||||
"maximum": 3,
|
||||
"minimum": 0,
|
||||
"example": 1
|
||||
},
|
||||
"numAcc": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 2
|
||||
},
|
||||
"numHwthreads": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 20
|
||||
},
|
||||
"numNodes": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 2
|
||||
},
|
||||
"partition": {
|
||||
"type": "string",
|
||||
"example": "main"
|
||||
},
|
||||
"project": {
|
||||
"type": "string",
|
||||
"example": "abcd200"
|
||||
},
|
||||
"resources": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Resource"
|
||||
}
|
||||
},
|
||||
"smt": {
|
||||
"type": "integer",
|
||||
"example": 4
|
||||
},
|
||||
"startTime": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 1649723812
|
||||
},
|
||||
"statistics": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/definitions/schema.JobStatistics"
|
||||
}
|
||||
},
|
||||
"subCluster": {
|
||||
"type": "string",
|
||||
"example": "main"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Tag"
|
||||
}
|
||||
},
|
||||
"user": {
|
||||
"type": "string",
|
||||
"example": "abcd100h"
|
||||
},
|
||||
"walltime": {
|
||||
"type": "integer",
|
||||
"minimum": 1,
|
||||
"example": 86400
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.JobMetric": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -1727,24 +1579,32 @@ const docTemplate = `{
|
||||
"schema.JobState": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"running",
|
||||
"completed",
|
||||
"failed",
|
||||
"boot_fail",
|
||||
"cancelled",
|
||||
"stopped",
|
||||
"timeout",
|
||||
"completed",
|
||||
"deadline",
|
||||
"failed",
|
||||
"node_fail",
|
||||
"out_of_memory",
|
||||
"pending",
|
||||
"preempted",
|
||||
"out_of_memory"
|
||||
"running",
|
||||
"suspended",
|
||||
"timeout"
|
||||
],
|
||||
"x-enum-varnames": [
|
||||
"JobStateRunning",
|
||||
"JobStateCompleted",
|
||||
"JobStateFailed",
|
||||
"JobStateBootFail",
|
||||
"JobStateCancelled",
|
||||
"JobStateStopped",
|
||||
"JobStateTimeout",
|
||||
"JobStateCompleted",
|
||||
"JobStateDeadline",
|
||||
"JobStateFailed",
|
||||
"JobStateNodeFail",
|
||||
"JobStateOutOfMemory",
|
||||
"JobStatePending",
|
||||
"JobStatePreempted",
|
||||
"JobStateOutOfMemory"
|
||||
"JobStateRunning",
|
||||
"JobStateSuspended",
|
||||
"JobStateTimeout"
|
||||
]
|
||||
},
|
||||
"schema.JobStatistics": {
|
||||
@@ -1943,7 +1803,8 @@ const docTemplate = `{
|
||||
"additionalProperties": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number"
|
||||
"type": "number",
|
||||
"format": "float64"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2031,6 +1892,9 @@ const docTemplate = `{
|
||||
},
|
||||
"remove": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"unit": {
|
||||
"$ref": "#/definitions/schema.Unit"
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -2133,7 +1997,7 @@ const docTemplate = `{
|
||||
var SwaggerInfo = &swag.Spec{
|
||||
Version: "1.0.0",
|
||||
Host: "localhost:8080",
|
||||
BasePath: "/api",
|
||||
BasePath: "",
|
||||
Schemes: []string{},
|
||||
Title: "ClusterCockpit REST API",
|
||||
Description: "API for batch job control.",
|
||||
|
||||
1024
internal/api/job.go
Normal file
1024
internal/api/job.go
Normal file
File diff suppressed because it is too large
Load Diff
170
internal/api/memorystore.go
Normal file
170
internal/api/memorystore.go
Normal file
@@ -0,0 +1,170 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
|
||||
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||
)
|
||||
|
||||
// handleFree godoc
|
||||
// @summary
|
||||
// @tags free
|
||||
// @description This endpoint allows the users to free the Buffers from the
|
||||
// metric store. This endpoint offers the users to remove then systematically
|
||||
// and also allows then to prune the data under node, if they do not want to
|
||||
// remove the whole node.
|
||||
// @produce json
|
||||
// @param to query string false "up to timestamp"
|
||||
// @success 200 {string} string "ok"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /free/ [post]
|
||||
func freeMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
rawTo := r.URL.Query().Get("to")
|
||||
if rawTo == "" {
|
||||
handleError(errors.New("'to' is a required query parameter"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
to, err := strconv.ParseInt(rawTo, 10, 64)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
bodyDec := json.NewDecoder(r.Body)
|
||||
var selectors [][]string
|
||||
err = bodyDec.Decode(&selectors)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
ms := memorystore.GetMemoryStore()
|
||||
n := 0
|
||||
for _, sel := range selectors {
|
||||
bn, err := ms.Free(sel, to)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
n += bn
|
||||
}
|
||||
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
fmt.Fprintf(rw, "buffers freed: %d\n", n)
|
||||
}
|
||||
|
||||
// handleWrite godoc
|
||||
// @summary Receive metrics in InfluxDB line-protocol
|
||||
// @tags write
|
||||
// @description Write data to the in-memory store in the InfluxDB line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)
|
||||
|
||||
// @accept plain
|
||||
// @produce json
|
||||
// @param cluster query string false "If the lines in the body do not have a cluster tag, use this value instead."
|
||||
// @success 200 {string} string "ok"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /write/ [post]
|
||||
func writeMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
bytes, err := io.ReadAll(r.Body)
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
ms := memorystore.GetMemoryStore()
|
||||
dec := lineprotocol.NewDecoderWithBytes(bytes)
|
||||
if err := memorystore.DecodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil {
|
||||
cclog.Errorf("/api/write error: %s", err.Error())
|
||||
handleError(err, http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
}
|
||||
|
||||
// handleDebug godoc
|
||||
// @summary Debug endpoint
|
||||
// @tags debug
|
||||
// @description This endpoint allows the users to print the content of
|
||||
// nodes/clusters/metrics to review the state of the data.
|
||||
// @produce json
|
||||
// @param selector query string false "Selector"
|
||||
// @success 200 {string} string "Debug dump"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /debug/ [post]
|
||||
func debugMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
raw := r.URL.Query().Get("selector")
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
selector := []string{}
|
||||
if len(raw) != 0 {
|
||||
selector = strings.Split(raw, ":")
|
||||
}
|
||||
|
||||
ms := memorystore.GetMemoryStore()
|
||||
if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil {
|
||||
handleError(err, http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// handleHealthCheck godoc
|
||||
// @summary HealthCheck endpoint
|
||||
// @tags healthcheck
|
||||
// @description This endpoint allows the users to check if a node is healthy
|
||||
// @produce json
|
||||
// @param selector query string false "Selector"
|
||||
// @success 200 {string} string "Debug dump"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /healthcheck/ [get]
|
||||
func metricsHealth(rw http.ResponseWriter, r *http.Request) {
|
||||
rawCluster := r.URL.Query().Get("cluster")
|
||||
rawNode := r.URL.Query().Get("node")
|
||||
|
||||
if rawCluster == "" || rawNode == "" {
|
||||
handleError(errors.New("'cluster' and 'node' are required query parameter"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
|
||||
selector := []string{rawCluster, rawNode}
|
||||
|
||||
ms := memorystore.GetMemoryStore()
|
||||
if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil {
|
||||
handleError(err, http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
231
internal/api/nats.go
Normal file
231
internal/api/nats.go
Normal file
@@ -0,0 +1,231 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
// NatsAPI provides NATS subscription-based handlers for Job and Node operations.
|
||||
// It mirrors the functionality of the REST API but uses NATS messaging.
|
||||
type NatsAPI struct {
|
||||
JobRepository *repository.JobRepository
|
||||
// RepositoryMutex protects job creation operations from race conditions
|
||||
// when checking for duplicate jobs during startJob calls.
|
||||
RepositoryMutex sync.Mutex
|
||||
}
|
||||
|
||||
// NewNatsAPI creates a new NatsAPI instance with default dependencies.
|
||||
func NewNatsAPI() *NatsAPI {
|
||||
return &NatsAPI{
|
||||
JobRepository: repository.GetJobRepository(),
|
||||
}
|
||||
}
|
||||
|
||||
// StartSubscriptions registers all NATS subscriptions for Job and Node APIs.
|
||||
// Returns an error if the NATS client is not available or subscription fails.
|
||||
func (api *NatsAPI) StartSubscriptions() error {
|
||||
client := nats.GetClient()
|
||||
if client == nil {
|
||||
cclog.Warn("NATS client not available, skipping API subscriptions")
|
||||
return nil
|
||||
}
|
||||
|
||||
if config.Keys.APISubjects != nil {
|
||||
|
||||
s := config.Keys.APISubjects
|
||||
|
||||
if err := client.Subscribe(s.SubjectJobStart, api.handleStartJob); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := client.Subscribe(s.SubjectJobStop, api.handleStopJob); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := client.Subscribe(s.SubjectNodeState, api.handleNodeState); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cclog.Info("NATS API subscriptions started")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleStartJob processes job start messages received via NATS.
|
||||
// Expected JSON payload follows the schema.Job structure.
|
||||
func (api *NatsAPI) handleStartJob(subject string, data []byte) {
|
||||
req := schema.Job{
|
||||
Shared: "none",
|
||||
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||
}
|
||||
|
||||
dec := json.NewDecoder(bytes.NewReader(data))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
cclog.Errorf("NATS %s: parsing request failed: %v", subject, err)
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Debugf("NATS %s: %s", subject, req.GoString())
|
||||
req.State = schema.JobStateRunning
|
||||
|
||||
if err := importer.SanityChecks(&req); err != nil {
|
||||
cclog.Errorf("NATS %s: sanity check failed: %v", subject, err)
|
||||
return
|
||||
}
|
||||
|
||||
var unlockOnce sync.Once
|
||||
api.RepositoryMutex.Lock()
|
||||
defer unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||
|
||||
jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("NATS %s: checking for duplicate failed: %v", subject, err)
|
||||
return
|
||||
}
|
||||
if err == nil {
|
||||
for _, job := range jobs {
|
||||
if (req.StartTime - job.StartTime) < secondsPerDay {
|
||||
cclog.Errorf("NATS %s: job with jobId %d, cluster %s already exists (dbid: %d)",
|
||||
subject, req.JobID, req.Cluster, job.ID)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
id, err := api.JobRepository.Start(&req)
|
||||
if err != nil {
|
||||
cclog.Errorf("NATS %s: insert into database failed: %v", subject, err)
|
||||
return
|
||||
}
|
||||
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||
|
||||
for _, tag := range req.Tags {
|
||||
if _, err := api.JobRepository.AddTagOrCreate(nil, id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||
cclog.Errorf("NATS %s: adding tag to new job %d failed: %v", subject, id, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Infof("NATS: new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d",
|
||||
id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||
}
|
||||
|
||||
// handleStopJob processes job stop messages received via NATS.
|
||||
// Expected JSON payload follows the StopJobAPIRequest structure.
|
||||
func (api *NatsAPI) handleStopJob(subject string, data []byte) {
|
||||
var req StopJobAPIRequest
|
||||
|
||||
dec := json.NewDecoder(bytes.NewReader(data))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
cclog.Errorf("NATS %s: parsing request failed: %v", subject, err)
|
||||
return
|
||||
}
|
||||
|
||||
if req.JobID == nil {
|
||||
cclog.Errorf("NATS %s: the field 'jobId' is required", subject)
|
||||
return
|
||||
}
|
||||
|
||||
job, err := api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
|
||||
if err != nil {
|
||||
cachedJob, cachedErr := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime)
|
||||
if cachedErr != nil {
|
||||
cclog.Errorf("NATS %s: finding job failed: %v (cached lookup also failed: %v)",
|
||||
subject, err, cachedErr)
|
||||
return
|
||||
}
|
||||
job = cachedJob
|
||||
}
|
||||
|
||||
if job.State != schema.JobStateRunning {
|
||||
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: job has already been stopped (state is: %s)",
|
||||
subject, job.JobID, job.ID, job.Cluster, job.State)
|
||||
return
|
||||
}
|
||||
|
||||
if job.StartTime > req.StopTime {
|
||||
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: stopTime %d must be >= startTime %d",
|
||||
subject, job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime)
|
||||
return
|
||||
}
|
||||
|
||||
if req.State != "" && !req.State.Valid() {
|
||||
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: invalid job state: %#v",
|
||||
subject, job.JobID, job.ID, job.Cluster, req.State)
|
||||
return
|
||||
} else if req.State == "" {
|
||||
req.State = schema.JobStateCompleted
|
||||
}
|
||||
|
||||
job.Duration = int32(req.StopTime - job.StartTime)
|
||||
job.State = req.State
|
||||
api.JobRepository.Mutex.Lock()
|
||||
defer api.JobRepository.Mutex.Unlock()
|
||||
|
||||
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: marking job as '%s' failed: %v",
|
||||
subject, job.JobID, job.ID, job.Cluster, job.State, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Infof("NATS: archiving job (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s",
|
||||
job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||
|
||||
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
|
||||
return
|
||||
}
|
||||
|
||||
archiver.TriggerArchiving(job)
|
||||
}
|
||||
|
||||
// handleNodeState processes node state update messages received via NATS.
|
||||
// Expected JSON payload follows the UpdateNodeStatesRequest structure.
|
||||
func (api *NatsAPI) handleNodeState(subject string, data []byte) {
|
||||
var req UpdateNodeStatesRequest
|
||||
|
||||
dec := json.NewDecoder(bytes.NewReader(data))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
cclog.Errorf("NATS %s: parsing request failed: %v", subject, err)
|
||||
return
|
||||
}
|
||||
|
||||
repo := repository.GetNodeRepository()
|
||||
|
||||
for _, node := range req.Nodes {
|
||||
state := determineState(node.States)
|
||||
nodeState := schema.NodeStateDB{
|
||||
TimeStamp: time.Now().Unix(),
|
||||
NodeState: state,
|
||||
CpusAllocated: node.CpusAllocated,
|
||||
MemoryAllocated: node.MemoryAllocated,
|
||||
GpusAllocated: node.GpusAllocated,
|
||||
HealthState: schema.MonitoringStateFull,
|
||||
JobsRunning: node.JobsRunning,
|
||||
}
|
||||
|
||||
repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState)
|
||||
}
|
||||
|
||||
cclog.Debugf("NATS %s: updated %d node states for cluster %s", subject, len(req.Nodes), req.Cluster)
|
||||
}
|
||||
80
internal/api/node.go
Normal file
80
internal/api/node.go
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
type UpdateNodeStatesRequest struct {
|
||||
Nodes []schema.NodePayload `json:"nodes"`
|
||||
Cluster string `json:"cluster" example:"fritz"`
|
||||
}
|
||||
|
||||
// this routine assumes that only one of them exists per node
|
||||
func determineState(states []string) schema.SchedulerState {
|
||||
for _, state := range states {
|
||||
switch strings.ToLower(state) {
|
||||
case "allocated":
|
||||
return schema.NodeStateAllocated
|
||||
case "reserved":
|
||||
return schema.NodeStateReserved
|
||||
case "idle":
|
||||
return schema.NodeStateIdle
|
||||
case "down":
|
||||
return schema.NodeStateDown
|
||||
case "mixed":
|
||||
return schema.NodeStateMixed
|
||||
}
|
||||
}
|
||||
|
||||
return schema.NodeStateUnknown
|
||||
}
|
||||
|
||||
// updateNodeStates godoc
|
||||
// @summary Deliver updated Slurm node states
|
||||
// @tags Nodestates
|
||||
// @description Returns a JSON-encoded list of users.
|
||||
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||
// @produce json
|
||||
// @param request body UpdateNodeStatesRequest true "Request body containing nodes and their states"
|
||||
// @success 200 {object} api.DefaultApiResponse "Success message"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/nodestats/ [post]
|
||||
func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
||||
// Parse request body
|
||||
req := UpdateNodeStatesRequest{}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
handleError(fmt.Errorf("parsing request body failed: %w", err),
|
||||
http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
repo := repository.GetNodeRepository()
|
||||
|
||||
for _, node := range req.Nodes {
|
||||
state := determineState(node.States)
|
||||
nodeState := schema.NodeStateDB{
|
||||
TimeStamp: time.Now().Unix(), NodeState: state,
|
||||
CpusAllocated: node.CpusAllocated,
|
||||
MemoryAllocated: node.MemoryAllocated,
|
||||
GpusAllocated: node.GpusAllocated,
|
||||
HealthState: schema.MonitoringStateFull,
|
||||
JobsRunning: node.JobsRunning,
|
||||
}
|
||||
|
||||
repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState)
|
||||
}
|
||||
}
|
||||
1403
internal/api/rest.go
1403
internal/api/rest.go
File diff suppressed because it is too large
Load Diff
221
internal/api/user.go
Normal file
221
internal/api/user.go
Normal file
@@ -0,0 +1,221 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/gorilla/mux"
|
||||
)
|
||||
|
||||
type APIReturnedUser struct {
|
||||
Username string `json:"username"`
|
||||
Name string `json:"name"`
|
||||
Roles []string `json:"roles"`
|
||||
Email string `json:"email"`
|
||||
Projects []string `json:"projects"`
|
||||
}
|
||||
|
||||
// getUsers godoc
|
||||
// @summary Returns a list of users
|
||||
// @tags User
|
||||
// @description Returns a JSON-encoded list of users.
|
||||
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||
// @produce json
|
||||
// @param not-just-user query bool true "If returned list should contain all users or only users with additional special roles"
|
||||
// @success 200 {array} api.ApiReturnedUser "List of users returned successfully"
|
||||
// @failure 400 {string} string "Bad Request"
|
||||
// @failure 401 {string} string "Unauthorized"
|
||||
// @failure 403 {string} string "Forbidden"
|
||||
// @failure 500 {string} string "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/users/ [get]
|
||||
func (api *RestAPI) getUsers(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
handleError(fmt.Errorf("only admins are allowed to fetch a list of users"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
users, err := repository.GetUserRepository().ListUsers(r.URL.Query().Get("not-just-user") == "true")
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("listing users failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(rw).Encode(users); err != nil {
|
||||
cclog.Errorf("Failed to encode users response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// updateUser godoc
|
||||
// @summary Update user roles and projects
|
||||
// @tags User
|
||||
// @description Allows admins to add/remove roles and projects for a user
|
||||
// @produce plain
|
||||
// @param id path string true "Username"
|
||||
// @param add-role formData string false "Role to add"
|
||||
// @param remove-role formData string false "Role to remove"
|
||||
// @param add-project formData string false "Project to add"
|
||||
// @param remove-project formData string false "Project to remove"
|
||||
// @success 200 {string} string "Success message"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/user/{id} [post]
|
||||
func (api *RestAPI) updateUser(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
handleError(fmt.Errorf("only admins are allowed to update a user"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Get Values
|
||||
newrole := r.FormValue("add-role")
|
||||
delrole := r.FormValue("remove-role")
|
||||
newproj := r.FormValue("add-project")
|
||||
delproj := r.FormValue("remove-project")
|
||||
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
|
||||
// Handle role updates
|
||||
if newrole != "" {
|
||||
if err := repository.GetUserRepository().AddRole(r.Context(), mux.Vars(r)["id"], newrole); err != nil {
|
||||
handleError(fmt.Errorf("adding role failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Add Role Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else if delrole != "" {
|
||||
if err := repository.GetUserRepository().RemoveRole(r.Context(), mux.Vars(r)["id"], delrole); err != nil {
|
||||
handleError(fmt.Errorf("removing role failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Remove Role Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else if newproj != "" {
|
||||
if err := repository.GetUserRepository().AddProject(r.Context(), mux.Vars(r)["id"], newproj); err != nil {
|
||||
handleError(fmt.Errorf("adding project failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Add Project Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else if delproj != "" {
|
||||
if err := repository.GetUserRepository().RemoveProject(r.Context(), mux.Vars(r)["id"], delproj); err != nil {
|
||||
handleError(fmt.Errorf("removing project failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Remove Project Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else {
|
||||
handleError(fmt.Errorf("no operation specified: must provide add-role, remove-role, add-project, or remove-project"), http.StatusBadRequest, rw)
|
||||
}
|
||||
}
|
||||
|
||||
// createUser godoc
|
||||
// @summary Create a new user
|
||||
// @tags User
|
||||
// @description Creates a new user with specified credentials and role
|
||||
// @produce plain
|
||||
// @param username formData string true "Username"
|
||||
// @param password formData string false "Password (not required for API users)"
|
||||
// @param role formData string true "User role"
|
||||
// @param name formData string false "Full name"
|
||||
// @param email formData string false "Email address"
|
||||
// @param project formData string false "Project (required for managers)"
|
||||
// @success 200 {string} string "Success message"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/users/ [post]
|
||||
func (api *RestAPI) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
rw.Header().Set("Content-Type", "text/plain")
|
||||
me := repository.GetUserFromContext(r.Context())
|
||||
if !me.HasRole(schema.RoleAdmin) {
|
||||
handleError(fmt.Errorf("only admins are allowed to create new users"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
username, password, role, name, email, project := r.FormValue("username"),
|
||||
r.FormValue("password"), r.FormValue("role"), r.FormValue("name"),
|
||||
r.FormValue("email"), r.FormValue("project")
|
||||
|
||||
// Validate username length
|
||||
if len(username) == 0 || len(username) > 100 {
|
||||
handleError(fmt.Errorf("username must be between 1 and 100 characters"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if len(password) == 0 && role != schema.GetRoleString(schema.RoleApi) {
|
||||
handleError(fmt.Errorf("only API users are allowed to have a blank password (login will be impossible)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if len(project) != 0 && role != schema.GetRoleString(schema.RoleManager) {
|
||||
handleError(fmt.Errorf("only managers require a project (can be changed later)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
} else if len(project) == 0 && role == schema.GetRoleString(schema.RoleManager) {
|
||||
handleError(fmt.Errorf("managers require a project to manage (can be changed later)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if err := repository.GetUserRepository().AddUser(&schema.User{
|
||||
Username: username,
|
||||
Name: name,
|
||||
Password: password,
|
||||
Email: email,
|
||||
Projects: []string{project},
|
||||
Roles: []string{role},
|
||||
}); err != nil {
|
||||
handleError(fmt.Errorf("adding user failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintf(rw, "User %v successfully created!\n", username)
|
||||
}
|
||||
|
||||
// deleteUser godoc
|
||||
// @summary Delete a user
|
||||
// @tags User
|
||||
// @description Deletes a user from the system
|
||||
// @produce plain
|
||||
// @param username formData string true "Username to delete"
|
||||
// @success 200 {string} string "Success"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/users/ [delete]
|
||||
func (api *RestAPI) deleteUser(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
handleError(fmt.Errorf("only admins are allowed to delete a user"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
username := r.FormValue("username")
|
||||
if err := repository.GetUserRepository().DelUser(username); err != nil {
|
||||
handleError(fmt.Errorf("deleting user failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
}
|
||||
190
internal/archiver/README.md
Normal file
190
internal/archiver/README.md
Normal file
@@ -0,0 +1,190 @@
|
||||
# Archiver Package
|
||||
|
||||
The `archiver` package provides asynchronous job archiving functionality for ClusterCockpit. When jobs complete, their metric data is archived from the metric store to a persistent archive backend (filesystem, S3, SQLite, etc.).
|
||||
|
||||
## Architecture
|
||||
|
||||
### Producer-Consumer Pattern
|
||||
|
||||
```
|
||||
┌──────────────┐ TriggerArchiving() ┌───────────────┐
|
||||
│ API Handler │ ───────────────────────▶ │ archiveChannel│
|
||||
│ (Job Stop) │ │ (buffer: 128)│
|
||||
└──────────────┘ └───────┬───────┘
|
||||
│
|
||||
┌─────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────────┐
|
||||
│ archivingWorker() │
|
||||
│ (goroutine) │
|
||||
└──────────┬───────────┘
|
||||
│
|
||||
▼
|
||||
1. Fetch job metadata
|
||||
2. Load metric data
|
||||
3. Calculate statistics
|
||||
4. Archive to backend
|
||||
5. Update database
|
||||
6. Call hooks
|
||||
```
|
||||
|
||||
### Components
|
||||
|
||||
- **archiveChannel**: Buffered channel (128 jobs) for async communication
|
||||
- **archivePending**: WaitGroup tracking in-flight archiving operations
|
||||
- **archivingWorker**: Background goroutine processing archiving requests
|
||||
- **shutdownCtx**: Context for graceful cancellation during shutdown
|
||||
|
||||
## Usage
|
||||
|
||||
### Initialization
|
||||
|
||||
```go
|
||||
// Start archiver with context for shutdown control
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
archiver.Start(jobRepository, ctx)
|
||||
```
|
||||
|
||||
### Archiving a Job
|
||||
|
||||
```go
|
||||
// Called automatically when a job completes
|
||||
archiver.TriggerArchiving(job)
|
||||
```
|
||||
|
||||
The function returns immediately. Actual archiving happens in the background.
|
||||
|
||||
### Graceful Shutdown
|
||||
|
||||
```go
|
||||
// Shutdown with 10 second timeout
|
||||
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
log.Printf("Archiver shutdown timeout: %v", err)
|
||||
}
|
||||
```
|
||||
|
||||
**Shutdown process:**
|
||||
1. Closes channel (rejects new jobs)
|
||||
2. Waits for pending jobs (up to timeout)
|
||||
3. Cancels context if timeout exceeded
|
||||
4. Waits for worker to exit cleanly
|
||||
|
||||
## Configuration
|
||||
|
||||
### Channel Buffer Size
|
||||
|
||||
The archiving channel has a buffer of 128 jobs. If more than 128 jobs are queued simultaneously, `TriggerArchiving()` will block until space is available.
|
||||
|
||||
To adjust:
|
||||
```go
|
||||
// In archiveWorker.go Start() function
|
||||
archiveChannel = make(chan *schema.Job, 256) // Increase buffer
|
||||
```
|
||||
|
||||
### Scope Selection
|
||||
|
||||
Archive data scopes are automatically selected based on job size:
|
||||
|
||||
- **Node scope**: Always included
|
||||
- **Core scope**: Included for jobs with ≤8 nodes (reduces data volume for large jobs)
|
||||
- **Accelerator scope**: Included if job used accelerators (`NumAcc > 0`)
|
||||
|
||||
To adjust the node threshold:
|
||||
```go
|
||||
// In archiver.go ArchiveJob() function
|
||||
if job.NumNodes <= 16 { // Change from 8 to 16
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
```
|
||||
|
||||
### Resolution
|
||||
|
||||
Data is archived at the highest available resolution (typically 60s intervals). To change:
|
||||
|
||||
```go
|
||||
// In archiver.go ArchiveJob() function
|
||||
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 300)
|
||||
// 0 = highest resolution
|
||||
// 300 = 5-minute resolution
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Automatic Retry
|
||||
|
||||
The archiver does **not** automatically retry failed archiving operations. If archiving fails:
|
||||
|
||||
1. Error is logged
|
||||
2. Job is marked as `MonitoringStatusArchivingFailed` in database
|
||||
3. Worker continues processing other jobs
|
||||
|
||||
### Manual Retry
|
||||
|
||||
To re-archive failed jobs, query for jobs with `MonitoringStatusArchivingFailed` and call `TriggerArchiving()` again.
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Single Worker Thread
|
||||
|
||||
The archiver uses a single worker goroutine. For high-throughput systems:
|
||||
|
||||
- Large channel buffer (128) prevents blocking
|
||||
- Archiving is typically I/O bound (writing to storage)
|
||||
- Single worker prevents overwhelming storage backend
|
||||
|
||||
### Shutdown Timeout
|
||||
|
||||
Recommended timeout values:
|
||||
- **Development**: 5-10 seconds
|
||||
- **Production**: 10-30 seconds
|
||||
- **High-load**: 30-60 seconds
|
||||
|
||||
Choose based on:
|
||||
- Average archiving time per job
|
||||
- Storage backend latency
|
||||
- Acceptable shutdown delay
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Logging
|
||||
|
||||
The archiver logs:
|
||||
- **Info**: Startup, shutdown, successful completions
|
||||
- **Debug**: Individual job archiving times
|
||||
- **Error**: Archiving failures with job ID and reason
|
||||
- **Warn**: Shutdown timeout exceeded
|
||||
|
||||
### Metrics
|
||||
|
||||
Monitor these signals for archiver health:
|
||||
- Jobs with `MonitoringStatusArchivingFailed`
|
||||
- Time from job stop to successful archive
|
||||
- Shutdown timeout occurrences
|
||||
|
||||
## Thread Safety
|
||||
|
||||
All exported functions are safe for concurrent use:
|
||||
- `Start()` - Safe to call once
|
||||
- `TriggerArchiving()` - Safe from multiple goroutines
|
||||
- `Shutdown()` - Safe to call once
|
||||
- `WaitForArchiving()` - Deprecated, but safe
|
||||
|
||||
Internal state is protected by:
|
||||
- Channel synchronization (`archiveChannel`)
|
||||
- WaitGroup for pending count (`archivePending`)
|
||||
- Context for cancellation (`shutdownCtx`)
|
||||
|
||||
## Files
|
||||
|
||||
- **archiveWorker.go**: Worker lifecycle, channel management, shutdown logic
|
||||
- **archiver.go**: Core archiving logic, metric loading, statistics calculation
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `internal/repository`: Database operations for job metadata
|
||||
- `internal/metricDataDispatcher`: Loading metric data from various backends
|
||||
- `pkg/archive`: Archive backend abstraction (filesystem, S3, SQLite)
|
||||
- `cc-lib/schema`: Job and metric data structures
|
||||
@@ -1,17 +1,61 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package archiver provides asynchronous job archiving functionality for ClusterCockpit.
|
||||
//
|
||||
// The archiver runs a background worker goroutine that processes job archiving requests
|
||||
// from a buffered channel. When jobs complete, their metric data is archived from the
|
||||
// metric store to the configured archive backend (filesystem, S3, etc.).
|
||||
//
|
||||
// # Architecture
|
||||
//
|
||||
// The archiver uses a producer-consumer pattern:
|
||||
// - Producer: TriggerArchiving() sends jobs to archiveChannel
|
||||
// - Consumer: archivingWorker() processes jobs from the channel
|
||||
// - Coordination: sync.WaitGroup tracks pending archive operations
|
||||
//
|
||||
// # Lifecycle
|
||||
//
|
||||
// 1. Start(repo, ctx) - Initialize worker with context for cancellation
|
||||
// 2. TriggerArchiving(job) - Queue job for archiving (called when job stops)
|
||||
// 3. archivingWorker() - Background goroutine processes jobs
|
||||
// 4. Shutdown(timeout) - Graceful shutdown with timeout
|
||||
//
|
||||
// # Graceful Shutdown
|
||||
//
|
||||
// The archiver supports graceful shutdown with configurable timeout:
|
||||
// - Closes channel to reject new jobs
|
||||
// - Waits for pending jobs to complete (up to timeout)
|
||||
// - Cancels context if timeout exceeded
|
||||
// - Ensures worker goroutine exits cleanly
|
||||
//
|
||||
// # Example Usage
|
||||
//
|
||||
// // Initialize archiver
|
||||
// ctx, cancel := context.WithCancel(context.Background())
|
||||
// defer cancel()
|
||||
// archiver.Start(jobRepository, ctx)
|
||||
//
|
||||
// // Trigger archiving when job completes
|
||||
// archiver.TriggerArchiving(job)
|
||||
//
|
||||
// // Graceful shutdown with 10 second timeout
|
||||
// if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
// log.Printf("Archiver shutdown timeout: %v", err)
|
||||
// }
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
@@ -19,76 +63,188 @@ var (
|
||||
archivePending sync.WaitGroup
|
||||
archiveChannel chan *schema.Job
|
||||
jobRepo *repository.JobRepository
|
||||
shutdownCtx context.Context
|
||||
shutdownCancel context.CancelFunc
|
||||
workerDone chan struct{}
|
||||
)
|
||||
|
||||
func Start(r *repository.JobRepository) {
|
||||
// Start initializes the archiver and starts the background worker goroutine.
|
||||
//
|
||||
// The archiver processes job archiving requests asynchronously via a buffered channel.
|
||||
// Jobs are sent to the channel using TriggerArchiving() and processed by the worker.
|
||||
//
|
||||
// Parameters:
|
||||
// - r: JobRepository instance for database operations
|
||||
// - ctx: Context for cancellation (shutdown signal propagation)
|
||||
//
|
||||
// The worker goroutine will run until:
|
||||
// - ctx is cancelled (via parent shutdown)
|
||||
// - archiveChannel is closed (via Shutdown())
|
||||
//
|
||||
// Must be called before TriggerArchiving(). Safe to call only once.
|
||||
func Start(r *repository.JobRepository, ctx context.Context) {
|
||||
shutdownCtx, shutdownCancel = context.WithCancel(ctx)
|
||||
archiveChannel = make(chan *schema.Job, 128)
|
||||
workerDone = make(chan struct{})
|
||||
jobRepo = r
|
||||
|
||||
go archivingWorker()
|
||||
}
|
||||
|
||||
// Archiving worker thread
|
||||
// archivingWorker is the background goroutine that processes job archiving requests.
|
||||
//
|
||||
// The worker loop:
|
||||
// 1. Blocks waiting for jobs on archiveChannel or shutdown signal
|
||||
// 2. Fetches job metadata from repository
|
||||
// 3. Archives job data to configured backend (calls ArchiveJob)
|
||||
// 4. Updates job footprint and energy metrics in database
|
||||
// 5. Marks job as successfully archived
|
||||
// 6. Calls job stop hooks
|
||||
//
|
||||
// The worker exits when:
|
||||
// - shutdownCtx is cancelled (timeout during shutdown)
|
||||
// - archiveChannel is closed (normal shutdown)
|
||||
//
|
||||
// Errors during archiving are logged and the job is marked as failed,
|
||||
// but the worker continues processing other jobs.
|
||||
func archivingWorker() {
|
||||
defer close(workerDone)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-shutdownCtx.Done():
|
||||
cclog.Info("Archive worker received shutdown signal")
|
||||
return
|
||||
|
||||
case job, ok := <-archiveChannel:
|
||||
if !ok {
|
||||
break
|
||||
cclog.Info("Archive channel closed, worker exiting")
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
// not using meta data, called to load JobMeta into Cache?
|
||||
// will fail if job meta not in repository
|
||||
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
|
||||
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||
// TODO: Maybe use context with cancel/timeout here
|
||||
jobMeta, err := ArchiveJob(job, context.Background())
|
||||
// Use shutdown context to allow cancellation
|
||||
jobMeta, err := ArchiveJob(job, shutdownCtx)
|
||||
if err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
|
||||
stmt := sq.Update("job").Where("job.id = ?", job.ID)
|
||||
|
||||
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
// Update the jobs database entry one last time:
|
||||
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
|
||||
if err := jobRepo.Execute(stmt); err != nil {
|
||||
log.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
log.Printf("archiving job (dbid: %d) successful", job.ID)
|
||||
cclog.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
cclog.Infof("archiving job (dbid: %d) successful", job.ID)
|
||||
|
||||
repository.CallJobStopHooks(job)
|
||||
archivePending.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Trigger async archiving
|
||||
// TriggerArchiving queues a job for asynchronous archiving.
|
||||
//
|
||||
// This function should be called when a job completes (stops) to archive its
|
||||
// metric data from the metric store to the configured archive backend.
|
||||
//
|
||||
// The function:
|
||||
// 1. Increments the pending job counter (WaitGroup)
|
||||
// 2. Sends the job to the archiving channel (buffered, capacity 128)
|
||||
// 3. Returns immediately (non-blocking unless channel is full)
|
||||
//
|
||||
// The actual archiving is performed asynchronously by the worker goroutine.
|
||||
// Upon completion, the worker will decrement the pending counter.
|
||||
//
|
||||
// Panics if Start() has not been called first.
|
||||
func TriggerArchiving(job *schema.Job) {
|
||||
if archiveChannel == nil {
|
||||
log.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
|
||||
cclog.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
|
||||
}
|
||||
|
||||
archivePending.Add(1)
|
||||
archiveChannel <- job
|
||||
}
|
||||
|
||||
// Wait for background thread to finish pending archiving operations
|
||||
func WaitForArchiving() {
|
||||
// close channel and wait for worker to process remaining jobs
|
||||
archivePending.Wait()
|
||||
// Shutdown performs a graceful shutdown of the archiver with a configurable timeout.
|
||||
//
|
||||
// The shutdown process:
|
||||
// 1. Closes archiveChannel - no new jobs will be accepted
|
||||
// 2. Waits for pending jobs to complete (up to timeout duration)
|
||||
// 3. If timeout is exceeded:
|
||||
// - Cancels shutdownCtx to interrupt ongoing ArchiveJob operations
|
||||
// - Returns error indicating timeout
|
||||
// 4. Waits for worker goroutine to exit cleanly
|
||||
//
|
||||
// Parameters:
|
||||
// - timeout: Maximum duration to wait for pending jobs to complete
|
||||
// (recommended: 10-30 seconds for production)
|
||||
//
|
||||
// Returns:
|
||||
// - nil if all jobs completed within timeout
|
||||
// - error if timeout was exceeded (some jobs may not have been archived)
|
||||
//
|
||||
// Jobs that don't complete within the timeout will be marked as failed.
|
||||
// The function always ensures the worker goroutine exits before returning.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
// log.Printf("Some jobs did not complete: %v", err)
|
||||
// }
|
||||
func Shutdown(timeout time.Duration) error {
|
||||
cclog.Info("Initiating archiver shutdown...")
|
||||
|
||||
// Close channel to signal no more jobs will be accepted
|
||||
close(archiveChannel)
|
||||
|
||||
// Create a channel to signal when all jobs are done
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
archivePending.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
// Wait for jobs to complete or timeout
|
||||
select {
|
||||
case <-done:
|
||||
cclog.Info("All archive jobs completed successfully")
|
||||
// Wait for worker to exit
|
||||
<-workerDone
|
||||
return nil
|
||||
case <-time.After(timeout):
|
||||
cclog.Warn("Archiver shutdown timeout exceeded, cancelling remaining operations")
|
||||
// Cancel any ongoing operations
|
||||
shutdownCancel()
|
||||
// Wait for worker to exit
|
||||
<-workerDone
|
||||
return fmt.Errorf("archiver shutdown timeout after %v", timeout)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package archiver
|
||||
|
||||
import (
|
||||
@@ -11,12 +12,37 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
// Writes a running job to the job-archive
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
// ArchiveJob archives a completed job's metric data to the configured archive backend.
|
||||
//
|
||||
// This function performs the following operations:
|
||||
// 1. Loads all metric data for the job from the metric data repository
|
||||
// 2. Calculates job-level statistics (avg, min, max) for each metric
|
||||
// 3. Stores the job metadata and metric data to the archive backend
|
||||
//
|
||||
// Metric data is retrieved at the highest available resolution (typically 60s)
|
||||
// for the following scopes:
|
||||
// - Node scope (always)
|
||||
// - Core scope (for jobs with ≤8 nodes, to reduce data volume)
|
||||
// - Accelerator scope (if job used accelerators)
|
||||
//
|
||||
// The function respects context cancellation. If ctx is cancelled (e.g., during
|
||||
// shutdown timeout), the operation will be interrupted and return an error.
|
||||
//
|
||||
// Parameters:
|
||||
// - job: The job to archive (must be a completed job)
|
||||
// - ctx: Context for cancellation and timeout control
|
||||
//
|
||||
// Returns:
|
||||
// - *schema.Job with populated Statistics field
|
||||
// - error if data loading or archiving fails
|
||||
//
|
||||
// If config.Keys.DisableArchive is true, only job statistics are calculated
|
||||
// and returned (no data is written to archive backend).
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
@@ -36,15 +62,11 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
|
||||
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||
if err != nil {
|
||||
log.Error("Error wile loading job data for archiving")
|
||||
cclog.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobMeta := &schema.JobMeta{
|
||||
BaseJob: job.BaseJob,
|
||||
StartTime: job.StartTime.Unix(),
|
||||
Statistics: make(map[string]schema.JobStatistics),
|
||||
}
|
||||
job.Statistics = make(map[string]schema.JobStatistics)
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
@@ -60,12 +82,13 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
jobMeta.Statistics[metric] = schema.JobStatistics{
|
||||
// Round AVG Result to 2 Digits
|
||||
job.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: avg / float64(job.NumNodes),
|
||||
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
@@ -75,8 +98,8 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
|
||||
// only return the JobMeta structure as the
|
||||
// statistics in there are needed.
|
||||
if config.Keys.DisableArchive {
|
||||
return jobMeta, nil
|
||||
return job, nil
|
||||
}
|
||||
|
||||
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
|
||||
return job, archive.GetHandle().ImportJob(job, &jobData)
|
||||
}
|
||||
|
||||
@@ -1,29 +1,49 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package auth implements various authentication methods
|
||||
package auth
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
"github.com/gorilla/sessions"
|
||||
)
|
||||
|
||||
// Authenticator is the interface for all authentication methods.
|
||||
// Each authenticator determines if it can handle a login request (CanLogin)
|
||||
// and performs the actual authentication (Login).
|
||||
type Authenticator interface {
|
||||
// CanLogin determines if this authenticator can handle the login request.
|
||||
// It returns the user object if available and a boolean indicating if this
|
||||
// authenticator should attempt the login. This method should not perform
|
||||
// expensive operations or actual authentication.
|
||||
CanLogin(user *schema.User, username string, rw http.ResponseWriter, r *http.Request) (*schema.User, bool)
|
||||
|
||||
// Login performs the actually authentication for the user.
|
||||
// It returns the authenticated user or an error if authentication fails.
|
||||
// The user parameter may be nil if the user doesn't exist in the database yet.
|
||||
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
||||
}
|
||||
|
||||
@@ -32,6 +52,70 @@ var (
|
||||
authInstance *Authentication
|
||||
)
|
||||
|
||||
// rateLimiterEntry tracks a rate limiter and its last use time for cleanup
|
||||
type rateLimiterEntry struct {
|
||||
limiter *rate.Limiter
|
||||
lastUsed time.Time
|
||||
}
|
||||
|
||||
var ipUserLimiters sync.Map
|
||||
|
||||
// getIPUserLimiter returns a rate limiter for the given IP and username combination.
|
||||
// Rate limiters are created on demand and track 5 attempts per 15 minutes.
|
||||
func getIPUserLimiter(ip, username string) *rate.Limiter {
|
||||
key := ip + ":" + username
|
||||
now := time.Now()
|
||||
|
||||
if entry, ok := ipUserLimiters.Load(key); ok {
|
||||
rle := entry.(*rateLimiterEntry)
|
||||
rle.lastUsed = now
|
||||
return rle.limiter
|
||||
}
|
||||
|
||||
// More aggressive rate limiting: 5 attempts per 15 minutes
|
||||
newLimiter := rate.NewLimiter(rate.Every(15*time.Minute/5), 5)
|
||||
ipUserLimiters.Store(key, &rateLimiterEntry{
|
||||
limiter: newLimiter,
|
||||
lastUsed: now,
|
||||
})
|
||||
return newLimiter
|
||||
}
|
||||
|
||||
// cleanupOldRateLimiters removes rate limiters that haven't been used recently
|
||||
func cleanupOldRateLimiters(olderThan time.Time) {
|
||||
ipUserLimiters.Range(func(key, value any) bool {
|
||||
entry := value.(*rateLimiterEntry)
|
||||
if entry.lastUsed.Before(olderThan) {
|
||||
ipUserLimiters.Delete(key)
|
||||
cclog.Debugf("Cleaned up rate limiter for %v", key)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// startRateLimiterCleanup starts a background goroutine to clean up old rate limiters
|
||||
func startRateLimiterCleanup() {
|
||||
go func() {
|
||||
ticker := time.NewTicker(1 * time.Hour)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
// Clean up limiters not used in the last 24 hours
|
||||
cleanupOldRateLimiters(time.Now().Add(-24 * time.Hour))
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// AuthConfig contains configuration for all authentication methods
|
||||
type AuthConfig struct {
|
||||
LdapConfig *LdapConfig `json:"ldap"`
|
||||
JwtConfig *JWTAuthConfig `json:"jwts"`
|
||||
OpenIDConfig *OpenIDConfig `json:"oidc"`
|
||||
}
|
||||
|
||||
// Keys holds the global authentication configuration
|
||||
var Keys AuthConfig
|
||||
|
||||
// Authentication manages all authentication methods and session handling
|
||||
type Authentication struct {
|
||||
sessionStore *sessions.CookieStore
|
||||
LdapAuth *LdapAuthenticator
|
||||
@@ -47,7 +131,7 @@ func (auth *Authentication) AuthViaSession(
|
||||
) (*schema.User, error) {
|
||||
session, err := auth.sessionStore.Get(r, "session")
|
||||
if err != nil {
|
||||
log.Error("Error while getting session store")
|
||||
cclog.Error("Error while getting session store")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -55,10 +139,31 @@ func (auth *Authentication) AuthViaSession(
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// TODO: Check if session keys exist
|
||||
username, _ := session.Values["username"].(string)
|
||||
projects, _ := session.Values["projects"].([]string)
|
||||
roles, _ := session.Values["roles"].([]string)
|
||||
// Validate session data with proper type checking
|
||||
username, ok := session.Values["username"].(string)
|
||||
if !ok || username == "" {
|
||||
cclog.Warn("Invalid session: missing or invalid username")
|
||||
// Invalidate the corrupted session
|
||||
session.Options.MaxAge = -1
|
||||
_ = auth.sessionStore.Save(r, rw, session)
|
||||
return nil, errors.New("invalid session data")
|
||||
}
|
||||
|
||||
projects, ok := session.Values["projects"].([]string)
|
||||
if !ok {
|
||||
cclog.Warn("Invalid session: projects not found or invalid type, using empty list")
|
||||
projects = []string{}
|
||||
}
|
||||
|
||||
roles, ok := session.Values["roles"].([]string)
|
||||
if !ok || len(roles) == 0 {
|
||||
cclog.Warn("Invalid session: missing or invalid roles")
|
||||
// Invalidate the corrupted session
|
||||
session.Options.MaxAge = -1
|
||||
_ = auth.sessionStore.Save(r, rw, session)
|
||||
return nil, errors.New("invalid session data")
|
||||
}
|
||||
|
||||
return &schema.User{
|
||||
Username: username,
|
||||
Projects: projects,
|
||||
@@ -68,68 +173,82 @@ func (auth *Authentication) AuthViaSession(
|
||||
}, nil
|
||||
}
|
||||
|
||||
func Init() {
|
||||
func Init(authCfg *json.RawMessage) {
|
||||
initOnce.Do(func() {
|
||||
authInstance = &Authentication{}
|
||||
|
||||
// Start background cleanup of rate limiters
|
||||
startRateLimiterCleanup()
|
||||
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
cclog.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
cclog.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
}
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Fatal("Error while initializing authentication -> decoding session key failed")
|
||||
cclog.Fatal("Error while initializing authentication -> decoding session key failed")
|
||||
}
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err == nil {
|
||||
authInstance.SessionMaxAge = d
|
||||
}
|
||||
|
||||
if config.Keys.LdapConfig != nil {
|
||||
if authCfg == nil {
|
||||
return
|
||||
}
|
||||
|
||||
config.Validate(configSchema, *authCfg)
|
||||
dec := json.NewDecoder(bytes.NewReader(*authCfg))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
cclog.Errorf("error while decoding ldap config: %v", err)
|
||||
}
|
||||
|
||||
if Keys.LdapConfig != nil {
|
||||
ldapAuth := &LdapAuthenticator{}
|
||||
if err := ldapAuth.Init(); err != nil {
|
||||
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||
cclog.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||
} else {
|
||||
authInstance.LdapAuth = ldapAuth
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
|
||||
}
|
||||
} else {
|
||||
log.Info("Missing LDAP configuration: No LDAP support!")
|
||||
cclog.Info("Missing LDAP configuration: No LDAP support!")
|
||||
}
|
||||
|
||||
if config.Keys.JwtConfig != nil {
|
||||
if Keys.JwtConfig != nil {
|
||||
authInstance.JwtAuth = &JWTAuthenticator{}
|
||||
if err := authInstance.JwtAuth.Init(); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> jwtAuth init failed")
|
||||
cclog.Fatal("Error while initializing authentication -> jwtAuth init failed")
|
||||
}
|
||||
|
||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||
if err := jwtSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||
cclog.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
|
||||
}
|
||||
|
||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||
cclog.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
|
||||
}
|
||||
} else {
|
||||
log.Info("Missing JWT configuration: No JWT token support!")
|
||||
cclog.Info("Missing JWT configuration: No JWT token support!")
|
||||
}
|
||||
|
||||
authInstance.LocalAuth = &LocalAuthenticator{}
|
||||
if err := authInstance.LocalAuth.Init(); err != nil {
|
||||
log.Fatal("Error while initializing authentication -> localAuth init failed")
|
||||
cclog.Fatal("Error while initializing authentication -> localAuth init failed")
|
||||
}
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
|
||||
})
|
||||
@@ -137,50 +256,48 @@ func Init() {
|
||||
|
||||
func GetAuthInstance() *Authentication {
|
||||
if authInstance == nil {
|
||||
log.Fatal("Authentication module not initialized!")
|
||||
cclog.Fatal("Authentication module not initialized!")
|
||||
}
|
||||
|
||||
return authInstance
|
||||
}
|
||||
|
||||
func handleTokenUser(tokenUser *schema.User) {
|
||||
// handleUserSync syncs or updates a user in the database based on configuration.
|
||||
// This is used for both JWT and OIDC authentication when syncUserOnLogin or updateUserOnLogin is enabled.
|
||||
func handleUserSync(user *schema.User, syncUserOnLogin, updateUserOnLogin bool) {
|
||||
r := repository.GetUserRepository()
|
||||
dbUser, err := r.GetUser(tokenUser.Username)
|
||||
dbUser, err := r.GetUser(user.Username)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && config.Keys.JwtConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(tokenUser); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
|
||||
cclog.Errorf("Error while loading user '%s': %v", user.Username, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err == sql.ErrNoRows && syncUserOnLogin { // Add new user
|
||||
if err := r.AddUser(user); err != nil {
|
||||
cclog.Errorf("Error while adding user '%s' to DB: %v", user.Username, err)
|
||||
}
|
||||
} else if err == nil && config.Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, tokenUser); err != nil {
|
||||
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
} else if err == nil && updateUserOnLogin { // Update existing user
|
||||
if err := r.UpdateUser(dbUser, user); err != nil {
|
||||
cclog.Errorf("Error while updating user '%s' in DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func handleOIDCUser(OIDCUser *schema.User) {
|
||||
r := repository.GetUserRepository()
|
||||
dbUser, err := r.GetUser(OIDCUser.Username)
|
||||
// handleTokenUser syncs JWT token user with database
|
||||
func handleTokenUser(tokenUser *schema.User) {
|
||||
handleUserSync(tokenUser, Keys.JwtConfig.SyncUserOnLogin, Keys.JwtConfig.UpdateUserOnLogin)
|
||||
}
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
|
||||
} else if err == sql.ErrNoRows && config.Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
|
||||
if err := r.AddUser(OIDCUser); err != nil {
|
||||
log.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
|
||||
}
|
||||
} else if err == nil && config.Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
|
||||
if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
|
||||
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
// handleOIDCUser syncs OIDC user with database
|
||||
func handleOIDCUser(OIDCUser *schema.User) {
|
||||
handleUserSync(OIDCUser, Keys.OpenIDConfig.SyncUserOnLogin, Keys.OpenIDConfig.UpdateUserOnLogin)
|
||||
}
|
||||
|
||||
func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error {
|
||||
session, err := auth.sessionStore.New(r, "session")
|
||||
if err != nil {
|
||||
log.Errorf("session creation failed: %s", err.Error())
|
||||
cclog.Errorf("session creation failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return err
|
||||
}
|
||||
@@ -188,7 +305,8 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
|
||||
if auth.SessionMaxAge != 0 {
|
||||
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
||||
}
|
||||
if config.Keys.HttpsCertFile == "" && config.Keys.HttpsKeyFile == "" {
|
||||
if config.Keys.HTTPSCertFile == "" && config.Keys.HTTPSKeyFile == "" {
|
||||
cclog.Warn("HTTPS not configured - session cookies will not have Secure flag set (insecure for production)")
|
||||
session.Options.Secure = false
|
||||
}
|
||||
session.Options.SameSite = http.SameSiteStrictMode
|
||||
@@ -196,7 +314,7 @@ func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request,
|
||||
session.Values["projects"] = user.Projects
|
||||
session.Values["roles"] = user.Roles
|
||||
if err := auth.sessionStore.Save(r, rw, session); err != nil {
|
||||
log.Warnf("session save failed: %s", err.Error())
|
||||
cclog.Warnf("session save failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return err
|
||||
}
|
||||
@@ -208,14 +326,26 @@ func (auth *Authentication) Login(
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
username := r.FormValue("username")
|
||||
var dbUser *schema.User
|
||||
ip, _, err := net.SplitHostPort(r.RemoteAddr)
|
||||
if err != nil {
|
||||
ip = r.RemoteAddr
|
||||
}
|
||||
|
||||
username := r.FormValue("username")
|
||||
|
||||
limiter := getIPUserLimiter(ip, username)
|
||||
if !limiter.Allow() {
|
||||
cclog.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
|
||||
onfailure(rw, r, errors.New("too many login attempts, try again in a few minutes"))
|
||||
return
|
||||
}
|
||||
|
||||
var dbUser *schema.User
|
||||
if username != "" {
|
||||
var err error
|
||||
dbUser, err = repository.GetUserRepository().GetUser(username)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%v'", username)
|
||||
cclog.Errorf("Error while loading user '%v'", username)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,12 +355,12 @@ func (auth *Authentication) Login(
|
||||
if user, ok = authenticator.CanLogin(dbUser, username, rw, r); !ok {
|
||||
continue
|
||||
} else {
|
||||
log.Debugf("Can login with user %v", user)
|
||||
cclog.Debugf("Can login with user %v", user)
|
||||
}
|
||||
|
||||
user, err := authenticator.Login(user, rw, r)
|
||||
if err != nil {
|
||||
log.Warnf("user login failed: %s", err.Error())
|
||||
cclog.Warnf("user login failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
@@ -239,7 +369,7 @@ func (auth *Authentication) Login(
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
|
||||
if r.FormValue("redirect") != "" {
|
||||
@@ -251,7 +381,7 @@ func (auth *Authentication) Login(
|
||||
return
|
||||
}
|
||||
|
||||
log.Debugf("login failed: no authenticator applied")
|
||||
cclog.Debugf("login failed: no authenticator applied")
|
||||
onfailure(rw, r, errors.New("no authenticator applied"))
|
||||
})
|
||||
}
|
||||
@@ -263,14 +393,14 @@ func (auth *Authentication) Auth(
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||
cclog.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
if user == nil {
|
||||
user, err = auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth -> authentication failed: %s", err.Error())
|
||||
cclog.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
@@ -281,22 +411,30 @@ func (auth *Authentication) Auth(
|
||||
return
|
||||
}
|
||||
|
||||
log.Info("auth -> authentication failed")
|
||||
cclog.Info("auth -> authentication failed")
|
||||
onfailure(rw, r, errors.New("unauthorized (please login first)"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthApi(
|
||||
func (auth *Authentication) AuthAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth api -> authentication failed: %s", err.Error())
|
||||
cclog.Infof("auth api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
ipErr := securedCheck(user, r)
|
||||
if ipErr != nil {
|
||||
cclog.Infof("auth api -> secured check failed: %s", ipErr.Error())
|
||||
onfailure(rw, r, ipErr)
|
||||
return
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
@@ -312,26 +450,63 @@ func (auth *Authentication) AuthApi(
|
||||
return
|
||||
}
|
||||
default:
|
||||
log.Info("auth api -> authentication failed: missing role")
|
||||
cclog.Info("auth api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
log.Info("auth api -> authentication failed: no auth")
|
||||
cclog.Info("auth api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthUserApi(
|
||||
func (auth *Authentication) AuthUserAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth user api -> authentication failed: %s", err.Error())
|
||||
cclog.Infof("auth user api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleApi) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleSupport, schema.RoleAdmin}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
cclog.Info("auth user api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
cclog.Info("auth user api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthMetricStoreAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
cclog.Infof("auth metricstore api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
@@ -347,23 +522,23 @@ func (auth *Authentication) AuthUserApi(
|
||||
return
|
||||
}
|
||||
default:
|
||||
log.Info("auth user api -> authentication failed: missing role")
|
||||
cclog.Info("auth metricstore api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
log.Info("auth user api -> authentication failed: no auth")
|
||||
cclog.Info("auth metricstore api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthConfigApi(
|
||||
func (auth *Authentication) AuthConfigAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth config api -> authentication failed: %s", err.Error())
|
||||
cclog.Infof("auth config api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
@@ -372,19 +547,19 @@ func (auth *Authentication) AuthConfigApi(
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
log.Info("auth config api -> authentication failed: no auth")
|
||||
cclog.Info("auth config api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthFrontendApi(
|
||||
func (auth *Authentication) AuthFrontendAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
log.Infof("auth frontend api -> authentication failed: %s", err.Error())
|
||||
cclog.Infof("auth frontend api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
@@ -393,7 +568,7 @@ func (auth *Authentication) AuthFrontendApi(
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
log.Info("auth frontend api -> authentication failed: no auth")
|
||||
cclog.Info("auth frontend api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
@@ -417,3 +592,42 @@ func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||
onsuccess.ServeHTTP(rw, r)
|
||||
})
|
||||
}
|
||||
|
||||
// Helper Moved To MiddleWare Auth Handlers
|
||||
func securedCheck(user *schema.User, r *http.Request) error {
|
||||
if user == nil {
|
||||
return fmt.Errorf("no user for secured check")
|
||||
}
|
||||
|
||||
// extract IP address for checking
|
||||
IPAddress := r.Header.Get("X-Real-Ip")
|
||||
if IPAddress == "" {
|
||||
IPAddress = r.Header.Get("X-Forwarded-For")
|
||||
}
|
||||
if IPAddress == "" {
|
||||
IPAddress = r.RemoteAddr
|
||||
}
|
||||
|
||||
// Handle both IPv4 and IPv6 addresses properly
|
||||
// For IPv6, this will strip the port and brackets
|
||||
// For IPv4, this will strip the port
|
||||
if host, _, err := net.SplitHostPort(IPAddress); err == nil {
|
||||
IPAddress = host
|
||||
}
|
||||
// If SplitHostPort fails, IPAddress is already just a host (no port)
|
||||
|
||||
// If nothing declared in config: deny all request to this api endpoint
|
||||
if len(config.Keys.APIAllowedIPs) == 0 {
|
||||
return fmt.Errorf("missing configuration key ApiAllowedIPs")
|
||||
}
|
||||
// If wildcard declared in config: Continue
|
||||
if config.Keys.APIAllowedIPs[0] == "*" {
|
||||
return nil
|
||||
}
|
||||
// check if IP is allowed
|
||||
if !util.Contains(config.Keys.APIAllowedIPs, IPAddress) {
|
||||
return fmt.Errorf("unknown ip: %v", IPAddress)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
176
internal/auth/auth_test.go
Normal file
176
internal/auth/auth_test.go
Normal file
@@ -0,0 +1,176 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestGetIPUserLimiter tests the rate limiter creation and retrieval
|
||||
func TestGetIPUserLimiter(t *testing.T) {
|
||||
ip := "192.168.1.1"
|
||||
username := "testuser"
|
||||
|
||||
// Get limiter for the first time
|
||||
limiter1 := getIPUserLimiter(ip, username)
|
||||
if limiter1 == nil {
|
||||
t.Fatal("Expected limiter to be created")
|
||||
}
|
||||
|
||||
// Get the same limiter again
|
||||
limiter2 := getIPUserLimiter(ip, username)
|
||||
if limiter1 != limiter2 {
|
||||
t.Error("Expected to get the same limiter instance")
|
||||
}
|
||||
|
||||
// Get a different limiter for different user
|
||||
limiter3 := getIPUserLimiter(ip, "otheruser")
|
||||
if limiter1 == limiter3 {
|
||||
t.Error("Expected different limiter for different user")
|
||||
}
|
||||
|
||||
// Get a different limiter for different IP
|
||||
limiter4 := getIPUserLimiter("192.168.1.2", username)
|
||||
if limiter1 == limiter4 {
|
||||
t.Error("Expected different limiter for different IP")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRateLimiterBehavior tests that rate limiting works correctly
|
||||
func TestRateLimiterBehavior(t *testing.T) {
|
||||
ip := "10.0.0.1"
|
||||
username := "ratelimituser"
|
||||
|
||||
limiter := getIPUserLimiter(ip, username)
|
||||
|
||||
// Should allow first 5 attempts
|
||||
for i := 0; i < 5; i++ {
|
||||
if !limiter.Allow() {
|
||||
t.Errorf("Request %d should be allowed within rate limit", i+1)
|
||||
}
|
||||
}
|
||||
|
||||
// 6th attempt should be blocked
|
||||
if limiter.Allow() {
|
||||
t.Error("Request 6 should be blocked by rate limiter")
|
||||
}
|
||||
}
|
||||
|
||||
// TestCleanupOldRateLimiters tests the cleanup function
|
||||
func TestCleanupOldRateLimiters(t *testing.T) {
|
||||
// Clear all existing limiters first to avoid interference from other tests
|
||||
cleanupOldRateLimiters(time.Now().Add(24 * time.Hour))
|
||||
|
||||
// Create some new rate limiters
|
||||
limiter1 := getIPUserLimiter("1.1.1.1", "user1")
|
||||
limiter2 := getIPUserLimiter("2.2.2.2", "user2")
|
||||
|
||||
if limiter1 == nil || limiter2 == nil {
|
||||
t.Fatal("Failed to create test limiters")
|
||||
}
|
||||
|
||||
// Cleanup limiters older than 1 second from now (should keep both)
|
||||
time.Sleep(10 * time.Millisecond) // Small delay to ensure timestamp difference
|
||||
cleanupOldRateLimiters(time.Now().Add(-1 * time.Second))
|
||||
|
||||
// Verify they still exist (should get same instance)
|
||||
if getIPUserLimiter("1.1.1.1", "user1") != limiter1 {
|
||||
t.Error("Limiter 1 was incorrectly cleaned up")
|
||||
}
|
||||
if getIPUserLimiter("2.2.2.2", "user2") != limiter2 {
|
||||
t.Error("Limiter 2 was incorrectly cleaned up")
|
||||
}
|
||||
|
||||
// Cleanup limiters older than 1 hour from now (should remove both)
|
||||
cleanupOldRateLimiters(time.Now().Add(2 * time.Hour))
|
||||
|
||||
// Getting them again should create new instances
|
||||
newLimiter1 := getIPUserLimiter("1.1.1.1", "user1")
|
||||
if newLimiter1 == limiter1 {
|
||||
t.Error("Old limiter should have been cleaned up")
|
||||
}
|
||||
}
|
||||
|
||||
// TestIPv4Extraction tests extracting IPv4 addresses
|
||||
func TestIPv4Extraction(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"IPv4 with port", "192.168.1.1:8080", "192.168.1.1"},
|
||||
{"IPv4 without port", "192.168.1.1", "192.168.1.1"},
|
||||
{"Localhost with port", "127.0.0.1:3000", "127.0.0.1"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.input
|
||||
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||
result = host
|
||||
}
|
||||
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestIPv6Extraction tests extracting IPv6 addresses
|
||||
func TestIPv6Extraction(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"IPv6 with port", "[2001:db8::1]:8080", "2001:db8::1"},
|
||||
{"IPv6 localhost with port", "[::1]:3000", "::1"},
|
||||
{"IPv6 without port", "2001:db8::1", "2001:db8::1"},
|
||||
{"IPv6 localhost", "::1", "::1"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.input
|
||||
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||
result = host
|
||||
}
|
||||
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestIPExtractionEdgeCases tests edge cases for IP extraction
|
||||
func TestIPExtractionEdgeCases(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"Hostname without port", "example.com", "example.com"},
|
||||
{"Empty string", "", ""},
|
||||
{"Just port", ":8080", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.input
|
||||
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||
result = host
|
||||
}
|
||||
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,8 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
@@ -13,13 +14,33 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
type JWTAuthConfig struct {
|
||||
// Specifies for how long a JWT token shall be valid
|
||||
// as a string parsable by time.ParseDuration().
|
||||
MaxAge string `json:"max-age"`
|
||||
|
||||
// Specifies which cookie should be checked for a JWT token (if no authorization header is present)
|
||||
CookieName string `json:"cookieName"`
|
||||
|
||||
// Deny login for users not in database (but defined in JWT).
|
||||
// Ignore user roles defined in JWTs ('roles' claim), get them from db.
|
||||
ValidateUser bool `json:"validateUser"`
|
||||
|
||||
// Specifies which issuer should be accepted when validating external JWTs ('iss' claim)
|
||||
TrustedIssuer string `json:"trustedIssuer"`
|
||||
|
||||
// Should an non-existent user be added to the DB based on the information in the token
|
||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||
|
||||
// Should an existent user be updated in the DB based on the information in the token
|
||||
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||
}
|
||||
|
||||
type JWTAuthenticator struct {
|
||||
publicKey ed25519.PublicKey
|
||||
privateKey ed25519.PrivateKey
|
||||
@@ -28,17 +49,17 @@ type JWTAuthenticator struct {
|
||||
func (ja *JWTAuthenticator) Init() error {
|
||||
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
||||
if pubKey == "" || privKey == "" {
|
||||
log.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
cclog.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode JWT public key")
|
||||
cclog.Warn("Could not decode JWT public key")
|
||||
return err
|
||||
}
|
||||
ja.publicKey = ed25519.PublicKey(bytes)
|
||||
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode JWT private key")
|
||||
cclog.Warn("Could not decode JWT private key")
|
||||
return err
|
||||
}
|
||||
ja.privateKey = ed25519.PrivateKey(bytes)
|
||||
@@ -62,7 +83,7 @@ func (ja *JWTAuthenticator) AuthViaJWT(
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
|
||||
if t.Method != jwt.SigningMethodEdDSA {
|
||||
return nil, errors.New("only Ed25519/EdDSA supported")
|
||||
}
|
||||
@@ -70,51 +91,34 @@ func (ja *JWTAuthenticator) AuthViaJWT(
|
||||
return ja.publicKey, nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing JWT token")
|
||||
cclog.Warn("Error while parsing JWT token")
|
||||
return nil, err
|
||||
}
|
||||
if !token.Valid {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
cclog.Warn("jwt token claims are not valid")
|
||||
return nil, errors.New("jwt token claims are not valid")
|
||||
}
|
||||
|
||||
// Token is valid, extract payload
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
sub, _ := claims["sub"].(string)
|
||||
|
||||
var roles []string
|
||||
|
||||
// Validate user + roles from JWT against database?
|
||||
if config.Keys.JwtConfig.ValidateUser {
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(sub)
|
||||
// Deny any logins for unknown usernames
|
||||
if err != nil {
|
||||
log.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
// Take user roles from database instead of trusting the JWT
|
||||
roles = user.Roles
|
||||
} else {
|
||||
// Extract roles from JWT (if present)
|
||||
if rawroles, ok := claims["roles"].([]interface{}); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use shared helper to get user from JWT claims
|
||||
var user *schema.User
|
||||
user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthToken, -1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &schema.User{
|
||||
Username: sub,
|
||||
Roles: roles,
|
||||
AuthType: schema.AuthToken,
|
||||
AuthSource: -1,
|
||||
}, nil
|
||||
|
||||
// If not validating user, we only get roles from JWT (no projects for this auth method)
|
||||
if !Keys.JwtConfig.ValidateUser {
|
||||
user.Roles = extractRolesFromClaims(claims, false)
|
||||
user.Projects = nil // Standard JWT auth doesn't include projects
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
// Generate a new JWT that can be used for authentication
|
||||
// ProvideJWT generates a new JWT that can be used for authentication
|
||||
func (ja *JWTAuthenticator) ProvideJWT(user *schema.User) (string, error) {
|
||||
if ja.privateKey == nil {
|
||||
return "", errors.New("environment variable 'JWT_PRIVATE_KEY' not set")
|
||||
@@ -126,8 +130,8 @@ func (ja *JWTAuthenticator) ProvideJWT(user *schema.User) (string, error) {
|
||||
"roles": user.Roles,
|
||||
"iat": now.Unix(),
|
||||
}
|
||||
if config.Keys.JwtConfig.MaxAge != "" {
|
||||
d, err := time.ParseDuration(config.Keys.JwtConfig.MaxAge)
|
||||
if Keys.JwtConfig.MaxAge != "" {
|
||||
d, err := time.ParseDuration(Keys.JwtConfig.MaxAge)
|
||||
if err != nil {
|
||||
return "", errors.New("cannot parse max-age config key")
|
||||
}
|
||||
|
||||
@@ -1,22 +1,19 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
@@ -31,18 +28,18 @@ var _ Authenticator = (*JWTCookieSessionAuthenticator)(nil)
|
||||
func (ja *JWTCookieSessionAuthenticator) Init() error {
|
||||
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
||||
if pubKey == "" || privKey == "" {
|
||||
log.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
cclog.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
return errors.New("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode JWT public key")
|
||||
cclog.Warn("Could not decode JWT public key")
|
||||
return err
|
||||
}
|
||||
ja.publicKey = ed25519.PublicKey(bytes)
|
||||
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode JWT private key")
|
||||
cclog.Warn("Could not decode JWT private key")
|
||||
return err
|
||||
}
|
||||
ja.privateKey = ed25519.PrivateKey(bytes)
|
||||
@@ -53,36 +50,35 @@ func (ja *JWTCookieSessionAuthenticator) Init() error {
|
||||
if keyFound && pubKeyCrossLogin != "" {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode cross login JWT public key")
|
||||
cclog.Warn("Could not decode cross login JWT public key")
|
||||
return err
|
||||
}
|
||||
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
|
||||
} else {
|
||||
ja.publicKeyCrossLogin = nil
|
||||
log.Debug("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
||||
cclog.Debug("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
||||
return errors.New("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
||||
}
|
||||
|
||||
jc := config.Keys.JwtConfig
|
||||
// Warn if other necessary settings are not configured
|
||||
if jc != nil {
|
||||
if jc.CookieName == "" {
|
||||
log.Info("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
if Keys.JwtConfig != nil {
|
||||
if Keys.JwtConfig.CookieName == "" {
|
||||
cclog.Info("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
return errors.New("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
}
|
||||
if !jc.ValidateUser {
|
||||
log.Info("forceJWTValidationViaDatabase not set to true: CC will accept users and roles defined in JWTs regardless of its own database!")
|
||||
if !Keys.JwtConfig.ValidateUser {
|
||||
cclog.Info("forceJWTValidationViaDatabase not set to true: CC will accept users and roles defined in JWTs regardless of its own database!")
|
||||
}
|
||||
if jc.TrustedIssuer == "" {
|
||||
log.Info("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
if Keys.JwtConfig.TrustedIssuer == "" {
|
||||
cclog.Info("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
return errors.New("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
}
|
||||
} else {
|
||||
log.Warn("config for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
cclog.Warn("config for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
return errors.New("config for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
}
|
||||
|
||||
log.Info("JWT Cookie Session authenticator successfully registered")
|
||||
cclog.Info("JWT Cookie Session authenticator successfully registered")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -92,7 +88,7 @@ func (ja *JWTCookieSessionAuthenticator) CanLogin(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
jc := config.Keys.JwtConfig
|
||||
jc := Keys.JwtConfig
|
||||
cookieName := ""
|
||||
if jc.CookieName != "" {
|
||||
cookieName = jc.CookieName
|
||||
@@ -115,7 +111,7 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
jc := config.Keys.JwtConfig
|
||||
jc := Keys.JwtConfig
|
||||
jwtCookie, err := r.Cookie(jc.CookieName)
|
||||
var rawtoken string
|
||||
|
||||
@@ -123,7 +119,7 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
rawtoken = jwtCookie.Value
|
||||
}
|
||||
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
|
||||
if t.Method != jwt.SigningMethodEdDSA {
|
||||
return nil, errors.New("only Ed25519/EdDSA supported")
|
||||
}
|
||||
@@ -140,67 +136,26 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
return ja.publicKey, nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Warn("JWT cookie session: error while parsing token")
|
||||
cclog.Warn("JWT cookie session: error while parsing token")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !token.Valid {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
cclog.Warn("jwt token claims are not valid")
|
||||
return nil, errors.New("jwt token claims are not valid")
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
sub, _ := claims["sub"].(string)
|
||||
|
||||
var roles []string
|
||||
projects := make([]string, 0)
|
||||
|
||||
if jc.ValidateUser {
|
||||
var err error
|
||||
user, err = repository.GetUserRepository().GetUser(sub)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%v'", sub)
|
||||
}
|
||||
|
||||
// Deny any logins for unknown usernames
|
||||
if user == nil {
|
||||
log.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
} else {
|
||||
var name string
|
||||
if wrap, ok := claims["name"].(map[string]interface{}); ok {
|
||||
if vals, ok := wrap["values"].([]interface{}); ok {
|
||||
if len(vals) != 0 {
|
||||
name = fmt.Sprintf("%v", vals[0])
|
||||
|
||||
for i := 1; i < len(vals); i++ {
|
||||
name += fmt.Sprintf(" %v", vals[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract roles from JWT (if present)
|
||||
if rawroles, ok := claims["roles"].([]interface{}); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
user = &schema.User{
|
||||
Username: sub,
|
||||
Name: name,
|
||||
Roles: roles,
|
||||
Projects: projects,
|
||||
AuthType: schema.AuthSession,
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
// Use shared helper to get user from JWT claims
|
||||
user, err = getUserFromJWT(claims, jc.ValidateUser, schema.AuthSession, schema.AuthViaToken)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Sync or update user if configured
|
||||
if !jc.ValidateUser && (jc.SyncUserOnLogin || jc.UpdateUserOnLogin) {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
// (Ask browser to) Delete JWT cookie
|
||||
|
||||
136
internal/auth/jwtHelpers.go
Normal file
136
internal/auth/jwtHelpers.go
Normal file
@@ -0,0 +1,136 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
// extractStringFromClaims extracts a string value from JWT claims
|
||||
func extractStringFromClaims(claims jwt.MapClaims, key string) string {
|
||||
if val, ok := claims[key].(string); ok {
|
||||
return val
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractRolesFromClaims extracts roles from JWT claims
|
||||
// If validateRoles is true, only valid roles are returned
|
||||
func extractRolesFromClaims(claims jwt.MapClaims, validateRoles bool) []string {
|
||||
var roles []string
|
||||
|
||||
if rawroles, ok := claims["roles"].([]any); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
if validateRoles {
|
||||
if schema.IsValidRole(r) {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
} else {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return roles
|
||||
}
|
||||
|
||||
// extractProjectsFromClaims extracts projects from JWT claims
|
||||
func extractProjectsFromClaims(claims jwt.MapClaims) []string {
|
||||
projects := make([]string, 0)
|
||||
|
||||
if rawprojs, ok := claims["projects"].([]any); ok {
|
||||
for _, pp := range rawprojs {
|
||||
if p, ok := pp.(string); ok {
|
||||
projects = append(projects, p)
|
||||
}
|
||||
}
|
||||
} else if rawprojs, ok := claims["projects"]; ok {
|
||||
if projSlice, ok := rawprojs.([]string); ok {
|
||||
projects = append(projects, projSlice...)
|
||||
}
|
||||
}
|
||||
|
||||
return projects
|
||||
}
|
||||
|
||||
// extractNameFromClaims extracts name from JWT claims
|
||||
// Handles both simple string and complex nested structure
|
||||
func extractNameFromClaims(claims jwt.MapClaims) string {
|
||||
// Try simple string first
|
||||
if name, ok := claims["name"].(string); ok {
|
||||
return name
|
||||
}
|
||||
|
||||
// Try nested structure: {name: {values: [...]}}
|
||||
if wrap, ok := claims["name"].(map[string]any); ok {
|
||||
if vals, ok := wrap["values"].([]any); ok {
|
||||
if len(vals) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%v", vals[0])
|
||||
for i := 1; i < len(vals); i++ {
|
||||
name += fmt.Sprintf(" %v", vals[i])
|
||||
}
|
||||
return name
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// getUserFromJWT creates or retrieves a user based on JWT claims
|
||||
// If validateUser is true, the user must exist in the database
|
||||
// Otherwise, a new user object is created from claims
|
||||
// authSource should be a schema.AuthSource constant (like schema.AuthViaToken)
|
||||
func getUserFromJWT(claims jwt.MapClaims, validateUser bool, authType schema.AuthType, authSource schema.AuthSource) (*schema.User, error) {
|
||||
sub := extractStringFromClaims(claims, "sub")
|
||||
if sub == "" {
|
||||
return nil, errors.New("missing 'sub' claim in JWT")
|
||||
}
|
||||
|
||||
if validateUser {
|
||||
// Validate user against database
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(sub)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("Error while loading user '%v': %v", sub, err)
|
||||
return nil, fmt.Errorf("database error: %w", err)
|
||||
}
|
||||
|
||||
// Deny any logins for unknown usernames
|
||||
if user == nil || err == sql.ErrNoRows {
|
||||
cclog.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
|
||||
// Return database user (with database roles)
|
||||
return user, nil
|
||||
}
|
||||
|
||||
// Create user from JWT claims
|
||||
name := extractNameFromClaims(claims)
|
||||
roles := extractRolesFromClaims(claims, true) // Validate roles
|
||||
projects := extractProjectsFromClaims(claims)
|
||||
|
||||
return &schema.User{
|
||||
Username: sub,
|
||||
Name: name,
|
||||
Roles: roles,
|
||||
Projects: projects,
|
||||
AuthType: authType,
|
||||
AuthSource: authSource,
|
||||
}, nil
|
||||
}
|
||||
281
internal/auth/jwtHelpers_test.go
Normal file
281
internal/auth/jwtHelpers_test.go
Normal file
@@ -0,0 +1,281 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
// TestExtractStringFromClaims tests extracting string values from JWT claims
|
||||
func TestExtractStringFromClaims(t *testing.T) {
|
||||
claims := jwt.MapClaims{
|
||||
"sub": "testuser",
|
||||
"email": "test@example.com",
|
||||
"age": 25, // not a string
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
key string
|
||||
expected string
|
||||
}{
|
||||
{"Existing string", "sub", "testuser"},
|
||||
{"Another string", "email", "test@example.com"},
|
||||
{"Non-existent key", "missing", ""},
|
||||
{"Non-string value", "age", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractStringFromClaims(claims, tt.key)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractRolesFromClaims tests role extraction and validation
|
||||
func TestExtractRolesFromClaims(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
claims jwt.MapClaims
|
||||
validateRoles bool
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "Valid roles without validation",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": []any{"admin", "user", "invalid_role"},
|
||||
},
|
||||
validateRoles: false,
|
||||
expected: []string{"admin", "user", "invalid_role"},
|
||||
},
|
||||
{
|
||||
name: "Valid roles with validation",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": []any{"admin", "user", "api"},
|
||||
},
|
||||
validateRoles: true,
|
||||
expected: []string{"admin", "user", "api"},
|
||||
},
|
||||
{
|
||||
name: "Invalid roles with validation",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": []any{"invalid_role", "fake_role"},
|
||||
},
|
||||
validateRoles: true,
|
||||
expected: []string{}, // Should filter out invalid roles
|
||||
},
|
||||
{
|
||||
name: "No roles claim",
|
||||
claims: jwt.MapClaims{},
|
||||
validateRoles: false,
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Non-array roles",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": "admin",
|
||||
},
|
||||
validateRoles: false,
|
||||
expected: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractRolesFromClaims(tt.claims, tt.validateRoles)
|
||||
|
||||
if len(result) != len(tt.expected) {
|
||||
t.Errorf("Expected %d roles, got %d", len(tt.expected), len(result))
|
||||
return
|
||||
}
|
||||
|
||||
for i, role := range result {
|
||||
if i >= len(tt.expected) || role != tt.expected[i] {
|
||||
t.Errorf("Expected role %s at position %d, got %s", tt.expected[i], i, role)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractProjectsFromClaims tests project extraction from claims
|
||||
func TestExtractProjectsFromClaims(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
claims jwt.MapClaims
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "Projects as array of interfaces",
|
||||
claims: jwt.MapClaims{
|
||||
"projects": []any{"project1", "project2", "project3"},
|
||||
},
|
||||
expected: []string{"project1", "project2", "project3"},
|
||||
},
|
||||
{
|
||||
name: "Projects as string array",
|
||||
claims: jwt.MapClaims{
|
||||
"projects": []string{"projectA", "projectB"},
|
||||
},
|
||||
expected: []string{"projectA", "projectB"},
|
||||
},
|
||||
{
|
||||
name: "No projects claim",
|
||||
claims: jwt.MapClaims{},
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Mixed types in projects array",
|
||||
claims: jwt.MapClaims{
|
||||
"projects": []any{"project1", 123, "project2"},
|
||||
},
|
||||
expected: []string{"project1", "project2"}, // Should skip non-strings
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractProjectsFromClaims(tt.claims)
|
||||
|
||||
if len(result) != len(tt.expected) {
|
||||
t.Errorf("Expected %d projects, got %d", len(tt.expected), len(result))
|
||||
return
|
||||
}
|
||||
|
||||
for i, project := range result {
|
||||
if i >= len(tt.expected) || project != tt.expected[i] {
|
||||
t.Errorf("Expected project %s at position %d, got %s", tt.expected[i], i, project)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractNameFromClaims tests name extraction from various formats
|
||||
func TestExtractNameFromClaims(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
claims jwt.MapClaims
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "Simple string name",
|
||||
claims: jwt.MapClaims{
|
||||
"name": "John Doe",
|
||||
},
|
||||
expected: "John Doe",
|
||||
},
|
||||
{
|
||||
name: "Nested name structure",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{"John", "Doe"},
|
||||
},
|
||||
},
|
||||
expected: "John Doe",
|
||||
},
|
||||
{
|
||||
name: "Nested name with single value",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{"Alice"},
|
||||
},
|
||||
},
|
||||
expected: "Alice",
|
||||
},
|
||||
{
|
||||
name: "No name claim",
|
||||
claims: jwt.MapClaims{},
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Empty nested values",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{},
|
||||
},
|
||||
},
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Nested with non-string values",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{123, "Smith"},
|
||||
},
|
||||
},
|
||||
expected: "123 Smith", // Should convert to string
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractNameFromClaims(tt.claims)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected '%s', got '%s'", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetUserFromJWT_NoValidation tests getUserFromJWT without database validation
|
||||
func TestGetUserFromJWT_NoValidation(t *testing.T) {
|
||||
claims := jwt.MapClaims{
|
||||
"sub": "testuser",
|
||||
"name": "Test User",
|
||||
"roles": []any{"user", "admin"},
|
||||
"projects": []any{"project1", "project2"},
|
||||
}
|
||||
|
||||
user, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if user.Username != "testuser" {
|
||||
t.Errorf("Expected username 'testuser', got '%s'", user.Username)
|
||||
}
|
||||
|
||||
if user.Name != "Test User" {
|
||||
t.Errorf("Expected name 'Test User', got '%s'", user.Name)
|
||||
}
|
||||
|
||||
if len(user.Roles) != 2 {
|
||||
t.Errorf("Expected 2 roles, got %d", len(user.Roles))
|
||||
}
|
||||
|
||||
if len(user.Projects) != 2 {
|
||||
t.Errorf("Expected 2 projects, got %d", len(user.Projects))
|
||||
}
|
||||
|
||||
if user.AuthType != schema.AuthToken {
|
||||
t.Errorf("Expected AuthType %v, got %v", schema.AuthToken, user.AuthType)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetUserFromJWT_MissingSub tests error when sub claim is missing
|
||||
func TestGetUserFromJWT_MissingSub(t *testing.T) {
|
||||
claims := jwt.MapClaims{
|
||||
"name": "Test User",
|
||||
}
|
||||
|
||||
_, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
||||
|
||||
if err == nil {
|
||||
t.Error("Expected error for missing sub claim")
|
||||
}
|
||||
|
||||
if err.Error() != "missing 'sub' claim in JWT" {
|
||||
t.Errorf("Expected specific error message, got: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,11 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -13,10 +13,8 @@ import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
@@ -30,13 +28,13 @@ func (ja *JWTSessionAuthenticator) Init() error {
|
||||
if pubKey := os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode cross login JWT HS512 key")
|
||||
cclog.Warn("Could not decode cross login JWT HS512 key")
|
||||
return err
|
||||
}
|
||||
ja.loginTokenKey = bytes
|
||||
}
|
||||
|
||||
log.Info("JWT Session authenticator successfully registered")
|
||||
cclog.Info("JWT Session authenticator successfully registered")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -60,87 +58,33 @@ func (ja *JWTSessionAuthenticator) Login(
|
||||
rawtoken = r.URL.Query().Get("login-token")
|
||||
}
|
||||
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
|
||||
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
|
||||
return ja.loginTokenKey, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
|
||||
})
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing jwt token")
|
||||
cclog.Warn("Error while parsing jwt token")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !token.Valid {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
cclog.Warn("jwt token claims are not valid")
|
||||
return nil, errors.New("jwt token claims are not valid")
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
sub, _ := claims["sub"].(string)
|
||||
|
||||
var roles []string
|
||||
projects := make([]string, 0)
|
||||
|
||||
if config.Keys.JwtConfig.ValidateUser {
|
||||
var err error
|
||||
user, err = repository.GetUserRepository().GetUser(sub)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%v'", sub)
|
||||
}
|
||||
|
||||
// Deny any logins for unknown usernames
|
||||
if user == nil {
|
||||
log.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
} else {
|
||||
var name string
|
||||
if wrap, ok := claims["name"].(map[string]interface{}); ok {
|
||||
if vals, ok := wrap["values"].([]interface{}); ok {
|
||||
if len(vals) != 0 {
|
||||
name = fmt.Sprintf("%v", vals[0])
|
||||
|
||||
for i := 1; i < len(vals); i++ {
|
||||
name += fmt.Sprintf(" %v", vals[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract roles from JWT (if present)
|
||||
if rawroles, ok := claims["roles"].([]interface{}); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
if schema.IsValidRole(r) {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if rawprojs, ok := claims["projects"].([]interface{}); ok {
|
||||
for _, pp := range rawprojs {
|
||||
if p, ok := pp.(string); ok {
|
||||
projects = append(projects, p)
|
||||
}
|
||||
}
|
||||
} else if rawprojs, ok := claims["projects"]; ok {
|
||||
projects = append(projects, rawprojs.([]string)...)
|
||||
}
|
||||
|
||||
user = &schema.User{
|
||||
Username: sub,
|
||||
Name: name,
|
||||
Roles: roles,
|
||||
Projects: projects,
|
||||
AuthType: schema.AuthSession,
|
||||
AuthSource: schema.AuthViaToken,
|
||||
}
|
||||
|
||||
if config.Keys.JwtConfig.SyncUserOnLogin || config.Keys.JwtConfig.UpdateUserOnLogin {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
// Use shared helper to get user from JWT claims
|
||||
user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthSession, schema.AuthViaToken)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Sync or update user if configured
|
||||
if !Keys.JwtConfig.ValidateUser && (Keys.JwtConfig.SyncUserOnLogin || Keys.JwtConfig.UpdateUserOnLogin) {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
return user, nil
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
@@ -11,13 +12,26 @@ import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/go-ldap/ldap/v3"
|
||||
)
|
||||
|
||||
type LdapConfig struct {
|
||||
URL string `json:"url"`
|
||||
UserBase string `json:"user_base"`
|
||||
SearchDN string `json:"search_dn"`
|
||||
UserBind string `json:"user_bind"`
|
||||
UserFilter string `json:"user_filter"`
|
||||
UserAttr string `json:"username_attr"`
|
||||
SyncInterval string `json:"sync_interval"` // Parsed using time.ParseDuration.
|
||||
SyncDelOldUsers bool `json:"sync_del_old_users"`
|
||||
|
||||
// Should an non-existent user be added to the DB if user exists in ldap directory
|
||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||
}
|
||||
|
||||
type LdapAuthenticator struct {
|
||||
syncPassword string
|
||||
UserAttr string
|
||||
@@ -28,13 +42,11 @@ var _ Authenticator = (*LdapAuthenticator)(nil)
|
||||
func (la *LdapAuthenticator) Init() error {
|
||||
la.syncPassword = os.Getenv("LDAP_ADMIN_PASSWORD")
|
||||
if la.syncPassword == "" {
|
||||
log.Warn("environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync will not work)")
|
||||
cclog.Warn("environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync will not work)")
|
||||
}
|
||||
|
||||
lc := config.Keys.LdapConfig
|
||||
|
||||
if lc.UserAttr != "" {
|
||||
la.UserAttr = lc.UserAttr
|
||||
if Keys.LdapConfig.UserAttr != "" {
|
||||
la.UserAttr = Keys.LdapConfig.UserAttr
|
||||
} else {
|
||||
la.UserAttr = "gecos"
|
||||
}
|
||||
@@ -48,7 +60,7 @@ func (la *LdapAuthenticator) CanLogin(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
lc := config.Keys.LdapConfig
|
||||
lc := Keys.LdapConfig
|
||||
|
||||
if user != nil {
|
||||
if user.AuthSource == schema.AuthViaLDAP {
|
||||
@@ -58,7 +70,8 @@ func (la *LdapAuthenticator) CanLogin(
|
||||
if lc.SyncUserOnLogin {
|
||||
l, err := la.getLdapConnection(true)
|
||||
if err != nil {
|
||||
log.Error("LDAP connection error")
|
||||
cclog.Error("LDAP connection error")
|
||||
return nil, false
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
@@ -71,12 +84,12 @@ func (la *LdapAuthenticator) CanLogin(
|
||||
|
||||
sr, err := l.Search(searchRequest)
|
||||
if err != nil {
|
||||
log.Warn(err)
|
||||
cclog.Warn(err)
|
||||
return nil, false
|
||||
}
|
||||
|
||||
if len(sr.Entries) != 1 {
|
||||
log.Warn("LDAP: User does not exist or too many entries returned")
|
||||
cclog.Warn("LDAP: User does not exist or too many entries returned")
|
||||
return nil, false
|
||||
}
|
||||
|
||||
@@ -96,7 +109,7 @@ func (la *LdapAuthenticator) CanLogin(
|
||||
}
|
||||
|
||||
if err := repository.GetUserRepository().AddUser(user); err != nil {
|
||||
log.Errorf("User '%s' LDAP: Insert into DB failed", username)
|
||||
cclog.Errorf("User '%s' LDAP: Insert into DB failed", username)
|
||||
return nil, false
|
||||
}
|
||||
|
||||
@@ -114,14 +127,14 @@ func (la *LdapAuthenticator) Login(
|
||||
) (*schema.User, error) {
|
||||
l, err := la.getLdapConnection(false)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting ldap connection")
|
||||
cclog.Warn("Error while getting ldap connection")
|
||||
return nil, err
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
userDn := strings.Replace(config.Keys.LdapConfig.UserBind, "{username}", user.Username, -1)
|
||||
userDn := strings.ReplaceAll(Keys.LdapConfig.UserBind, "{username}", user.Username)
|
||||
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
|
||||
log.Errorf("AUTH/LDAP > Authentication for user %s failed: %v",
|
||||
cclog.Errorf("AUTH/LDAP > Authentication for user %s failed: %v",
|
||||
user.Username, err)
|
||||
return nil, fmt.Errorf("Authentication failed")
|
||||
}
|
||||
@@ -130,11 +143,11 @@ func (la *LdapAuthenticator) Login(
|
||||
}
|
||||
|
||||
func (la *LdapAuthenticator) Sync() error {
|
||||
const IN_DB int = 1
|
||||
const IN_LDAP int = 2
|
||||
const IN_BOTH int = 3
|
||||
const InDB int = 1
|
||||
const InLdap int = 2
|
||||
const InBoth int = 3
|
||||
ur := repository.GetUserRepository()
|
||||
lc := config.Keys.LdapConfig
|
||||
lc := Keys.LdapConfig
|
||||
|
||||
users := map[string]int{}
|
||||
usernames, err := ur.GetLdapUsernames()
|
||||
@@ -143,12 +156,12 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
}
|
||||
|
||||
for _, username := range usernames {
|
||||
users[username] = IN_DB
|
||||
users[username] = InDB
|
||||
}
|
||||
|
||||
l, err := la.getLdapConnection(true)
|
||||
if err != nil {
|
||||
log.Error("LDAP connection error")
|
||||
cclog.Error("LDAP connection error")
|
||||
return err
|
||||
}
|
||||
defer l.Close()
|
||||
@@ -159,7 +172,7 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
lc.UserFilter,
|
||||
[]string{"dn", "uid", la.UserAttr}, nil))
|
||||
if err != nil {
|
||||
log.Warn("LDAP search error")
|
||||
cclog.Warn("LDAP search error")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -172,18 +185,18 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
|
||||
_, ok := users[username]
|
||||
if !ok {
|
||||
users[username] = IN_LDAP
|
||||
users[username] = InLdap
|
||||
newnames[username] = entry.GetAttributeValue(la.UserAttr)
|
||||
} else {
|
||||
users[username] = IN_BOTH
|
||||
users[username] = InBoth
|
||||
}
|
||||
}
|
||||
|
||||
for username, where := range users {
|
||||
if where == IN_DB && lc.SyncDelOldUsers {
|
||||
if where == InDB && lc.SyncDelOldUsers {
|
||||
ur.DelUser(username)
|
||||
log.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
|
||||
} else if where == IN_LDAP {
|
||||
cclog.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
|
||||
} else if where == InLdap {
|
||||
name := newnames[username]
|
||||
|
||||
var roles []string
|
||||
@@ -198,9 +211,9 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
AuthSource: schema.AuthViaLDAP,
|
||||
}
|
||||
|
||||
log.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
|
||||
cclog.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
|
||||
if err := ur.AddUser(user); err != nil {
|
||||
log.Errorf("User '%s' LDAP: Insert into DB failed", username)
|
||||
cclog.Errorf("User '%s' LDAP: Insert into DB failed", username)
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -210,17 +223,17 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
}
|
||||
|
||||
func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
|
||||
lc := config.Keys.LdapConfig
|
||||
conn, err := ldap.DialURL(lc.Url)
|
||||
lc := Keys.LdapConfig
|
||||
conn, err := ldap.DialURL(lc.URL)
|
||||
if err != nil {
|
||||
log.Warn("LDAP URL dial failed")
|
||||
cclog.Warn("LDAP URL dial failed")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if admin {
|
||||
if err := conn.Bind(lc.SearchDN, la.syncPassword); err != nil {
|
||||
conn.Close()
|
||||
log.Warn("LDAP connection bind failed")
|
||||
cclog.Warn("LDAP connection bind failed")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
)
|
||||
|
||||
@@ -27,19 +28,19 @@ func (la *LocalAuthenticator) CanLogin(
|
||||
user *schema.User,
|
||||
username string,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, bool) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
return user, user != nil && user.AuthSource == schema.AuthViaLocalPassword
|
||||
}
|
||||
|
||||
func (la *LocalAuthenticator) Login(
|
||||
user *schema.User,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*schema.User, error) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
if e := bcrypt.CompareHashAndPassword([]byte(user.Password),
|
||||
[]byte(r.FormValue("password"))); e != nil {
|
||||
log.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username)
|
||||
cclog.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username)
|
||||
return nil, fmt.Errorf("Authentication failed")
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
@@ -13,15 +14,20 @@ import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/coreos/go-oidc/v3/oidc"
|
||||
"github.com/gorilla/mux"
|
||||
"golang.org/x/oauth2"
|
||||
)
|
||||
|
||||
type OpenIDConfig struct {
|
||||
Provider string `json:"provider"`
|
||||
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||
}
|
||||
|
||||
type OIDC struct {
|
||||
client *oauth2.Config
|
||||
provider *oidc.Provider
|
||||
@@ -48,18 +54,23 @@ func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value strin
|
||||
http.SetCookie(w, c)
|
||||
}
|
||||
|
||||
// NewOIDC creates a new OIDC authenticator with the configured provider
|
||||
func NewOIDC(a *Authentication) *OIDC {
|
||||
provider, err := oidc.NewProvider(context.Background(), config.Keys.OpenIDConfig.Provider)
|
||||
// Use context with timeout for provider initialization
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
provider, err := oidc.NewProvider(ctx, Keys.OpenIDConfig.Provider)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
clientID := os.Getenv("OID_CLIENT_ID")
|
||||
if clientID == "" {
|
||||
log.Warn("environment variable 'OID_CLIENT_ID' not set (Open ID connect auth will not work)")
|
||||
cclog.Warn("environment variable 'OID_CLIENT_ID' not set (Open ID connect auth will not work)")
|
||||
}
|
||||
clientSecret := os.Getenv("OID_CLIENT_SECRET")
|
||||
if clientSecret == "" {
|
||||
log.Warn("environment variable 'OID_CLIENT_SECRET' not set (Open ID connect auth will not work)")
|
||||
cclog.Warn("environment variable 'OID_CLIENT_SECRET' not set (Open ID connect auth will not work)")
|
||||
}
|
||||
|
||||
client := &oauth2.Config{
|
||||
@@ -105,13 +116,18 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
||||
http.Error(rw, "Code not found", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
token, err := oa.client.Exchange(context.Background(), code, oauth2.VerifierOption(codeVerifier))
|
||||
// Exchange authorization code for token with timeout
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
token, err := oa.client.Exchange(ctx, code, oauth2.VerifierOption(codeVerifier))
|
||||
if err != nil {
|
||||
http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
userInfo, err := oa.provider.UserInfo(context.Background(), oauth2.StaticTokenSource(token))
|
||||
// Get user info from OIDC provider with same timeout
|
||||
userInfo, err := oa.provider.UserInfo(ctx, oauth2.StaticTokenSource(token))
|
||||
if err != nil {
|
||||
http.Error(rw, "Failed to get userinfo: "+err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
@@ -168,14 +184,14 @@ func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
||||
AuthSource: schema.AuthViaOIDC,
|
||||
}
|
||||
|
||||
if config.Keys.OpenIDConfig.SyncUserOnLogin || config.Keys.OpenIDConfig.UpdateUserOnLogin {
|
||||
if Keys.OpenIDConfig.SyncUserOnLogin || Keys.OpenIDConfig.UpdateUserOnLogin {
|
||||
handleOIDCUser(user)
|
||||
}
|
||||
|
||||
oa.authentication.SaveSession(rw, r, user)
|
||||
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(ctx))
|
||||
cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
userCtx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(userCtx))
|
||||
}
|
||||
|
||||
func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) {
|
||||
|
||||
96
internal/auth/schema.go
Normal file
96
internal/auth/schema.go
Normal file
@@ -0,0 +1,96 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
var configSchema = `
|
||||
{
|
||||
"jwts": {
|
||||
"description": "For JWT token authentication.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"max-age": {
|
||||
"description": "Configure how long a token is valid. As string parsable by time.ParseDuration()",
|
||||
"type": "string"
|
||||
},
|
||||
"cookieName": {
|
||||
"description": "Cookie that should be checked for a JWT token.",
|
||||
"type": "string"
|
||||
},
|
||||
"validateUser": {
|
||||
"description": "Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"trustedIssuer": {
|
||||
"description": "Issuer that should be accepted when validating external JWTs ",
|
||||
"type": "string"
|
||||
},
|
||||
"syncUserOnLogin": {
|
||||
"description": "Add non-existent user to DB at login attempt with values provided in JWT.",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": ["max-age"]
|
||||
},
|
||||
"oidc": {
|
||||
"provider": {
|
||||
"description": "",
|
||||
"type": "string"
|
||||
},
|
||||
"syncUserOnLogin": {
|
||||
"description": "",
|
||||
"type": "boolean"
|
||||
},
|
||||
"updateUserOnLogin": {
|
||||
"description": "",
|
||||
"type": "boolean"
|
||||
},
|
||||
"required": ["provider"]
|
||||
},
|
||||
"ldap": {
|
||||
"description": "For LDAP Authentication and user synchronisation.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"description": "URL of LDAP directory server.",
|
||||
"type": "string"
|
||||
},
|
||||
"user_base": {
|
||||
"description": "Base DN of user tree root.",
|
||||
"type": "string"
|
||||
},
|
||||
"search_dn": {
|
||||
"description": "DN for authenticating LDAP admin account with general read rights.",
|
||||
"type": "string"
|
||||
},
|
||||
"user_bind": {
|
||||
"description": "Expression used to authenticate users via LDAP bind. Must contain uid={username}.",
|
||||
"type": "string"
|
||||
},
|
||||
"user_filter": {
|
||||
"description": "Filter to extract users for syncing.",
|
||||
"type": "string"
|
||||
},
|
||||
"username_attr": {
|
||||
"description": "Attribute with full username. Default: gecos",
|
||||
"type": "string"
|
||||
},
|
||||
"sync_interval": {
|
||||
"description": "Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.",
|
||||
"type": "string"
|
||||
},
|
||||
"sync_del_old_users": {
|
||||
"description": "Delete obsolete users in database.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"syncUserOnLogin": {
|
||||
"description": "Add non-existent user to DB at login attempt if user exists in Ldap directory",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": ["url", "user_base", "search_dn", "user_bind", "user_filter"]
|
||||
},
|
||||
"required": ["jwts"]
|
||||
}`
|
||||
@@ -1,72 +1,158 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package config implements the program configuration data structures, validation and parsing
|
||||
package config
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/resampler"
|
||||
)
|
||||
|
||||
var Keys schema.ProgramConfig = schema.ProgramConfig{
|
||||
type ProgramConfig struct {
|
||||
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||
Addr string `json:"addr"`
|
||||
|
||||
// Addresses from which secured admin API endpoints can be reached, can be wildcard "*"
|
||||
APIAllowedIPs []string `json:"apiAllowedIPs"`
|
||||
|
||||
APISubjects *NATSConfig `json:"apiSubjects"`
|
||||
|
||||
// Drop root permissions once .env was read and the port was taken.
|
||||
User string `json:"user"`
|
||||
Group string `json:"group"`
|
||||
|
||||
// Disable authentication (for everything: API, Web-UI, ...)
|
||||
DisableAuthentication bool `json:"disable-authentication"`
|
||||
|
||||
// If `embed-static-files` is true (default), the frontend files are directly
|
||||
// embeded into the go binary and expected to be in web/frontend. Only if
|
||||
// it is false the files in `static-files` are served instead.
|
||||
EmbedStaticFiles bool `json:"embed-static-files"`
|
||||
StaticFiles string `json:"static-files"`
|
||||
|
||||
// 'sqlite3' or 'mysql' (mysql will work for mariadb as well)
|
||||
DBDriver string `json:"db-driver"`
|
||||
|
||||
// For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).
|
||||
DB string `json:"db"`
|
||||
|
||||
// Keep all metric data in the metric data repositories,
|
||||
// do not write to the job-archive.
|
||||
DisableArchive bool `json:"disable-archive"`
|
||||
|
||||
EnableJobTaggers bool `json:"enable-job-taggers"`
|
||||
|
||||
// Validate json input against schema
|
||||
Validate bool `json:"validate"`
|
||||
|
||||
// If 0 or empty, the session does not expire!
|
||||
SessionMaxAge string `json:"session-max-age"`
|
||||
|
||||
// If both those options are not empty, use HTTPS using those certificates.
|
||||
HTTPSCertFile string `json:"https-cert-file"`
|
||||
HTTPSKeyFile string `json:"https-key-file"`
|
||||
|
||||
// If not the empty string and `addr` does not end in ":80",
|
||||
// redirect every request incoming at port 80 to that url.
|
||||
RedirectHTTPTo string `json:"redirect-http-to"`
|
||||
|
||||
// Where to store MachineState files
|
||||
MachineStateDir string `json:"machine-state-dir"`
|
||||
|
||||
// If not zero, automatically mark jobs as stopped running X seconds longer than their walltime.
|
||||
StopJobsExceedingWalltime int `json:"stop-jobs-exceeding-walltime"`
|
||||
|
||||
// Defines time X in seconds in which jobs are considered to be "short" and will be filtered in specific views.
|
||||
ShortRunningJobsDuration int `json:"short-running-jobs-duration"`
|
||||
|
||||
// Energy Mix CO2 Emission Constant [g/kWh]
|
||||
// If entered, displays estimated CO2 emission for job based on jobs totalEnergy
|
||||
EmissionConstant int `json:"emission-constant"`
|
||||
|
||||
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
||||
EnableResampling *ResampleConfig `json:"resampling"`
|
||||
}
|
||||
|
||||
type ResampleConfig struct {
|
||||
// Minimum number of points to trigger resampling of data
|
||||
MinimumPoints int `json:"minimumPoints"`
|
||||
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
|
||||
Resolutions []int `json:"resolutions"`
|
||||
// Trigger next zoom level at less than this many visible datapoints
|
||||
Trigger int `json:"trigger"`
|
||||
}
|
||||
|
||||
type NATSConfig struct {
|
||||
SubjectJobStart string `json:"subjectJobStart"`
|
||||
SubjectJobStop string `json:"subjectJobStop"`
|
||||
SubjectNodeState string `json:"subjectNodeState"`
|
||||
}
|
||||
|
||||
type IntRange struct {
|
||||
From int `json:"from"`
|
||||
To int `json:"to"`
|
||||
}
|
||||
|
||||
type TimeRange struct {
|
||||
From *time.Time `json:"from"`
|
||||
To *time.Time `json:"to"`
|
||||
Range string `json:"range,omitempty"`
|
||||
}
|
||||
|
||||
type FilterRanges struct {
|
||||
Duration *IntRange `json:"duration"`
|
||||
NumNodes *IntRange `json:"numNodes"`
|
||||
StartTime *TimeRange `json:"startTime"`
|
||||
}
|
||||
|
||||
type ClusterConfig struct {
|
||||
Name string `json:"name"`
|
||||
FilterRanges *FilterRanges `json:"filterRanges"`
|
||||
MetricDataRepository json.RawMessage `json:"metricDataRepository"`
|
||||
}
|
||||
|
||||
var Clusters []*ClusterConfig
|
||||
|
||||
var Keys ProgramConfig = ProgramConfig{
|
||||
Addr: "localhost:8080",
|
||||
DisableAuthentication: false,
|
||||
EmbedStaticFiles: true,
|
||||
DBDriver: "sqlite3",
|
||||
DB: "./var/job.db",
|
||||
Archive: json.RawMessage(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`),
|
||||
DisableArchive: false,
|
||||
Validate: false,
|
||||
SessionMaxAge: "168h",
|
||||
StopJobsExceedingWalltime: 0,
|
||||
ShortRunningJobsDuration: 5 * 60,
|
||||
UiDefaults: map[string]interface{}{
|
||||
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
||||
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_showFootprint": true,
|
||||
"job_list_usePaging": false,
|
||||
"plot_general_colorBackground": true,
|
||||
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
||||
"plot_general_lineWidth": 3,
|
||||
"plot_list_jobsPerPage": 50,
|
||||
"plot_list_selectedMetrics": []string{"cpu_load", "mem_used", "flops_any", "mem_bw"},
|
||||
"plot_view_plotsPerRow": 3,
|
||||
"plot_view_showPolarplot": true,
|
||||
"plot_view_showRoofline": true,
|
||||
"plot_view_showStatTable": true,
|
||||
"system_view_selectedMetric": "cpu_load",
|
||||
"analysis_view_selectedTopEntity": "user",
|
||||
"analysis_view_selectedTopCategory": "totalWalltime",
|
||||
"status_view_selectedTopUserCategory": "totalJobs",
|
||||
"status_view_selectedTopProjectCategory": "totalJobs",
|
||||
},
|
||||
}
|
||||
|
||||
func Init(flagConfigFile string) {
|
||||
raw, err := os.ReadFile(flagConfigFile)
|
||||
if err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Fatalf("CONFIG ERROR: %v", err)
|
||||
}
|
||||
} else {
|
||||
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
|
||||
log.Fatalf("Validate config: %v\n", err)
|
||||
}
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
log.Fatalf("could not decode: %v", err)
|
||||
}
|
||||
func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) {
|
||||
Validate(configSchema, mainConfig)
|
||||
dec := json.NewDecoder(bytes.NewReader(mainConfig))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
||||
}
|
||||
|
||||
if Keys.Clusters == nil || len(Keys.Clusters) < 1 {
|
||||
log.Fatal("At least one cluster required in config!")
|
||||
}
|
||||
Validate(clustersSchema, clusterConfig)
|
||||
dec = json.NewDecoder(bytes.NewReader(clusterConfig))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Clusters); err != nil {
|
||||
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
||||
}
|
||||
|
||||
if len(Clusters) < 1 {
|
||||
cclog.Abort("Config Init: At least one cluster required in config. Exited with error.")
|
||||
}
|
||||
|
||||
if Keys.EnableResampling != nil && Keys.EnableResampling.MinimumPoints > 0 {
|
||||
resampler.SetMinimumRequiredPoints(Keys.EnableResampling.MinimumPoints)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,30 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
func TestInit(t *testing.T) {
|
||||
fp := "../../configs/config.json"
|
||||
Init(fp)
|
||||
ccconf.Init(fp)
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||
Init(cfg, clustercfg)
|
||||
} else {
|
||||
cclog.Abort("Cluster configuration must be present")
|
||||
}
|
||||
} else {
|
||||
cclog.Abort("Main configuration must be present")
|
||||
}
|
||||
|
||||
if Keys.Addr != "0.0.0.0:443" {
|
||||
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:443", Keys.Addr)
|
||||
}
|
||||
@@ -18,7 +32,17 @@ func TestInit(t *testing.T) {
|
||||
|
||||
func TestInitMinimal(t *testing.T) {
|
||||
fp := "../../configs/config-demo.json"
|
||||
Init(fp)
|
||||
ccconf.Init(fp)
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||
Init(cfg, clustercfg)
|
||||
} else {
|
||||
cclog.Abort("Cluster configuration must be present")
|
||||
}
|
||||
} else {
|
||||
cclog.Abort("Main configuration must be present")
|
||||
}
|
||||
|
||||
if Keys.Addr != "127.0.0.1:8080" {
|
||||
t.Errorf("wrong addr\ngot: %s \nwant: 127.0.0.1:8080", Keys.Addr)
|
||||
}
|
||||
|
||||
51
internal/config/default_metrics.go
Normal file
51
internal/config/default_metrics.go
Normal file
@@ -0,0 +1,51 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// DEPRECATED: SUPERSEDED BY NEW USER CONFIG - userConfig.go / web.go
|
||||
|
||||
type DefaultMetricsCluster struct {
|
||||
Name string `json:"name"`
|
||||
DefaultMetrics string `json:"default_metrics"`
|
||||
}
|
||||
|
||||
type DefaultMetricsConfig struct {
|
||||
Clusters []DefaultMetricsCluster `json:"clusters"`
|
||||
}
|
||||
|
||||
func LoadDefaultMetricsConfig() (*DefaultMetricsConfig, error) {
|
||||
filePath := "default_metrics.json"
|
||||
if _, err := os.Stat(filePath); os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
data, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var cfg DefaultMetricsConfig
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
func ParseMetricsString(s string) []string {
|
||||
parts := strings.Split(s, ",")
|
||||
var metrics []string
|
||||
for _, p := range parts {
|
||||
trimmed := strings.TrimSpace(p)
|
||||
if trimmed != "" {
|
||||
metrics = append(metrics, trimmed)
|
||||
}
|
||||
}
|
||||
return metrics
|
||||
}
|
||||
205
internal/config/schema.go
Normal file
205
internal/config/schema.go
Normal file
@@ -0,0 +1,205 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package config
|
||||
|
||||
var configSchema = `
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"addr": {
|
||||
"description": "Address where the http (or https) server will listen on (for example: 'localhost:80').",
|
||||
"type": "string"
|
||||
},
|
||||
"apiAllowedIPs": {
|
||||
"description": "Addresses from which secured API endpoints can be reached",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"user": {
|
||||
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||
"type": "string"
|
||||
},
|
||||
"group": {
|
||||
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||
"type": "string"
|
||||
},
|
||||
"disable-authentication": {
|
||||
"description": "Disable authentication (for everything: API, Web-UI, ...).",
|
||||
"type": "boolean"
|
||||
},
|
||||
"embed-static-files": {
|
||||
"description": "If all files in web/frontend/public should be served from within the binary itself (they are embedded) or not.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"static-files": {
|
||||
"description": "Folder where static assets can be found, if embed-static-files is false.",
|
||||
"type": "string"
|
||||
},
|
||||
"db": {
|
||||
"description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).",
|
||||
"type": "string"
|
||||
},
|
||||
"disable-archive": {
|
||||
"description": "Keep all metric data in the metric data repositories, do not write to the job-archive.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"enable-job-taggers": {
|
||||
"description": "Turn on automatic application and jobclass taggers",
|
||||
"type": "boolean"
|
||||
},
|
||||
"validate": {
|
||||
"description": "Validate all input json documents against json schema.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"session-max-age": {
|
||||
"description": "Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire!",
|
||||
"type": "string"
|
||||
},
|
||||
"https-cert-file": {
|
||||
"description": "Filepath to SSL certificate. If also https-key-file is set use HTTPS using those certificates.",
|
||||
"type": "string"
|
||||
},
|
||||
"https-key-file": {
|
||||
"description": "Filepath to SSL key file. If also https-cert-file is set use HTTPS using those certificates.",
|
||||
"type": "string"
|
||||
},
|
||||
"redirect-http-to": {
|
||||
"description": "If not the empty string and addr does not end in :80, redirect every request incoming at port 80 to that url.",
|
||||
"type": "string"
|
||||
},
|
||||
"stop-jobs-exceeding-walltime": {
|
||||
"description": "If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job.",
|
||||
"type": "integer"
|
||||
},
|
||||
"short-running-jobs-duration": {
|
||||
"description": "Do not show running jobs shorter than X seconds.",
|
||||
"type": "integer"
|
||||
},
|
||||
"emission-constant": {
|
||||
"description": ".",
|
||||
"type": "integer"
|
||||
},
|
||||
"cron-frequency": {
|
||||
"description": "Frequency of cron job workers.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"duration-worker": {
|
||||
"description": "Duration Update Worker [Defaults to '5m']",
|
||||
"type": "string"
|
||||
},
|
||||
"footprint-worker": {
|
||||
"description": "Metric-Footprint Update Worker [Defaults to '10m']",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"enable-resampling": {
|
||||
"description": "Enable dynamic zoom in frontend metric plots.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"minimumPoints": {
|
||||
"description": "Minimum points to trigger resampling of time-series data.",
|
||||
"type": "integer"
|
||||
},
|
||||
"trigger": {
|
||||
"description": "Trigger next zoom level at less than this many visible datapoints.",
|
||||
"type": "integer"
|
||||
},
|
||||
"resolutions": {
|
||||
"description": "Array of resampling target resolutions, in seconds.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["trigger", "resolutions"]
|
||||
}
|
||||
},
|
||||
"required": ["apiAllowedIPs"]
|
||||
}`
|
||||
|
||||
var clustersSchema = `
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"description": "The name of the cluster.",
|
||||
"type": "string"
|
||||
},
|
||||
"metricDataRepository": {
|
||||
"description": "Type of the metric data repository for this cluster",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"kind": {
|
||||
"type": "string",
|
||||
"enum": ["influxdb", "prometheus", "cc-metric-store", "cc-metric-store-internal", "test"]
|
||||
},
|
||||
"url": {
|
||||
"type": "string"
|
||||
},
|
||||
"token": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["kind"]
|
||||
},
|
||||
"filterRanges": {
|
||||
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"numNodes": {
|
||||
"description": "UI slider range for number of nodes",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from": {
|
||||
"type": "integer"
|
||||
},
|
||||
"to": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": ["from", "to"]
|
||||
},
|
||||
"duration": {
|
||||
"description": "UI slider range for duration",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from": {
|
||||
"type": "integer"
|
||||
},
|
||||
"to": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": ["from", "to"]
|
||||
},
|
||||
"startTime": {
|
||||
"description": "UI slider range for start time",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"from": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"to": {
|
||||
"type": "null"
|
||||
}
|
||||
},
|
||||
"required": ["from", "to"]
|
||||
}
|
||||
},
|
||||
"required": ["numNodes", "duration", "startTime"]
|
||||
}
|
||||
},
|
||||
"required": ["name", "metricDataRepository", "filterRanges"],
|
||||
"minItems": 1
|
||||
}
|
||||
}`
|
||||
29
internal/config/validate.go
Normal file
29
internal/config/validate.go
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/santhosh-tekuri/jsonschema/v5"
|
||||
)
|
||||
|
||||
func Validate(schema string, instance json.RawMessage) {
|
||||
sch, err := jsonschema.CompileString("schema.json", schema)
|
||||
if err != nil {
|
||||
cclog.Fatalf("%#v", err)
|
||||
}
|
||||
|
||||
var v any
|
||||
if err := json.Unmarshal([]byte(instance), &v); err != nil {
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
|
||||
if err = sch.Validate(v); err != nil {
|
||||
cclog.Fatalf("%#v", err)
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,6 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package model
|
||||
|
||||
@@ -3,14 +3,28 @@
|
||||
package model
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
type ClusterMetricWithName struct {
|
||||
Name string `json:"name"`
|
||||
Unit *schema.Unit `json:"unit,omitempty"`
|
||||
Timestep int `json:"timestep"`
|
||||
Data []schema.Float `json:"data"`
|
||||
}
|
||||
|
||||
type ClusterMetrics struct {
|
||||
NodeCount int `json:"nodeCount"`
|
||||
Metrics []*ClusterMetricWithName `json:"metrics"`
|
||||
}
|
||||
|
||||
type Count struct {
|
||||
Name string `json:"name"`
|
||||
Count int `json:"count"`
|
||||
@@ -50,6 +64,7 @@ type IntRangeOutput struct {
|
||||
|
||||
type JobFilter struct {
|
||||
Tags []string `json:"tags,omitempty"`
|
||||
DbID []string `json:"dbId,omitempty"`
|
||||
JobID *StringInput `json:"jobId,omitempty"`
|
||||
ArrayJobID *int `json:"arrayJobId,omitempty"`
|
||||
User *StringInput `json:"user,omitempty"`
|
||||
@@ -57,16 +72,16 @@ type JobFilter struct {
|
||||
JobName *StringInput `json:"jobName,omitempty"`
|
||||
Cluster *StringInput `json:"cluster,omitempty"`
|
||||
Partition *StringInput `json:"partition,omitempty"`
|
||||
Duration *schema.IntRange `json:"duration,omitempty"`
|
||||
Duration *config.IntRange `json:"duration,omitempty"`
|
||||
Energy *FloatRange `json:"energy,omitempty"`
|
||||
MinRunningFor *int `json:"minRunningFor,omitempty"`
|
||||
NumNodes *schema.IntRange `json:"numNodes,omitempty"`
|
||||
NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
|
||||
NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
|
||||
StartTime *schema.TimeRange `json:"startTime,omitempty"`
|
||||
NumNodes *config.IntRange `json:"numNodes,omitempty"`
|
||||
NumAccelerators *config.IntRange `json:"numAccelerators,omitempty"`
|
||||
NumHWThreads *config.IntRange `json:"numHWThreads,omitempty"`
|
||||
StartTime *config.TimeRange `json:"startTime,omitempty"`
|
||||
State []schema.JobState `json:"state,omitempty"`
|
||||
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
||||
Exclusive *int `json:"exclusive,omitempty"`
|
||||
Shared *string `json:"shared,omitempty"`
|
||||
Node *StringInput `json:"node,omitempty"`
|
||||
}
|
||||
|
||||
@@ -95,9 +110,23 @@ type JobResultList struct {
|
||||
HasNextPage *bool `json:"hasNextPage,omitempty"`
|
||||
}
|
||||
|
||||
type JobStats struct {
|
||||
ID int `json:"id"`
|
||||
JobID string `json:"jobId"`
|
||||
StartTime int `json:"startTime"`
|
||||
Duration int `json:"duration"`
|
||||
Cluster string `json:"cluster"`
|
||||
SubCluster string `json:"subCluster"`
|
||||
NumNodes int `json:"numNodes"`
|
||||
NumHWThreads *int `json:"numHWThreads,omitempty"`
|
||||
NumAccelerators *int `json:"numAccelerators,omitempty"`
|
||||
Stats []*NamedStats `json:"stats"`
|
||||
}
|
||||
|
||||
type JobsStatistics struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
TotalUsers int `json:"totalUsers"`
|
||||
TotalJobs int `json:"totalJobs"`
|
||||
RunningJobs int `json:"runningJobs"`
|
||||
ShortJobs int `json:"shortJobs"`
|
||||
@@ -142,12 +171,58 @@ type MetricStatItem struct {
|
||||
type Mutation struct {
|
||||
}
|
||||
|
||||
type NamedStats struct {
|
||||
Name string `json:"name"`
|
||||
Data *schema.MetricStatistics `json:"data"`
|
||||
}
|
||||
|
||||
type NamedStatsWithScope struct {
|
||||
Name string `json:"name"`
|
||||
Scope schema.MetricScope `json:"scope"`
|
||||
Stats []*ScopedStats `json:"stats"`
|
||||
}
|
||||
|
||||
type NodeFilter struct {
|
||||
Hostname *StringInput `json:"hostname,omitempty"`
|
||||
Cluster *StringInput `json:"cluster,omitempty"`
|
||||
Subcluster *StringInput `json:"subcluster,omitempty"`
|
||||
SchedulerState *schema.SchedulerState `json:"schedulerState,omitempty"`
|
||||
HealthState *string `json:"healthState,omitempty"`
|
||||
TimeStart *int `json:"timeStart,omitempty"`
|
||||
}
|
||||
|
||||
type NodeMetrics struct {
|
||||
Host string `json:"host"`
|
||||
State string `json:"state"`
|
||||
SubCluster string `json:"subCluster"`
|
||||
Metrics []*JobMetricWithName `json:"metrics"`
|
||||
}
|
||||
|
||||
type NodeStateResultList struct {
|
||||
Items []*schema.Node `json:"items"`
|
||||
Count *int `json:"count,omitempty"`
|
||||
}
|
||||
|
||||
type NodeStates struct {
|
||||
State string `json:"state"`
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
type NodeStatesTimed struct {
|
||||
State string `json:"state"`
|
||||
Counts []int `json:"counts"`
|
||||
Times []int `json:"times"`
|
||||
}
|
||||
|
||||
type NodesResultList struct {
|
||||
Items []*NodeMetrics `json:"items"`
|
||||
Offset *int `json:"offset,omitempty"`
|
||||
Limit *int `json:"limit,omitempty"`
|
||||
Count *int `json:"count,omitempty"`
|
||||
TotalNodes *int `json:"totalNodes,omitempty"`
|
||||
HasNextPage *bool `json:"hasNextPage,omitempty"`
|
||||
}
|
||||
|
||||
type OrderByInput struct {
|
||||
Field string `json:"field"`
|
||||
Type string `json:"type"`
|
||||
@@ -159,7 +234,10 @@ type PageRequest struct {
|
||||
Page int `json:"page"`
|
||||
}
|
||||
|
||||
type Query struct {
|
||||
type ScopedStats struct {
|
||||
Hostname string `json:"hostname"`
|
||||
ID *string `json:"id,omitempty"`
|
||||
Data *schema.MetricStatistics `json:"data"`
|
||||
}
|
||||
|
||||
type StringInput struct {
|
||||
@@ -192,20 +270,22 @@ type User struct {
|
||||
type Aggregate string
|
||||
|
||||
const (
|
||||
AggregateUser Aggregate = "USER"
|
||||
AggregateProject Aggregate = "PROJECT"
|
||||
AggregateCluster Aggregate = "CLUSTER"
|
||||
AggregateUser Aggregate = "USER"
|
||||
AggregateProject Aggregate = "PROJECT"
|
||||
AggregateCluster Aggregate = "CLUSTER"
|
||||
AggregateSubcluster Aggregate = "SUBCLUSTER"
|
||||
)
|
||||
|
||||
var AllAggregate = []Aggregate{
|
||||
AggregateUser,
|
||||
AggregateProject,
|
||||
AggregateCluster,
|
||||
AggregateSubcluster,
|
||||
}
|
||||
|
||||
func (e Aggregate) IsValid() bool {
|
||||
switch e {
|
||||
case AggregateUser, AggregateProject, AggregateCluster:
|
||||
case AggregateUser, AggregateProject, AggregateCluster, AggregateSubcluster:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -215,7 +295,7 @@ func (e Aggregate) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *Aggregate) UnmarshalGQL(v interface{}) error {
|
||||
func (e *Aggregate) UnmarshalGQL(v any) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
@@ -232,11 +312,26 @@ func (e Aggregate) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||
}
|
||||
|
||||
func (e *Aggregate) UnmarshalJSON(b []byte) error {
|
||||
s, err := strconv.Unquote(string(b))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return e.UnmarshalGQL(s)
|
||||
}
|
||||
|
||||
func (e Aggregate) MarshalJSON() ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
e.MarshalGQL(&buf)
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
type SortByAggregate string
|
||||
|
||||
const (
|
||||
SortByAggregateTotalwalltime SortByAggregate = "TOTALWALLTIME"
|
||||
SortByAggregateTotaljobs SortByAggregate = "TOTALJOBS"
|
||||
SortByAggregateTotalusers SortByAggregate = "TOTALUSERS"
|
||||
SortByAggregateTotalnodes SortByAggregate = "TOTALNODES"
|
||||
SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS"
|
||||
SortByAggregateTotalcores SortByAggregate = "TOTALCORES"
|
||||
@@ -248,6 +343,7 @@ const (
|
||||
var AllSortByAggregate = []SortByAggregate{
|
||||
SortByAggregateTotalwalltime,
|
||||
SortByAggregateTotaljobs,
|
||||
SortByAggregateTotalusers,
|
||||
SortByAggregateTotalnodes,
|
||||
SortByAggregateTotalnodehours,
|
||||
SortByAggregateTotalcores,
|
||||
@@ -258,7 +354,7 @@ var AllSortByAggregate = []SortByAggregate{
|
||||
|
||||
func (e SortByAggregate) IsValid() bool {
|
||||
switch e {
|
||||
case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
|
||||
case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalusers, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -268,7 +364,7 @@ func (e SortByAggregate) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *SortByAggregate) UnmarshalGQL(v interface{}) error {
|
||||
func (e *SortByAggregate) UnmarshalGQL(v any) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
@@ -285,6 +381,20 @@ func (e SortByAggregate) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||
}
|
||||
|
||||
func (e *SortByAggregate) UnmarshalJSON(b []byte) error {
|
||||
s, err := strconv.Unquote(string(b))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return e.UnmarshalGQL(s)
|
||||
}
|
||||
|
||||
func (e SortByAggregate) MarshalJSON() ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
e.MarshalGQL(&buf)
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
type SortDirectionEnum string
|
||||
|
||||
const (
|
||||
@@ -309,7 +419,7 @@ func (e SortDirectionEnum) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *SortDirectionEnum) UnmarshalGQL(v interface{}) error {
|
||||
func (e *SortDirectionEnum) UnmarshalGQL(v any) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
@@ -325,3 +435,17 @@ func (e *SortDirectionEnum) UnmarshalGQL(v interface{}) error {
|
||||
func (e SortDirectionEnum) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||
}
|
||||
|
||||
func (e *SortDirectionEnum) UnmarshalJSON(b []byte) error {
|
||||
s, err := strconv.Unquote(string(b))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return e.UnmarshalGQL(s)
|
||||
}
|
||||
|
||||
func (e SortDirectionEnum) MarshalJSON() ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
e.MarshalGQL(&buf)
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
@@ -32,7 +32,7 @@ func Init() {
|
||||
|
||||
func GetResolverInstance() *Resolver {
|
||||
if resolverInstance == nil {
|
||||
log.Fatal("Authentication module not initialized!")
|
||||
cclog.Fatal("Authentication module not initialized!")
|
||||
}
|
||||
|
||||
return resolverInstance
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
package graph
|
||||
|
||||
// This file will be automatically regenerated based on the schema, any resolver implementations
|
||||
// This file will be automatically regenerated based on the schema, any resolver
|
||||
// implementations
|
||||
// will be copied through when generating and any unknown code will be moved to the end.
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.49
|
||||
// Code generated by github.com/99designs/gqlgen version v0.17.84
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
@@ -20,8 +22,8 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
// Partitions is the resolver for the partitions field.
|
||||
@@ -29,15 +31,21 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
||||
return r.Repo.Partitions(obj.Name)
|
||||
}
|
||||
|
||||
// StartTime is the resolver for the startTime field.
|
||||
func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error) {
|
||||
timestamp := time.Unix(obj.StartTime, 0)
|
||||
return ×tamp, nil
|
||||
}
|
||||
|
||||
// Tags is the resolver for the tags field.
|
||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
|
||||
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)
|
||||
}
|
||||
|
||||
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
||||
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
|
||||
// FIXME: Make the hardcoded duration configurable
|
||||
if obj.Exclusive != 1 && obj.Duration > 600 {
|
||||
if obj.Shared != "none" && obj.Duration > 600 {
|
||||
return r.Repo.FindConcurrentJobs(ctx, obj)
|
||||
}
|
||||
|
||||
@@ -48,7 +56,7 @@ func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*mod
|
||||
func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.FootprintValue, error) {
|
||||
rawFootprint, err := r.Repo.FetchFootprint(obj)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching job footprint data")
|
||||
cclog.Warn("Error while fetching job footprint data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -73,21 +81,21 @@ func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.
|
||||
func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*model.EnergyFootprintValue, error) {
|
||||
rawEnergyFootprint, err := r.Repo.FetchEnergyFootprint(obj)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching job energy footprint data")
|
||||
cclog.Warn("Error while fetching job energy footprint data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.EnergyFootprintValue{}
|
||||
for name, value := range rawEnergyFootprint {
|
||||
// Suboptimal: Nearly hardcoded metric name expectations
|
||||
matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`)
|
||||
matchCPU := regexp.MustCompile(`cpu|Cpu|CPU`)
|
||||
matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
|
||||
matchMem := regexp.MustCompile(`mem|Mem|MEM`)
|
||||
matchCore := regexp.MustCompile(`core|Core|CORE`)
|
||||
|
||||
hwType := ""
|
||||
switch test := name; { // NOtice ';' for var declaration
|
||||
case matchCpu.MatchString(test):
|
||||
case matchCPU.MatchString(test):
|
||||
hwType = "CPU"
|
||||
case matchAcc.MatchString(test):
|
||||
hwType = "Accelerator"
|
||||
@@ -125,40 +133,75 @@ func (r *metricValueResolver) Name(ctx context.Context, obj *schema.MetricValue)
|
||||
|
||||
// CreateTag is the resolver for the createTag field.
|
||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string, scope string) (*schema.Tag, error) {
|
||||
id, err := r.Repo.CreateTag(typeArg, name, scope)
|
||||
if err != nil {
|
||||
log.Warn("Error while creating tag")
|
||||
return nil, err
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil, fmt.Errorf("no user in context")
|
||||
}
|
||||
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||
if user.HasRole(schema.RoleAdmin) && scope == "admin" ||
|
||||
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && scope == "global" ||
|
||||
user.Username == scope {
|
||||
// Create in DB
|
||||
id, err := r.Repo.CreateTag(typeArg, name, scope)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while creating tag")
|
||||
return nil, err
|
||||
}
|
||||
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
||||
} else {
|
||||
cclog.Warnf("Not authorized to create tag with scope: %s", scope)
|
||||
return nil, fmt.Errorf("not authorized to create tag with scope: %s", scope)
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteTag is the resolver for the deleteTag field.
|
||||
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
|
||||
// This Uses ID string <-> ID string, removeTagFromList uses []string <-> []int
|
||||
panic(fmt.Errorf("not implemented: DeleteTag - deleteTag"))
|
||||
}
|
||||
|
||||
// AddTagsToJob is the resolver for the addTagsToJob field.
|
||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
// Selectable Tags Pre-Filtered by Scope in Frontend: No backend check required
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil, fmt.Errorf("no user in context")
|
||||
}
|
||||
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while adding tag to job")
|
||||
cclog.Warn("Error while adding tag to job")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags := []*schema.Tag{}
|
||||
for _, tagId := range tagIds {
|
||||
tid, err := strconv.ParseInt(tagId, 10, 64)
|
||||
for _, tagID := range tagIds {
|
||||
// Get ID
|
||||
tid, err := strconv.ParseInt(tagID, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing tag id")
|
||||
cclog.Warn("Error while parsing tag id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.AddTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
|
||||
log.Warn("Error while adding tag")
|
||||
return nil, err
|
||||
// Test Exists
|
||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||
if !exists {
|
||||
cclog.Warnf("Tag does not exist (ID): %d", tid)
|
||||
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||
}
|
||||
|
||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||
if user.HasRole(schema.RoleAdmin) && tscope == "admin" ||
|
||||
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && tscope == "global" ||
|
||||
user.Username == tscope {
|
||||
// Add to Job
|
||||
if tags, err = r.Repo.AddTag(user, jid, tid); err != nil {
|
||||
cclog.Warn("Error while adding tag")
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
cclog.Warnf("Not authorized to add tag: %d", tid)
|
||||
return nil, fmt.Errorf("not authorized to add tag: %d", tid)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,40 +210,127 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
||||
|
||||
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
|
||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||
// Removable Tags Pre-Filtered by Scope in Frontend: No backend check required
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil, fmt.Errorf("no user in context")
|
||||
}
|
||||
|
||||
jid, err := strconv.ParseInt(job, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing job id")
|
||||
cclog.Warn("Error while parsing job id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags := []*schema.Tag{}
|
||||
for _, tagId := range tagIds {
|
||||
tid, err := strconv.ParseInt(tagId, 10, 64)
|
||||
for _, tagID := range tagIds {
|
||||
// Get ID
|
||||
tid, err := strconv.ParseInt(tagID, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing tag id")
|
||||
cclog.Warn("Error while parsing tag id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if tags, err = r.Repo.RemoveTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
|
||||
log.Warn("Error while removing tag")
|
||||
return nil, err
|
||||
// Test Exists
|
||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||
if !exists {
|
||||
cclog.Warnf("Tag does not exist (ID): %d", tid)
|
||||
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||
}
|
||||
|
||||
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||
if user.HasRole(schema.RoleAdmin) && tscope == "admin" ||
|
||||
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && tscope == "global" ||
|
||||
user.Username == tscope {
|
||||
// Remove from Job
|
||||
if tags, err = r.Repo.RemoveTag(user, jid, tid); err != nil {
|
||||
cclog.Warn("Error while removing tag")
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
cclog.Warnf("Not authorized to remove tag: %d", tid)
|
||||
return nil, fmt.Errorf("not authorized to remove tag: %d", tid)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
// RemoveTagFromList is the resolver for the removeTagFromList field.
|
||||
func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []string) ([]int, error) {
|
||||
// Needs Contextuser
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil, fmt.Errorf("no user in context")
|
||||
}
|
||||
|
||||
tags := []int{}
|
||||
for _, tagID := range tagIds {
|
||||
// Get ID
|
||||
tid, err := strconv.ParseInt(tagID, 10, 64)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while parsing tag id for removal")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Test Exists
|
||||
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||
if !exists {
|
||||
cclog.Warnf("Tag does not exist (ID): %d", tid)
|
||||
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||
}
|
||||
|
||||
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
|
||||
if user.HasRole(schema.RoleAdmin) && (tscope == "global" || tscope == "admin") || user.Username == tscope {
|
||||
// Remove from DB
|
||||
if err = r.Repo.RemoveTagById(tid); err != nil {
|
||||
cclog.Warn("Error while removing tag")
|
||||
return nil, err
|
||||
} else {
|
||||
tags = append(tags, int(tid))
|
||||
}
|
||||
} else {
|
||||
cclog.Warnf("Not authorized to remove tag: %d", tid)
|
||||
return nil, fmt.Errorf("not authorized to remove tag: %d", tid)
|
||||
}
|
||||
}
|
||||
return tags, nil
|
||||
}
|
||||
|
||||
// UpdateConfiguration is the resolver for the updateConfiguration field.
|
||||
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
|
||||
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, repository.GetUserFromContext(ctx)); err != nil {
|
||||
log.Warn("Error while updating user config")
|
||||
cclog.Warn("Error while updating user config")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// ID is the resolver for the id field.
|
||||
func (r *nodeResolver) ID(ctx context.Context, obj *schema.Node) (string, error) {
|
||||
panic(fmt.Errorf("not implemented: ID - id"))
|
||||
}
|
||||
|
||||
// SchedulerState is the resolver for the schedulerState field.
|
||||
func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (schema.SchedulerState, error) {
|
||||
if obj.NodeState != "" {
|
||||
return obj.NodeState, nil
|
||||
} else {
|
||||
return "", fmt.Errorf("no SchedulerState (NodeState) on Object")
|
||||
}
|
||||
}
|
||||
|
||||
// HealthState is the resolver for the healthState field.
|
||||
func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (string, error) {
|
||||
panic(fmt.Errorf("not implemented: HealthState - healthState"))
|
||||
}
|
||||
|
||||
// MetaData is the resolver for the metaData field.
|
||||
func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) {
|
||||
panic(fmt.Errorf("not implemented: MetaData - metaData"))
|
||||
}
|
||||
|
||||
// Clusters is the resolver for the clusters field.
|
||||
func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error) {
|
||||
return archive.Clusters, nil
|
||||
@@ -213,6 +343,14 @@ func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||
|
||||
// GlobalMetrics is the resolver for the globalMetrics field.
|
||||
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
|
||||
if user != nil {
|
||||
if user.HasRole(schema.RoleUser) || user.HasRole(schema.RoleManager) {
|
||||
return archive.GlobalUserMetricList, nil
|
||||
}
|
||||
}
|
||||
|
||||
return archive.GlobalMetricList, nil
|
||||
}
|
||||
|
||||
@@ -225,7 +363,7 @@ func (r *queryResolver) User(ctx context.Context, username string) (*model.User,
|
||||
func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) {
|
||||
data, err := r.Repo.AllocatedNodes(cluster)
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching allocated nodes")
|
||||
cclog.Warn("Error while fetching allocated nodes")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -240,17 +378,82 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
|
||||
return counts, nil
|
||||
}
|
||||
|
||||
// Node is the resolver for the node field.
|
||||
func (r *queryResolver) Node(ctx context.Context, id string) (*schema.Node, error) {
|
||||
repo := repository.GetNodeRepository()
|
||||
numericID, err := strconv.ParseInt(id, 10, 64)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while parsing job id")
|
||||
return nil, err
|
||||
}
|
||||
return repo.GetNodeByID(numericID, false)
|
||||
}
|
||||
|
||||
// Nodes is the resolver for the nodes field.
|
||||
func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) {
|
||||
repo := repository.GetNodeRepository()
|
||||
nodes, err := repo.QueryNodes(ctx, filter, nil, order) // Ignore Paging, Order Unused
|
||||
count := len(nodes)
|
||||
return &model.NodeStateResultList{Items: nodes, Count: &count}, err
|
||||
}
|
||||
|
||||
// NodeStates is the resolver for the nodeStates field.
|
||||
func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) {
|
||||
repo := repository.GetNodeRepository()
|
||||
|
||||
stateCounts, serr := repo.CountStates(ctx, filter, "node_state")
|
||||
if serr != nil {
|
||||
cclog.Warnf("Error while counting nodeStates: %s", serr.Error())
|
||||
return nil, serr
|
||||
}
|
||||
|
||||
healthCounts, herr := repo.CountStates(ctx, filter, "health_state")
|
||||
if herr != nil {
|
||||
cclog.Warnf("Error while counting healthStates: %s", herr.Error())
|
||||
return nil, herr
|
||||
}
|
||||
|
||||
allCounts := append(stateCounts, healthCounts...)
|
||||
|
||||
return allCounts, nil
|
||||
}
|
||||
|
||||
// NodeStatesTimed is the resolver for the nodeStatesTimed field.
|
||||
func (r *queryResolver) NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error) {
|
||||
repo := repository.GetNodeRepository()
|
||||
|
||||
if typeArg == "node" {
|
||||
stateCounts, serr := repo.CountStatesTimed(ctx, filter, "node_state")
|
||||
if serr != nil {
|
||||
cclog.Warnf("Error while counting nodeStates in time: %s", serr.Error())
|
||||
return nil, serr
|
||||
}
|
||||
return stateCounts, nil
|
||||
}
|
||||
|
||||
if typeArg == "health" {
|
||||
healthCounts, herr := repo.CountStatesTimed(ctx, filter, "health_state")
|
||||
if herr != nil {
|
||||
cclog.Warnf("Error while counting healthStates in time: %s", herr.Error())
|
||||
return nil, herr
|
||||
}
|
||||
return healthCounts, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("unknown Node State Query Type")
|
||||
}
|
||||
|
||||
// Job is the resolver for the job field.
|
||||
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
||||
numericId, err := strconv.ParseInt(id, 10, 64)
|
||||
numericID, err := strconv.ParseInt(id, 10, 64)
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing job id")
|
||||
cclog.Warn("Error while parsing job id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job, err := r.Repo.FindById(ctx, numericId)
|
||||
job, err := r.Repo.FindByID(ctx, numericID)
|
||||
if err != nil {
|
||||
log.Warn("Error while finding job by id")
|
||||
cclog.Warn("Error while finding job by id")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -277,13 +480,13 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
||||
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying job for metrics")
|
||||
cclog.Warn("Error while querying job for metrics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job data")
|
||||
cclog.Warn("Error while loading job data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -301,9 +504,67 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
||||
return res, err
|
||||
}
|
||||
|
||||
// JobsFootprints is the resolver for the jobsFootprints field.
|
||||
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||
return r.jobsFootprints(ctx, filter, metrics)
|
||||
// JobStats is the resolver for the jobStats field.
|
||||
func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error) {
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while querying job %s for metadata", id)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricDataDispatcher.LoadJobStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while loading jobStats data for job id %s", id)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.NamedStats{}
|
||||
for name, md := range data {
|
||||
res = append(res, &model.NamedStats{
|
||||
Name: name,
|
||||
Data: &md,
|
||||
})
|
||||
}
|
||||
|
||||
return res, err
|
||||
}
|
||||
|
||||
// ScopedJobStats is the resolver for the scopedJobStats field.
|
||||
func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.NamedStatsWithScope, error) {
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while querying job %s for metadata", id)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricDataDispatcher.LoadScopedJobStats(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while loading scopedJobStats data for job id %s", id)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := make([]*model.NamedStatsWithScope, 0)
|
||||
for name, scoped := range data {
|
||||
for scope, stats := range scoped {
|
||||
|
||||
mdlStats := make([]*model.ScopedStats, 0)
|
||||
for _, stat := range stats {
|
||||
mdlStats = append(mdlStats, &model.ScopedStats{
|
||||
Hostname: stat.Hostname,
|
||||
ID: stat.Id,
|
||||
Data: stat.Data,
|
||||
})
|
||||
}
|
||||
|
||||
res = append(res, &model.NamedStatsWithScope{
|
||||
Name: name,
|
||||
Scope: scope,
|
||||
Stats: mdlStats,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Jobs is the resolver for the jobs field.
|
||||
@@ -317,48 +578,47 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
||||
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, page, order)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying jobs")
|
||||
cclog.Warn("Error while querying jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
count, err := r.Repo.CountJobs(ctx, filter)
|
||||
if err != nil {
|
||||
log.Warn("Error while counting jobs")
|
||||
cclog.Warn("Error while counting jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !config.Keys.UiDefaults["job_list_usePaging"].(bool) {
|
||||
hasNextPage := false
|
||||
// page.Page += 1 : Simple, but expensive
|
||||
// Example Page 4 @ 10 IpP : Does item 41 exist?
|
||||
// Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 @ 10 IpP exists.
|
||||
nextPage := &model.PageRequest{
|
||||
ItemsPerPage: 1,
|
||||
Page: ((page.Page * page.ItemsPerPage) + 1),
|
||||
}
|
||||
|
||||
nextJobs, err := r.Repo.QueryJobs(ctx, filter, nextPage, order)
|
||||
if err != nil {
|
||||
log.Warn("Error while querying next jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(nextJobs) == 1 {
|
||||
hasNextPage = true
|
||||
}
|
||||
|
||||
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
||||
} else {
|
||||
return &model.JobResultList{Items: jobs, Count: &count}, nil
|
||||
// Note: Even if App-Default 'config.Keys.UiDefaults["job_list_usePaging"]' is set, always return hasNextPage boolean.
|
||||
// Users can decide in frontend to use continuous scroll, even if app-default is paging!
|
||||
/*
|
||||
Example Page 4 @ 10 IpP : Does item 41 exist?
|
||||
Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 @ 10 IpP exists.
|
||||
*/
|
||||
nextPage := &model.PageRequest{
|
||||
ItemsPerPage: 1,
|
||||
Page: ((page.Page * page.ItemsPerPage) + 1),
|
||||
}
|
||||
nextJobs, err := r.Repo.QueryJobs(ctx, filter, nextPage, order)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while querying next jobs")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
hasNextPage := len(nextJobs) == 1
|
||||
|
||||
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
||||
}
|
||||
|
||||
// JobsStatistics is the resolver for the jobsStatistics field.
|
||||
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
||||
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) ([]*model.JobsStatistics, error) {
|
||||
var err error
|
||||
var stats []*model.JobsStatistics
|
||||
|
||||
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
||||
// Top Level Defaults
|
||||
defaultDurationBins := "1h"
|
||||
defaultMetricBins := 10
|
||||
|
||||
if requireField(ctx, "totalJobs") || requireField(ctx, "totalUsers") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
||||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
||||
if groupBy == nil {
|
||||
stats, err = r.Repo.JobsStats(ctx, filter)
|
||||
@@ -391,8 +651,13 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
}
|
||||
|
||||
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
|
||||
|
||||
if numDurationBins == nil {
|
||||
numDurationBins = &defaultDurationBins
|
||||
}
|
||||
|
||||
if groupBy == nil {
|
||||
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0])
|
||||
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0], numDurationBins)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -402,8 +667,13 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
}
|
||||
|
||||
if requireField(ctx, "histMetrics") {
|
||||
|
||||
if numMetricBins == nil {
|
||||
numMetricBins = &defaultMetricBins
|
||||
}
|
||||
|
||||
if groupBy == nil {
|
||||
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0])
|
||||
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0], numMetricBins)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -415,6 +685,62 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// JobsMetricStats is the resolver for the jobsMetricStats field.
|
||||
func (r *queryResolver) JobsMetricStats(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.JobStats, error) {
|
||||
// No Paging, Fixed Order by StartTime ASC
|
||||
order := &model.OrderByInput{
|
||||
Field: "startTime",
|
||||
Type: "col",
|
||||
Order: "ASC",
|
||||
}
|
||||
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, nil, order)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while querying jobs for comparison")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
res := []*model.JobStats{}
|
||||
for _, job := range jobs {
|
||||
data, err := metricDataDispatcher.LoadJobStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while loading comparison jobStats data for job id %d", job.JobID)
|
||||
continue
|
||||
// return nil, err
|
||||
}
|
||||
|
||||
sres := []*model.NamedStats{}
|
||||
for name, md := range data {
|
||||
sres = append(sres, &model.NamedStats{
|
||||
Name: name,
|
||||
Data: &md,
|
||||
})
|
||||
}
|
||||
|
||||
numThreadsInt := int(job.NumHWThreads)
|
||||
numAccsInt := int(job.NumAcc)
|
||||
res = append(res, &model.JobStats{
|
||||
ID: int(*job.ID),
|
||||
JobID: strconv.Itoa(int(job.JobID)),
|
||||
StartTime: int(job.StartTime),
|
||||
Duration: int(job.Duration),
|
||||
Cluster: job.Cluster,
|
||||
SubCluster: job.SubCluster,
|
||||
NumNodes: int(job.NumNodes),
|
||||
NumHWThreads: &numThreadsInt,
|
||||
NumAccelerators: &numAccsInt,
|
||||
Stats: sres,
|
||||
})
|
||||
}
|
||||
return res, err
|
||||
}
|
||||
|
||||
// JobsFootprints is the resolver for the jobsFootprints field.
|
||||
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||
// NOTE: Legacy Naming! This resolver is for normalized histograms in analysis view only - *Not* related to DB "footprint" column!
|
||||
return r.jobsFootprints(ctx, filter, metrics)
|
||||
}
|
||||
|
||||
// RooflineHeatmap is the resolver for the rooflineHeatmap field.
|
||||
func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
||||
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
|
||||
@@ -423,8 +749,8 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
|
||||
// NodeMetrics is the resolver for the nodeMetrics field.
|
||||
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user != nil && !user.HasRole(schema.RoleAdmin) {
|
||||
return nil, errors.New("you need to be an administrator for this query")
|
||||
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
@@ -435,17 +761,24 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
|
||||
data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading node data")
|
||||
cclog.Warn("error while loading node data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodeRepo := repository.GetNodeRepository()
|
||||
stateMap, _ := nodeRepo.MapNodes(cluster)
|
||||
|
||||
nodeMetrics := make([]*model.NodeMetrics, 0, len(data))
|
||||
for hostname, metrics := range data {
|
||||
host := &model.NodeMetrics{
|
||||
Host: hostname,
|
||||
State: stateMap[hostname],
|
||||
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||
}
|
||||
host.SubCluster, _ = archive.GetSubClusterByNode(cluster, hostname)
|
||||
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||
if err != nil {
|
||||
cclog.Warnf("error in nodeMetrics resolver: %s", err)
|
||||
}
|
||||
|
||||
for metric, scopedMetrics := range metrics {
|
||||
for _, scopedMetric := range scopedMetrics {
|
||||
@@ -463,6 +796,152 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
return nodeMetrics, nil
|
||||
}
|
||||
|
||||
// NodeMetricsList is the resolver for the nodeMetricsList field.
|
||||
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||
}
|
||||
|
||||
nodeRepo := repository.GetNodeRepository()
|
||||
nodes, stateMap, countNodes, hasNextPage, nerr := nodeRepo.GetNodesForList(ctx, cluster, subCluster, stateFilter, nodeFilter, page)
|
||||
if nerr != nil {
|
||||
return nil, errors.New("could not retrieve node list required for resolving NodeMetricsList")
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := metricDataDispatcher.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx)
|
||||
if err != nil {
|
||||
cclog.Warn("error while loading node data (Resolver.NodeMetricsList")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodeMetricsList := make([]*model.NodeMetrics, 0, len(data))
|
||||
for hostname, metrics := range data {
|
||||
host := &model.NodeMetrics{
|
||||
Host: hostname,
|
||||
State: stateMap[hostname],
|
||||
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||
}
|
||||
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||
if err != nil {
|
||||
cclog.Warnf("error in nodeMetrics resolver: %s", err)
|
||||
}
|
||||
|
||||
for metric, scopedMetrics := range metrics {
|
||||
for scope, scopedMetric := range scopedMetrics {
|
||||
host.Metrics = append(host.Metrics, &model.JobMetricWithName{
|
||||
Name: metric,
|
||||
Scope: scope,
|
||||
Metric: scopedMetric,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
nodeMetricsList = append(nodeMetricsList, host)
|
||||
}
|
||||
|
||||
nodeMetricsListResult := &model.NodesResultList{
|
||||
Items: nodeMetricsList,
|
||||
TotalNodes: &countNodes,
|
||||
HasNextPage: &hasNextPage,
|
||||
}
|
||||
|
||||
return nodeMetricsListResult, nil
|
||||
}
|
||||
|
||||
// ClusterMetrics is the resolver for the clusterMetrics field.
|
||||
func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metrics []string, from time.Time, to time.Time) (*model.ClusterMetrics, error) {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, mc.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// 'nodes' == nil -> Defaults to all nodes of cluster for existing query workflow
|
||||
scopes := []schema.MetricScope{"node"}
|
||||
data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nil, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
cclog.Warn("error while loading node data")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
clusterMetricData := make([]*model.ClusterMetricWithName, 0)
|
||||
clusterMetrics := model.ClusterMetrics{NodeCount: 0, Metrics: clusterMetricData}
|
||||
|
||||
collectorTimestep := make(map[string]int)
|
||||
collectorUnit := make(map[string]schema.Unit)
|
||||
collectorData := make(map[string][]schema.Float)
|
||||
|
||||
for _, metrics := range data {
|
||||
clusterMetrics.NodeCount += 1
|
||||
for metric, scopedMetrics := range metrics {
|
||||
_, ok := collectorData[metric]
|
||||
if !ok {
|
||||
collectorData[metric] = make([]schema.Float, 0)
|
||||
for _, scopedMetric := range scopedMetrics {
|
||||
// Collect Info
|
||||
collectorTimestep[metric] = scopedMetric.Timestep
|
||||
collectorUnit[metric] = scopedMetric.Unit
|
||||
// Collect Initial Data
|
||||
for _, ser := range scopedMetric.Series {
|
||||
collectorData[metric] = append(collectorData[metric], ser.Data...)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Sum up values by index
|
||||
for _, scopedMetric := range scopedMetrics {
|
||||
// For This Purpose (Cluster_Wide-Sum of Node Metrics) OK
|
||||
for _, ser := range scopedMetric.Series {
|
||||
for i, val := range ser.Data {
|
||||
collectorData[metric][i] += val
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for metricName, data := range collectorData {
|
||||
cu := collectorUnit[metricName]
|
||||
roundedData := make([]schema.Float, 0)
|
||||
for _, val := range data {
|
||||
roundedData = append(roundedData, schema.Float((math.Round(float64(val)*100.0) / 100.0)))
|
||||
}
|
||||
|
||||
cm := model.ClusterMetricWithName{
|
||||
Name: metricName,
|
||||
Unit: &cu,
|
||||
Timestep: collectorTimestep[metricName],
|
||||
Data: roundedData,
|
||||
}
|
||||
|
||||
clusterMetrics.Metrics = append(clusterMetrics.Metrics, &cm)
|
||||
}
|
||||
|
||||
return &clusterMetrics, nil
|
||||
}
|
||||
|
||||
// NumberOfNodes is the resolver for the numberOfNodes field.
|
||||
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
|
||||
nodeList, err := archive.ParseNodeList(obj.Nodes)
|
||||
@@ -484,6 +963,9 @@ func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricV
|
||||
// Mutation returns generated.MutationResolver implementation.
|
||||
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
||||
|
||||
// Node returns generated.NodeResolver implementation.
|
||||
func (r *Resolver) Node() generated.NodeResolver { return &nodeResolver{r} }
|
||||
|
||||
// Query returns generated.QueryResolver implementation.
|
||||
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
||||
|
||||
@@ -495,6 +977,7 @@ type (
|
||||
jobResolver struct{ *Resolver }
|
||||
metricValueResolver struct{ *Resolver }
|
||||
mutationResolver struct{ *Resolver }
|
||||
nodeResolver struct{ *Resolver }
|
||||
queryResolver struct{ *Resolver }
|
||||
subClusterResolver struct{ *Resolver }
|
||||
)
|
||||
|
||||
@@ -1,20 +1,21 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package graph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"slices"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
const MAX_JOBS_FOR_ANALYSIS = 500
|
||||
@@ -28,7 +29,7 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
) ([][]float64, error) {
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||
if err != nil {
|
||||
log.Error("Error while querying jobs for roofline")
|
||||
cclog.Error("Error while querying jobs for roofline")
|
||||
return nil, err
|
||||
}
|
||||
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
|
||||
@@ -56,13 +57,13 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
|
||||
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
||||
cclog.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
|
||||
if flops_ == nil && membw_ == nil {
|
||||
log.Infof("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||
cclog.Infof("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||
continue
|
||||
// return nil, fmt.Errorf("GRAPH/UTIL > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||
}
|
||||
@@ -70,7 +71,7 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
flops, ok1 := flops_["node"]
|
||||
membw, ok2 := membw_["node"]
|
||||
if !ok1 || !ok2 {
|
||||
log.Info("rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||
cclog.Info("rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||
continue
|
||||
// TODO/FIXME:
|
||||
// return nil, errors.New("GRAPH/UTIL > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||
@@ -105,7 +106,7 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||
if err != nil {
|
||||
log.Error("Error while querying jobs for footprint")
|
||||
cclog.Error("Error while querying jobs for footprint")
|
||||
return nil, err
|
||||
}
|
||||
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
|
||||
@@ -128,7 +129,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
||||
}
|
||||
|
||||
if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||
log.Error("Error while loading averages for footprint")
|
||||
cclog.Error("Error while loading averages for footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -186,11 +187,5 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
||||
func requireField(ctx context.Context, name string) bool {
|
||||
fields := graphql.CollectAllFields(ctx)
|
||||
|
||||
for _, f := range fields {
|
||||
if f == name {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
return slices.Contains(fields, name)
|
||||
}
|
||||
|
||||
132
internal/importer/README.md
Normal file
132
internal/importer/README.md
Normal file
@@ -0,0 +1,132 @@
|
||||
# Importer Package
|
||||
|
||||
The `importer` package provides functionality for importing job data into the ClusterCockpit database from archived job files.
|
||||
|
||||
## Overview
|
||||
|
||||
This package supports two primary import workflows:
|
||||
|
||||
1. **Bulk Database Initialization** - Reinitialize the entire job database from archived jobs
|
||||
2. **Individual Job Import** - Import specific jobs from metadata/data file pairs
|
||||
|
||||
Both workflows enrich job metadata by calculating performance footprints and energy consumption metrics before persisting to the database.
|
||||
|
||||
## Main Entry Points
|
||||
|
||||
### InitDB()
|
||||
|
||||
Reinitializes the job database from all archived jobs.
|
||||
|
||||
```go
|
||||
if err := importer.InitDB(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
```
|
||||
|
||||
This function:
|
||||
- Flushes existing job, tag, and jobtag tables
|
||||
- Iterates through all jobs in the configured archive
|
||||
- Enriches each job with calculated metrics
|
||||
- Inserts jobs into the database in batched transactions (100 jobs per batch)
|
||||
- Continues on individual job failures, logging errors
|
||||
|
||||
**Use Case**: Initial database setup or complete database rebuild from archive.
|
||||
|
||||
### HandleImportFlag(flag string)
|
||||
|
||||
Imports jobs from specified file pairs.
|
||||
|
||||
```go
|
||||
// Format: "<meta.json>:<data.json>[,<meta2.json>:<data2.json>,...]"
|
||||
flag := "/path/to/meta.json:/path/to/data.json"
|
||||
if err := importer.HandleImportFlag(flag); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
```
|
||||
|
||||
This function:
|
||||
- Parses the comma-separated file pairs
|
||||
- Validates metadata and job data against schemas (if validation enabled)
|
||||
- Enriches each job with footprints and energy metrics
|
||||
- Imports jobs into both the archive and database
|
||||
- Fails fast on the first error
|
||||
|
||||
**Use Case**: Importing specific jobs from external sources or manual job additions.
|
||||
|
||||
## Job Enrichment
|
||||
|
||||
Both import workflows use `enrichJobMetadata()` to calculate:
|
||||
|
||||
### Performance Footprints
|
||||
|
||||
Performance footprints are calculated from metric averages based on the subcluster configuration:
|
||||
|
||||
```go
|
||||
job.Footprint["mem_used_avg"] = 45.2 // GB
|
||||
job.Footprint["cpu_load_avg"] = 0.87 // percentage
|
||||
```
|
||||
|
||||
### Energy Metrics
|
||||
|
||||
Energy consumption is calculated from power metrics using the formula:
|
||||
|
||||
```
|
||||
Energy (kWh) = (Power (W) × Duration (s) / 3600) / 1000
|
||||
```
|
||||
|
||||
For each energy metric:
|
||||
```go
|
||||
job.EnergyFootprint["acc_power"] = 12.5 // kWh
|
||||
job.Energy = 150.2 // Total energy in kWh
|
||||
```
|
||||
|
||||
**Note**: Energy calculations for metrics with unit "energy" (Joules) are not yet implemented.
|
||||
|
||||
## Data Validation
|
||||
|
||||
### SanityChecks(job *schema.Job)
|
||||
|
||||
Validates job metadata before database insertion:
|
||||
|
||||
- Cluster exists in configuration
|
||||
- Subcluster is valid (assigns if needed)
|
||||
- Job state is valid
|
||||
- Resources and user fields are populated
|
||||
- Node counts and hardware thread counts are positive
|
||||
- Resource count matches declared node count
|
||||
|
||||
## Normalization Utilities
|
||||
|
||||
The package includes utilities for normalizing metric values to appropriate SI prefixes:
|
||||
|
||||
### Normalize(avg float64, prefix string)
|
||||
|
||||
Adjusts values and SI prefixes for readability:
|
||||
|
||||
```go
|
||||
factor, newPrefix := importer.Normalize(2048.0, "M")
|
||||
// Converts 2048 MB → ~2.0 GB
|
||||
// Returns: factor for conversion, "G"
|
||||
```
|
||||
|
||||
This is useful for automatically scaling metrics (e.g., memory, storage) to human-readable units.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `github.com/ClusterCockpit/cc-backend/internal/repository` - Database operations
|
||||
- `github.com/ClusterCockpit/cc-backend/pkg/archive` - Job archive access
|
||||
- `github.com/ClusterCockpit/cc-lib/schema` - Job schema definitions
|
||||
- `github.com/ClusterCockpit/cc-lib/ccLogger` - Logging
|
||||
- `github.com/ClusterCockpit/cc-lib/ccUnits` - SI unit handling
|
||||
|
||||
## Error Handling
|
||||
|
||||
- **InitDB**: Continues processing on individual job failures, logs errors, returns summary
|
||||
- **HandleImportFlag**: Fails fast on first error, returns immediately
|
||||
- Both functions log detailed error context for debugging
|
||||
|
||||
## Performance
|
||||
|
||||
- **Transaction Batching**: InitDB processes jobs in batches of 100 for optimal database performance
|
||||
- **Tag Caching**: Tag IDs are cached during import to minimize database queries
|
||||
- **Progress Reporting**: InitDB prints progress updates during bulk operations
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package importer
|
||||
@@ -8,18 +8,32 @@ import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
// Import all jobs specified as `<path-to-meta.json>:<path-to-data.json>,...`
|
||||
// HandleImportFlag imports jobs from file pairs specified in a comma-separated flag string.
|
||||
//
|
||||
// The flag format is: "<path-to-meta.json>:<path-to-data.json>[,<path-to-meta2.json>:<path-to-data2.json>,...]"
|
||||
//
|
||||
// For each job pair, this function:
|
||||
// 1. Reads and validates the metadata JSON file (schema.Job)
|
||||
// 2. Reads and validates the job data JSON file (schema.JobData)
|
||||
// 3. Enriches the job with calculated footprints and energy metrics
|
||||
// 4. Validates the job using SanityChecks()
|
||||
// 5. Imports the job into the archive
|
||||
// 6. Inserts the job into the database with associated tags
|
||||
//
|
||||
// Schema validation is performed if config.Keys.Validate is true.
|
||||
//
|
||||
// Returns an error if file reading, validation, enrichment, or database operations fail.
|
||||
// The function stops processing on the first error encountered.
|
||||
func HandleImportFlag(flag string) error {
|
||||
r := repository.GetJobRepository()
|
||||
|
||||
@@ -31,7 +45,7 @@ func HandleImportFlag(flag string) error {
|
||||
|
||||
raw, err := os.ReadFile(files[0])
|
||||
if err != nil {
|
||||
log.Warn("Error while reading metadata file for import")
|
||||
cclog.Warn("Error while reading metadata file for import")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -42,15 +56,18 @@ func HandleImportFlag(flag string) error {
|
||||
}
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
dec.DisallowUnknownFields()
|
||||
job := schema.JobMeta{BaseJob: schema.JobDefaults}
|
||||
job := schema.Job{
|
||||
Shared: "none",
|
||||
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||
}
|
||||
if err = dec.Decode(&job); err != nil {
|
||||
log.Warn("Error while decoding raw json metadata for import")
|
||||
cclog.Warn("Error while decoding raw json metadata for import")
|
||||
return err
|
||||
}
|
||||
|
||||
raw, err = os.ReadFile(files[1])
|
||||
if err != nil {
|
||||
log.Warn("Error while reading jobdata file for import")
|
||||
cclog.Warn("Error while reading jobdata file for import")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -63,100 +80,41 @@ func HandleImportFlag(flag string) error {
|
||||
dec.DisallowUnknownFields()
|
||||
jobData := schema.JobData{}
|
||||
if err = dec.Decode(&jobData); err != nil {
|
||||
log.Warn("Error while decoding raw json jobdata for import")
|
||||
cclog.Warn("Error while decoding raw json jobdata for import")
|
||||
return err
|
||||
}
|
||||
|
||||
job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
if err = enrichJobMetadata(&job); err != nil {
|
||||
cclog.Errorf("Error enriching job metadata: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(&job, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||
energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100) / 100)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job resources")
|
||||
return err
|
||||
}
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job metadata")
|
||||
return err
|
||||
}
|
||||
|
||||
if err = SanityChecks(&job.BaseJob); err != nil {
|
||||
log.Warn("BaseJob SanityChecks failed")
|
||||
if err = SanityChecks(&job); err != nil {
|
||||
cclog.Warn("BaseJob SanityChecks failed")
|
||||
return err
|
||||
}
|
||||
|
||||
if err = archive.GetHandle().ImportJob(&job, &jobData); err != nil {
|
||||
log.Error("Error while importing job")
|
||||
cclog.Error("Error while importing job")
|
||||
return err
|
||||
}
|
||||
|
||||
id, err := r.InsertJob(&job)
|
||||
if err != nil {
|
||||
log.Warn("Error while job db insert")
|
||||
cclog.Warn("Error while job db insert")
|
||||
return err
|
||||
}
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
if err := r.ImportTag(id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||
log.Error("Error while adding or creating tag on import")
|
||||
cclog.Error("Error while adding or creating tag on import")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
|
||||
cclog.Infof("successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package importer_test
|
||||
@@ -16,9 +16,12 @@ import (
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
// copyFile copies a file from source path to destination path.
|
||||
// Used by tests to set up test fixtures.
|
||||
func copyFile(s string, d string) error {
|
||||
r, err := os.Open(s)
|
||||
if err != nil {
|
||||
@@ -34,17 +37,26 @@ func copyFile(s string, d string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// setup initializes a test environment for importer tests.
|
||||
//
|
||||
// Creates a temporary directory with:
|
||||
// - A test job archive with cluster configuration
|
||||
// - A SQLite database initialized with schema
|
||||
// - Configuration files loaded
|
||||
//
|
||||
// Returns a JobRepository instance for test assertions.
|
||||
func setup(t *testing.T) *repository.JobRepository {
|
||||
const testconfig = `{
|
||||
"main": {
|
||||
"addr": "0.0.0.0:8080",
|
||||
"validate": false,
|
||||
"apiAllowedIPs": [
|
||||
"*"
|
||||
]},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"jwts": {
|
||||
"max-age": "2m"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "testcluster",
|
||||
@@ -75,18 +87,18 @@ func setup(t *testing.T) *repository.JobRepository {
|
||||
}
|
||||
]}`
|
||||
|
||||
log.Init("info", true)
|
||||
cclog.Init("info", true)
|
||||
tmpdir := t.TempDir()
|
||||
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
||||
if err := os.Mkdir(jobarchive, 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
|
||||
if err := os.Mkdir(fritzArchive, 0777); err != nil {
|
||||
if err := os.Mkdir(fritzArchive, 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := copyFile(filepath.Join("testdata", "cluster-fritz.json"),
|
||||
@@ -101,11 +113,23 @@ func setup(t *testing.T) *repository.JobRepository {
|
||||
}
|
||||
|
||||
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
config.Init(cfgFilePath)
|
||||
ccconf.Init(cfgFilePath)
|
||||
|
||||
// Load and check main configuration
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||
config.Init(cfg, clustercfg)
|
||||
} else {
|
||||
t.Fatal("Cluster configuration must be present")
|
||||
}
|
||||
} else {
|
||||
t.Fatal("Main configuration must be present")
|
||||
}
|
||||
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||
|
||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||
@@ -116,6 +140,7 @@ func setup(t *testing.T) *repository.JobRepository {
|
||||
return repository.GetJobRepository()
|
||||
}
|
||||
|
||||
// Result represents the expected test result for job import verification.
|
||||
type Result struct {
|
||||
JobId int64
|
||||
Cluster string
|
||||
@@ -123,6 +148,8 @@ type Result struct {
|
||||
Duration int32
|
||||
}
|
||||
|
||||
// readResult reads the expected test result from a golden file.
|
||||
// Golden files contain the expected job attributes after import.
|
||||
func readResult(t *testing.T, testname string) Result {
|
||||
var r Result
|
||||
|
||||
@@ -140,6 +167,13 @@ func readResult(t *testing.T, testname string) Result {
|
||||
return r
|
||||
}
|
||||
|
||||
// TestHandleImportFlag tests the HandleImportFlag function with various job import scenarios.
|
||||
//
|
||||
// The test uses golden files in testdata/ to verify that jobs are correctly:
|
||||
// - Parsed from metadata and data JSON files
|
||||
// - Enriched with footprints and energy metrics
|
||||
// - Inserted into the database
|
||||
// - Retrievable with correct attributes
|
||||
func TestHandleImportFlag(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
@@ -163,7 +197,7 @@ func TestHandleImportFlag(t *testing.T) {
|
||||
}
|
||||
|
||||
result := readResult(t, testname)
|
||||
job, err := r.Find(&result.JobId, &result.Cluster, &result.StartTime)
|
||||
job, err := r.FindCached(&result.JobId, &result.Cluster, &result.StartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -1,7 +1,16 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package importer provides functionality for importing job data into the ClusterCockpit database.
|
||||
//
|
||||
// The package supports two primary use cases:
|
||||
// 1. Bulk database initialization from archived jobs via InitDB()
|
||||
// 2. Individual job import from file pairs via HandleImportFlag()
|
||||
//
|
||||
// Both operations enrich job metadata by calculating footprints and energy metrics
|
||||
// before persisting to the database.
|
||||
package importer
|
||||
|
||||
import (
|
||||
@@ -13,8 +22,8 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -22,25 +31,38 @@ const (
|
||||
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
|
||||
)
|
||||
|
||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||
// repopulate them using the jobs found in `archive`.
|
||||
// InitDB reinitializes the job database from archived job data.
|
||||
//
|
||||
// This function performs the following operations:
|
||||
// 1. Flushes existing job, tag, and jobtag tables
|
||||
// 2. Iterates through all jobs in the archive
|
||||
// 3. Enriches each job with calculated footprints and energy metrics
|
||||
// 4. Inserts jobs and tags into the database in batched transactions
|
||||
//
|
||||
// Jobs are processed in batches of 100 for optimal performance. The function
|
||||
// continues processing even if individual jobs fail, logging errors and
|
||||
// returning a summary at the end.
|
||||
//
|
||||
// Returns an error if database initialization, transaction management, or
|
||||
// critical operations fail. Individual job failures are logged but do not
|
||||
// stop the overall import process.
|
||||
func InitDB() error {
|
||||
r := repository.GetJobRepository()
|
||||
if err := r.Flush(); err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
cclog.Errorf("repository initDB(): %v", err)
|
||||
return err
|
||||
}
|
||||
starttime := time.Now()
|
||||
log.Print("Building job table...")
|
||||
cclog.Print("Building job table...")
|
||||
|
||||
t, err := r.TransactionInit()
|
||||
if err != nil {
|
||||
log.Warn("Error while initializing SQL transactions")
|
||||
cclog.Warn("Error while initializing SQL transactions")
|
||||
return err
|
||||
}
|
||||
tags := make(map[string]int64)
|
||||
|
||||
// Not using log.Print because we want the line to end with `\r` and
|
||||
// Not using cclog.Print because we want the line to end with `\r` and
|
||||
// this function is only ever called when a special command line flag
|
||||
// is passed anyways.
|
||||
fmt.Printf("%d jobs inserted...\r", 0)
|
||||
@@ -52,142 +74,195 @@ func InitDB() error {
|
||||
for jobContainer := range ar.Iter(false) {
|
||||
|
||||
jobMeta := jobContainer.Meta
|
||||
if jobMeta == nil {
|
||||
cclog.Warn("skipping job with nil metadata")
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
// Bundle 100 inserts into one transaction for better performance
|
||||
if i%100 == 0 {
|
||||
r.TransactionCommit(t)
|
||||
if i > 0 {
|
||||
if err := t.Commit(); err != nil {
|
||||
cclog.Errorf("transaction commit error: %v", err)
|
||||
return err
|
||||
}
|
||||
// Start a new transaction for the next batch
|
||||
t, err = r.TransactionInit()
|
||||
if err != nil {
|
||||
cclog.Errorf("transaction init error: %v", err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
fmt.Printf("%d jobs inserted...\r", i)
|
||||
}
|
||||
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
job := schema.Job{
|
||||
BaseJob: jobMeta.BaseJob,
|
||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
||||
StartTimeUnix: jobMeta.StartTime,
|
||||
}
|
||||
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
log.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||
energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100) / 100)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
if err := enrichJobMetadata(jobMeta); err != nil {
|
||||
cclog.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
if err := SanityChecks(jobMeta); err != nil {
|
||||
cclog.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
id, jobErr := r.TransactionAddNamed(t,
|
||||
repository.NamedJobInsert, jobMeta)
|
||||
if jobErr != nil {
|
||||
cclog.Errorf("repository initDB(): %v", jobErr)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
id, err := r.TransactionAddNamed(t,
|
||||
repository.NamedJobInsert, job)
|
||||
if err != nil {
|
||||
log.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
// Job successfully inserted, increment counter
|
||||
i += 1
|
||||
|
||||
for _, tag := range job.Tags {
|
||||
for _, tag := range jobMeta.Tags {
|
||||
tagstr := tag.Name + ":" + tag.Type
|
||||
tagId, ok := tags[tagstr]
|
||||
tagID, ok := tags[tagstr]
|
||||
if !ok {
|
||||
tagId, err = r.TransactionAdd(t,
|
||||
var err error
|
||||
tagID, err = r.TransactionAdd(t,
|
||||
addTagQuery,
|
||||
tag.Name, tag.Type)
|
||||
if err != nil {
|
||||
log.Errorf("Error adding tag: %v", err)
|
||||
cclog.Errorf("Error adding tag: %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
tags[tagstr] = tagId
|
||||
tags[tagstr] = tagID
|
||||
}
|
||||
|
||||
r.TransactionAdd(t,
|
||||
setTagQuery,
|
||||
id, tagId)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
i += 1
|
||||
id, tagID)
|
||||
}
|
||||
}
|
||||
|
||||
if errorOccured > 0 {
|
||||
log.Warnf("Error in import of %d jobs!", errorOccured)
|
||||
cclog.Warnf("Error in import of %d jobs!", errorOccured)
|
||||
}
|
||||
|
||||
r.TransactionEnd(t)
|
||||
log.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
|
||||
cclog.Infof("A total of %d jobs have been registered in %.3f seconds.", i, time.Since(starttime).Seconds())
|
||||
return nil
|
||||
}
|
||||
|
||||
// This function also sets the subcluster if necessary!
|
||||
func SanityChecks(job *schema.BaseJob) error {
|
||||
// enrichJobMetadata calculates and populates job footprints, energy metrics, and serialized fields.
|
||||
//
|
||||
// This function performs the following enrichment operations:
|
||||
// 1. Calculates job footprint metrics based on the subcluster configuration
|
||||
// 2. Computes energy footprint and total energy consumption in kWh
|
||||
// 3. Marshals footprints, resources, and metadata into JSON for database storage
|
||||
//
|
||||
// The function expects the job's MonitoringStatus and SubCluster to be already set.
|
||||
// Energy calculations convert power metrics (Watts) to energy (kWh) using the formula:
|
||||
//
|
||||
// Energy (kWh) = (Power (W) * Duration (s) / 3600) / 1000
|
||||
//
|
||||
// Returns an error if subcluster retrieval, metric indexing, or JSON marshaling fails.
|
||||
func enrichJobMetadata(job *schema.Job) error {
|
||||
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||
if err != nil {
|
||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(job, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
switch sc.MetricConfig[i].Energy {
|
||||
case "energy": // this metric has energy as unit (Joules)
|
||||
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
case "power": // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
cclog.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job resources")
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job metadata")
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SanityChecks validates job metadata and ensures cluster/subcluster configuration is valid.
|
||||
//
|
||||
// This function performs the following validations:
|
||||
// 1. Verifies the cluster exists in the archive configuration
|
||||
// 2. Assigns and validates the subcluster (may modify job.SubCluster)
|
||||
// 3. Validates job state is a recognized value
|
||||
// 4. Ensures resources and user fields are populated
|
||||
// 5. Validates node counts and hardware thread counts are positive
|
||||
// 6. Verifies the number of resources matches the declared node count
|
||||
//
|
||||
// The function may modify the job's SubCluster field if it needs to be assigned.
|
||||
//
|
||||
// Returns an error if any validation check fails.
|
||||
func SanityChecks(job *schema.Job) error {
|
||||
if c := archive.GetCluster(job.Cluster); c == nil {
|
||||
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
||||
}
|
||||
if err := archive.AssignSubCluster(job); err != nil {
|
||||
log.Warn("Error while assigning subcluster to job")
|
||||
cclog.Warn("Error while assigning subcluster to job")
|
||||
return err
|
||||
}
|
||||
if !job.State.Valid() {
|
||||
@@ -206,6 +281,14 @@ func SanityChecks(job *schema.BaseJob) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkJobData normalizes metric units in job data based on average values.
|
||||
//
|
||||
// NOTE: This function is currently unused and contains incomplete implementation.
|
||||
// It was intended to normalize byte and file-related metrics to appropriate SI prefixes,
|
||||
// but the normalization logic is commented out. Consider removing or completing this
|
||||
// function based on project requirements.
|
||||
//
|
||||
// TODO: Either implement the metric normalization or remove this dead code.
|
||||
func checkJobData(d *schema.JobData) error {
|
||||
for _, scopes := range *d {
|
||||
// var newUnit schema.Unit
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package importer
|
||||
@@ -7,13 +7,27 @@ package importer
|
||||
import (
|
||||
"math"
|
||||
|
||||
ccunits "github.com/ClusterCockpit/cc-units"
|
||||
ccunits "github.com/ClusterCockpit/cc-lib/ccUnits"
|
||||
)
|
||||
|
||||
// getNormalizationFactor calculates the scaling factor needed to normalize a value
|
||||
// to a more readable range (typically between 1.0 and 1000.0).
|
||||
//
|
||||
// For values greater than 1000, the function scales down by factors of 1000 (returns negative exponent).
|
||||
// For values less than 1.0, the function scales up by factors of 1000 (returns positive exponent).
|
||||
//
|
||||
// Returns:
|
||||
// - factor: The multiplicative factor to apply (10^(count*scale))
|
||||
// - exponent: The power of 10 representing the adjustment (multiple of 3 for SI prefixes)
|
||||
func getNormalizationFactor(v float64) (float64, int) {
|
||||
count := 0
|
||||
scale := -3
|
||||
|
||||
// Prevent infinite loop for zero or negative values
|
||||
if v <= 0.0 {
|
||||
return 1.0, 0
|
||||
}
|
||||
|
||||
if v > 1000.0 {
|
||||
for v > 1000.0 {
|
||||
v *= 1e-3
|
||||
@@ -29,9 +43,22 @@ func getNormalizationFactor(v float64) (float64, int) {
|
||||
return math.Pow10(count * scale), count * scale
|
||||
}
|
||||
|
||||
// getExponent calculates the SI prefix exponent from a numeric prefix value.
|
||||
//
|
||||
// For example:
|
||||
// - Input: 1000.0 (kilo) returns 3
|
||||
// - Input: 1000000.0 (mega) returns 6
|
||||
// - Input: 1000000000.0 (giga) returns 9
|
||||
//
|
||||
// Returns the exponent representing the power of 10 for the SI prefix.
|
||||
func getExponent(p float64) int {
|
||||
count := 0
|
||||
|
||||
// Prevent infinite loop for infinity or NaN values
|
||||
if math.IsInf(p, 0) || math.IsNaN(p) || p <= 0.0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
for p > 1.0 {
|
||||
p = p / 1000.0
|
||||
count++
|
||||
@@ -40,12 +67,42 @@ func getExponent(p float64) int {
|
||||
return count * 3
|
||||
}
|
||||
|
||||
// newPrefixFromFactor computes a new SI unit prefix after applying a normalization factor.
|
||||
//
|
||||
// Given an original prefix and an exponent adjustment, this function calculates
|
||||
// the resulting SI prefix. For example, if normalizing from bytes (no prefix) by
|
||||
// a factor of 10^9, the result would be the "G" (giga) prefix.
|
||||
//
|
||||
// Parameters:
|
||||
// - op: The original SI prefix value
|
||||
// - e: The exponent adjustment to apply
|
||||
//
|
||||
// Returns the new SI prefix after adjustment.
|
||||
func newPrefixFromFactor(op ccunits.Prefix, e int) ccunits.Prefix {
|
||||
f := float64(op)
|
||||
exp := math.Pow10(getExponent(f) - e)
|
||||
return ccunits.Prefix(exp)
|
||||
}
|
||||
|
||||
// Normalize adjusts a metric value and its SI unit prefix to a more readable range.
|
||||
//
|
||||
// This function is useful for automatically scaling metrics to appropriate units.
|
||||
// For example, normalizing 2048 MiB might result in ~2.0 GiB.
|
||||
//
|
||||
// The function analyzes the average value and determines if a different SI prefix
|
||||
// would make the number more human-readable (typically keeping values between 1 and 1000).
|
||||
//
|
||||
// Parameters:
|
||||
// - avg: The metric value to normalize
|
||||
// - p: The current SI prefix as a string (e.g., "K", "M", "G")
|
||||
//
|
||||
// Returns:
|
||||
// - factor: The multiplicative factor to apply to convert the value
|
||||
// - newPrefix: The new SI prefix string to use
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// factor, newPrefix := Normalize(2048.0, "M") // returns factor for MB->GB conversion, "G"
|
||||
func Normalize(avg float64, p string) (float64, string) {
|
||||
f, e := getNormalizationFactor(avg)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package importer
|
||||
@@ -8,9 +8,11 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
ccunits "github.com/ClusterCockpit/cc-units"
|
||||
ccunits "github.com/ClusterCockpit/cc-lib/ccUnits"
|
||||
)
|
||||
|
||||
// TestNormalizeFactor tests the normalization of large byte values to gigabyte prefix.
|
||||
// Verifies that values in the billions are correctly scaled to the "G" (giga) prefix.
|
||||
func TestNormalizeFactor(t *testing.T) {
|
||||
// var us string
|
||||
s := []float64{2890031237, 23998994567, 389734042344, 390349424345}
|
||||
@@ -38,6 +40,8 @@ func TestNormalizeFactor(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestNormalizeKeep tests that values already in an appropriate range maintain their prefix.
|
||||
// Verifies that when values don't require rescaling, the original "G" prefix is preserved.
|
||||
func TestNormalizeKeep(t *testing.T) {
|
||||
s := []float64{3.0, 24.0, 390.0, 391.0}
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"exclusive":1,"monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}
|
||||
{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"shared":"none","monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}
|
||||
|
||||
@@ -1 +1 @@
|
||||
{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"exclusive":1,"jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}
|
||||
{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"shared":"none","jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}
|
||||
|
||||
227
internal/memorystore/api.go
Normal file
227
internal/memorystore/api.go
Normal file
@@ -0,0 +1,227 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrInvalidTimeRange = errors.New("[METRICSTORE]> invalid time range: 'from' must be before 'to'")
|
||||
ErrEmptyCluster = errors.New("[METRICSTORE]> cluster name cannot be empty")
|
||||
)
|
||||
|
||||
type APIMetricData struct {
|
||||
Error *string `json:"error,omitempty"`
|
||||
Data schema.FloatArray `json:"data,omitempty"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
Resolution int64 `json:"resolution"`
|
||||
Avg schema.Float `json:"avg"`
|
||||
Min schema.Float `json:"min"`
|
||||
Max schema.Float `json:"max"`
|
||||
}
|
||||
|
||||
type APIQueryRequest struct {
|
||||
Cluster string `json:"cluster"`
|
||||
Queries []APIQuery `json:"queries"`
|
||||
ForAllNodes []string `json:"for-all-nodes"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
WithStats bool `json:"with-stats"`
|
||||
WithData bool `json:"with-data"`
|
||||
WithPadding bool `json:"with-padding"`
|
||||
}
|
||||
|
||||
type APIQueryResponse struct {
|
||||
Queries []APIQuery `json:"queries,omitempty"`
|
||||
Results [][]APIMetricData `json:"results"`
|
||||
}
|
||||
|
||||
type APIQuery struct {
|
||||
Type *string `json:"type,omitempty"`
|
||||
SubType *string `json:"subtype,omitempty"`
|
||||
Metric string `json:"metric"`
|
||||
Hostname string `json:"host"`
|
||||
Resolution int64 `json:"resolution"`
|
||||
TypeIds []string `json:"type-ids,omitempty"`
|
||||
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||
ScaleFactor schema.Float `json:"scale-by,omitempty"`
|
||||
Aggregate bool `json:"aggreg"`
|
||||
}
|
||||
|
||||
// TODO: Optimize this, just like the stats endpoint!
|
||||
func (data *APIMetricData) AddStats() {
|
||||
n := 0
|
||||
sum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64
|
||||
for _, x := range data.Data {
|
||||
if x.IsNaN() {
|
||||
continue
|
||||
}
|
||||
|
||||
n += 1
|
||||
sum += float64(x)
|
||||
min = math.Min(min, float64(x))
|
||||
max = math.Max(max, float64(x))
|
||||
}
|
||||
|
||||
if n > 0 {
|
||||
avg := sum / float64(n)
|
||||
data.Avg = schema.Float(avg)
|
||||
data.Min = schema.Float(min)
|
||||
data.Max = schema.Float(max)
|
||||
} else {
|
||||
data.Avg, data.Min, data.Max = schema.NaN, schema.NaN, schema.NaN
|
||||
}
|
||||
}
|
||||
|
||||
func (data *APIMetricData) ScaleBy(f schema.Float) {
|
||||
if f == 0 || f == 1 {
|
||||
return
|
||||
}
|
||||
|
||||
data.Avg *= f
|
||||
data.Min *= f
|
||||
data.Max *= f
|
||||
for i := 0; i < len(data.Data); i++ {
|
||||
data.Data[i] *= f
|
||||
}
|
||||
}
|
||||
|
||||
func (data *APIMetricData) PadDataWithNull(ms *MemoryStore, from, to int64, metric string) {
|
||||
minfo, ok := ms.Metrics[metric]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if (data.From / minfo.Frequency) > (from / minfo.Frequency) {
|
||||
padfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency))
|
||||
ndata := make([]schema.Float, 0, padfront+len(data.Data))
|
||||
for range padfront {
|
||||
ndata = append(ndata, schema.NaN)
|
||||
}
|
||||
for j := 0; j < len(data.Data); j++ {
|
||||
ndata = append(ndata, data.Data[j])
|
||||
}
|
||||
data.Data = ndata
|
||||
}
|
||||
}
|
||||
|
||||
func FetchData(req APIQueryRequest) (*APIQueryResponse, error) {
|
||||
if req.From > req.To {
|
||||
return nil, ErrInvalidTimeRange
|
||||
}
|
||||
if req.Cluster == "" && req.ForAllNodes != nil {
|
||||
return nil, ErrEmptyCluster
|
||||
}
|
||||
|
||||
req.WithData = true
|
||||
ms := GetMemoryStore()
|
||||
|
||||
response := APIQueryResponse{
|
||||
Results: make([][]APIMetricData, 0, len(req.Queries)),
|
||||
}
|
||||
if req.ForAllNodes != nil {
|
||||
nodes := ms.ListChildren([]string{req.Cluster})
|
||||
for _, node := range nodes {
|
||||
for _, metric := range req.ForAllNodes {
|
||||
q := APIQuery{
|
||||
Metric: metric,
|
||||
Hostname: node,
|
||||
}
|
||||
req.Queries = append(req.Queries, q)
|
||||
response.Queries = append(response.Queries, q)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, query := range req.Queries {
|
||||
sels := make([]util.Selector, 0, 1)
|
||||
if query.Aggregate || query.Type == nil {
|
||||
sel := util.Selector{{String: req.Cluster}, {String: query.Hostname}}
|
||||
if query.Type != nil {
|
||||
if len(query.TypeIds) == 1 {
|
||||
sel = append(sel, util.SelectorElement{String: *query.Type + query.TypeIds[0]})
|
||||
} else {
|
||||
ids := make([]string, len(query.TypeIds))
|
||||
for i, id := range query.TypeIds {
|
||||
ids[i] = *query.Type + id
|
||||
}
|
||||
sel = append(sel, util.SelectorElement{Group: ids})
|
||||
}
|
||||
|
||||
if query.SubType != nil {
|
||||
if len(query.SubTypeIds) == 1 {
|
||||
sel = append(sel, util.SelectorElement{String: *query.SubType + query.SubTypeIds[0]})
|
||||
} else {
|
||||
ids := make([]string, len(query.SubTypeIds))
|
||||
for i, id := range query.SubTypeIds {
|
||||
ids[i] = *query.SubType + id
|
||||
}
|
||||
sel = append(sel, util.SelectorElement{Group: ids})
|
||||
}
|
||||
}
|
||||
}
|
||||
sels = append(sels, sel)
|
||||
} else {
|
||||
for _, typeID := range query.TypeIds {
|
||||
if query.SubType != nil {
|
||||
for _, subTypeID := range query.SubTypeIds {
|
||||
sels = append(sels, util.Selector{
|
||||
{String: req.Cluster},
|
||||
{String: query.Hostname},
|
||||
{String: *query.Type + typeID},
|
||||
{String: *query.SubType + subTypeID},
|
||||
})
|
||||
}
|
||||
} else {
|
||||
sels = append(sels, util.Selector{
|
||||
{String: req.Cluster},
|
||||
{String: query.Hostname},
|
||||
{String: *query.Type + typeID},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// log.Printf("query: %#v\n", query)
|
||||
// log.Printf("sels: %#v\n", sels)
|
||||
var err error
|
||||
res := make([]APIMetricData, 0, len(sels))
|
||||
for _, sel := range sels {
|
||||
data := APIMetricData{}
|
||||
|
||||
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)
|
||||
if err != nil {
|
||||
msg := err.Error()
|
||||
data.Error = &msg
|
||||
res = append(res, data)
|
||||
continue
|
||||
}
|
||||
|
||||
if req.WithStats {
|
||||
data.AddStats()
|
||||
}
|
||||
if query.ScaleFactor != 0 {
|
||||
data.ScaleBy(query.ScaleFactor)
|
||||
}
|
||||
if req.WithPadding {
|
||||
data.PadDataWithNull(ms, req.From, req.To, query.Metric)
|
||||
}
|
||||
if !req.WithData {
|
||||
data.Data = nil
|
||||
}
|
||||
res = append(res, data)
|
||||
}
|
||||
response.Results = append(response.Results, res)
|
||||
}
|
||||
|
||||
return &response, nil
|
||||
}
|
||||
196
internal/memorystore/archive.go
Normal file
196
internal/memorystore/archive.go
Normal file
@@ -0,0 +1,196 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bufio"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
func Archiving(wg *sync.WaitGroup, ctx context.Context) {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
d, err := time.ParseDuration(Keys.Archive.Interval)
|
||||
if err != nil {
|
||||
cclog.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err)
|
||||
}
|
||||
if d <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
t := time.Now().Add(-d)
|
||||
cclog.Infof("[METRICSTORE]> start archiving checkpoints (older than %s)...", t.Format(time.RFC3339))
|
||||
n, err := ArchiveCheckpoints(Keys.Checkpoints.RootDir,
|
||||
Keys.Archive.RootDir, t.Unix(), Keys.Archive.DeleteInstead)
|
||||
|
||||
if err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> archiving failed: %s", err.Error())
|
||||
} else {
|
||||
cclog.Infof("[METRICSTORE]> done: %d files zipped and moved to archive", n)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
var ErrNoNewArchiveData error = errors.New("all data already archived")
|
||||
|
||||
// ZIP all checkpoint files older than `from` together and write them to the `archiveDir`,
|
||||
// deleting them from the `checkpointsDir`.
|
||||
func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteInstead bool) (int, error) {
|
||||
entries1, err := os.ReadDir(checkpointsDir)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
type workItem struct {
|
||||
cdir, adir string
|
||||
cluster, host string
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
n, errs := int32(0), int32(0)
|
||||
work := make(chan workItem, Keys.NumWorkers)
|
||||
|
||||
wg.Add(Keys.NumWorkers)
|
||||
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for workItem := range work {
|
||||
m, err := archiveCheckpoints(workItem.cdir, workItem.adir, from, deleteInstead)
|
||||
if err != nil {
|
||||
cclog.Errorf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error())
|
||||
atomic.AddInt32(&errs, 1)
|
||||
}
|
||||
atomic.AddInt32(&n, int32(m))
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
for _, de1 := range entries1 {
|
||||
entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
|
||||
if e != nil {
|
||||
err = e
|
||||
}
|
||||
|
||||
for _, de2 := range entries2 {
|
||||
cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name())
|
||||
adir := filepath.Join(archiveDir, de1.Name(), de2.Name())
|
||||
work <- workItem{
|
||||
adir: adir, cdir: cdir,
|
||||
cluster: de1.Name(), host: de2.Name(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
if err != nil {
|
||||
return int(n), err
|
||||
}
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("%d errors happened while archiving (%d successes)", errs, n)
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
// Helper function for `ArchiveCheckpoints`.
|
||||
func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead bool) (int, error) {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
extension := Keys.Checkpoints.FileFormat
|
||||
files, err := findFiles(entries, from, extension, false)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if deleteInstead {
|
||||
n := 0
|
||||
for _, checkpoint := range files {
|
||||
filename := filepath.Join(dir, checkpoint)
|
||||
if err = os.Remove(filename); err != nil {
|
||||
return n, err
|
||||
}
|
||||
n += 1
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from))
|
||||
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
err = os.MkdirAll(archiveDir, CheckpointDirPerms)
|
||||
if err == nil {
|
||||
f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
bw := bufio.NewWriter(f)
|
||||
defer bw.Flush()
|
||||
zw := zip.NewWriter(bw)
|
||||
defer zw.Close()
|
||||
|
||||
n := 0
|
||||
for _, checkpoint := range files {
|
||||
// Use closure to ensure file is closed immediately after use,
|
||||
// avoiding file descriptor leak from defer in loop
|
||||
err := func() error {
|
||||
filename := filepath.Join(dir, checkpoint)
|
||||
r, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
w, err := zw.Create(checkpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if _, err = io.Copy(w, r); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = os.Remove(filename); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}()
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
n += 1
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
477
internal/memorystore/avroCheckpoint.go
Normal file
477
internal/memorystore/avroCheckpoint.go
Normal file
@@ -0,0 +1,477 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/linkedin/goavro/v2"
|
||||
)
|
||||
|
||||
var NumAvroWorkers int = DefaultAvroWorkers
|
||||
var startUp bool = true
|
||||
|
||||
func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
|
||||
levels := make([]*AvroLevel, 0)
|
||||
selectors := make([][]string, 0)
|
||||
as.root.lock.RLock()
|
||||
// Cluster
|
||||
for sel1, l1 := range as.root.children {
|
||||
l1.lock.RLock()
|
||||
// Node
|
||||
for sel2, l2 := range l1.children {
|
||||
l2.lock.RLock()
|
||||
// Frequency
|
||||
for sel3, l3 := range l2.children {
|
||||
levels = append(levels, l3)
|
||||
selectors = append(selectors, []string{sel1, sel2, sel3})
|
||||
}
|
||||
l2.lock.RUnlock()
|
||||
}
|
||||
l1.lock.RUnlock()
|
||||
}
|
||||
as.root.lock.RUnlock()
|
||||
|
||||
type workItem struct {
|
||||
level *AvroLevel
|
||||
dir string
|
||||
selector []string
|
||||
}
|
||||
|
||||
n, errs := int32(0), int32(0)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(NumAvroWorkers)
|
||||
work := make(chan workItem, NumAvroWorkers*2)
|
||||
for range NumAvroWorkers {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
for workItem := range work {
|
||||
from := getTimestamp(workItem.dir)
|
||||
|
||||
if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil {
|
||||
if err == ErrNoNewArchiveData {
|
||||
continue
|
||||
}
|
||||
|
||||
cclog.Errorf("error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||
atomic.AddInt32(&errs, 1)
|
||||
} else {
|
||||
atomic.AddInt32(&n, 1)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
for i := range len(levels) {
|
||||
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||
work <- workItem{
|
||||
level: levels[i],
|
||||
dir: dir,
|
||||
selector: selectors[i],
|
||||
}
|
||||
}
|
||||
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("%d errors happend while creating avro checkpoints (%d successes)", errs, n)
|
||||
}
|
||||
|
||||
startUp = false
|
||||
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
// getTimestamp returns the timestamp from the directory name
|
||||
func getTimestamp(dir string) int64 {
|
||||
// Extract the resolution and timestamp from the directory name
|
||||
// The existing avro file will be in epoch timestamp format
|
||||
// iterate over all the files in the directory and find the maximum timestamp
|
||||
// and return it
|
||||
|
||||
resolution := path.Base(dir)
|
||||
dir = path.Dir(dir)
|
||||
|
||||
files, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
var maxTS int64 = 0
|
||||
|
||||
if len(files) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if file.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := file.Name()
|
||||
|
||||
if len(name) < 5 || !strings.HasSuffix(name, ".avro") || !strings.HasPrefix(name, resolution+"_") {
|
||||
continue
|
||||
}
|
||||
|
||||
ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64)
|
||||
if err != nil {
|
||||
fmt.Printf("error while parsing timestamp: %s\n", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
if ts > maxTS {
|
||||
maxTS = ts
|
||||
}
|
||||
}
|
||||
|
||||
interval, _ := time.ParseDuration(Keys.Checkpoints.Interval)
|
||||
updateTime := time.Unix(maxTS, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix()
|
||||
|
||||
if startUp {
|
||||
return 0
|
||||
}
|
||||
|
||||
if updateTime < time.Now().Unix() {
|
||||
return 0
|
||||
}
|
||||
|
||||
return maxTS
|
||||
}
|
||||
|
||||
func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
|
||||
l.lock.Lock()
|
||||
defer l.lock.Unlock()
|
||||
|
||||
// fmt.Printf("Checkpointing directory: %s\n", dir)
|
||||
// filepath contains the resolution
|
||||
intRes, _ := strconv.Atoi(path.Base(dir))
|
||||
|
||||
// find smallest overall timestamp in l.data map and delete it from l.data
|
||||
minTS := int64(1<<63 - 1)
|
||||
for ts, dat := range l.data {
|
||||
if ts < minTS && len(dat) != 0 {
|
||||
minTS = ts
|
||||
}
|
||||
}
|
||||
|
||||
if from == 0 && minTS != int64(1<<63-1) {
|
||||
from = minTS
|
||||
}
|
||||
|
||||
if from == 0 {
|
||||
return ErrNoNewArchiveData
|
||||
}
|
||||
|
||||
var schema string
|
||||
var codec *goavro.Codec
|
||||
recordList := make([]map[string]any, 0)
|
||||
|
||||
var f *os.File
|
||||
|
||||
filePath := dir + fmt.Sprintf("_%d.avro", from)
|
||||
|
||||
var err error
|
||||
|
||||
fp_, err_ := os.Stat(filePath)
|
||||
if errors.Is(err_, os.ErrNotExist) {
|
||||
err = os.MkdirAll(path.Dir(dir), 0o755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
} else if fp_.Size() != 0 {
|
||||
f, err = os.Open(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open existing avro file: %v", err)
|
||||
}
|
||||
|
||||
br := bufio.NewReader(f)
|
||||
|
||||
reader, err := goavro.NewOCFReader(br)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create OCF reader: %v", err)
|
||||
}
|
||||
codec = reader.Codec()
|
||||
schema = codec.Schema()
|
||||
|
||||
f.Close()
|
||||
}
|
||||
|
||||
timeRef := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
|
||||
|
||||
if dumpAll {
|
||||
timeRef = time.Now().Unix()
|
||||
}
|
||||
|
||||
// Empty values
|
||||
if len(l.data) == 0 {
|
||||
// we checkpoint avro files every 60 seconds
|
||||
repeat := 60 / intRes
|
||||
|
||||
for range repeat {
|
||||
recordList = append(recordList, make(map[string]any))
|
||||
}
|
||||
}
|
||||
|
||||
readFlag := true
|
||||
|
||||
for ts := range l.data {
|
||||
flag := false
|
||||
if ts < timeRef {
|
||||
data := l.data[ts]
|
||||
|
||||
schemaGen, err := generateSchema(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
flag, schema, err = compareSchema(schema, schemaGen)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to compare read and generated schema: %v", err)
|
||||
}
|
||||
if flag && readFlag && !errors.Is(err_, os.ErrNotExist) {
|
||||
|
||||
f.Close()
|
||||
|
||||
f, err = os.Open(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open Avro file: %v", err)
|
||||
}
|
||||
|
||||
br := bufio.NewReader(f)
|
||||
|
||||
ocfReader, err := goavro.NewOCFReader(br)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create OCF reader while changing schema: %v", err)
|
||||
}
|
||||
|
||||
for ocfReader.Scan() {
|
||||
record, err := ocfReader.Read()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read record: %v", err)
|
||||
}
|
||||
|
||||
recordList = append(recordList, record.(map[string]any))
|
||||
}
|
||||
|
||||
f.Close()
|
||||
|
||||
err = os.Remove(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to delete file: %v", err)
|
||||
}
|
||||
|
||||
readFlag = false
|
||||
}
|
||||
codec, err = goavro.NewCodec(schema)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create codec after merged schema: %v", err)
|
||||
}
|
||||
|
||||
recordList = append(recordList, generateRecord(data))
|
||||
delete(l.data, ts)
|
||||
}
|
||||
}
|
||||
|
||||
if len(recordList) == 0 {
|
||||
return ErrNoNewArchiveData
|
||||
}
|
||||
|
||||
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to append new avro file: %v", err)
|
||||
}
|
||||
|
||||
// fmt.Printf("Codec : %#v\n", codec)
|
||||
|
||||
writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
|
||||
W: f,
|
||||
Codec: codec,
|
||||
CompressionName: goavro.CompressionDeflateLabel,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create OCF writer: %v", err)
|
||||
}
|
||||
|
||||
// Append the new record
|
||||
if err := writer.Append(recordList); err != nil {
|
||||
return fmt.Errorf("failed to append record: %v", err)
|
||||
}
|
||||
|
||||
f.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func compareSchema(schemaRead, schemaGen string) (bool, string, error) {
|
||||
var genSchema, readSchema AvroSchema
|
||||
|
||||
if schemaRead == "" {
|
||||
return false, schemaGen, nil
|
||||
}
|
||||
|
||||
// Unmarshal the schema strings into AvroSchema structs
|
||||
if err := json.Unmarshal([]byte(schemaGen), &genSchema); err != nil {
|
||||
return false, "", fmt.Errorf("failed to parse generated schema: %v", err)
|
||||
}
|
||||
if err := json.Unmarshal([]byte(schemaRead), &readSchema); err != nil {
|
||||
return false, "", fmt.Errorf("failed to parse read schema: %v", err)
|
||||
}
|
||||
|
||||
sort.Slice(genSchema.Fields, func(i, j int) bool {
|
||||
return genSchema.Fields[i].Name < genSchema.Fields[j].Name
|
||||
})
|
||||
|
||||
sort.Slice(readSchema.Fields, func(i, j int) bool {
|
||||
return readSchema.Fields[i].Name < readSchema.Fields[j].Name
|
||||
})
|
||||
|
||||
// Check if schemas are identical
|
||||
schemasEqual := true
|
||||
if len(genSchema.Fields) <= len(readSchema.Fields) {
|
||||
|
||||
for i := range genSchema.Fields {
|
||||
if genSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||
schemasEqual = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If schemas are identical, return the read schema
|
||||
if schemasEqual {
|
||||
return false, schemaRead, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Create a map to hold unique fields from both schemas
|
||||
fieldMap := make(map[string]AvroField)
|
||||
|
||||
// Add fields from the read schema
|
||||
for _, field := range readSchema.Fields {
|
||||
fieldMap[field.Name] = field
|
||||
}
|
||||
|
||||
// Add or update fields from the generated schema
|
||||
for _, field := range genSchema.Fields {
|
||||
fieldMap[field.Name] = field
|
||||
}
|
||||
|
||||
// Create a union schema by collecting fields from the map
|
||||
var mergedFields []AvroField
|
||||
for _, field := range fieldMap {
|
||||
mergedFields = append(mergedFields, field)
|
||||
}
|
||||
|
||||
// Sort fields by name for consistency
|
||||
sort.Slice(mergedFields, func(i, j int) bool {
|
||||
return mergedFields[i].Name < mergedFields[j].Name
|
||||
})
|
||||
|
||||
// Create the merged schema
|
||||
mergedSchema := AvroSchema{
|
||||
Type: "record",
|
||||
Name: genSchema.Name,
|
||||
Fields: mergedFields,
|
||||
}
|
||||
|
||||
// Check if schemas are identical
|
||||
schemasEqual = len(mergedSchema.Fields) == len(readSchema.Fields)
|
||||
if schemasEqual {
|
||||
for i := range mergedSchema.Fields {
|
||||
if mergedSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||
schemasEqual = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if schemasEqual {
|
||||
return false, schemaRead, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Marshal the merged schema back to JSON
|
||||
mergedSchemaJSON, err := json.Marshal(mergedSchema)
|
||||
if err != nil {
|
||||
return false, "", fmt.Errorf("failed to marshal merged schema: %v", err)
|
||||
}
|
||||
|
||||
return true, string(mergedSchemaJSON), nil
|
||||
}
|
||||
|
||||
func generateSchema(data map[string]schema.Float) (string, error) {
|
||||
// Define the Avro schema structure
|
||||
schema := map[string]any{
|
||||
"type": "record",
|
||||
"name": "DataRecord",
|
||||
"fields": []map[string]any{},
|
||||
}
|
||||
|
||||
fieldTracker := make(map[string]struct{})
|
||||
|
||||
for key := range data {
|
||||
if _, exists := fieldTracker[key]; !exists {
|
||||
key = correctKey(key)
|
||||
|
||||
field := map[string]any{
|
||||
"name": key,
|
||||
"type": "double",
|
||||
"default": -1.0,
|
||||
}
|
||||
schema["fields"] = append(schema["fields"].([]map[string]any), field)
|
||||
fieldTracker[key] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
schemaString, err := json.Marshal(schema)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to marshal schema: %v", err)
|
||||
}
|
||||
|
||||
return string(schemaString), nil
|
||||
}
|
||||
|
||||
func generateRecord(data map[string]schema.Float) map[string]any {
|
||||
record := make(map[string]any)
|
||||
|
||||
// Iterate through each map in data
|
||||
for key, value := range data {
|
||||
key = correctKey(key)
|
||||
|
||||
// Set the value in the record
|
||||
// avro only accepts basic types
|
||||
record[key] = value.Double()
|
||||
}
|
||||
|
||||
return record
|
||||
}
|
||||
|
||||
func correctKey(key string) string {
|
||||
key = strings.ReplaceAll(key, "_", "_0x5F_")
|
||||
key = strings.ReplaceAll(key, ":", "_0x3A_")
|
||||
key = strings.ReplaceAll(key, ".", "_0x2E_")
|
||||
return key
|
||||
}
|
||||
|
||||
func ReplaceKey(key string) string {
|
||||
key = strings.ReplaceAll(key, "_0x2E_", ".")
|
||||
key = strings.ReplaceAll(key, "_0x3A_", ":")
|
||||
key = strings.ReplaceAll(key, "_0x5F_", "_")
|
||||
return key
|
||||
}
|
||||
84
internal/memorystore/avroHelper.go
Normal file
84
internal/memorystore/avroHelper.go
Normal file
@@ -0,0 +1,84 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"slices"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||
// AvroPool is a pool of Avro writers.
|
||||
go func() {
|
||||
if Keys.Checkpoints.FileFormat == "json" {
|
||||
wg.Done() // Mark this goroutine as done
|
||||
return // Exit the goroutine
|
||||
}
|
||||
|
||||
defer wg.Done()
|
||||
|
||||
var avroLevel *AvroLevel
|
||||
oldSelector := make([]string, 0)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case val := <-LineProtocolMessages:
|
||||
// Fetch the frequency of the metric from the global configuration
|
||||
freq, err := GetMetricFrequency(val.MetricName)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error fetching metric frequency: %s\n", err)
|
||||
continue
|
||||
}
|
||||
|
||||
metricName := ""
|
||||
|
||||
for _, selectorName := range val.Selector {
|
||||
metricName += selectorName + SelectorDelimiter
|
||||
}
|
||||
|
||||
metricName += val.MetricName
|
||||
|
||||
// Create a new selector for the Avro level
|
||||
// The selector is a slice of strings that represents the path to the
|
||||
// Avro level. It is created by appending the cluster, node, and metric
|
||||
// name to the selector.
|
||||
var selector []string
|
||||
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
|
||||
|
||||
if !stringSlicesEqual(oldSelector, selector) {
|
||||
// Get the Avro level for the metric
|
||||
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
|
||||
|
||||
// If the Avro level is nil, create a new one
|
||||
if avroLevel == nil {
|
||||
cclog.Errorf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
|
||||
}
|
||||
oldSelector = slices.Clone(selector)
|
||||
}
|
||||
|
||||
avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq))
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func stringSlicesEqual(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
167
internal/memorystore/avroStruct.go
Normal file
167
internal/memorystore/avroStruct.go
Normal file
@@ -0,0 +1,167 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
var (
|
||||
LineProtocolMessages = make(chan *AvroStruct)
|
||||
// SelectorDelimiter separates hierarchical selector components in metric names for Avro encoding
|
||||
SelectorDelimiter = "_SEL_"
|
||||
)
|
||||
|
||||
var CheckpointBufferMinutes = DefaultCheckpointBufferMin
|
||||
|
||||
type AvroStruct struct {
|
||||
MetricName string
|
||||
Cluster string
|
||||
Node string
|
||||
Selector []string
|
||||
Value schema.Float
|
||||
Timestamp int64
|
||||
}
|
||||
|
||||
type AvroStore struct {
|
||||
root AvroLevel
|
||||
}
|
||||
|
||||
var avroStore AvroStore
|
||||
|
||||
type AvroLevel struct {
|
||||
children map[string]*AvroLevel
|
||||
data map[int64]map[string]schema.Float
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
type AvroField struct {
|
||||
Name string `json:"name"`
|
||||
Type any `json:"type"`
|
||||
Default any `json:"default,omitempty"`
|
||||
}
|
||||
|
||||
type AvroSchema struct {
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
Fields []AvroField `json:"fields"`
|
||||
}
|
||||
|
||||
func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel {
|
||||
if len(selector) == 0 {
|
||||
return l
|
||||
}
|
||||
|
||||
// Allow concurrent reads:
|
||||
l.lock.RLock()
|
||||
var child *AvroLevel
|
||||
var ok bool
|
||||
if l.children == nil {
|
||||
// Children map needs to be created...
|
||||
l.lock.RUnlock()
|
||||
} else {
|
||||
child, ok := l.children[selector[0]]
|
||||
l.lock.RUnlock()
|
||||
if ok {
|
||||
return child.findAvroLevelOrCreate(selector[1:])
|
||||
}
|
||||
}
|
||||
|
||||
// The level does not exist, take write lock for unique access:
|
||||
l.lock.Lock()
|
||||
// While this thread waited for the write lock, another thread
|
||||
// could have created the child node.
|
||||
if l.children != nil {
|
||||
child, ok = l.children[selector[0]]
|
||||
if ok {
|
||||
l.lock.Unlock()
|
||||
return child.findAvroLevelOrCreate(selector[1:])
|
||||
}
|
||||
}
|
||||
|
||||
child = &AvroLevel{
|
||||
data: make(map[int64]map[string]schema.Float, 0),
|
||||
children: nil,
|
||||
}
|
||||
|
||||
if l.children != nil {
|
||||
l.children[selector[0]] = child
|
||||
} else {
|
||||
l.children = map[string]*AvroLevel{selector[0]: child}
|
||||
}
|
||||
l.lock.Unlock()
|
||||
return child.findAvroLevelOrCreate(selector[1:])
|
||||
}
|
||||
|
||||
func (l *AvroLevel) addMetric(metricName string, value schema.Float, timestamp int64, Freq int) {
|
||||
l.lock.Lock()
|
||||
defer l.lock.Unlock()
|
||||
|
||||
KeyCounter := int(CheckpointBufferMinutes * 60 / Freq)
|
||||
|
||||
// Create keys in advance for the given amount of time
|
||||
if len(l.data) != KeyCounter {
|
||||
if len(l.data) == 0 {
|
||||
for i := range KeyCounter {
|
||||
l.data[timestamp+int64(i*Freq)] = make(map[string]schema.Float, 0)
|
||||
}
|
||||
} else {
|
||||
// Get the last timestamp
|
||||
var lastTS int64
|
||||
for ts := range l.data {
|
||||
if ts > lastTS {
|
||||
lastTS = ts
|
||||
}
|
||||
}
|
||||
// Create keys for the next KeyCounter timestamps
|
||||
l.data[lastTS+int64(Freq)] = make(map[string]schema.Float, 0)
|
||||
}
|
||||
}
|
||||
|
||||
closestTS := int64(0)
|
||||
minDiff := int64(Freq) + 1 // Start with diff just outside the valid range
|
||||
found := false
|
||||
|
||||
// Iterate over timestamps and choose the one which is within range.
|
||||
// Since its epoch time, we check if the difference is less than 60 seconds.
|
||||
for ts, dat := range l.data {
|
||||
// Check if timestamp is within range
|
||||
diff := timestamp - ts
|
||||
if diff < -int64(Freq) || diff > int64(Freq) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Metric already present at this timestamp — skip
|
||||
if _, ok := dat[metricName]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if this is the closest timestamp so far
|
||||
if Abs(diff) < minDiff {
|
||||
minDiff = Abs(diff)
|
||||
closestTS = ts
|
||||
found = true
|
||||
}
|
||||
}
|
||||
|
||||
if found {
|
||||
l.data[closestTS][metricName] = value
|
||||
}
|
||||
}
|
||||
|
||||
func GetAvroStore() *AvroStore {
|
||||
return &avroStore
|
||||
}
|
||||
|
||||
// Abs returns the absolute value of x.
|
||||
func Abs(x int64) int64 {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
190
internal/memorystore/buffer.go
Normal file
190
internal/memorystore/buffer.go
Normal file
@@ -0,0 +1,190 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
// BufferCap is the default buffer capacity.
|
||||
// buffer.data will only ever grow up to its capacity and a new link
|
||||
// in the buffer chain will be created if needed so that no copying
|
||||
// of data or reallocation needs to happen on writes.
|
||||
const BufferCap int = DefaultBufferCapacity
|
||||
|
||||
var bufferPool sync.Pool = sync.Pool{
|
||||
New: func() any {
|
||||
return &buffer{
|
||||
data: make([]schema.Float, 0, BufferCap),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
var (
|
||||
ErrNoData error = errors.New("[METRICSTORE]> no data for this metric/level")
|
||||
ErrDataDoesNotAlign error = errors.New("[METRICSTORE]> data from lower granularities does not align")
|
||||
)
|
||||
|
||||
// Each metric on each level has it's own buffer.
|
||||
// This is where the actual values go.
|
||||
// If `cap(data)` is reached, a new buffer is created and
|
||||
// becomes the new head of a buffer list.
|
||||
type buffer struct {
|
||||
prev *buffer
|
||||
next *buffer
|
||||
data []schema.Float
|
||||
frequency int64
|
||||
start int64
|
||||
archived bool
|
||||
closed bool
|
||||
}
|
||||
|
||||
func newBuffer(ts, freq int64) *buffer {
|
||||
b := bufferPool.Get().(*buffer)
|
||||
b.frequency = freq
|
||||
b.start = ts - (freq / 2)
|
||||
b.prev = nil
|
||||
b.next = nil
|
||||
b.archived = false
|
||||
b.closed = false
|
||||
b.data = b.data[:0]
|
||||
return b
|
||||
}
|
||||
|
||||
// If a new buffer was created, the new head is returnd.
|
||||
// Otherwise, the existing buffer is returnd.
|
||||
// Normaly, only "newer" data should be written, but if the value would
|
||||
// end up in the same buffer anyways it is allowed.
|
||||
func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {
|
||||
if ts < b.start {
|
||||
return nil, errors.New("[METRICSTORE]> cannot write value to buffer from past")
|
||||
}
|
||||
|
||||
// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)
|
||||
idx := int((ts - b.start) / b.frequency)
|
||||
if idx >= cap(b.data) {
|
||||
newbuf := newBuffer(ts, b.frequency)
|
||||
newbuf.prev = b
|
||||
b.next = newbuf
|
||||
b = newbuf
|
||||
idx = 0
|
||||
}
|
||||
|
||||
// Overwriting value or writing value from past
|
||||
if idx < len(b.data) {
|
||||
b.data[idx] = value
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// Fill up unwritten slots with NaN
|
||||
for i := len(b.data); i < idx; i++ {
|
||||
b.data = append(b.data, schema.NaN)
|
||||
}
|
||||
|
||||
b.data = append(b.data, value)
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (b *buffer) end() int64 {
|
||||
return b.firstWrite() + int64(len(b.data))*b.frequency
|
||||
}
|
||||
|
||||
func (b *buffer) firstWrite() int64 {
|
||||
return b.start + (b.frequency / 2)
|
||||
}
|
||||
|
||||
// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
|
||||
// Simple linear interpolation is done between the two neighboring cells if possible.
|
||||
// If values at the start or end are missing, instead of NaN values, the second and thrid
|
||||
// return values contain the actual `from`/`to`.
|
||||
// This function goes back the buffer chain if `from` is older than the currents buffer start.
|
||||
// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
|
||||
// If `data` is not long enough to hold all values, this function will panic!
|
||||
func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {
|
||||
if from < b.firstWrite() {
|
||||
if b.prev != nil {
|
||||
return b.prev.read(from, to, data)
|
||||
}
|
||||
from = b.firstWrite()
|
||||
}
|
||||
|
||||
i := 0
|
||||
t := from
|
||||
for ; t < to; t += b.frequency {
|
||||
idx := int((t - b.start) / b.frequency)
|
||||
if idx >= cap(b.data) {
|
||||
if b.next == nil {
|
||||
break
|
||||
}
|
||||
b = b.next
|
||||
idx = 0
|
||||
}
|
||||
|
||||
if idx >= len(b.data) {
|
||||
if b.next == nil || to <= b.next.start {
|
||||
break
|
||||
}
|
||||
data[i] += schema.NaN
|
||||
} else if t < b.start {
|
||||
data[i] += schema.NaN
|
||||
} else {
|
||||
data[i] += b.data[idx]
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
return data[:i], from, t, nil
|
||||
}
|
||||
|
||||
// Returns true if this buffer needs to be freed.
|
||||
func (b *buffer) free(t int64) (delme bool, n int) {
|
||||
if b.prev != nil {
|
||||
delme, m := b.prev.free(t)
|
||||
n += m
|
||||
if delme {
|
||||
b.prev.next = nil
|
||||
if cap(b.prev.data) == BufferCap {
|
||||
bufferPool.Put(b.prev)
|
||||
}
|
||||
b.prev = nil
|
||||
}
|
||||
}
|
||||
|
||||
end := b.end()
|
||||
if end < t {
|
||||
return true, n + 1
|
||||
}
|
||||
|
||||
return false, n
|
||||
}
|
||||
|
||||
// Call `callback` on every buffer that contains data in the range from `from` to `to`.
|
||||
func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
|
||||
if b == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := b.prev.iterFromTo(from, to, callback); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if from <= b.end() && b.start <= to {
|
||||
return callback(b)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *buffer) count() int64 {
|
||||
res := int64(len(b.data))
|
||||
if b.prev != nil {
|
||||
res += b.prev.count()
|
||||
}
|
||||
return res
|
||||
}
|
||||
761
internal/memorystore/checkpoint.go
Normal file
761
internal/memorystore/checkpoint.go
Normal file
@@ -0,0 +1,761 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/linkedin/goavro/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
CheckpointFilePerms = 0o644
|
||||
CheckpointDirPerms = 0o755
|
||||
GCTriggerInterval = DefaultGCTriggerInterval
|
||||
)
|
||||
|
||||
// Whenever changed, update MarshalJSON as well!
|
||||
type CheckpointMetrics struct {
|
||||
Data []schema.Float `json:"data"`
|
||||
Frequency int64 `json:"frequency"`
|
||||
Start int64 `json:"start"`
|
||||
}
|
||||
|
||||
type CheckpointFile struct {
|
||||
Metrics map[string]*CheckpointMetrics `json:"metrics"`
|
||||
Children map[string]*CheckpointFile `json:"children"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
}
|
||||
|
||||
var lastCheckpoint time.Time
|
||||
|
||||
func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
|
||||
lastCheckpoint = time.Now()
|
||||
|
||||
if Keys.Checkpoints.FileFormat == "json" {
|
||||
ms := GetMemoryStore()
|
||||
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
d, err := time.ParseDuration(Keys.Checkpoints.Interval)
|
||||
if err != nil {
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
if d <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
cclog.Infof("[METRICSTORE]> start checkpointing (starting at %s)...", lastCheckpoint.Format(time.RFC3339))
|
||||
now := time.Now()
|
||||
n, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir,
|
||||
lastCheckpoint.Unix(), now.Unix())
|
||||
if err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> checkpointing failed: %s", err.Error())
|
||||
} else {
|
||||
cclog.Infof("[METRICSTORE]> done: %d checkpoint files created", n)
|
||||
lastCheckpoint = now
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
} else {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(time.Duration(CheckpointBufferMinutes) * time.Minute):
|
||||
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(DefaultAvroCheckpointInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
// As `Float` implements a custom MarshalJSON() function,
|
||||
// serializing an array of such types has more overhead
|
||||
// than one would assume (because of extra allocations, interfaces and so on).
|
||||
func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {
|
||||
buf := make([]byte, 0, 128+len(cm.Data)*8)
|
||||
buf = append(buf, `{"frequency":`...)
|
||||
buf = strconv.AppendInt(buf, cm.Frequency, 10)
|
||||
buf = append(buf, `,"start":`...)
|
||||
buf = strconv.AppendInt(buf, cm.Start, 10)
|
||||
buf = append(buf, `,"data":[`...)
|
||||
for i, x := range cm.Data {
|
||||
if i != 0 {
|
||||
buf = append(buf, ',')
|
||||
}
|
||||
if x.IsNaN() {
|
||||
buf = append(buf, `null`...)
|
||||
} else {
|
||||
buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)
|
||||
}
|
||||
}
|
||||
buf = append(buf, `]}`...)
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
|
||||
// On a per-host basis a new JSON file is created. I have no idea if this will scale.
|
||||
// The good thing: Only a host at a time is locked, so this function can run
|
||||
// in parallel to writes/reads.
|
||||
func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
|
||||
levels := make([]*Level, 0)
|
||||
selectors := make([][]string, 0)
|
||||
m.root.lock.RLock()
|
||||
for sel1, l1 := range m.root.children {
|
||||
l1.lock.RLock()
|
||||
for sel2, l2 := range l1.children {
|
||||
levels = append(levels, l2)
|
||||
selectors = append(selectors, []string{sel1, sel2})
|
||||
}
|
||||
l1.lock.RUnlock()
|
||||
}
|
||||
m.root.lock.RUnlock()
|
||||
|
||||
type workItem struct {
|
||||
level *Level
|
||||
dir string
|
||||
selector []string
|
||||
}
|
||||
|
||||
n, errs := int32(0), int32(0)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(Keys.NumWorkers)
|
||||
work := make(chan workItem, Keys.NumWorkers*2)
|
||||
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
for workItem := range work {
|
||||
if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {
|
||||
if err == ErrNoNewArchiveData {
|
||||
continue
|
||||
}
|
||||
|
||||
cclog.Errorf("[METRICSTORE]> error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||
atomic.AddInt32(&errs, 1)
|
||||
} else {
|
||||
atomic.AddInt32(&n, 1)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
for i := 0; i < len(levels); i++ {
|
||||
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||
work <- workItem{
|
||||
level: levels[i],
|
||||
dir: dir,
|
||||
selector: selectors[i],
|
||||
}
|
||||
}
|
||||
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)", errs, n)
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
|
||||
retval := &CheckpointFile{
|
||||
From: from,
|
||||
To: to,
|
||||
Metrics: make(map[string]*CheckpointMetrics),
|
||||
Children: make(map[string]*CheckpointFile),
|
||||
}
|
||||
|
||||
for metric, minfo := range m.Metrics {
|
||||
b := l.metrics[minfo.offset]
|
||||
if b == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
allArchived := true
|
||||
b.iterFromTo(from, to, func(b *buffer) error {
|
||||
if !b.archived {
|
||||
allArchived = false
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if allArchived {
|
||||
continue
|
||||
}
|
||||
|
||||
data := make([]schema.Float, (to-from)/b.frequency+1)
|
||||
data, start, end, err := b.read(from, to, data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i := int((end - start) / b.frequency); i < len(data); i++ {
|
||||
data[i] = schema.NaN
|
||||
}
|
||||
|
||||
retval.Metrics[metric] = &CheckpointMetrics{
|
||||
Frequency: b.frequency,
|
||||
Start: start,
|
||||
Data: data,
|
||||
}
|
||||
}
|
||||
|
||||
for name, child := range l.children {
|
||||
val, err := child.toCheckpointFile(from, to, m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if val != nil {
|
||||
retval.Children[name] = val
|
||||
}
|
||||
}
|
||||
|
||||
if len(retval.Children) == 0 && len(retval.Metrics) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return retval, nil
|
||||
}
|
||||
|
||||
func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
||||
cf, err := l.toCheckpointFile(from, to, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cf == nil {
|
||||
return ErrNoNewArchiveData
|
||||
}
|
||||
|
||||
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
|
||||
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
err = os.MkdirAll(dir, CheckpointDirPerms)
|
||||
if err == nil {
|
||||
f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
bw := bufio.NewWriter(f)
|
||||
if err = json.NewEncoder(bw).Encode(cf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return bw.Flush()
|
||||
}
|
||||
|
||||
func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) {
|
||||
var wg sync.WaitGroup
|
||||
work := make(chan [2]string, Keys.NumWorkers)
|
||||
n, errs := int32(0), int32(0)
|
||||
|
||||
wg.Add(Keys.NumWorkers)
|
||||
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for host := range work {
|
||||
lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))
|
||||
nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from, extension)
|
||||
if err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> error while loading checkpoints for %s/%s: %s", host[0], host[1], err.Error())
|
||||
atomic.AddInt32(&errs, 1)
|
||||
}
|
||||
atomic.AddInt32(&n, int32(nn))
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
i := 0
|
||||
clustersDir, err := os.ReadDir(dir)
|
||||
for _, clusterDir := range clustersDir {
|
||||
if !clusterDir.IsDir() {
|
||||
err = errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
|
||||
goto done
|
||||
}
|
||||
|
||||
hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name()))
|
||||
if e != nil {
|
||||
err = e
|
||||
goto done
|
||||
}
|
||||
|
||||
for _, hostDir := range hostsDir {
|
||||
if !hostDir.IsDir() {
|
||||
err = errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
|
||||
goto done
|
||||
}
|
||||
|
||||
i++
|
||||
if i%Keys.NumWorkers == 0 && i > GCTriggerInterval {
|
||||
// Forcing garbage collection runs here regulary during the loading of checkpoints
|
||||
// will decrease the total heap size after loading everything back to memory is done.
|
||||
// While loading data, the heap will grow fast, so the GC target size will double
|
||||
// almost always. By forcing GCs here, we can keep it growing more slowly so that
|
||||
// at the end, less memory is wasted.
|
||||
runtime.GC()
|
||||
}
|
||||
|
||||
work <- [2]string{clusterDir.Name(), hostDir.Name()}
|
||||
}
|
||||
}
|
||||
done:
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
if err != nil {
|
||||
return int(n), err
|
||||
}
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)", errs, n)
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
// Metrics stored at the lowest 2 levels are not loaded (root and cluster)!
|
||||
// This function can only be called once and before the very first write or read.
|
||||
// Different host's data is loaded to memory in parallel.
|
||||
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||
if _, err := os.Stat(dir); os.IsNotExist(err) {
|
||||
// The directory does not exist, so create it using os.MkdirAll()
|
||||
err := os.MkdirAll(dir, CheckpointDirPerms) // CheckpointDirPerms sets the permissions for the directory
|
||||
if err != nil {
|
||||
cclog.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
|
||||
}
|
||||
cclog.Debugf("[METRICSTORE]> %#v Directory created successfully", dir)
|
||||
}
|
||||
|
||||
// Config read (replace with your actual config read)
|
||||
fileFormat := Keys.Checkpoints.FileFormat
|
||||
if fileFormat == "" {
|
||||
fileFormat = "avro"
|
||||
}
|
||||
|
||||
// Map to easily get the fallback format
|
||||
oppositeFormat := map[string]string{
|
||||
"json": "avro",
|
||||
"avro": "json",
|
||||
}
|
||||
|
||||
// First, attempt to load the specified format
|
||||
if found, err := checkFilesWithExtension(dir, fileFormat); err != nil {
|
||||
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
|
||||
} else if found {
|
||||
cclog.Infof("[METRICSTORE]> Loading %s files because fileformat is %s", fileFormat, fileFormat)
|
||||
return m.FromCheckpoint(dir, from, fileFormat)
|
||||
}
|
||||
|
||||
// If not found, attempt the opposite format
|
||||
altFormat := oppositeFormat[fileFormat]
|
||||
if found, err := checkFilesWithExtension(dir, altFormat); err != nil {
|
||||
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
|
||||
} else if found {
|
||||
cclog.Infof("[METRICSTORE]> Loading %s files but fileformat is %s", altFormat, fileFormat)
|
||||
return m.FromCheckpoint(dir, from, altFormat)
|
||||
}
|
||||
|
||||
cclog.Print("[METRICSTORE]> No valid checkpoint files found in the directory")
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func checkFilesWithExtension(dir string, extension string) (bool, error) {
|
||||
found := false
|
||||
|
||||
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error accessing path %s: %v", path, err)
|
||||
}
|
||||
if !info.IsDir() && filepath.Ext(info.Name()) == "."+extension {
|
||||
found = true
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("[METRICSTORE]> error walking through directories: %s", err)
|
||||
}
|
||||
|
||||
return found, nil
|
||||
}
|
||||
|
||||
func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
|
||||
br := bufio.NewReader(f)
|
||||
|
||||
fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:]
|
||||
resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error while reading avro file (resolution parsing) : %s", err)
|
||||
}
|
||||
|
||||
fromTimestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64)
|
||||
|
||||
// Same logic according to lineprotocol
|
||||
fromTimestamp -= (resolution / 2)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error converting timestamp from the avro file : %s", err)
|
||||
}
|
||||
|
||||
// fmt.Printf("File : %s with resolution : %d\n", fileName, resolution)
|
||||
|
||||
var recordCounter int64 = 0
|
||||
|
||||
// Create a new OCF reader from the buffered reader
|
||||
ocfReader, err := goavro.NewOCFReader(br)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error creating OCF reader: %w", err)
|
||||
}
|
||||
|
||||
metricsData := make(map[string]schema.FloatArray)
|
||||
|
||||
for ocfReader.Scan() {
|
||||
datum, err := ocfReader.Read()
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error while reading avro file : %s", err)
|
||||
}
|
||||
|
||||
record, ok := datum.(map[string]any)
|
||||
if !ok {
|
||||
return fmt.Errorf("[METRICSTORE]> failed to assert datum as map[string]interface{}")
|
||||
}
|
||||
|
||||
for key, value := range record {
|
||||
metricsData[key] = append(metricsData[key], schema.ConvertToFloat(value.(float64)))
|
||||
}
|
||||
|
||||
recordCounter += 1
|
||||
}
|
||||
|
||||
to := (fromTimestamp + (recordCounter / (60 / resolution) * 60))
|
||||
if to < from {
|
||||
return nil
|
||||
}
|
||||
|
||||
for key, floatArray := range metricsData {
|
||||
metricName := ReplaceKey(key)
|
||||
|
||||
if strings.Contains(metricName, SelectorDelimiter) {
|
||||
subString := strings.Split(metricName, SelectorDelimiter)
|
||||
|
||||
lvl := l
|
||||
|
||||
for i := 0; i < len(subString)-1; i++ {
|
||||
|
||||
sel := subString[i]
|
||||
|
||||
if lvl.children == nil {
|
||||
lvl.children = make(map[string]*Level)
|
||||
}
|
||||
|
||||
child, ok := lvl.children[sel]
|
||||
if !ok {
|
||||
child = &Level{
|
||||
metrics: make([]*buffer, len(m.Metrics)),
|
||||
children: nil,
|
||||
}
|
||||
lvl.children[sel] = child
|
||||
}
|
||||
lvl = child
|
||||
}
|
||||
|
||||
leafMetricName := subString[len(subString)-1]
|
||||
err = lvl.createBuffer(m, leafMetricName, floatArray, fromTimestamp, resolution)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||
}
|
||||
} else {
|
||||
err = l.createBuffer(m, metricName, floatArray, fromTimestamp, resolution)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schema.FloatArray, from int64, resolution int64) error {
|
||||
n := len(floatArray)
|
||||
b := &buffer{
|
||||
frequency: resolution,
|
||||
start: from,
|
||||
data: floatArray[0:n:n],
|
||||
prev: nil,
|
||||
next: nil,
|
||||
archived: true,
|
||||
}
|
||||
|
||||
minfo, ok := m.Metrics[metricName]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
prev := l.metrics[minfo.offset]
|
||||
if prev == nil {
|
||||
l.metrics[minfo.offset] = b
|
||||
} else {
|
||||
if prev.start > b.start {
|
||||
return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
|
||||
}
|
||||
|
||||
b.prev = prev
|
||||
prev.next = b
|
||||
|
||||
missingCount := ((int(b.start) - int(prev.start)) - len(prev.data)*int(b.frequency))
|
||||
if missingCount > 0 {
|
||||
missingCount /= int(b.frequency)
|
||||
|
||||
for range missingCount {
|
||||
prev.data = append(prev.data, schema.NaN)
|
||||
}
|
||||
|
||||
prev.data = prev.data[0:len(prev.data):len(prev.data)]
|
||||
}
|
||||
}
|
||||
l.metrics[minfo.offset] = b
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {
|
||||
br := bufio.NewReader(f)
|
||||
cf := &CheckpointFile{}
|
||||
if err := json.NewDecoder(br).Decode(cf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cf.To != 0 && cf.To < from {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := l.loadFile(cf, m); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
|
||||
for name, metric := range cf.Metrics {
|
||||
n := len(metric.Data)
|
||||
b := &buffer{
|
||||
frequency: metric.Frequency,
|
||||
start: metric.Start,
|
||||
data: metric.Data[0:n:n],
|
||||
prev: nil,
|
||||
next: nil,
|
||||
archived: true,
|
||||
}
|
||||
|
||||
minfo, ok := m.Metrics[name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
prev := l.metrics[minfo.offset]
|
||||
if prev == nil {
|
||||
l.metrics[minfo.offset] = b
|
||||
} else {
|
||||
if prev.start > b.start {
|
||||
return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
|
||||
}
|
||||
|
||||
b.prev = prev
|
||||
prev.next = b
|
||||
}
|
||||
l.metrics[minfo.offset] = b
|
||||
}
|
||||
|
||||
if len(cf.Children) > 0 && l.children == nil {
|
||||
l.children = make(map[string]*Level)
|
||||
}
|
||||
|
||||
for sel, childCf := range cf.Children {
|
||||
child, ok := l.children[sel]
|
||||
if !ok {
|
||||
child = &Level{
|
||||
metrics: make([]*buffer, len(m.Metrics)),
|
||||
children: nil,
|
||||
}
|
||||
l.children[sel] = child
|
||||
}
|
||||
|
||||
if err := child.loadFile(childCf, m); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension string) (int, error) {
|
||||
direntries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return 0, err
|
||||
}
|
||||
|
||||
allFiles := make([]fs.DirEntry, 0)
|
||||
filesLoaded := 0
|
||||
for _, e := range direntries {
|
||||
if e.IsDir() {
|
||||
child := &Level{
|
||||
metrics: make([]*buffer, len(m.Metrics)),
|
||||
children: make(map[string]*Level),
|
||||
}
|
||||
|
||||
files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from, extension)
|
||||
filesLoaded += files
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
l.children[e.Name()] = child
|
||||
} else if strings.HasSuffix(e.Name(), "."+extension) {
|
||||
allFiles = append(allFiles, e)
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
files, err := findFiles(allFiles, from, extension, true)
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
loaders := map[string]func(*MemoryStore, *os.File, int64) error{
|
||||
"json": l.loadJSONFile,
|
||||
"avro": l.loadAvroFile,
|
||||
}
|
||||
|
||||
loader := loaders[extension]
|
||||
|
||||
for _, filename := range files {
|
||||
// Use a closure to ensure file is closed immediately after use
|
||||
err := func() error {
|
||||
f, err := os.Open(path.Join(dir, filename))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return loader(m, f, from)
|
||||
}()
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
filesLoaded += 1
|
||||
}
|
||||
|
||||
return filesLoaded, nil
|
||||
}
|
||||
|
||||
// This will probably get very slow over time!
|
||||
// A solution could be some sort of an index file in which all other files
|
||||
// and the timespan they contain is listed.
|
||||
func findFiles(direntries []fs.DirEntry, t int64, extension string, findMoreRecentFiles bool) ([]string, error) {
|
||||
nums := map[string]int64{}
|
||||
for _, e := range direntries {
|
||||
if !strings.HasSuffix(e.Name(), "."+extension) {
|
||||
continue
|
||||
}
|
||||
|
||||
ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nums[e.Name()] = ts
|
||||
}
|
||||
|
||||
sort.Slice(direntries, func(i, j int) bool {
|
||||
a, b := direntries[i], direntries[j]
|
||||
return nums[a.Name()] < nums[b.Name()]
|
||||
})
|
||||
|
||||
filenames := make([]string, 0)
|
||||
for i := range direntries {
|
||||
e := direntries[i]
|
||||
ts1 := nums[e.Name()]
|
||||
|
||||
if findMoreRecentFiles && t <= ts1 {
|
||||
filenames = append(filenames, e.Name())
|
||||
}
|
||||
if i == len(direntries)-1 {
|
||||
continue
|
||||
}
|
||||
|
||||
enext := direntries[i+1]
|
||||
ts2 := nums[enext.Name()]
|
||||
|
||||
if findMoreRecentFiles {
|
||||
if ts1 < t && t < ts2 {
|
||||
filenames = append(filenames, e.Name())
|
||||
}
|
||||
} else {
|
||||
if ts2 < t {
|
||||
filenames = append(filenames, e.Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return filenames, nil
|
||||
}
|
||||
117
internal/memorystore/config.go
Normal file
117
internal/memorystore/config.go
Normal file
@@ -0,0 +1,117 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultMaxWorkers = 10
|
||||
DefaultBufferCapacity = 512
|
||||
DefaultGCTriggerInterval = 100
|
||||
DefaultAvroWorkers = 4
|
||||
DefaultCheckpointBufferMin = 3
|
||||
DefaultAvroCheckpointInterval = time.Minute
|
||||
)
|
||||
|
||||
var InternalCCMSFlag bool = false
|
||||
|
||||
type MetricStoreConfig struct {
|
||||
// Number of concurrent workers for checkpoint and archive operations.
|
||||
// If not set or 0, defaults to min(runtime.NumCPU()/2+1, 10)
|
||||
NumWorkers int `json:"num-workers"`
|
||||
Checkpoints struct {
|
||||
FileFormat string `json:"file-format"`
|
||||
Interval string `json:"interval"`
|
||||
RootDir string `json:"directory"`
|
||||
Restore string `json:"restore"`
|
||||
} `json:"checkpoints"`
|
||||
Debug struct {
|
||||
DumpToFile string `json:"dump-to-file"`
|
||||
EnableGops bool `json:"gops"`
|
||||
} `json:"debug"`
|
||||
RetentionInMemory string `json:"retention-in-memory"`
|
||||
Archive struct {
|
||||
Interval string `json:"interval"`
|
||||
RootDir string `json:"directory"`
|
||||
DeleteInstead bool `json:"delete-instead"`
|
||||
} `json:"archive"`
|
||||
Subscriptions []struct {
|
||||
// Channel name
|
||||
SubscribeTo string `json:"subscribe-to"`
|
||||
|
||||
// Allow lines without a cluster tag, use this as default, optional
|
||||
ClusterTag string `json:"cluster-tag"`
|
||||
} `json:"subscriptions"`
|
||||
}
|
||||
|
||||
var Keys MetricStoreConfig
|
||||
|
||||
// AggregationStrategy for aggregation over multiple values at different cpus/sockets/..., not time!
|
||||
type AggregationStrategy int
|
||||
|
||||
const (
|
||||
NoAggregation AggregationStrategy = iota
|
||||
SumAggregation
|
||||
AvgAggregation
|
||||
)
|
||||
|
||||
func AssignAggregationStrategy(str string) (AggregationStrategy, error) {
|
||||
switch str {
|
||||
case "":
|
||||
return NoAggregation, nil
|
||||
case "sum":
|
||||
return SumAggregation, nil
|
||||
case "avg":
|
||||
return AvgAggregation, nil
|
||||
default:
|
||||
return NoAggregation, fmt.Errorf("[METRICSTORE]> unknown aggregation strategy: %s", str)
|
||||
}
|
||||
}
|
||||
|
||||
type MetricConfig struct {
|
||||
// Interval in seconds at which measurements are stored
|
||||
Frequency int64
|
||||
|
||||
// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
|
||||
Aggregation AggregationStrategy
|
||||
|
||||
// Private, used internally...
|
||||
offset int
|
||||
}
|
||||
|
||||
var Metrics map[string]MetricConfig
|
||||
|
||||
func GetMetricFrequency(metricName string) (int64, error) {
|
||||
if metric, ok := Metrics[metricName]; ok {
|
||||
return metric.Frequency, nil
|
||||
}
|
||||
return 0, fmt.Errorf("[METRICSTORE]> metric %s not found", metricName)
|
||||
}
|
||||
|
||||
// AddMetric adds logic to add metrics. Redundant metrics should be updated with max frequency.
|
||||
// use metric.Name to check if the metric already exists.
|
||||
// if not, add it to the Metrics map.
|
||||
func AddMetric(name string, metric MetricConfig) error {
|
||||
if Metrics == nil {
|
||||
Metrics = make(map[string]MetricConfig, 0)
|
||||
}
|
||||
|
||||
if existingMetric, ok := Metrics[name]; ok {
|
||||
if existingMetric.Frequency != metric.Frequency {
|
||||
if existingMetric.Frequency < metric.Frequency {
|
||||
existingMetric.Frequency = metric.Frequency
|
||||
Metrics[name] = existingMetric
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Metrics[name] = metric
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
95
internal/memorystore/configSchema.go
Normal file
95
internal/memorystore/configSchema.go
Normal file
@@ -0,0 +1,95 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
const configSchema = `{
|
||||
"type": "object",
|
||||
"description": "Configuration specific to built-in metric-store.",
|
||||
"properties": {
|
||||
"checkpoints": {
|
||||
"description": "Configuration for checkpointing the metrics within metric-store",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file-format": {
|
||||
"description": "Specify the type of checkpoint file. There are 2 variants: 'avro' and 'json'. If nothing is specified, 'avro' is default.",
|
||||
"type": "string"
|
||||
},
|
||||
"interval": {
|
||||
"description": "Interval at which the metrics should be checkpointed.",
|
||||
"type": "string"
|
||||
},
|
||||
"directory": {
|
||||
"description": "Specify the parent directy in which the checkpointed files should be placed.",
|
||||
"type": "string"
|
||||
},
|
||||
"restore": {
|
||||
"description": "When cc-backend starts up, look for checkpointed files that are less than X hours old and load metrics from these selected checkpoint files.",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"archive": {
|
||||
"description": "Configuration for archiving the already checkpointed files.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"interval": {
|
||||
"description": "Interval at which the checkpointed files should be archived.",
|
||||
"type": "string"
|
||||
},
|
||||
"directory": {
|
||||
"description": "Specify the parent directy in which the archived files should be placed.",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"retention-in-memory": {
|
||||
"description": "Keep the metrics within memory for given time interval. Retention for X hours, then the metrics would be freed.",
|
||||
"type": "string"
|
||||
},
|
||||
"nats": {
|
||||
"description": "Configuration for accepting published data through NATS.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"address": {
|
||||
"description": "Address of the NATS server.",
|
||||
"type": "string"
|
||||
},
|
||||
"username": {
|
||||
"description": "Optional: If configured with username/password method.",
|
||||
"type": "string"
|
||||
},
|
||||
"password": {
|
||||
"description": "Optional: If configured with username/password method.",
|
||||
"type": "string"
|
||||
},
|
||||
"creds-file-path": {
|
||||
"description": "Optional: If configured with Credential File method. Path to your NATS cred file.",
|
||||
"type": "string"
|
||||
},
|
||||
"subscriptions": {
|
||||
"description": "Array of various subscriptions. Allows to subscibe to different subjects and publishers.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"subscribe-to": {
|
||||
"description": "Channel name",
|
||||
"type": "string"
|
||||
},
|
||||
"cluster-tag": {
|
||||
"description": "Optional: Allow lines without a cluster tag, use this as default",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}`
|
||||
112
internal/memorystore/debug.go
Normal file
112
internal/memorystore/debug.go
Normal file
@@ -0,0 +1,112 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
func (b *buffer) debugDump(buf []byte) []byte {
|
||||
if b.prev != nil {
|
||||
buf = b.prev.debugDump(buf)
|
||||
}
|
||||
|
||||
start, len, end := b.start, len(b.data), b.start+b.frequency*int64(len(b.data))
|
||||
buf = append(buf, `{"start":`...)
|
||||
buf = strconv.AppendInt(buf, start, 10)
|
||||
buf = append(buf, `,"len":`...)
|
||||
buf = strconv.AppendInt(buf, int64(len), 10)
|
||||
buf = append(buf, `,"end":`...)
|
||||
buf = strconv.AppendInt(buf, end, 10)
|
||||
if b.archived {
|
||||
buf = append(buf, `,"saved":true`...)
|
||||
}
|
||||
if b.next != nil {
|
||||
buf = append(buf, `},`...)
|
||||
} else {
|
||||
buf = append(buf, `}`...)
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (l *Level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf []byte, depth int) ([]byte, error) {
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
for i := 0; i < depth; i++ {
|
||||
buf = append(buf, '\t')
|
||||
}
|
||||
buf = append(buf, '"')
|
||||
buf = append(buf, lvlname...)
|
||||
buf = append(buf, "\":{\n"...)
|
||||
depth += 1
|
||||
objitems := 0
|
||||
for name, mc := range m.Metrics {
|
||||
if b := l.metrics[mc.offset]; b != nil {
|
||||
for i := 0; i < depth; i++ {
|
||||
buf = append(buf, '\t')
|
||||
}
|
||||
|
||||
buf = append(buf, '"')
|
||||
buf = append(buf, name...)
|
||||
buf = append(buf, `":[`...)
|
||||
buf = b.debugDump(buf)
|
||||
buf = append(buf, "],\n"...)
|
||||
objitems++
|
||||
}
|
||||
}
|
||||
|
||||
for name, lvl := range l.children {
|
||||
_, err := w.Write(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf = buf[0:0]
|
||||
buf, err = lvl.debugDump(m, w, name, buf, depth)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf = append(buf, ',', '\n')
|
||||
objitems++
|
||||
}
|
||||
|
||||
// remove final `,`:
|
||||
if objitems > 0 {
|
||||
buf = append(buf[0:len(buf)-1], '\n')
|
||||
}
|
||||
|
||||
depth -= 1
|
||||
for i := 0; i < depth; i++ {
|
||||
buf = append(buf, '\t')
|
||||
}
|
||||
buf = append(buf, '}')
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
func (m *MemoryStore) DebugDump(w *bufio.Writer, selector []string) error {
|
||||
lvl := m.root.findLevel(selector)
|
||||
if lvl == nil {
|
||||
return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
|
||||
}
|
||||
|
||||
buf := make([]byte, 0, 2048)
|
||||
buf = append(buf, "{"...)
|
||||
|
||||
buf, err := lvl.debugDump(m, w, "data", buf, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
buf = append(buf, "}\n"...)
|
||||
if _, err = w.Write(buf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return w.Flush()
|
||||
}
|
||||
92
internal/memorystore/healthcheck.go
Normal file
92
internal/memorystore/healthcheck.go
Normal file
@@ -0,0 +1,92 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MaxMissingDataPoints is a threshold that allows a node to be healthy with certain number of data points missing.
|
||||
// Suppose a node does not receive last 5 data points, then healthCheck endpoint will still say a
|
||||
// node is healthy. Anything more than 5 missing points in metrics of the node will deem the node unhealthy.
|
||||
const MaxMissingDataPoints int64 = 5
|
||||
|
||||
// MaxUnhealthyMetrics is a threshold which allows upto certain number of metrics in a node to be unhealthly.
|
||||
// Works with MaxMissingDataPoints. Say 5 metrics (including submetrics) do not receive the last
|
||||
// MaxMissingDataPoints data points, then the node will be deemed healthy. Any more metrics that does
|
||||
// not receive data for MaxMissingDataPoints data points will deem the node unhealthy.
|
||||
const MaxUnhealthyMetrics int64 = 5
|
||||
|
||||
func (b *buffer) healthCheck() int64 {
|
||||
// Check if the buffer is empty
|
||||
if b.data == nil {
|
||||
return 1
|
||||
}
|
||||
|
||||
bufferEnd := b.start + b.frequency*int64(len(b.data))
|
||||
t := time.Now().Unix()
|
||||
|
||||
// Check if the buffer is too old
|
||||
if t-bufferEnd > MaxMissingDataPoints*b.frequency {
|
||||
return 1
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func (l *Level) healthCheck(m *MemoryStore, count int64) (int64, error) {
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
|
||||
for _, mc := range m.Metrics {
|
||||
if b := l.metrics[mc.offset]; b != nil {
|
||||
count += b.healthCheck()
|
||||
}
|
||||
}
|
||||
|
||||
for _, lvl := range l.children {
|
||||
c, err := lvl.healthCheck(m, 0)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
count += c
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
||||
|
||||
func (m *MemoryStore) HealthCheck(w *bufio.Writer, selector []string) error {
|
||||
lvl := m.root.findLevel(selector)
|
||||
if lvl == nil {
|
||||
return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
|
||||
}
|
||||
|
||||
buf := make([]byte, 0, 25)
|
||||
// buf = append(buf, "{"...)
|
||||
|
||||
var count int64 = 0
|
||||
|
||||
unhealthyMetricsCount, err := lvl.healthCheck(m, count)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if unhealthyMetricsCount < MaxUnhealthyMetrics {
|
||||
buf = append(buf, "Healthy"...)
|
||||
} else {
|
||||
buf = append(buf, "Unhealthy"...)
|
||||
}
|
||||
|
||||
// buf = append(buf, "}\n"...)
|
||||
|
||||
if _, err = w.Write(buf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return w.Flush()
|
||||
}
|
||||
192
internal/memorystore/level.go
Normal file
192
internal/memorystore/level.go
Normal file
@@ -0,0 +1,192 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
)
|
||||
|
||||
// Could also be called "node" as this forms a node in a tree structure.
|
||||
// Called Level because "node" might be confusing here.
|
||||
// Can be both a leaf or a inner node. In this tree structue, inner nodes can
|
||||
// also hold data (in `metrics`).
|
||||
type Level struct {
|
||||
children map[string]*Level
|
||||
metrics []*buffer
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
// Find the correct level for the given selector, creating it if
|
||||
// it does not exist. Example selector in the context of the
|
||||
// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }.
|
||||
// This function would probably benefit a lot from `level.children` beeing a `sync.Map`?
|
||||
func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {
|
||||
if len(selector) == 0 {
|
||||
return l
|
||||
}
|
||||
|
||||
// Allow concurrent reads:
|
||||
l.lock.RLock()
|
||||
var child *Level
|
||||
var ok bool
|
||||
if l.children == nil {
|
||||
// Children map needs to be created...
|
||||
l.lock.RUnlock()
|
||||
} else {
|
||||
child, ok = l.children[selector[0]]
|
||||
l.lock.RUnlock()
|
||||
if ok {
|
||||
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||
}
|
||||
}
|
||||
|
||||
// The level does not exist, take write lock for unique access:
|
||||
l.lock.Lock()
|
||||
// While this thread waited for the write lock, another thread
|
||||
// could have created the child node.
|
||||
if l.children != nil {
|
||||
child, ok = l.children[selector[0]]
|
||||
if ok {
|
||||
l.lock.Unlock()
|
||||
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||
}
|
||||
}
|
||||
|
||||
child = &Level{
|
||||
metrics: make([]*buffer, nMetrics),
|
||||
children: nil,
|
||||
}
|
||||
|
||||
if l.children != nil {
|
||||
l.children[selector[0]] = child
|
||||
} else {
|
||||
l.children = map[string]*Level{selector[0]: child}
|
||||
}
|
||||
l.lock.Unlock()
|
||||
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||
}
|
||||
|
||||
func (l *Level) free(t int64) (int, error) {
|
||||
l.lock.Lock()
|
||||
defer l.lock.Unlock()
|
||||
|
||||
n := 0
|
||||
for i, b := range l.metrics {
|
||||
if b != nil {
|
||||
delme, m := b.free(t)
|
||||
n += m
|
||||
if delme {
|
||||
if cap(b.data) == BufferCap {
|
||||
bufferPool.Put(b)
|
||||
}
|
||||
l.metrics[i] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, l := range l.children {
|
||||
m, err := l.free(t)
|
||||
n += m
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (l *Level) sizeInBytes() int64 {
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
size := int64(0)
|
||||
|
||||
for _, b := range l.metrics {
|
||||
if b != nil {
|
||||
size += b.count() * int64(unsafe.Sizeof(util.Float(0)))
|
||||
}
|
||||
}
|
||||
|
||||
for _, child := range l.children {
|
||||
size += child.sizeInBytes()
|
||||
}
|
||||
|
||||
return size
|
||||
}
|
||||
|
||||
func (l *Level) findLevel(selector []string) *Level {
|
||||
if len(selector) == 0 {
|
||||
return l
|
||||
}
|
||||
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
|
||||
lvl := l.children[selector[0]]
|
||||
if lvl == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return lvl.findLevel(selector[1:])
|
||||
}
|
||||
|
||||
func (l *Level) findBuffers(selector util.Selector, offset int, f func(b *buffer) error) error {
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
|
||||
if len(selector) == 0 {
|
||||
b := l.metrics[offset]
|
||||
if b != nil {
|
||||
return f(b)
|
||||
}
|
||||
|
||||
for _, lvl := range l.children {
|
||||
err := lvl.findBuffers(nil, offset, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
sel := selector[0]
|
||||
if len(sel.String) != 0 && l.children != nil {
|
||||
lvl, ok := l.children[sel.String]
|
||||
if ok {
|
||||
err := lvl.findBuffers(selector[1:], offset, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if sel.Group != nil && l.children != nil {
|
||||
for _, key := range sel.Group {
|
||||
lvl, ok := l.children[key]
|
||||
if ok {
|
||||
err := lvl.findBuffers(selector[1:], offset, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if sel.Any && l.children != nil {
|
||||
for _, lvl := range l.children {
|
||||
if err := lvl.findBuffers(selector[1:], offset, f); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
258
internal/memorystore/lineprotocol.go
Normal file
258
internal/memorystore/lineprotocol.go
Normal file
@@ -0,0 +1,258 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||
)
|
||||
|
||||
func ReceiveNats(ms *MemoryStore,
|
||||
workers int,
|
||||
ctx context.Context,
|
||||
) error {
|
||||
nc := nats.GetClient()
|
||||
|
||||
if nc == nil {
|
||||
cclog.Warn("NATS client not initialized")
|
||||
return nil
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
msgs := make(chan []byte, workers*2)
|
||||
|
||||
for _, sc := range Keys.Subscriptions {
|
||||
clusterTag := sc.ClusterTag
|
||||
if workers > 1 {
|
||||
wg.Add(workers)
|
||||
|
||||
for range workers {
|
||||
go func() {
|
||||
for m := range msgs {
|
||||
dec := lineprotocol.NewDecoderWithBytes(m)
|
||||
if err := DecodeLine(dec, ms, clusterTag); err != nil {
|
||||
cclog.Errorf("error: %s", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
nc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {
|
||||
msgs <- data
|
||||
})
|
||||
} else {
|
||||
nc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {
|
||||
dec := lineprotocol.NewDecoderWithBytes(data)
|
||||
if err := DecodeLine(dec, ms, clusterTag); err != nil {
|
||||
cclog.Errorf("error: %s", err.Error())
|
||||
}
|
||||
})
|
||||
}
|
||||
cclog.Infof("NATS subscription to '%s' established", sc.SubscribeTo)
|
||||
}
|
||||
|
||||
close(msgs)
|
||||
wg.Wait()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Place `prefix` in front of `buf` but if possible,
|
||||
// do that inplace in `buf`.
|
||||
func reorder(buf, prefix []byte) []byte {
|
||||
n := len(prefix)
|
||||
m := len(buf)
|
||||
if cap(buf) < m+n {
|
||||
return append(prefix[:n:n], buf...)
|
||||
} else {
|
||||
buf = buf[:n+m]
|
||||
for i := m - 1; i >= 0; i-- {
|
||||
buf[i+n] = buf[i]
|
||||
}
|
||||
for i := range n {
|
||||
buf[i] = prefix[i]
|
||||
}
|
||||
return buf
|
||||
}
|
||||
}
|
||||
|
||||
// Decode lines using dec and make write calls to the MemoryStore.
|
||||
// If a line is missing its cluster tag, use clusterDefault as default.
|
||||
func DecodeLine(dec *lineprotocol.Decoder,
|
||||
ms *MemoryStore,
|
||||
clusterDefault string,
|
||||
) error {
|
||||
// Reduce allocations in loop:
|
||||
t := time.Now()
|
||||
metric, metricBuf := Metric{}, make([]byte, 0, 16)
|
||||
selector := make([]string, 0, 4)
|
||||
typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
|
||||
|
||||
// Optimize for the case where all lines in a "batch" are about the same
|
||||
// cluster and host. By using `WriteToLevel` (level = host), we do not need
|
||||
// to take the root- and cluster-level lock as often.
|
||||
var lvl *Level = nil
|
||||
prevCluster, prevHost := "", ""
|
||||
|
||||
var ok bool
|
||||
for dec.Next() {
|
||||
rawmeasurement, err := dec.Measurement()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Needs to be copied because another call to dec.* would
|
||||
// invalidate the returned slice.
|
||||
metricBuf = append(metricBuf[:0], rawmeasurement...)
|
||||
|
||||
// The go compiler optimizes map[string(byteslice)] lookups:
|
||||
metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
|
||||
cluster, host := clusterDefault, ""
|
||||
for {
|
||||
key, val, err := dec.NextTag()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if key == nil {
|
||||
break
|
||||
}
|
||||
|
||||
// The go compiler optimizes string([]byte{...}) == "...":
|
||||
switch string(key) {
|
||||
case "cluster":
|
||||
if string(val) == prevCluster {
|
||||
cluster = prevCluster
|
||||
} else {
|
||||
cluster = string(val)
|
||||
lvl = nil
|
||||
}
|
||||
case "hostname", "host":
|
||||
if string(val) == prevHost {
|
||||
host = prevHost
|
||||
} else {
|
||||
host = string(val)
|
||||
lvl = nil
|
||||
}
|
||||
case "type":
|
||||
if string(val) == "node" {
|
||||
break
|
||||
}
|
||||
|
||||
// We cannot be sure that the "type" tag comes before the "type-id" tag:
|
||||
if len(typeBuf) == 0 {
|
||||
typeBuf = append(typeBuf, val...)
|
||||
} else {
|
||||
typeBuf = reorder(typeBuf, val)
|
||||
}
|
||||
case "type-id":
|
||||
typeBuf = append(typeBuf, val...)
|
||||
case "subtype":
|
||||
// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
|
||||
if len(subTypeBuf) == 0 {
|
||||
subTypeBuf = append(subTypeBuf, val...)
|
||||
} else {
|
||||
subTypeBuf = reorder(subTypeBuf, val)
|
||||
// subTypeBuf = reorder(typeBuf, val)
|
||||
}
|
||||
case "stype-id":
|
||||
subTypeBuf = append(subTypeBuf, val...)
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// If the cluster or host changed, the lvl was set to nil
|
||||
if lvl == nil {
|
||||
selector = selector[:2]
|
||||
selector[0], selector[1] = cluster, host
|
||||
lvl = ms.GetLevel(selector)
|
||||
prevCluster, prevHost = cluster, host
|
||||
}
|
||||
|
||||
// subtypes:
|
||||
selector = selector[:0]
|
||||
if len(typeBuf) > 0 {
|
||||
selector = append(selector, string(typeBuf)) // <- Allocation :(
|
||||
if len(subTypeBuf) > 0 {
|
||||
selector = append(selector, string(subTypeBuf))
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
key, val, err := dec.NextField()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if key == nil {
|
||||
break
|
||||
}
|
||||
|
||||
if string(key) != "value" {
|
||||
return fmt.Errorf("host %s: unknown field: '%s' (value: %#v)", host, string(key), val)
|
||||
}
|
||||
|
||||
if val.Kind() == lineprotocol.Float {
|
||||
metric.Value = schema.Float(val.FloatV())
|
||||
} else if val.Kind() == lineprotocol.Int {
|
||||
metric.Value = schema.Float(val.IntV())
|
||||
} else if val.Kind() == lineprotocol.Uint {
|
||||
metric.Value = schema.Float(val.UintV())
|
||||
} else {
|
||||
return fmt.Errorf("host %s: unsupported value type in message: %s", host, val.Kind().String())
|
||||
}
|
||||
}
|
||||
|
||||
if t, err = dec.Time(lineprotocol.Second, t); err != nil {
|
||||
t = time.Now()
|
||||
if t, err = dec.Time(lineprotocol.Millisecond, t); err != nil {
|
||||
t = time.Now()
|
||||
if t, err = dec.Time(lineprotocol.Microsecond, t); err != nil {
|
||||
t = time.Now()
|
||||
if t, err = dec.Time(lineprotocol.Nanosecond, t); err != nil {
|
||||
return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error())
|
||||
}
|
||||
|
||||
time := t.Unix()
|
||||
|
||||
if Keys.Checkpoints.FileFormat != "json" {
|
||||
LineProtocolMessages <- &AvroStruct{
|
||||
MetricName: string(metricBuf),
|
||||
Cluster: cluster,
|
||||
Node: host,
|
||||
Selector: append([]string{}, selector...),
|
||||
Value: metric.Value,
|
||||
Timestamp: time,
|
||||
}
|
||||
}
|
||||
|
||||
if err := ms.WriteToLevel(lvl, selector, time, []Metric{metric}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
423
internal/memorystore/memorystore.go
Normal file
423
internal/memorystore/memorystore.go
Normal file
@@ -0,0 +1,423 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package memorystore provides an efficient in-memory time-series metric storage system
|
||||
// with support for hierarchical data organization, checkpointing, and archiving.
|
||||
//
|
||||
// The package organizes metrics in a tree structure (cluster → host → component) and
|
||||
// provides concurrent read/write access to metric data with configurable aggregation strategies.
|
||||
// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,
|
||||
// and enforcing retention policies.
|
||||
//
|
||||
// Key features:
|
||||
// - In-memory metric storage with configurable retention
|
||||
// - Hierarchical data organization (selectors)
|
||||
// - Concurrent checkpoint/archive workers
|
||||
// - Support for sum and average aggregation
|
||||
// - NATS integration for metric ingestion
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/resampler"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
)
|
||||
|
||||
var (
|
||||
singleton sync.Once
|
||||
msInstance *MemoryStore
|
||||
// shutdownFunc stores the context cancellation function created in Init
|
||||
// and is called during Shutdown to cancel all background goroutines
|
||||
shutdownFunc context.CancelFunc
|
||||
)
|
||||
|
||||
type Metric struct {
|
||||
Name string
|
||||
Value schema.Float
|
||||
MetricConfig MetricConfig
|
||||
}
|
||||
|
||||
type MemoryStore struct {
|
||||
Metrics map[string]MetricConfig
|
||||
root Level
|
||||
}
|
||||
|
||||
func Init(rawConfig json.RawMessage, wg *sync.WaitGroup) {
|
||||
startupTime := time.Now()
|
||||
|
||||
if rawConfig != nil {
|
||||
config.Validate(configSchema, rawConfig)
|
||||
dec := json.NewDecoder(bytes.NewReader(rawConfig))
|
||||
// dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
cclog.Abortf("[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", rawConfig, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// Set NumWorkers from config or use default
|
||||
if Keys.NumWorkers <= 0 {
|
||||
Keys.NumWorkers = min(runtime.NumCPU()/2+1, DefaultMaxWorkers)
|
||||
}
|
||||
cclog.Debugf("[METRICSTORE]> Using %d workers for checkpoint/archive operations\n", Keys.NumWorkers)
|
||||
|
||||
// Helper function to add metric configuration
|
||||
addMetricConfig := func(mc schema.MetricConfig) {
|
||||
agg, err := AssignAggregationStrategy(mc.Aggregation)
|
||||
if err != nil {
|
||||
cclog.Warnf("Could not find aggregation strategy for metric config '%s': %s", mc.Name, err.Error())
|
||||
}
|
||||
|
||||
AddMetric(mc.Name, MetricConfig{
|
||||
Frequency: int64(mc.Timestep),
|
||||
Aggregation: agg,
|
||||
})
|
||||
}
|
||||
|
||||
for _, c := range archive.Clusters {
|
||||
for _, mc := range c.MetricConfig {
|
||||
addMetricConfig(*mc)
|
||||
}
|
||||
|
||||
for _, sc := range c.SubClusters {
|
||||
for _, mc := range sc.MetricConfig {
|
||||
addMetricConfig(mc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pass the config.MetricStoreKeys
|
||||
InitMetrics(Metrics)
|
||||
|
||||
ms := GetMemoryStore()
|
||||
|
||||
d, err := time.ParseDuration(Keys.Checkpoints.Restore)
|
||||
if err != nil {
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
|
||||
restoreFrom := startupTime.Add(-d)
|
||||
cclog.Infof("[METRICSTORE]> Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339))
|
||||
files, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix())
|
||||
loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB
|
||||
if err != nil {
|
||||
cclog.Fatalf("[METRICSTORE]> Loading checkpoints failed: %s\n", err.Error())
|
||||
} else {
|
||||
cclog.Infof("[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds())
|
||||
}
|
||||
|
||||
// Try to use less memory by forcing a GC run here and then
|
||||
// lowering the target percentage. The default of 100 means
|
||||
// that only once the ratio of new allocations execeds the
|
||||
// previously active heap, a GC is triggered.
|
||||
// Forcing a GC here will set the "previously active heap"
|
||||
// to a minumum.
|
||||
runtime.GC()
|
||||
|
||||
ctx, shutdown := context.WithCancel(context.Background())
|
||||
|
||||
wg.Add(4)
|
||||
|
||||
Retention(wg, ctx)
|
||||
Checkpointing(wg, ctx)
|
||||
Archiving(wg, ctx)
|
||||
DataStaging(wg, ctx)
|
||||
|
||||
// Note: Signal handling has been removed from this function.
|
||||
// The caller is responsible for handling shutdown signals and calling
|
||||
// the shutdown() function when appropriate.
|
||||
// Store the shutdown function for later use by Shutdown()
|
||||
shutdownFunc = shutdown
|
||||
|
||||
err = ReceiveNats(ms, 1, ctx)
|
||||
if err != nil {
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// InitMetrics creates a new, initialized instance of a MemoryStore.
|
||||
// Will panic if values in the metric configurations are invalid.
|
||||
func InitMetrics(metrics map[string]MetricConfig) {
|
||||
singleton.Do(func() {
|
||||
offset := 0
|
||||
for key, cfg := range metrics {
|
||||
if cfg.Frequency == 0 {
|
||||
panic("[METRICSTORE]> invalid frequency")
|
||||
}
|
||||
|
||||
metrics[key] = MetricConfig{
|
||||
Frequency: cfg.Frequency,
|
||||
Aggregation: cfg.Aggregation,
|
||||
offset: offset,
|
||||
}
|
||||
offset += 1
|
||||
}
|
||||
|
||||
msInstance = &MemoryStore{
|
||||
root: Level{
|
||||
metrics: make([]*buffer, len(metrics)),
|
||||
children: make(map[string]*Level),
|
||||
},
|
||||
Metrics: metrics,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func GetMemoryStore() *MemoryStore {
|
||||
if msInstance == nil {
|
||||
cclog.Fatalf("[METRICSTORE]> MemoryStore not initialized!")
|
||||
}
|
||||
|
||||
return msInstance
|
||||
}
|
||||
|
||||
func Shutdown() {
|
||||
// Cancel the context to signal all background goroutines to stop
|
||||
if shutdownFunc != nil {
|
||||
shutdownFunc()
|
||||
}
|
||||
|
||||
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
||||
var files int
|
||||
var err error
|
||||
|
||||
ms := GetMemoryStore()
|
||||
|
||||
if Keys.Checkpoints.FileFormat == "json" {
|
||||
files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
|
||||
} else {
|
||||
files, err = GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, true)
|
||||
close(LineProtocolMessages)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> Writing checkpoint failed: %s\n", err.Error())
|
||||
}
|
||||
cclog.Infof("[METRICSTORE]> Done! (%d files written)\n", files)
|
||||
}
|
||||
|
||||
func getName(m *MemoryStore, i int) string {
|
||||
for key, val := range m.Metrics {
|
||||
if val.offset == i {
|
||||
return key
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
||||
ms := GetMemoryStore()
|
||||
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
d, err := time.ParseDuration(Keys.RetentionInMemory)
|
||||
if err != nil {
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
if d <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
tickInterval := d / 2
|
||||
if tickInterval <= 0 {
|
||||
return
|
||||
}
|
||||
ticker := time.NewTicker(tickInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
t := time.Now().Add(-d)
|
||||
cclog.Infof("[METRICSTORE]> start freeing buffers (older than %s)...\n", t.Format(time.RFC3339))
|
||||
freed, err := ms.Free(nil, t.Unix())
|
||||
if err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> freeing up buffers failed: %s\n", err.Error())
|
||||
} else {
|
||||
cclog.Infof("[METRICSTORE]> done: %d buffers freed\n", freed)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Write all values in `metrics` to the level specified by `selector` for time `ts`.
|
||||
// Look at `findLevelOrCreate` for how selectors work.
|
||||
func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {
|
||||
var ok bool
|
||||
for i, metric := range metrics {
|
||||
if metric.MetricConfig.Frequency == 0 {
|
||||
metric.MetricConfig, ok = m.Metrics[metric.Name]
|
||||
if !ok {
|
||||
metric.MetricConfig.Frequency = 0
|
||||
}
|
||||
metrics[i] = metric
|
||||
}
|
||||
}
|
||||
|
||||
return m.WriteToLevel(&m.root, selector, ts, metrics)
|
||||
}
|
||||
|
||||
func (m *MemoryStore) GetLevel(selector []string) *Level {
|
||||
return m.root.findLevelOrCreate(selector, len(m.Metrics))
|
||||
}
|
||||
|
||||
// WriteToLevel assumes that `minfo` in `metrics` is filled in
|
||||
func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error {
|
||||
l = l.findLevelOrCreate(selector, len(m.Metrics))
|
||||
l.lock.Lock()
|
||||
defer l.lock.Unlock()
|
||||
|
||||
for _, metric := range metrics {
|
||||
if metric.MetricConfig.Frequency == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
b := l.metrics[metric.MetricConfig.offset]
|
||||
if b == nil {
|
||||
// First write to this metric and level
|
||||
b = newBuffer(ts, metric.MetricConfig.Frequency)
|
||||
l.metrics[metric.MetricConfig.offset] = b
|
||||
}
|
||||
|
||||
nb, err := b.write(ts, metric.Value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Last write created a new buffer...
|
||||
if b != nb {
|
||||
l.metrics[metric.MetricConfig.offset] = nb
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read returns all values for metric `metric` from `from` to `to` for the selected level(s).
|
||||
// If the level does not hold the metric itself, the data will be aggregated recursively from the children.
|
||||
// The second and third return value are the actual from/to for the data. Those can be different from
|
||||
// the range asked for if no data was available.
|
||||
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {
|
||||
if from > to {
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range")
|
||||
}
|
||||
|
||||
minfo, ok := m.Metrics[metric]
|
||||
if !ok {
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> unknown metric: " + metric)
|
||||
}
|
||||
|
||||
n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)
|
||||
|
||||
err := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {
|
||||
cdata, cfrom, cto, err := b.read(from, to, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
from, to = cfrom, cto
|
||||
} else if from != cfrom || to != cto || len(data) != len(cdata) {
|
||||
missingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)
|
||||
if missingfront != 0 {
|
||||
return ErrDataDoesNotAlign
|
||||
}
|
||||
|
||||
newlen := len(cdata) - missingback
|
||||
if newlen < 1 {
|
||||
return ErrDataDoesNotAlign
|
||||
}
|
||||
cdata = cdata[0:newlen]
|
||||
if len(cdata) != len(data) {
|
||||
return ErrDataDoesNotAlign
|
||||
}
|
||||
|
||||
from, to = cfrom, cto
|
||||
}
|
||||
|
||||
data = cdata
|
||||
n += 1
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, err
|
||||
} else if n == 0 {
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found")
|
||||
} else if n > 1 {
|
||||
if minfo.Aggregation == AvgAggregation {
|
||||
normalize := 1. / schema.Float(n)
|
||||
for i := 0; i < len(data); i++ {
|
||||
data[i] *= normalize
|
||||
}
|
||||
} else if minfo.Aggregation != SumAggregation {
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid aggregation")
|
||||
}
|
||||
}
|
||||
|
||||
data, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, err
|
||||
}
|
||||
|
||||
return data, from, to, resolution, nil
|
||||
}
|
||||
|
||||
// Free releases all buffers for the selected level and all its children that
|
||||
// contain only values older than `t`.
|
||||
func (m *MemoryStore) Free(selector []string, t int64) (int, error) {
|
||||
return m.GetLevel(selector).free(t)
|
||||
}
|
||||
|
||||
func (m *MemoryStore) FreeAll() error {
|
||||
for k := range m.root.children {
|
||||
delete(m.root.children, k)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MemoryStore) SizeInBytes() int64 {
|
||||
return m.root.sizeInBytes()
|
||||
}
|
||||
|
||||
// ListChildren , given a selector, returns a list of all children of the level
|
||||
// selected.
|
||||
func (m *MemoryStore) ListChildren(selector []string) []string {
|
||||
lvl := &m.root
|
||||
for lvl != nil && len(selector) != 0 {
|
||||
lvl.lock.RLock()
|
||||
next := lvl.children[selector[0]]
|
||||
lvl.lock.RUnlock()
|
||||
lvl = next
|
||||
selector = selector[1:]
|
||||
}
|
||||
|
||||
if lvl == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
lvl.lock.RLock()
|
||||
defer lvl.lock.RUnlock()
|
||||
|
||||
children := make([]string, 0, len(lvl.children))
|
||||
for child := range lvl.children {
|
||||
children = append(children, child)
|
||||
}
|
||||
|
||||
return children
|
||||
}
|
||||
156
internal/memorystore/memorystore_test.go
Normal file
156
internal/memorystore/memorystore_test.go
Normal file
@@ -0,0 +1,156 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
func TestAssignAggregationStrategy(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected AggregationStrategy
|
||||
wantErr bool
|
||||
}{
|
||||
{"empty string", "", NoAggregation, false},
|
||||
{"sum", "sum", SumAggregation, false},
|
||||
{"avg", "avg", AvgAggregation, false},
|
||||
{"invalid", "invalid", NoAggregation, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := AssignAggregationStrategy(tt.input)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("AssignAggregationStrategy(%q) error = %v, wantErr %v", tt.input, err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
if result != tt.expected {
|
||||
t.Errorf("AssignAggregationStrategy(%q) = %v, want %v", tt.input, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAddMetric(t *testing.T) {
|
||||
// Reset Metrics before test
|
||||
Metrics = make(map[string]MetricConfig)
|
||||
|
||||
err := AddMetric("test_metric", MetricConfig{
|
||||
Frequency: 60,
|
||||
Aggregation: SumAggregation,
|
||||
})
|
||||
if err != nil {
|
||||
t.Errorf("AddMetric() error = %v", err)
|
||||
}
|
||||
|
||||
if _, ok := Metrics["test_metric"]; !ok {
|
||||
t.Error("AddMetric() did not add metric to Metrics map")
|
||||
}
|
||||
|
||||
// Test updating with higher frequency
|
||||
err = AddMetric("test_metric", MetricConfig{
|
||||
Frequency: 120,
|
||||
Aggregation: SumAggregation,
|
||||
})
|
||||
if err != nil {
|
||||
t.Errorf("AddMetric() error = %v", err)
|
||||
}
|
||||
|
||||
if Metrics["test_metric"].Frequency != 120 {
|
||||
t.Errorf("AddMetric() frequency = %d, want 120", Metrics["test_metric"].Frequency)
|
||||
}
|
||||
|
||||
// Test updating with lower frequency (should not update)
|
||||
err = AddMetric("test_metric", MetricConfig{
|
||||
Frequency: 30,
|
||||
Aggregation: SumAggregation,
|
||||
})
|
||||
if err != nil {
|
||||
t.Errorf("AddMetric() error = %v", err)
|
||||
}
|
||||
|
||||
if Metrics["test_metric"].Frequency != 120 {
|
||||
t.Errorf("AddMetric() frequency = %d, want 120 (should not downgrade)", Metrics["test_metric"].Frequency)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetMetricFrequency(t *testing.T) {
|
||||
// Reset Metrics before test
|
||||
Metrics = map[string]MetricConfig{
|
||||
"test_metric": {
|
||||
Frequency: 60,
|
||||
Aggregation: SumAggregation,
|
||||
},
|
||||
}
|
||||
|
||||
freq, err := GetMetricFrequency("test_metric")
|
||||
if err != nil {
|
||||
t.Errorf("GetMetricFrequency() error = %v", err)
|
||||
}
|
||||
if freq != 60 {
|
||||
t.Errorf("GetMetricFrequency() = %d, want 60", freq)
|
||||
}
|
||||
|
||||
_, err = GetMetricFrequency("nonexistent")
|
||||
if err == nil {
|
||||
t.Error("GetMetricFrequency() expected error for nonexistent metric")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufferWrite(t *testing.T) {
|
||||
b := newBuffer(100, 10)
|
||||
|
||||
// Test writing value
|
||||
nb, err := b.write(100, schema.Float(42.0))
|
||||
if err != nil {
|
||||
t.Errorf("buffer.write() error = %v", err)
|
||||
}
|
||||
if nb != b {
|
||||
t.Error("buffer.write() created new buffer unexpectedly")
|
||||
}
|
||||
if len(b.data) != 1 {
|
||||
t.Errorf("buffer.write() len(data) = %d, want 1", len(b.data))
|
||||
}
|
||||
if b.data[0] != schema.Float(42.0) {
|
||||
t.Errorf("buffer.write() data[0] = %v, want 42.0", b.data[0])
|
||||
}
|
||||
|
||||
// Test writing value from past (should error)
|
||||
_, err = b.write(50, schema.Float(10.0))
|
||||
if err == nil {
|
||||
t.Error("buffer.write() expected error for past timestamp")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufferRead(t *testing.T) {
|
||||
b := newBuffer(100, 10)
|
||||
|
||||
// Write some test data
|
||||
b.write(100, schema.Float(1.0))
|
||||
b.write(110, schema.Float(2.0))
|
||||
b.write(120, schema.Float(3.0))
|
||||
|
||||
// Read data
|
||||
data := make([]schema.Float, 3)
|
||||
result, from, to, err := b.read(100, 130, data)
|
||||
if err != nil {
|
||||
t.Errorf("buffer.read() error = %v", err)
|
||||
}
|
||||
// Buffer read should return from as firstWrite (start + freq/2)
|
||||
if from != 100 {
|
||||
t.Errorf("buffer.read() from = %d, want 100", from)
|
||||
}
|
||||
if to != 130 {
|
||||
t.Errorf("buffer.read() to = %d, want 130", to)
|
||||
}
|
||||
if len(result) != 3 {
|
||||
t.Errorf("buffer.read() len(result) = %d, want 3", len(result))
|
||||
}
|
||||
}
|
||||
124
internal/memorystore/stats.go
Normal file
124
internal/memorystore/stats.go
Normal file
@@ -0,0 +1,124 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package memorystore
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/util"
|
||||
)
|
||||
|
||||
type Stats struct {
|
||||
Samples int
|
||||
Avg util.Float
|
||||
Min util.Float
|
||||
Max util.Float
|
||||
}
|
||||
|
||||
func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) {
|
||||
if from < b.start {
|
||||
if b.prev != nil {
|
||||
return b.prev.stats(from, to)
|
||||
}
|
||||
from = b.start
|
||||
}
|
||||
|
||||
// TODO: Check if b.closed and if so and the full buffer is queried,
|
||||
// use b.statistics instead of iterating over the buffer.
|
||||
|
||||
samples := 0
|
||||
sum, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
|
||||
var t int64
|
||||
for t = from; t < to; t += b.frequency {
|
||||
idx := int((t - b.start) / b.frequency)
|
||||
if idx >= cap(b.data) {
|
||||
b = b.next
|
||||
if b == nil {
|
||||
break
|
||||
}
|
||||
idx = 0
|
||||
}
|
||||
|
||||
if t < b.start || idx >= len(b.data) {
|
||||
continue
|
||||
}
|
||||
|
||||
xf := float64(b.data[idx])
|
||||
if math.IsNaN(xf) {
|
||||
continue
|
||||
}
|
||||
|
||||
samples += 1
|
||||
sum += xf
|
||||
min = math.Min(min, xf)
|
||||
max = math.Max(max, xf)
|
||||
}
|
||||
|
||||
return Stats{
|
||||
Samples: samples,
|
||||
Avg: util.Float(sum) / util.Float(samples),
|
||||
Min: util.Float(min),
|
||||
Max: util.Float(max),
|
||||
}, from, t, nil
|
||||
}
|
||||
|
||||
// Returns statistics for the requested metric on the selected node/level.
|
||||
// Data is aggregated to the selected level the same way as in `MemoryStore.Read`.
|
||||
// If `Stats.Samples` is zero, the statistics should not be considered as valid.
|
||||
func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int64) (*Stats, int64, int64, error) {
|
||||
if from > to {
|
||||
return nil, 0, 0, errors.New("invalid time range")
|
||||
}
|
||||
|
||||
minfo, ok := m.Metrics[metric]
|
||||
if !ok {
|
||||
return nil, 0, 0, errors.New("unknown metric: " + metric)
|
||||
}
|
||||
|
||||
n, samples := 0, 0
|
||||
avg, min, max := util.Float(0), math.MaxFloat32, -math.MaxFloat32
|
||||
err := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {
|
||||
stats, cfrom, cto, err := b.stats(from, to)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
from, to = cfrom, cto
|
||||
} else if from != cfrom || to != cto {
|
||||
return ErrDataDoesNotAlign
|
||||
}
|
||||
|
||||
samples += stats.Samples
|
||||
avg += stats.Avg
|
||||
min = math.Min(min, float64(stats.Min))
|
||||
max = math.Max(max, float64(stats.Max))
|
||||
n += 1
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, 0, 0, err
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
return nil, 0, 0, ErrNoData
|
||||
}
|
||||
|
||||
if minfo.Aggregation == AvgAggregation {
|
||||
avg /= util.Float(n)
|
||||
} else if n > 1 && minfo.Aggregation != SumAggregation {
|
||||
return nil, 0, 0, errors.New("invalid aggregation")
|
||||
}
|
||||
|
||||
return &Stats{
|
||||
Samples: samples,
|
||||
Avg: avg,
|
||||
Min: util.Float(min),
|
||||
Max: util.Float(max),
|
||||
}, from, to, nil
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricDataDispatcher
|
||||
@@ -7,15 +7,16 @@ package metricDataDispatcher
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/resampler"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/lrucache"
|
||||
"github.com/ClusterCockpit/cc-lib/resampler"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
|
||||
@@ -39,7 +40,7 @@ func LoadData(job *schema.Job,
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ interface{}, ttl time.Duration, size int) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ any, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
@@ -66,10 +67,10 @@ func LoadData(job *schema.Job,
|
||||
jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution)
|
||||
if err != nil {
|
||||
if len(jd) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
cclog.Warnf("partial error: %s", err.Error())
|
||||
// return err, 0, 0 // Reactivating will block archiving on one partial error
|
||||
} else {
|
||||
log.Error("Error while loading job data from metric repository")
|
||||
cclog.Error("Error while loading job data from metric repository")
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
@@ -78,25 +79,25 @@ func LoadData(job *schema.Job,
|
||||
var jd_temp schema.JobData
|
||||
jd_temp, err = archive.GetHandle().LoadJobData(job)
|
||||
if err != nil {
|
||||
log.Error("Error while loading job data from archive")
|
||||
cclog.Error("Error while loading job data from archive")
|
||||
return err, 0, 0
|
||||
}
|
||||
|
||||
//Deep copy the cached archive hashmap
|
||||
// Deep copy the cached archive hashmap
|
||||
jd = metricdata.DeepCopy(jd_temp)
|
||||
|
||||
//Resampling for archived data.
|
||||
//Pass the resolution from frontend here.
|
||||
// Resampling for archived data.
|
||||
// Pass the resolution from frontend here.
|
||||
for _, v := range jd {
|
||||
for _, v_ := range v {
|
||||
timestep := 0
|
||||
timestep := int64(0)
|
||||
for i := 0; i < len(v_.Series); i += 1 {
|
||||
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
|
||||
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, int64(v_.Timestep), int64(resolution))
|
||||
if err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
}
|
||||
v_.Timestep = timestep
|
||||
v_.Timestep = int(timestep)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -169,11 +170,14 @@ func LoadData(job *schema.Job,
|
||||
jd.AddNodeScope("mem_bw")
|
||||
}
|
||||
|
||||
// Round Resulting Stat Values
|
||||
jd.RoundMetricStats()
|
||||
|
||||
return jd, ttl, size
|
||||
})
|
||||
|
||||
if err, ok := data.(error); ok {
|
||||
log.Error("Error in returned dataset")
|
||||
cclog.Error("Error in returned dataset")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -198,7 +202,7 @@ func LoadAverages(
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
|
||||
if err != nil {
|
||||
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||
cclog.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -219,7 +223,78 @@ func LoadAverages(
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for the node/system view. Returns a map of nodes to a map of metrics.
|
||||
// Used for statsTable in frontend: Return scoped statistics by metric.
|
||||
func LoadScopedJobStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
) (schema.ScopedJobStats, error) {
|
||||
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
|
||||
return archive.LoadScopedStatsFromArchive(job, metrics, scopes)
|
||||
}
|
||||
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster)
|
||||
}
|
||||
|
||||
scopedStats, err := repo.LoadScopedStats(job, metrics, scopes, ctx)
|
||||
if err != nil {
|
||||
cclog.Errorf("error while loading scoped statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return scopedStats, nil
|
||||
}
|
||||
|
||||
// Used for polar plots in frontend: Aggregates statistics for all nodes to single values for job per metric.
|
||||
func LoadJobStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.MetricStatistics, error) {
|
||||
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
|
||||
return archive.LoadStatsFromArchive(job, metrics)
|
||||
}
|
||||
|
||||
data := make(map[string]schema.MetricStatistics, len(metrics))
|
||||
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
|
||||
if err != nil {
|
||||
return data, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster)
|
||||
}
|
||||
|
||||
stats, err := repo.LoadStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
cclog.Errorf("error while loading statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project)
|
||||
return data, err
|
||||
}
|
||||
|
||||
for _, m := range metrics {
|
||||
sum, avg, min, max := 0.0, 0.0, 0.0, 0.0
|
||||
nodes, ok := stats[m]
|
||||
if !ok {
|
||||
data[m] = schema.MetricStatistics{Min: min, Avg: avg, Max: max}
|
||||
continue
|
||||
}
|
||||
|
||||
for _, node := range nodes {
|
||||
sum += node.Avg
|
||||
min = math.Min(min, node.Min)
|
||||
max = math.Max(max, node.Max)
|
||||
}
|
||||
|
||||
data[m] = schema.MetricStatistics{
|
||||
Avg: (math.Round((sum/float64(job.NumNodes))*100) / 100),
|
||||
Min: (math.Round(min*100) / 100),
|
||||
Max: (math.Round(max*100) / 100),
|
||||
}
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Used for the classic node/system view. Returns a map of nodes to a map of metrics.
|
||||
func LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
@@ -241,9 +316,9 @@ func LoadNodeData(
|
||||
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
log.Warnf("partial error: %s", err.Error())
|
||||
cclog.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
log.Error("Error while loading node data from metric repository")
|
||||
cclog.Error("Error while loading node data from metric repository")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
@@ -254,3 +329,53 @@ func LoadNodeData(
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func LoadNodeListData(
|
||||
cluster, subCluster string,
|
||||
nodes []string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, error) {
|
||||
repo, err := metricdata.GetMetricDataRepo(cluster)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
|
||||
}
|
||||
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
metrics = append(metrics, m.Name)
|
||||
}
|
||||
}
|
||||
|
||||
data, err := repo.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, resolution, from, to, ctx)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
cclog.Warnf("partial error: %s", err.Error())
|
||||
} else {
|
||||
cclog.Error("Error while loading node data from metric repository")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
|
||||
const maxSeriesSize int = 8
|
||||
for _, jd := range data {
|
||||
for _, scopes := range jd {
|
||||
for _, jm := range scopes {
|
||||
if jm.StatisticsSeries != nil || len(jm.Series) < maxSeriesSize {
|
||||
continue
|
||||
}
|
||||
jm.AddStatisticsSeries()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if data == nil {
|
||||
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
1107
internal/metricdata/cc-metric-store-internal.go
Normal file
1107
internal/metricdata/cc-metric-store-internal.go
Normal file
File diff suppressed because it is too large
Load Diff
@@ -11,13 +11,12 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
type CCMetricStoreConfig struct {
|
||||
@@ -80,7 +79,7 @@ type ApiMetricData struct {
|
||||
func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
|
||||
var config CCMetricStoreConfig
|
||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json config")
|
||||
cclog.Warn("Error while unmarshaling raw json config")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -127,13 +126,13 @@ func (ccms *CCMetricStore) doRequest(
|
||||
) (*ApiQueryResponse, error) {
|
||||
buf := &bytes.Buffer{}
|
||||
if err := json.NewEncoder(buf).Encode(body); err != nil {
|
||||
log.Warn("Error while encoding request body")
|
||||
cclog.Errorf("Error while encoding request body: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf)
|
||||
if err != nil {
|
||||
log.Warn("Error while building request body")
|
||||
cclog.Errorf("Error while building request body: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
if ccms.jwt != "" {
|
||||
@@ -149,7 +148,7 @@ func (ccms *CCMetricStore) doRequest(
|
||||
|
||||
res, err := ccms.client.Do(req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -159,7 +158,7 @@ func (ccms *CCMetricStore) doRequest(
|
||||
|
||||
var resBody ApiQueryResponse
|
||||
if err := json.NewDecoder(bufio.NewReader(res.Body)).Decode(&resBody); err != nil {
|
||||
log.Warn("Error while decoding result body")
|
||||
cclog.Errorf("Error while decoding result body: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -175,14 +174,14 @@ func (ccms *CCMetricStore) LoadData(
|
||||
) (schema.JobData, error) {
|
||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution)
|
||||
if err != nil {
|
||||
log.Warn("Error while building queries")
|
||||
cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: job.Cluster,
|
||||
From: job.StartTime.Unix(),
|
||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
||||
From: job.StartTime,
|
||||
To: job.StartTime + int64(job.Duration),
|
||||
Queries: queries,
|
||||
WithStats: true,
|
||||
WithData: true,
|
||||
@@ -190,7 +189,7 @@ func (ccms *CCMetricStore) LoadData(
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -205,13 +204,12 @@ func (ccms *CCMetricStore) LoadData(
|
||||
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
}
|
||||
|
||||
res := row[0].Resolution
|
||||
if res == 0 {
|
||||
res = mc.Timestep
|
||||
res := mc.Timestep
|
||||
if len(row) > 0 {
|
||||
res = row[0].Resolution
|
||||
}
|
||||
|
||||
jobMetric, ok := jobData[metric][scope]
|
||||
|
||||
if !ok {
|
||||
jobMetric = &schema.JobMetric{
|
||||
Unit: mc.Unit,
|
||||
@@ -235,8 +233,7 @@ func (ccms *CCMetricStore) LoadData(
|
||||
}
|
||||
|
||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||
// TODO: use schema.Float instead of float64?
|
||||
// This is done because regular float64 can not be JSONed when NaN.
|
||||
// "schema.Float()" because regular float64 can not be JSONed when NaN.
|
||||
res.Avg = schema.Float(0)
|
||||
res.Min = schema.Float(0)
|
||||
res.Max = schema.Float(0)
|
||||
@@ -270,14 +267,6 @@ func (ccms *CCMetricStore) LoadData(
|
||||
return jobData, nil
|
||||
}
|
||||
|
||||
var (
|
||||
hwthreadString = string(schema.MetricScopeHWThread)
|
||||
coreString = string(schema.MetricScopeCore)
|
||||
memoryDomainString = string(schema.MetricScopeMemoryDomain)
|
||||
socketString = string(schema.MetricScopeSocket)
|
||||
acceleratorString = string(schema.MetricScopeAccelerator)
|
||||
)
|
||||
|
||||
func (ccms *CCMetricStore) buildQueries(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
@@ -298,10 +287,24 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||
if mc == nil {
|
||||
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
log.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
cclog.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip if metric is removed for subcluster
|
||||
if len(mc.SubClusters) != 0 {
|
||||
isRemoved := false
|
||||
for _, scConfig := range mc.SubClusters {
|
||||
if scConfig.Name == job.SubCluster && scConfig.Remove {
|
||||
isRemoved = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if isRemoved {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid duplicates...
|
||||
handledScopes := make([]schema.MetricScope, 0, 3)
|
||||
|
||||
@@ -440,6 +443,23 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
continue
|
||||
}
|
||||
|
||||
// Core -> Socket
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromCores(hwthreads)
|
||||
for _, socket := range sockets {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Core -> Node
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||
cores, _ := topology.GetCoresFromHWThreads(hwthreads)
|
||||
@@ -540,23 +560,16 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
ctx context.Context,
|
||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
|
||||
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
// resolution := 9000
|
||||
|
||||
// for _, mc := range metricConfigs {
|
||||
// resolution = min(resolution, mc.Timestep)
|
||||
// }
|
||||
|
||||
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
|
||||
if err != nil {
|
||||
log.Warn("Error while building query")
|
||||
cclog.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: job.Cluster,
|
||||
From: job.StartTime.Unix(),
|
||||
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
|
||||
From: job.StartTime,
|
||||
To: job.StartTime + int64(job.Duration),
|
||||
Queries: queries,
|
||||
WithStats: true,
|
||||
WithData: false,
|
||||
@@ -564,7 +577,7 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
log.Error("Error while performing request")
|
||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -574,9 +587,8 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
data := res[0]
|
||||
if data.Error != nil {
|
||||
log.Infof("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
||||
cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
||||
continue
|
||||
// return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
||||
}
|
||||
|
||||
metricdata, ok := stats[metric]
|
||||
@@ -586,9 +598,8 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
}
|
||||
|
||||
if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() {
|
||||
log.Infof("fetching %s for node %s failed: one of avg/min/max is NaN", metric, query.Hostname)
|
||||
cclog.Warnf("fetching %s for node %s failed: one of avg/min/max is NaN", metric, query.Hostname)
|
||||
continue
|
||||
// return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
|
||||
}
|
||||
|
||||
metricdata[query.Hostname] = schema.MetricStatistics{
|
||||
@@ -601,7 +612,98 @@ func (ccms *CCMetricStore) LoadStats(
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// TODO: Support sub-node-scope metrics! For this, the partition of a node needs to be known!
|
||||
// Used for Job-View Statistics Table
|
||||
func (ccms *CCMetricStore) LoadScopedStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
) (schema.ScopedJobStats, error) {
|
||||
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, 0)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while building queries for jobId %d, Metrics %v, Scopes %v: %s", job.JobID, metrics, scopes, err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: job.Cluster,
|
||||
From: job.StartTime,
|
||||
To: job.StartTime + int64(job.Duration),
|
||||
Queries: queries,
|
||||
WithStats: true,
|
||||
WithData: false,
|
||||
}
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var errors []string
|
||||
scopedJobStats := make(schema.ScopedJobStats)
|
||||
|
||||
for i, row := range resBody.Results {
|
||||
query := req.Queries[i]
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
scope := assignedScope[i]
|
||||
|
||||
if _, ok := scopedJobStats[metric]; !ok {
|
||||
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
|
||||
}
|
||||
|
||||
if _, ok := scopedJobStats[metric][scope]; !ok {
|
||||
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
|
||||
}
|
||||
|
||||
for ndx, res := range row {
|
||||
if res.Error != nil {
|
||||
/* Build list for "partial errors", if any */
|
||||
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
||||
continue
|
||||
}
|
||||
|
||||
id := (*string)(nil)
|
||||
if query.Type != nil {
|
||||
id = new(string)
|
||||
*id = query.TypeIds[ndx]
|
||||
}
|
||||
|
||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||
// "schema.Float()" because regular float64 can not be JSONed when NaN.
|
||||
res.Avg = schema.Float(0)
|
||||
res.Min = schema.Float(0)
|
||||
res.Max = schema.Float(0)
|
||||
}
|
||||
|
||||
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
|
||||
Hostname: query.Hostname,
|
||||
Id: id,
|
||||
Data: &schema.MetricStatistics{
|
||||
Avg: float64(res.Avg),
|
||||
Min: float64(res.Min),
|
||||
Max: float64(res.Max),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// So that one can later check len(scopedJobStats[metric][scope]): Remove from map if empty
|
||||
if len(scopedJobStats[metric][scope]) == 0 {
|
||||
delete(scopedJobStats[metric], scope)
|
||||
if len(scopedJobStats[metric]) == 0 {
|
||||
delete(scopedJobStats, metric)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(errors) != 0 {
|
||||
/* Returns list for "partial errors" */
|
||||
return scopedJobStats, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||
}
|
||||
return scopedJobStats, nil
|
||||
}
|
||||
|
||||
// Used for Systems-View Node-Overview
|
||||
func (ccms *CCMetricStore) LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
@@ -627,7 +729,7 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
req.Queries = append(req.Queries, ApiQuery{
|
||||
Hostname: node,
|
||||
Metric: ccms.toRemoteName(metric),
|
||||
Resolution: 60, // Default for Node Queries
|
||||
Resolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -635,7 +737,7 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
|
||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -693,10 +795,428 @@ func (ccms *CCMetricStore) LoadNodeData(
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func intToStringSlice(is []int) []string {
|
||||
ss := make([]string, len(is))
|
||||
for i, x := range is {
|
||||
ss[i] = strconv.Itoa(x)
|
||||
// Used for Systems-View Node-List
|
||||
func (ccms *CCMetricStore) LoadNodeListData(
|
||||
cluster, subCluster string,
|
||||
nodes []string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, error) {
|
||||
|
||||
// Note: Order of node data is not guaranteed after this point
|
||||
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error())
|
||||
return nil, err
|
||||
}
|
||||
return ss
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: cluster,
|
||||
Queries: queries,
|
||||
From: from.Unix(),
|
||||
To: to.Unix(),
|
||||
WithStats: true,
|
||||
WithData: true,
|
||||
}
|
||||
|
||||
resBody, err := ccms.doRequest(ctx, &req)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while performing request: %s", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var errors []string
|
||||
data := make(map[string]schema.JobData)
|
||||
for i, row := range resBody.Results {
|
||||
var query ApiQuery
|
||||
if resBody.Queries != nil {
|
||||
query = resBody.Queries[i]
|
||||
} else {
|
||||
query = req.Queries[i]
|
||||
}
|
||||
// qdata := res[0]
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
scope := assignedScope[i]
|
||||
mc := archive.GetMetricConfig(cluster, metric)
|
||||
|
||||
res := mc.Timestep
|
||||
if len(row) > 0 {
|
||||
res = row[0].Resolution
|
||||
}
|
||||
|
||||
// Init Nested Map Data Structures If Not Found
|
||||
hostData, ok := data[query.Hostname]
|
||||
if !ok {
|
||||
hostData = make(schema.JobData)
|
||||
data[query.Hostname] = hostData
|
||||
}
|
||||
|
||||
metricData, ok := hostData[metric]
|
||||
if !ok {
|
||||
metricData = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
data[query.Hostname][metric] = metricData
|
||||
}
|
||||
|
||||
scopeData, ok := metricData[scope]
|
||||
if !ok {
|
||||
scopeData = &schema.JobMetric{
|
||||
Unit: mc.Unit,
|
||||
Timestep: res,
|
||||
Series: make([]schema.Series, 0),
|
||||
}
|
||||
data[query.Hostname][metric][scope] = scopeData
|
||||
}
|
||||
|
||||
for ndx, res := range row {
|
||||
if res.Error != nil {
|
||||
/* Build list for "partial errors", if any */
|
||||
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
||||
continue
|
||||
}
|
||||
|
||||
id := (*string)(nil)
|
||||
if query.Type != nil {
|
||||
id = new(string)
|
||||
*id = query.TypeIds[ndx]
|
||||
}
|
||||
|
||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||
// "schema.Float()" because regular float64 can not be JSONed when NaN.
|
||||
res.Avg = schema.Float(0)
|
||||
res.Min = schema.Float(0)
|
||||
res.Max = schema.Float(0)
|
||||
}
|
||||
|
||||
scopeData.Series = append(scopeData.Series, schema.Series{
|
||||
Hostname: query.Hostname,
|
||||
Id: id,
|
||||
Statistics: schema.MetricStatistics{
|
||||
Avg: float64(res.Avg),
|
||||
Min: float64(res.Min),
|
||||
Max: float64(res.Max),
|
||||
},
|
||||
Data: res.Data,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if len(errors) != 0 {
|
||||
/* Returns list of "partial errors" */
|
||||
return data, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (ccms *CCMetricStore) buildNodeQueries(
|
||||
cluster string,
|
||||
subCluster string,
|
||||
nodes []string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||
|
||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||
assignedScope := []schema.MetricScope{}
|
||||
|
||||
// Get Topol before loop if subCluster given
|
||||
var subClusterTopol *schema.SubCluster
|
||||
var scterr error
|
||||
if subCluster != "" {
|
||||
subClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster)
|
||||
if scterr != nil {
|
||||
cclog.Errorf("could not load cluster %s subCluster %s topology: %s", cluster, subCluster, scterr.Error())
|
||||
return nil, nil, scterr
|
||||
}
|
||||
}
|
||||
|
||||
for _, metric := range metrics {
|
||||
remoteName := ccms.toRemoteName(metric)
|
||||
mc := archive.GetMetricConfig(cluster, metric)
|
||||
if mc == nil {
|
||||
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||
cclog.Warnf("metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip if metric is removed for subcluster
|
||||
if mc.SubClusters != nil {
|
||||
isRemoved := false
|
||||
for _, scConfig := range mc.SubClusters {
|
||||
if scConfig.Name == subCluster && scConfig.Remove {
|
||||
isRemoved = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if isRemoved {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid duplicates...
|
||||
handledScopes := make([]schema.MetricScope, 0, 3)
|
||||
|
||||
scopesLoop:
|
||||
for _, requestedScope := range scopes {
|
||||
nativeScope := mc.Scope
|
||||
|
||||
scope := nativeScope.Max(requestedScope)
|
||||
for _, s := range handledScopes {
|
||||
if scope == s {
|
||||
continue scopesLoop
|
||||
}
|
||||
}
|
||||
handledScopes = append(handledScopes, scope)
|
||||
|
||||
for _, hostname := range nodes {
|
||||
|
||||
// If no subCluster given, get it by node
|
||||
if subCluster == "" {
|
||||
subClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname)
|
||||
if scnerr != nil {
|
||||
return nil, nil, scnerr
|
||||
}
|
||||
subClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName)
|
||||
if scterr != nil {
|
||||
return nil, nil, scterr
|
||||
}
|
||||
}
|
||||
|
||||
// Always full node hwthread id list, no partial queries expected -> Use "topology.Node" directly where applicable
|
||||
// Always full accelerator id list, no partial queries expected -> Use "acceleratorIds" directly where applicable
|
||||
topology := subClusterTopol.Topology
|
||||
acceleratorIds := topology.GetAcceleratorIDs()
|
||||
|
||||
// Moved check here if metric matches hardware specs
|
||||
if nativeScope == schema.MetricScopeAccelerator && len(acceleratorIds) == 0 {
|
||||
continue scopesLoop
|
||||
}
|
||||
|
||||
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
||||
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
||||
if scope != schema.MetricScopeAccelerator {
|
||||
// Skip all other catched cases
|
||||
continue
|
||||
}
|
||||
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: acceleratorIds,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
|
||||
continue
|
||||
}
|
||||
|
||||
// Accelerator -> Node
|
||||
if nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode {
|
||||
if len(acceleratorIds) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &acceleratorString,
|
||||
TypeIds: acceleratorIds,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// HWThread -> HWThead
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Node),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// HWThread -> Core
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {
|
||||
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||
for _, core := range cores {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Core[core]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// HWThread -> Socket
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||
for _, socket := range sockets {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// HWThread -> Node
|
||||
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &hwthreadString,
|
||||
TypeIds: intToStringSlice(topology.Node),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Core -> Core
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
|
||||
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Core -> Socket
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromCores(topology.Node)
|
||||
for _, socket := range sockets {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(topology.Socket[socket]),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Core -> Node
|
||||
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
|
||||
cores, _ := topology.GetCoresFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &coreString,
|
||||
TypeIds: intToStringSlice(cores),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// MemoryDomain -> MemoryDomain
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
|
||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// MemoryDoman -> Node
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
|
||||
sockets, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Socket -> Socket
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: false,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Socket -> Node
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &socketString,
|
||||
TypeIds: intToStringSlice(sockets),
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
// Node -> Node
|
||||
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Resolution: resolution,
|
||||
})
|
||||
assignedScope = append(assignedScope, scope)
|
||||
continue
|
||||
}
|
||||
|
||||
return nil, nil, fmt.Errorf("METRICDATA/CCMS > TODO: unhandled case: native-scope=%s, requested-scope=%s", nativeScope, requestedScope)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return queries, assignedScope, nil
|
||||
}
|
||||
|
||||
@@ -1,314 +0,0 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricdata
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
|
||||
)
|
||||
|
||||
type InfluxDBv2DataRepositoryConfig struct {
|
||||
Url string `json:"url"`
|
||||
Token string `json:"token"`
|
||||
Bucket string `json:"bucket"`
|
||||
Org string `json:"org"`
|
||||
SkipTls bool `json:"skiptls"`
|
||||
}
|
||||
|
||||
type InfluxDBv2DataRepository struct {
|
||||
client influxdb2.Client
|
||||
queryClient influxdb2Api.QueryAPI
|
||||
bucket, measurement string
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) Init(rawConfig json.RawMessage) error {
|
||||
var config InfluxDBv2DataRepositoryConfig
|
||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json config")
|
||||
return err
|
||||
}
|
||||
|
||||
idb.client = influxdb2.NewClientWithOptions(config.Url, config.Token, influxdb2.DefaultOptions().SetTLSConfig(&tls.Config{InsecureSkipVerify: config.SkipTls}))
|
||||
idb.queryClient = idb.client.QueryAPI(config.Org)
|
||||
idb.bucket = config.Bucket
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) formatTime(t time.Time) string {
|
||||
return t.Format(time.RFC3339) // Like “2006-01-02T15:04:05Z07:00”
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) epochToTime(epoch int64) time.Time {
|
||||
return time.Unix(epoch, 0)
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) LoadData(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
resolution int) (schema.JobData, error) {
|
||||
|
||||
measurementsConds := make([]string, 0, len(metrics))
|
||||
for _, m := range metrics {
|
||||
measurementsConds = append(measurementsConds, fmt.Sprintf(`r["_measurement"] == "%s"`, m))
|
||||
}
|
||||
measurementsCond := strings.Join(measurementsConds, " or ")
|
||||
|
||||
hostsConds := make([]string, 0, len(job.Resources))
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
||||
}
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
jobData := make(schema.JobData) // Empty Schema: map[<string>FIELD]map[<MetricScope>SCOPE]<*JobMetric>METRIC
|
||||
// Requested Scopes
|
||||
for _, scope := range scopes {
|
||||
query := ""
|
||||
switch scope {
|
||||
case "node":
|
||||
// Get Finest Granularity, Groupy By Measurement and Hostname (== Metric / Node), Calculate Mean for 60s windows
|
||||
// log.Info("Scope 'node' requested. ")
|
||||
query = fmt.Sprintf(`
|
||||
from(bucket: "%s")
|
||||
|> range(start: %s, stop: %s)
|
||||
|> filter(fn: (r) => (%s) and (%s) )
|
||||
|> drop(columns: ["_start", "_stop"])
|
||||
|> group(columns: ["hostname", "_measurement"])
|
||||
|> aggregateWindow(every: 60s, fn: mean)
|
||||
|> drop(columns: ["_time"])`,
|
||||
idb.bucket,
|
||||
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
|
||||
measurementsCond, hostsCond)
|
||||
case "socket":
|
||||
log.Info("Scope 'socket' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
case "core":
|
||||
log.Info(" Scope 'core' requested, but not yet supported: Will return 'node' scope only. ")
|
||||
continue
|
||||
// Get Finest Granularity only, Set NULL to 0.0
|
||||
// query = fmt.Sprintf(`
|
||||
// from(bucket: "%s")
|
||||
// |> range(start: %s, stop: %s)
|
||||
// |> filter(fn: (r) => %s )
|
||||
// |> filter(fn: (r) => %s )
|
||||
// |> drop(columns: ["_start", "_stop", "cluster"])
|
||||
// |> map(fn: (r) => (if exists r._value then {r with _value: r._value} else {r with _value: 0.0}))`,
|
||||
// idb.bucket,
|
||||
// idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix + int64(job.Duration) + int64(1) )),
|
||||
// measurementsCond, hostsCond)
|
||||
default:
|
||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
||||
continue
|
||||
// return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support other scopes than 'node'")
|
||||
}
|
||||
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
log.Error("Error while performing query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Init Metrics: Only Node level now -> TODO: Matching /check on scope level ...
|
||||
for _, metric := range metrics {
|
||||
jobMetric, ok := jobData[metric]
|
||||
if !ok {
|
||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||
jobMetric = map[schema.MetricScope]*schema.JobMetric{
|
||||
scope: { // uses scope var from above!
|
||||
Unit: mc.Unit,
|
||||
Timestep: mc.Timestep,
|
||||
Series: make([]schema.Series, 0, len(job.Resources)),
|
||||
StatisticsSeries: nil, // Should be: &schema.StatsSeries{},
|
||||
},
|
||||
}
|
||||
}
|
||||
jobData[metric] = jobMetric
|
||||
}
|
||||
|
||||
// Process Result: Time-Data
|
||||
field, host, hostSeries := "", "", schema.Series{}
|
||||
// typeId := 0
|
||||
switch scope {
|
||||
case "node":
|
||||
for rows.Next() {
|
||||
row := rows.Record()
|
||||
if host == "" || host != row.ValueByKey("hostname").(string) || rows.TableChanged() {
|
||||
if host != "" {
|
||||
// Append Series before reset
|
||||
jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
||||
}
|
||||
field, host = row.Measurement(), row.ValueByKey("hostname").(string)
|
||||
hostSeries = schema.Series{
|
||||
Hostname: host,
|
||||
Statistics: schema.MetricStatistics{}, //TODO Add Statistics
|
||||
Data: make([]schema.Float, 0),
|
||||
}
|
||||
}
|
||||
val, ok := row.Value().(float64)
|
||||
if ok {
|
||||
hostSeries.Data = append(hostSeries.Data, schema.Float(val))
|
||||
} else {
|
||||
hostSeries.Data = append(hostSeries.Data, schema.Float(0))
|
||||
}
|
||||
}
|
||||
case "socket":
|
||||
continue
|
||||
case "core":
|
||||
continue
|
||||
// Include Series.Id in hostSeries
|
||||
// for rows.Next() {
|
||||
// row := rows.Record()
|
||||
// if ( host == "" || host != row.ValueByKey("hostname").(string) || typeId != row.ValueByKey("type-id").(int) || rows.TableChanged() ) {
|
||||
// if ( host != "" ) {
|
||||
// // Append Series before reset
|
||||
// jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
||||
// }
|
||||
// field, host, typeId = row.Measurement(), row.ValueByKey("hostname").(string), row.ValueByKey("type-id").(int)
|
||||
// hostSeries = schema.Series{
|
||||
// Hostname: host,
|
||||
// Id: &typeId,
|
||||
// Statistics: nil,
|
||||
// Data: make([]schema.Float, 0),
|
||||
// }
|
||||
// }
|
||||
// val := row.Value().(float64)
|
||||
// hostSeries.Data = append(hostSeries.Data, schema.Float(val))
|
||||
// }
|
||||
default:
|
||||
log.Infof("Unknown scope '%s' requested: Will return 'node' scope.", scope)
|
||||
continue
|
||||
// return nil, errors.New("the InfluxDB metric data repository does not yet support other scopes than 'node, core'")
|
||||
}
|
||||
// Append last Series
|
||||
jobData[field][scope].Series = append(jobData[field][scope].Series, hostSeries)
|
||||
}
|
||||
|
||||
// Get Stats
|
||||
stats, err := idb.LoadStats(job, metrics, ctx)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading statistics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, scope := range scopes {
|
||||
if scope == "node" { // No 'socket/core' support yet
|
||||
for metric, nodes := range stats {
|
||||
for node, stats := range nodes {
|
||||
for index, _ := range jobData[metric][scope].Series {
|
||||
if jobData[metric][scope].Series[index].Hostname == node {
|
||||
jobData[metric][scope].Series[index].Statistics = schema.MetricStatistics{Avg: stats.Avg, Min: stats.Min, Max: stats.Max}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return jobData, nil
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) LoadStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
|
||||
stats := map[string]map[string]schema.MetricStatistics{}
|
||||
|
||||
hostsConds := make([]string, 0, len(job.Resources))
|
||||
for _, h := range job.Resources {
|
||||
if h.HWThreads != nil || h.Accelerators != nil {
|
||||
// TODO
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > the InfluxDB metric data repository does not yet support HWThreads or Accelerators")
|
||||
}
|
||||
hostsConds = append(hostsConds, fmt.Sprintf(`r["hostname"] == "%s"`, h.Hostname))
|
||||
}
|
||||
hostsCond := strings.Join(hostsConds, " or ")
|
||||
|
||||
// lenMet := len(metrics)
|
||||
|
||||
for _, metric := range metrics {
|
||||
// log.Debugf("<< You are here: %s (Index %d of %d metrics)", metric, index, lenMet)
|
||||
|
||||
query := fmt.Sprintf(`
|
||||
data = from(bucket: "%s")
|
||||
|> range(start: %s, stop: %s)
|
||||
|> filter(fn: (r) => r._measurement == "%s" and r._field == "value" and (%s))
|
||||
union(tables: [data |> mean(column: "_value") |> set(key: "_field", value: "avg"),
|
||||
data |> min(column: "_value") |> set(key: "_field", value: "min"),
|
||||
data |> max(column: "_value") |> set(key: "_field", value: "max")])
|
||||
|> pivot(rowKey: ["hostname"], columnKey: ["_field"], valueColumn: "_value")
|
||||
|> group()`,
|
||||
idb.bucket,
|
||||
idb.formatTime(job.StartTime), idb.formatTime(idb.epochToTime(job.StartTimeUnix+int64(job.Duration)+int64(1))),
|
||||
metric, hostsCond)
|
||||
|
||||
rows, err := idb.queryClient.Query(ctx, query)
|
||||
if err != nil {
|
||||
log.Error("Error while performing query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodes := map[string]schema.MetricStatistics{}
|
||||
for rows.Next() {
|
||||
row := rows.Record()
|
||||
host := row.ValueByKey("hostname").(string)
|
||||
|
||||
avg, avgok := row.ValueByKey("avg").(float64)
|
||||
if !avgok {
|
||||
// log.Debugf(">> Assertion error for metric %s, statistic AVG. Expected 'float64', got %v", metric, avg)
|
||||
avg = 0.0
|
||||
}
|
||||
min, minok := row.ValueByKey("min").(float64)
|
||||
if !minok {
|
||||
// log.Debugf(">> Assertion error for metric %s, statistic MIN. Expected 'float64', got %v", metric, min)
|
||||
min = 0.0
|
||||
}
|
||||
max, maxok := row.ValueByKey("max").(float64)
|
||||
if !maxok {
|
||||
// log.Debugf(">> Assertion error for metric %s, statistic MAX. Expected 'float64', got %v", metric, max)
|
||||
max = 0.0
|
||||
}
|
||||
|
||||
nodes[host] = schema.MetricStatistics{
|
||||
Avg: avg,
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
stats[metric] = nodes
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (idb *InfluxDBv2DataRepository) LoadNodeData(
|
||||
cluster string,
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
|
||||
// TODO : Implement to be used in Analysis- und System/Node-View
|
||||
log.Infof("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes)
|
||||
|
||||
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
|
||||
}
|
||||
@@ -1,7 +1,8 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package metricdata
|
||||
|
||||
import (
|
||||
@@ -11,8 +12,9 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
type MetricDataRepository interface {
|
||||
@@ -23,23 +25,29 @@ type MetricDataRepository interface {
|
||||
// Return the JobData for the given job, only with the requested metrics.
|
||||
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
|
||||
|
||||
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
|
||||
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope only.
|
||||
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
|
||||
|
||||
// Return a map of hosts to a map of metrics at the requested scopes for that node.
|
||||
// Return a map of metrics to a map of scopes to the scoped metric statistics of the job.
|
||||
LoadScopedStats(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.ScopedJobStats, error)
|
||||
|
||||
// Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node.
|
||||
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
|
||||
|
||||
// Return a map of hosts to a map of metrics to a map of scopes for multiple nodes.
|
||||
LoadNodeListData(cluster, subCluster string, nodes, metrics []string, scopes []schema.MetricScope, resolution int, from, to time.Time, ctx context.Context) (map[string]schema.JobData, error)
|
||||
}
|
||||
|
||||
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
|
||||
|
||||
func Init() error {
|
||||
for _, cluster := range config.Keys.Clusters {
|
||||
for _, cluster := range config.Clusters {
|
||||
if cluster.MetricDataRepository != nil {
|
||||
var kind struct {
|
||||
Kind string `json:"kind"`
|
||||
}
|
||||
if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json MetricDataRepository")
|
||||
cclog.Warn("Error while unmarshaling raw json MetricDataRepository")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -47,8 +55,9 @@ func Init() error {
|
||||
switch kind.Kind {
|
||||
case "cc-metric-store":
|
||||
mdr = &CCMetricStore{}
|
||||
case "influxdb":
|
||||
mdr = &InfluxDBv2DataRepository{}
|
||||
case "cc-metric-store-internal":
|
||||
mdr = &CCMetricStoreInternal{}
|
||||
memorystore.InternalCCMSFlag = true
|
||||
case "prometheus":
|
||||
mdr = &PrometheusDataRepository{}
|
||||
case "test":
|
||||
@@ -58,7 +67,7 @@ func Init() error {
|
||||
}
|
||||
|
||||
if err := mdr.Init(cluster.MetricDataRepository); err != nil {
|
||||
log.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
|
||||
cclog.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
|
||||
return err
|
||||
}
|
||||
metricDataRepos[cluster.Name] = mdr
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) 2022 DKRZ
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricdata
|
||||
@@ -21,8 +21,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
promapi "github.com/prometheus/client_golang/api"
|
||||
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
||||
promcfg "github.com/prometheus/common/config"
|
||||
@@ -159,7 +159,7 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
||||
var config PrometheusDataRepositoryConfig
|
||||
// parse config
|
||||
if err := json.Unmarshal(rawConfig, &config); err != nil {
|
||||
log.Warn("Error while unmarshaling raw json config")
|
||||
cclog.Warn("Error while unmarshaling raw json config")
|
||||
return err
|
||||
}
|
||||
// support basic authentication
|
||||
@@ -178,7 +178,7 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
||||
RoundTripper: rt,
|
||||
})
|
||||
if err != nil {
|
||||
log.Error("Error while initializing new prometheus client")
|
||||
cclog.Error("Error while initializing new prometheus client")
|
||||
return err
|
||||
}
|
||||
// init query client
|
||||
@@ -191,9 +191,9 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
|
||||
for metric, templ := range config.Templates {
|
||||
pdb.templates[metric], err = template.New(metric).Parse(templ)
|
||||
if err == nil {
|
||||
log.Debugf("Added PromQL template for %s: %s", metric, templ)
|
||||
cclog.Debugf("Added PromQL template for %s: %s", metric, templ)
|
||||
} else {
|
||||
log.Warnf("Failed to parse PromQL template %s for metric %s", templ, metric)
|
||||
cclog.Warnf("Failed to parse PromQL template %s for metric %s", templ, metric)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -220,7 +220,7 @@ func (pdb *PrometheusDataRepository) FormatQuery(
|
||||
return "", errors.New(fmt.Sprintf("METRICDATA/PROMETHEUS > Error compiling template %v", templ))
|
||||
} else {
|
||||
query := buf.String()
|
||||
log.Debugf("PromQL: %s", query)
|
||||
cclog.Debugf("PromQL: %s", query)
|
||||
return query, nil
|
||||
}
|
||||
} else {
|
||||
@@ -278,13 +278,13 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
for i, resource := range job.Resources {
|
||||
nodes[i] = resource.Hostname
|
||||
}
|
||||
from := job.StartTime
|
||||
to := job.StartTime.Add(time.Duration(job.Duration) * time.Second)
|
||||
from := time.Unix(job.StartTime, 0)
|
||||
to := time.Unix(job.StartTime+int64(job.Duration), 0)
|
||||
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func() {
|
||||
log.Infof("Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
cclog.Infof("Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
@@ -292,12 +292,12 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
for _, metric := range metrics {
|
||||
metricConfig := archive.GetMetricConfig(job.Cluster, metric)
|
||||
if metricConfig == nil {
|
||||
log.Warnf("Error in LoadData: Metric %s for cluster %s not configured", metric, job.Cluster)
|
||||
cclog.Warnf("Error in LoadData: Metric %s for cluster %s not configured", metric, job.Cluster)
|
||||
return nil, errors.New("Prometheus config error")
|
||||
}
|
||||
query, err := pdb.FormatQuery(metric, scope, nodes, job.Cluster)
|
||||
if err != nil {
|
||||
log.Warn("Error while formatting prometheus query")
|
||||
cclog.Warn("Error while formatting prometheus query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -309,11 +309,11 @@ func (pdb *PrometheusDataRepository) LoadData(
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
if err != nil {
|
||||
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
|
||||
cclog.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
}
|
||||
if len(warnings) > 0 {
|
||||
log.Warnf("Warnings: %v\n", warnings)
|
||||
cclog.Warnf("Warnings: %v\n", warnings)
|
||||
}
|
||||
|
||||
// init data structures
|
||||
@@ -359,7 +359,7 @@ func (pdb *PrometheusDataRepository) LoadStats(
|
||||
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job for stats")
|
||||
cclog.Warn("Error while loading job for stats")
|
||||
return nil, err
|
||||
}
|
||||
for metric, metricData := range data {
|
||||
@@ -390,19 +390,19 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func() {
|
||||
log.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
cclog.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
for _, metric := range metrics {
|
||||
metricConfig := archive.GetMetricConfig(cluster, metric)
|
||||
if metricConfig == nil {
|
||||
log.Warnf("Error in LoadNodeData: Metric %s for cluster %s not configured", metric, cluster)
|
||||
cclog.Warnf("Error in LoadNodeData: Metric %s for cluster %s not configured", metric, cluster)
|
||||
return nil, errors.New("Prometheus config error")
|
||||
}
|
||||
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
|
||||
if err != nil {
|
||||
log.Warn("Error while formatting prometheus query")
|
||||
cclog.Warn("Error while formatting prometheus query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -414,11 +414,11 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
if err != nil {
|
||||
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
||||
cclog.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
}
|
||||
if len(warnings) > 0 {
|
||||
log.Warnf("Warnings: %v\n", warnings)
|
||||
cclog.Warnf("Warnings: %v\n", warnings)
|
||||
}
|
||||
|
||||
step := int64(metricConfig.Timestep)
|
||||
@@ -443,6 +443,145 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
|
||||
}
|
||||
}
|
||||
t1 := time.Since(t0)
|
||||
log.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
||||
cclog.Debugf("LoadNodeData of %v nodes took %s", len(data), t1)
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Implemented by NHR@FAU; Used in Job-View StatsTable
|
||||
func (pdb *PrometheusDataRepository) LoadScopedStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
) (schema.ScopedJobStats, error) {
|
||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for statsTable
|
||||
scopedJobStats := make(schema.ScopedJobStats)
|
||||
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while loading job for scopedJobStats")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for metric, metricData := range data {
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func() {
|
||||
cclog.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := scopedJobStats[metric]; !ok {
|
||||
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
|
||||
}
|
||||
|
||||
if _, ok := scopedJobStats[metric][scope]; !ok {
|
||||
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
|
||||
}
|
||||
|
||||
for _, series := range metricData[scope].Series {
|
||||
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
|
||||
Hostname: series.Hostname,
|
||||
Data: &series.Statistics,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return scopedJobStats, nil
|
||||
}
|
||||
|
||||
// Implemented by NHR@FAU; Used in NodeList-View
|
||||
func (pdb *PrometheusDataRepository) LoadNodeListData(
|
||||
cluster, subCluster string,
|
||||
nodes []string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, error) {
|
||||
// Assumption: pdb.loadData() only returns series node-scope - use node scope for NodeList
|
||||
|
||||
// Fetch Data, based on pdb.LoadNodeData()
|
||||
t0 := time.Now()
|
||||
// Map of hosts of jobData
|
||||
data := make(map[string]schema.JobData)
|
||||
|
||||
// query db for each metric
|
||||
// TODO: scopes seems to be always empty
|
||||
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
|
||||
scopes = append(scopes, schema.MetricScopeNode)
|
||||
}
|
||||
|
||||
for _, scope := range scopes {
|
||||
if scope != schema.MetricScopeNode {
|
||||
logOnce.Do(func() {
|
||||
cclog.Infof("Note: Scope '%s' requested, but not yet supported: Will return 'node' scope only.", scope)
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
for _, metric := range metrics {
|
||||
metricConfig := archive.GetMetricConfig(cluster, metric)
|
||||
if metricConfig == nil {
|
||||
cclog.Warnf("Error in LoadNodeListData: Metric %s for cluster %s not configured", metric, cluster)
|
||||
return nil, errors.New("Prometheus config error")
|
||||
}
|
||||
query, err := pdb.FormatQuery(metric, scope, nodes, cluster)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while formatting prometheus query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// ranged query over all nodes
|
||||
r := promv1.Range{
|
||||
Start: from,
|
||||
End: to,
|
||||
Step: time.Duration(metricConfig.Timestep * 1e9),
|
||||
}
|
||||
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
|
||||
if err != nil {
|
||||
cclog.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
|
||||
return nil, errors.New("Prometheus query error")
|
||||
}
|
||||
if len(warnings) > 0 {
|
||||
cclog.Warnf("Warnings: %v\n", warnings)
|
||||
}
|
||||
|
||||
step := int64(metricConfig.Timestep)
|
||||
steps := int64(to.Sub(from).Seconds()) / step
|
||||
|
||||
// iter rows of host, metric, values
|
||||
for _, row := range result.(promm.Matrix) {
|
||||
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
|
||||
|
||||
hostdata, ok := data[hostname]
|
||||
if !ok {
|
||||
hostdata = make(schema.JobData)
|
||||
data[hostname] = hostdata
|
||||
}
|
||||
|
||||
metricdata, ok := hostdata[metric]
|
||||
if !ok {
|
||||
metricdata = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
data[hostname][metric] = metricdata
|
||||
}
|
||||
|
||||
// output per host, metric and scope
|
||||
scopeData, ok := metricdata[scope]
|
||||
if !ok {
|
||||
scopeData = &schema.JobMetric{
|
||||
Unit: metricConfig.Unit,
|
||||
Timestep: metricConfig.Timestep,
|
||||
Series: []schema.Series{pdb.RowToSeries(from, step, steps, row)},
|
||||
}
|
||||
data[hostname][metric][scope] = scopeData
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
t1 := time.Since(t0)
|
||||
cclog.Debugf("LoadNodeListData of %v nodes took %s", len(data), t1)
|
||||
return data, nil
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package metricdata
|
||||
|
||||
import (
|
||||
@@ -9,14 +10,14 @@ import (
|
||||
"encoding/json"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
// Only a mock for unit-testing.
|
||||
// TestMetricDataRepository is only a mock for unit-testing.
|
||||
type TestMetricDataRepository struct{}
|
||||
|
||||
func (tmdr *TestMetricDataRepository) Init(_ json.RawMessage) error {
|
||||
@@ -28,15 +29,25 @@ func (tmdr *TestMetricDataRepository) LoadData(
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
resolution int) (schema.JobData, error) {
|
||||
|
||||
resolution int,
|
||||
) (schema.JobData, error) {
|
||||
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
|
||||
}
|
||||
|
||||
func (tmdr *TestMetricDataRepository) LoadStats(
|
||||
job *schema.Job,
|
||||
metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
metrics []string,
|
||||
ctx context.Context,
|
||||
) (map[string]map[string]schema.MetricStatistics, error) {
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
func (tmdr *TestMetricDataRepository) LoadScopedStats(
|
||||
job *schema.Job,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
) (schema.ScopedJobStats, error) {
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
@@ -45,17 +56,27 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
|
||||
metrics, nodes []string,
|
||||
scopes []schema.MetricScope,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
|
||||
ctx context.Context,
|
||||
) (map[string]map[string][]*schema.JobMetric, error) {
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
func DeepCopy(jd_temp schema.JobData) schema.JobData {
|
||||
var jd schema.JobData
|
||||
func (tmdr *TestMetricDataRepository) LoadNodeListData(
|
||||
cluster, subCluster string,
|
||||
nodes []string,
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
) (map[string]schema.JobData, error) {
|
||||
panic("TODO")
|
||||
}
|
||||
|
||||
jd = make(schema.JobData, len(jd_temp))
|
||||
for k, v := range jd_temp {
|
||||
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
|
||||
func DeepCopy(jdTemp schema.JobData) schema.JobData {
|
||||
jd := make(schema.JobData, len(jdTemp))
|
||||
for k, v := range jdTemp {
|
||||
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jdTemp[k]))
|
||||
for k_, v_ := range v {
|
||||
jd[k][k_] = new(schema.JobMetric)
|
||||
jd[k][k_].Series = make([]schema.Series, len(v_.Series))
|
||||
|
||||
68
internal/repository/config.go
Normal file
68
internal/repository/config.go
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import "time"
|
||||
|
||||
// RepositoryConfig holds configuration for repository operations.
|
||||
// All fields have sensible defaults, so this configuration is optional.
|
||||
type RepositoryConfig struct {
|
||||
// CacheSize is the LRU cache size in bytes for job metadata and energy footprints.
|
||||
// Default: 1MB (1024 * 1024 bytes)
|
||||
CacheSize int
|
||||
|
||||
// MaxOpenConnections is the maximum number of open database connections.
|
||||
// Default: 4
|
||||
MaxOpenConnections int
|
||||
|
||||
// MaxIdleConnections is the maximum number of idle database connections.
|
||||
// Default: 4
|
||||
MaxIdleConnections int
|
||||
|
||||
// ConnectionMaxLifetime is the maximum amount of time a connection may be reused.
|
||||
// Default: 1 hour
|
||||
ConnectionMaxLifetime time.Duration
|
||||
|
||||
// ConnectionMaxIdleTime is the maximum amount of time a connection may be idle.
|
||||
// Default: 1 hour
|
||||
ConnectionMaxIdleTime time.Duration
|
||||
|
||||
// MinRunningJobDuration is the minimum duration in seconds for a job to be
|
||||
// considered in "running jobs" queries. This filters out very short jobs.
|
||||
// Default: 600 seconds (10 minutes)
|
||||
MinRunningJobDuration int
|
||||
}
|
||||
|
||||
// DefaultConfig returns the default repository configuration.
|
||||
// These values are optimized for typical deployments.
|
||||
func DefaultConfig() *RepositoryConfig {
|
||||
return &RepositoryConfig{
|
||||
CacheSize: 1 * 1024 * 1024, // 1MB
|
||||
MaxOpenConnections: 4,
|
||||
MaxIdleConnections: 4,
|
||||
ConnectionMaxLifetime: time.Hour,
|
||||
ConnectionMaxIdleTime: time.Hour,
|
||||
MinRunningJobDuration: 600, // 10 minutes
|
||||
}
|
||||
}
|
||||
|
||||
// repoConfig is the package-level configuration instance.
|
||||
// It is initialized with defaults and can be overridden via SetConfig.
|
||||
var repoConfig *RepositoryConfig = DefaultConfig()
|
||||
|
||||
// SetConfig sets the repository configuration.
|
||||
// This must be called before any repository initialization (Connect, GetJobRepository, etc.).
|
||||
// If not called, default values from DefaultConfig() are used.
|
||||
func SetConfig(cfg *RepositoryConfig) {
|
||||
if cfg != nil {
|
||||
repoConfig = cfg
|
||||
}
|
||||
}
|
||||
|
||||
// GetConfig returns the current repository configuration.
|
||||
func GetConfig() *RepositoryConfig {
|
||||
return repoConfig
|
||||
}
|
||||
@@ -1,15 +1,18 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/mattn/go-sqlite3"
|
||||
"github.com/qustavo/sqlhooks/v2"
|
||||
@@ -33,6 +36,21 @@ type DatabaseOptions struct {
|
||||
ConnectionMaxIdleTime time.Duration
|
||||
}
|
||||
|
||||
func setupSqlite(db *sql.DB) error {
|
||||
pragmas := []string{
|
||||
"temp_store = memory",
|
||||
}
|
||||
|
||||
for _, pragma := range pragmas {
|
||||
_, err := db.Exec("PRAGMA " + pragma)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func Connect(driver string, db string) {
|
||||
var err error
|
||||
var dbHandle *sqlx.DB
|
||||
@@ -40,36 +58,45 @@ func Connect(driver string, db string) {
|
||||
dbConnOnce.Do(func() {
|
||||
opts := DatabaseOptions{
|
||||
URL: db,
|
||||
MaxOpenConnections: 4,
|
||||
MaxIdleConnections: 4,
|
||||
ConnectionMaxLifetime: time.Hour,
|
||||
ConnectionMaxIdleTime: time.Hour,
|
||||
MaxOpenConnections: repoConfig.MaxOpenConnections,
|
||||
MaxIdleConnections: repoConfig.MaxIdleConnections,
|
||||
ConnectionMaxLifetime: repoConfig.ConnectionMaxLifetime,
|
||||
ConnectionMaxIdleTime: repoConfig.ConnectionMaxIdleTime,
|
||||
}
|
||||
|
||||
switch driver {
|
||||
case "sqlite3":
|
||||
// - Set WAL mode (not strictly necessary each time because it's persisted in the database, but good for first run)
|
||||
// - Set busy timeout, so concurrent writers wait on each other instead of erroring immediately
|
||||
// - Enable foreign key checks
|
||||
opts.URL += "?_journal=WAL&_timeout=5000&_fk=true"
|
||||
// TODO: Have separate DB handles for Writes and Reads
|
||||
// Optimize SQLite connection: https://kerkour.com/sqlite-for-servers
|
||||
connectionURLParams := make(url.Values)
|
||||
connectionURLParams.Add("_txlock", "immediate")
|
||||
connectionURLParams.Add("_journal_mode", "WAL")
|
||||
connectionURLParams.Add("_busy_timeout", "5000")
|
||||
connectionURLParams.Add("_synchronous", "NORMAL")
|
||||
connectionURLParams.Add("_cache_size", "1000000000")
|
||||
connectionURLParams.Add("_foreign_keys", "true")
|
||||
opts.URL = fmt.Sprintf("file:%s?%s", opts.URL, connectionURLParams.Encode())
|
||||
|
||||
if log.Loglevel() == "debug" {
|
||||
if cclog.Loglevel() == "debug" {
|
||||
sql.Register("sqlite3WithHooks", sqlhooks.Wrap(&sqlite3.SQLiteDriver{}, &Hooks{}))
|
||||
dbHandle, err = sqlx.Open("sqlite3WithHooks", opts.URL)
|
||||
} else {
|
||||
dbHandle, err = sqlx.Open("sqlite3", opts.URL)
|
||||
}
|
||||
|
||||
err = setupSqlite(dbHandle.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
cclog.Abortf("Failed sqlite db setup.\nError: %s\n", err.Error())
|
||||
}
|
||||
case "mysql":
|
||||
opts.URL += "?multiStatements=true"
|
||||
dbHandle, err = sqlx.Open("mysql", opts.URL)
|
||||
if err != nil {
|
||||
log.Fatalf("sqlx.Open() error: %v", err)
|
||||
}
|
||||
default:
|
||||
log.Fatalf("unsupported database driver: %s", driver)
|
||||
cclog.Abortf("DB Connection: Unsupported database driver '%s'.\n", driver)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
cclog.Abortf("DB Connection: Could not connect to '%s' database with sqlx.Open().\nError: %s\n", driver, err.Error())
|
||||
}
|
||||
|
||||
dbHandle.SetMaxOpenConns(opts.MaxOpenConnections)
|
||||
@@ -80,14 +107,14 @@ func Connect(driver string, db string) {
|
||||
dbConnInstance = &DBConnection{DB: dbHandle, Driver: driver}
|
||||
err = checkDBVersion(driver, dbHandle.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
cclog.Abortf("DB Connection: Failed DB version check.\nError: %s\n", err.Error())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func GetConnection() *DBConnection {
|
||||
if dbConnInstance == nil {
|
||||
log.Fatalf("Database connection not initialized!")
|
||||
cclog.Fatalf("Database connection not initialized!")
|
||||
}
|
||||
|
||||
return dbConnInstance
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
@@ -8,21 +8,21 @@ import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
)
|
||||
|
||||
// Hooks satisfies the sqlhook.Hooks interface
|
||||
type Hooks struct{}
|
||||
|
||||
// Before hook will print the query with it's args and return the context with the timestamp
|
||||
func (h *Hooks) Before(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
|
||||
log.Debugf("SQL query %s %q", query, args)
|
||||
func (h *Hooks) Before(ctx context.Context, query string, args ...any) (context.Context, error) {
|
||||
cclog.Debugf("SQL query %s %q", query, args)
|
||||
return context.WithValue(ctx, "begin", time.Now()), nil
|
||||
}
|
||||
|
||||
// After hook will get the timestamp registered on the Before hook and print the elapsed time
|
||||
func (h *Hooks) After(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
|
||||
func (h *Hooks) After(ctx context.Context, query string, args ...any) (context.Context, error) {
|
||||
begin := ctx.Value("begin").(time.Time)
|
||||
log.Debugf("Took: %s\n", time.Since(begin))
|
||||
cclog.Debugf("Took: %s\n", time.Since(begin))
|
||||
return ctx, nil
|
||||
}
|
||||
|
||||
@@ -1,7 +1,64 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package repository provides the data access layer for cc-backend using the repository pattern.
|
||||
//
|
||||
// The repository pattern abstracts database operations and provides a clean interface for
|
||||
// data access. Each major entity (Job, User, Node, Tag) has its own repository with CRUD
|
||||
// operations and specialized queries.
|
||||
//
|
||||
// # Database Connection
|
||||
//
|
||||
// Initialize the database connection before using any repository:
|
||||
//
|
||||
// repository.Connect("sqlite3", "./var/job.db")
|
||||
// // or for MySQL:
|
||||
// repository.Connect("mysql", "user:password@tcp(localhost:3306)/dbname")
|
||||
//
|
||||
// # Configuration
|
||||
//
|
||||
// Optional: Configure repository settings before initialization:
|
||||
//
|
||||
// repository.SetConfig(&repository.RepositoryConfig{
|
||||
// CacheSize: 2 * 1024 * 1024, // 2MB cache
|
||||
// MaxOpenConnections: 8, // Connection pool size
|
||||
// MinRunningJobDuration: 300, // Filter threshold
|
||||
// })
|
||||
//
|
||||
// If not configured, sensible defaults are used automatically.
|
||||
//
|
||||
// # Repositories
|
||||
//
|
||||
// - JobRepository: Job lifecycle management and querying
|
||||
// - UserRepository: User management and authentication
|
||||
// - NodeRepository: Cluster node state tracking
|
||||
// - Tags: Job tagging and categorization
|
||||
//
|
||||
// # Caching
|
||||
//
|
||||
// Repositories use LRU caching to improve performance. Cache keys are constructed
|
||||
// as "type:id" (e.g., "metadata:123"). Cache is automatically invalidated on
|
||||
// mutations to maintain consistency.
|
||||
//
|
||||
// # Transaction Support
|
||||
//
|
||||
// For batch operations, use transactions:
|
||||
//
|
||||
// t, err := jobRepo.TransactionInit()
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
// defer t.Rollback() // Rollback if not committed
|
||||
//
|
||||
// // Perform operations...
|
||||
// jobRepo.TransactionAdd(t, query, args...)
|
||||
//
|
||||
// // Commit when done
|
||||
// if err := t.Commit(); err != nil {
|
||||
// return err
|
||||
// }
|
||||
package repository
|
||||
|
||||
import (
|
||||
@@ -9,16 +66,16 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"maps"
|
||||
"math"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/lrucache"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
@@ -33,6 +90,7 @@ type JobRepository struct {
|
||||
stmtCache *sq.StmtCache
|
||||
cache *lrucache.Cache
|
||||
driver string
|
||||
Mutex sync.Mutex
|
||||
}
|
||||
|
||||
func GetJobRepository() *JobRepository {
|
||||
@@ -44,45 +102,56 @@ func GetJobRepository() *JobRepository {
|
||||
driver: db.Driver,
|
||||
|
||||
stmtCache: sq.NewStmtCache(db.DB),
|
||||
cache: lrucache.New(1024 * 1024),
|
||||
cache: lrucache.New(repoConfig.CacheSize),
|
||||
}
|
||||
})
|
||||
return jobRepoInstance
|
||||
}
|
||||
|
||||
var jobColumns []string = []string{
|
||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id",
|
||||
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
|
||||
"job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
|
||||
"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
|
||||
"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
|
||||
"job.num_hwthreads", "job.num_acc", "job.shared", "job.monitoring_status",
|
||||
"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
|
||||
"job.footprint", "job.energy",
|
||||
}
|
||||
|
||||
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
|
||||
var jobCacheColumns []string = []string{
|
||||
"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
|
||||
"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
|
||||
"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
|
||||
"job_cache.num_acc", "job_cache.shared", "job_cache.monitoring_status", "job_cache.smt",
|
||||
"job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources",
|
||||
"job_cache.footprint", "job_cache.energy",
|
||||
}
|
||||
|
||||
func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
|
||||
job := &schema.Job{}
|
||||
|
||||
if err := row.Scan(
|
||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
|
||||
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster,
|
||||
&job.StartTime, &job.Partition, &job.ArrayJobID, &job.NumNodes, &job.NumHWThreads,
|
||||
&job.NumAcc, &job.Shared, &job.MonitoringStatus, &job.SMT, &job.State,
|
||||
&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
|
||||
log.Warnf("Error while scanning rows (Job): %v", err)
|
||||
cclog.Warnf("Error while scanning rows (Job): %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
|
||||
log.Warn("Error while unmarshaling raw resources json")
|
||||
cclog.Warn("Error while unmarshaling raw resources json")
|
||||
return nil, err
|
||||
}
|
||||
job.RawResources = nil
|
||||
|
||||
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||
log.Warnf("Error while unmarshaling raw footprint json: %v", err)
|
||||
cclog.Warnf("Error while unmarshaling raw footprint json: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
job.RawFootprint = nil
|
||||
|
||||
job.StartTime = time.Unix(job.StartTimeUnix, 0)
|
||||
// Always ensure accurate duration for running jobs
|
||||
if job.State == schema.JobStateRunning {
|
||||
job.Duration = int32(time.Since(job.StartTime).Seconds())
|
||||
job.Duration = int32(time.Now().Unix() - job.StartTime)
|
||||
}
|
||||
|
||||
return job, nil
|
||||
@@ -97,7 +166,7 @@ func (r *JobRepository) Optimize() error {
|
||||
return err
|
||||
}
|
||||
case "mysql":
|
||||
log.Info("Optimize currently not supported for mysql driver")
|
||||
cclog.Info("Optimize currently not supported for mysql driver")
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -138,17 +207,6 @@ func (r *JobRepository) Flush() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func scanJobLink(row interface{ Scan(...interface{}) error }) (*model.JobLink, error) {
|
||||
jobLink := &model.JobLink{}
|
||||
if err := row.Scan(
|
||||
&jobLink.ID, &jobLink.JobID); err != nil {
|
||||
log.Warn("Error while scanning rows (jobLink)")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return jobLink, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error) {
|
||||
start := time.Now()
|
||||
cachekey := fmt.Sprintf("metadata:%d", job.ID)
|
||||
@@ -159,7 +217,7 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
|
||||
|
||||
if err := sq.Select("job.meta_data").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawMetaData); err != nil {
|
||||
log.Warn("Error while scanning for job metadata")
|
||||
cclog.Warn("Error while scanning for job metadata")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -168,12 +226,12 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
|
||||
log.Warn("Error while unmarshaling raw metadata json")
|
||||
cclog.Warn("Error while unmarshaling raw metadata json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
|
||||
log.Debugf("Timer FetchMetadata %s", time.Since(start))
|
||||
cclog.Debugf("Timer FetchMetadata %s", time.Since(start))
|
||||
return job.MetaData, nil
|
||||
}
|
||||
|
||||
@@ -182,16 +240,14 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
r.cache.Del(cachekey)
|
||||
if job.MetaData == nil {
|
||||
if _, err = r.FetchMetadata(job); err != nil {
|
||||
log.Warnf("Error while fetching metadata for job, DB ID '%v'", job.ID)
|
||||
cclog.Warnf("Error while fetching metadata for job, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if job.MetaData != nil {
|
||||
cpy := make(map[string]string, len(job.MetaData)+1)
|
||||
for k, v := range job.MetaData {
|
||||
cpy[k] = v
|
||||
}
|
||||
maps.Copy(cpy, job.MetaData)
|
||||
cpy[key] = val
|
||||
job.MetaData = cpy
|
||||
} else {
|
||||
@@ -199,7 +255,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
}
|
||||
|
||||
if job.RawMetaData, err = json.Marshal(job.MetaData); err != nil {
|
||||
log.Warnf("Error while marshaling metadata for job, DB ID '%v'", job.ID)
|
||||
cclog.Warnf("Error while marshaling metadata for job, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -207,7 +263,7 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
Set("meta_data", job.RawMetaData).
|
||||
Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).Exec(); err != nil {
|
||||
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
||||
cclog.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -217,15 +273,10 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
|
||||
|
||||
func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
|
||||
start := time.Now()
|
||||
cachekey := fmt.Sprintf("footprint:%d", job.ID)
|
||||
if cached := r.cache.Get(cachekey, nil); cached != nil {
|
||||
job.Footprint = cached.(map[string]float64)
|
||||
return job.Footprint, nil
|
||||
}
|
||||
|
||||
if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
|
||||
log.Warn("Error while scanning for job footprint")
|
||||
cclog.Warn("Error while scanning for job footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -234,12 +285,11 @@ func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
|
||||
log.Warn("Error while unmarshaling raw footprint json")
|
||||
cclog.Warn("Error while unmarshaling raw footprint json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r.cache.Put(cachekey, job.Footprint, len(job.Footprint), 24*time.Hour)
|
||||
log.Debugf("Timer FetchFootprint %s", time.Since(start))
|
||||
cclog.Debugf("Timer FetchFootprint %s", time.Since(start))
|
||||
return job.Footprint, nil
|
||||
}
|
||||
|
||||
@@ -253,7 +303,7 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6
|
||||
|
||||
if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
|
||||
RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
|
||||
log.Warn("Error while scanning for job energy_footprint")
|
||||
cclog.Warn("Error while scanning for job energy_footprint")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -262,40 +312,83 @@ func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float6
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
|
||||
log.Warn("Error while unmarshaling raw energy footprint json")
|
||||
cclog.Warn("Error while unmarshaling raw energy footprint json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
|
||||
log.Debugf("Timer FetchEnergyFootprint %s", time.Since(start))
|
||||
cclog.Debugf("Timer FetchEnergyFootprint %s", time.Since(start))
|
||||
return job.EnergyFootprint, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
|
||||
func (r *JobRepository) DeleteJobsBefore(startTime int64, omitTagged bool) (int, error) {
|
||||
var cnt int
|
||||
q := sq.Select("count(*)").From("job").Where("job.start_time < ?", startTime)
|
||||
q.RunWith(r.DB).QueryRow().Scan(cnt)
|
||||
|
||||
if omitTagged {
|
||||
q = q.Where("NOT EXISTS (SELECT 1 FROM jobtag WHERE jobtag.job_id = job.id)")
|
||||
}
|
||||
|
||||
if err := q.RunWith(r.DB).QueryRow().Scan(&cnt); err != nil {
|
||||
cclog.Errorf("Error counting jobs before %d: %v", startTime, err)
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Invalidate cache for jobs being deleted (get job IDs first)
|
||||
if cnt > 0 {
|
||||
var jobIds []int64
|
||||
selectQuery := sq.Select("id").From("job").Where("job.start_time < ?", startTime)
|
||||
|
||||
if omitTagged {
|
||||
selectQuery = selectQuery.Where("NOT EXISTS (SELECT 1 FROM jobtag WHERE jobtag.job_id = job.id)")
|
||||
}
|
||||
|
||||
rows, err := selectQuery.RunWith(r.DB).Query()
|
||||
if err == nil {
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
var id int64
|
||||
if err := rows.Scan(&id); err == nil {
|
||||
jobIds = append(jobIds, id)
|
||||
}
|
||||
}
|
||||
// Invalidate cache entries
|
||||
for _, id := range jobIds {
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", id))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", id))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
qd := sq.Delete("job").Where("job.start_time < ?", startTime)
|
||||
|
||||
if omitTagged {
|
||||
qd = qd.Where("NOT EXISTS (SELECT 1 FROM jobtag WHERE jobtag.job_id = job.id)")
|
||||
}
|
||||
_, err := qd.RunWith(r.DB).Exec()
|
||||
|
||||
if err != nil {
|
||||
s, _, _ := qd.ToSql()
|
||||
log.Errorf(" DeleteJobsBefore(%d) with %s: error %#v", startTime, s, err)
|
||||
cclog.Errorf(" DeleteJobsBefore(%d) with %s: error %#v", startTime, s, err)
|
||||
} else {
|
||||
log.Debugf("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
|
||||
cclog.Debugf("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
|
||||
}
|
||||
return cnt, err
|
||||
}
|
||||
|
||||
func (r *JobRepository) DeleteJobById(id int64) error {
|
||||
func (r *JobRepository) DeleteJobByID(id int64) error {
|
||||
// Invalidate cache entries before deletion
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", id))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", id))
|
||||
|
||||
qd := sq.Delete("job").Where("job.id = ?", id)
|
||||
_, err := qd.RunWith(r.DB).Exec()
|
||||
|
||||
if err != nil {
|
||||
s, _, _ := qd.ToSql()
|
||||
log.Errorf("DeleteJobById(%d) with %s : error %#v", id, s, err)
|
||||
cclog.Errorf("DeleteJobById(%d) with %s : error %#v", id, s, err)
|
||||
} else {
|
||||
log.Debugf("DeleteJobById(%d): Success", id)
|
||||
cclog.Debugf("DeleteJobById(%d): Success", id)
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -344,10 +437,10 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta
|
||||
|
||||
// theSql, args, theErr := theQuery.ToSql()
|
||||
// if theErr != nil {
|
||||
// log.Warn("Error while converting query to sql")
|
||||
// cclog.Warn("Error while converting query to sql")
|
||||
// return "", err
|
||||
// }
|
||||
// log.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args)
|
||||
// cclog.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args)
|
||||
|
||||
err := theQuery.RunWith(r.stmtCache).QueryRow().Scan(&result)
|
||||
|
||||
@@ -358,7 +451,7 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta
|
||||
}
|
||||
return "", ErrNotFound
|
||||
} else {
|
||||
log.Infof("Non-Admin User %s : Requested Query '%s' on table '%s' : Forbidden", user.Name, query, table)
|
||||
cclog.Infof("Non-Admin User %s : Requested Query '%s' on table '%s' : Forbidden", user.Name, query, table)
|
||||
return "", ErrForbidden
|
||||
}
|
||||
}
|
||||
@@ -377,7 +470,7 @@ func (r *JobRepository) FindColumnValues(user *schema.User, query string, table
|
||||
err := rows.Scan(&result)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warnf("Error while scanning rows: %v", err)
|
||||
cclog.Warnf("Error while scanning rows: %v", err)
|
||||
return emptyResult, err
|
||||
}
|
||||
results = append(results, result)
|
||||
@@ -387,7 +480,7 @@ func (r *JobRepository) FindColumnValues(user *schema.User, query string, table
|
||||
return emptyResult, ErrNotFound
|
||||
|
||||
} else {
|
||||
log.Infof("Non-Admin User %s : Requested Query '%s' on table '%s' : Forbidden", user.Name, query, table)
|
||||
cclog.Infof("Non-Admin User %s : Requested Query '%s' on table '%s' : Forbidden", user.Name, query, table)
|
||||
return emptyResult, ErrForbidden
|
||||
}
|
||||
}
|
||||
@@ -395,7 +488,7 @@ func (r *JobRepository) FindColumnValues(user *schema.User, query string, table
|
||||
func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||
var err error
|
||||
start := time.Now()
|
||||
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
|
||||
partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
|
||||
parts := []string{}
|
||||
if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
|
||||
return nil, 0, 1000
|
||||
@@ -406,7 +499,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log.Debugf("Timer Partitions %s", time.Since(start))
|
||||
cclog.Debugf("Timer Partitions %s", time.Since(start))
|
||||
return partitions.([]string), nil
|
||||
}
|
||||
|
||||
@@ -420,7 +513,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
Where("job.cluster = ?", cluster).
|
||||
RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -431,11 +524,11 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
var resources []*schema.Resource
|
||||
var subcluster string
|
||||
if err := rows.Scan(&raw, &subcluster); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
if err := json.Unmarshal(raw, &resources); err != nil {
|
||||
log.Warn("Error while unmarshaling raw resources json")
|
||||
cclog.Warn("Error while unmarshaling raw resources json")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -450,63 +543,91 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("Timer AllocatedNodes %s", time.Since(start))
|
||||
cclog.Debugf("Timer AllocatedNodes %s", time.Since(start))
|
||||
return subclusters, nil
|
||||
}
|
||||
|
||||
// FIXME: Set duration to requested walltime?
|
||||
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
|
||||
start := time.Now()
|
||||
currentTime := time.Now().Unix()
|
||||
res, err := sq.Update("job").
|
||||
Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
|
||||
Set("duration", 0).
|
||||
Set("job_state", schema.JobStateFailed).
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.walltime > 0").
|
||||
Where(fmt.Sprintf("(%d - job.start_time) > (job.walltime + %d)", time.Now().Unix(), seconds)).
|
||||
Where("(? - job.start_time) > (job.walltime + ?)", currentTime, seconds).
|
||||
RunWith(r.DB).Exec()
|
||||
if err != nil {
|
||||
log.Warn("Error while stopping jobs exceeding walltime")
|
||||
cclog.Warn("Error while stopping jobs exceeding walltime")
|
||||
return err
|
||||
}
|
||||
|
||||
rowsAffected, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
log.Warn("Error while fetching affected rows after stopping due to exceeded walltime")
|
||||
cclog.Warn("Error while fetching affected rows after stopping due to exceeded walltime")
|
||||
return err
|
||||
}
|
||||
|
||||
if rowsAffected > 0 {
|
||||
log.Infof("%d jobs have been marked as failed due to running too long", rowsAffected)
|
||||
cclog.Infof("%d jobs have been marked as failed due to running too long", rowsAffected)
|
||||
}
|
||||
log.Debugf("Timer StopJobsExceedingWalltimeBy %s", time.Since(start))
|
||||
cclog.Debugf("Timer StopJobsExceedingWalltimeBy %s", time.Since(start))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindJobIdsByTag(tagID int64) ([]int64, error) {
|
||||
query := sq.Select("job.id").From("job").
|
||||
Join("jobtag ON jobtag.job_id = job.id").
|
||||
Where(sq.Eq{"jobtag.tag_id": tagID}).Distinct()
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
jobIds := make([]int64, 0, 100)
|
||||
|
||||
for rows.Next() {
|
||||
var jobID int64
|
||||
|
||||
if err := rows.Scan(&jobID); err != nil {
|
||||
rows.Close()
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
jobIds = append(jobIds, jobID)
|
||||
}
|
||||
|
||||
return jobIds, nil
|
||||
}
|
||||
|
||||
// FIXME: Reconsider filtering short jobs with harcoded threshold
|
||||
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
|
||||
query := sq.Select(jobColumns...).From("job").
|
||||
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.job_state = 'running'").
|
||||
Where("job.duration > 600")
|
||||
Where("job.duration > ?", repoConfig.MinRunningJobDuration)
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warn("Error while scanning rows")
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
log.Infof("Return job count %d", len(jobs))
|
||||
cclog.Infof("Return job count %d", len(jobs))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
@@ -523,7 +644,7 @@ func (r *JobRepository) UpdateDuration() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
|
||||
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64, omitTagged bool) ([]*schema.Job, error) {
|
||||
var query sq.SelectBuilder
|
||||
|
||||
if startTimeBegin == startTimeEnd || startTimeBegin > startTimeEnd {
|
||||
@@ -531,43 +652,49 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
|
||||
}
|
||||
|
||||
if startTimeBegin == 0 {
|
||||
log.Infof("Find jobs before %d", startTimeEnd)
|
||||
query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf(
|
||||
"job.start_time < %d", startTimeEnd))
|
||||
cclog.Infof("Find jobs before %d", startTimeEnd)
|
||||
query = sq.Select(jobColumns...).From("job").Where("job.start_time < ?", startTimeEnd)
|
||||
} else {
|
||||
log.Infof("Find jobs between %d and %d", startTimeBegin, startTimeEnd)
|
||||
query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf(
|
||||
"job.start_time BETWEEN %d AND %d", startTimeBegin, startTimeEnd))
|
||||
cclog.Infof("Find jobs between %d and %d", startTimeBegin, startTimeEnd)
|
||||
query = sq.Select(jobColumns...).From("job").Where("job.start_time BETWEEN ? AND ?", startTimeBegin, startTimeEnd)
|
||||
}
|
||||
|
||||
if omitTagged {
|
||||
query = query.Where("NOT EXISTS (SELECT 1 FROM jobtag WHERE jobtag.job_id = job.id)")
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warn("Error while scanning rows")
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
log.Infof("Return job count %d", len(jobs))
|
||||
cclog.Infof("Return job count %d", len(jobs))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
|
||||
// Invalidate cache entries as monitoring status affects job state
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", job))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", job))
|
||||
|
||||
stmt := sq.Update("job").
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", job)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
|
||||
@@ -587,53 +714,63 @@ func (r *JobRepository) MarkArchived(
|
||||
|
||||
func (r *JobRepository) UpdateEnergy(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
jobMeta *schema.Job,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return stmt, err
|
||||
}
|
||||
energyFootprint := make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
|
||||
switch sc.MetricConfig[i].Energy {
|
||||
case "energy": // this metric has energy as unit (Joules or Wh)
|
||||
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
case "power": // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||
energy = math.Round(((LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
energyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
energyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
|
||||
// cclog.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy)
|
||||
}
|
||||
|
||||
var rawFootprint []byte
|
||||
if rawFootprint, err = json.Marshal(energyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
cclog.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return stmt, err
|
||||
}
|
||||
|
||||
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil
|
||||
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateFootprint(
|
||||
stmt sq.UpdateBuilder,
|
||||
jobMeta *schema.JobMeta,
|
||||
jobMeta *schema.Job,
|
||||
) (sq.UpdateBuilder, error) {
|
||||
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
log.Errorf("cannot get subcluster: %s", err.Error())
|
||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return stmt, err
|
||||
}
|
||||
footprint := make(map[string]float64)
|
||||
@@ -647,7 +784,7 @@ func (r *JobRepository) UpdateFootprint(
|
||||
}
|
||||
|
||||
if statType != "avg" && statType != "min" && statType != "max" {
|
||||
log.Warnf("unknown statType for footprint update: %s", statType)
|
||||
cclog.Warnf("unknown statType for footprint update: %s", statType)
|
||||
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
|
||||
}
|
||||
|
||||
@@ -661,7 +798,7 @@ func (r *JobRepository) UpdateFootprint(
|
||||
|
||||
var rawFootprint []byte
|
||||
if rawFootprint, err = json.Marshal(footprint); err != nil {
|
||||
log.Warnf("Error while marshaling footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
cclog.Warnf("Error while marshaling footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return stmt, err
|
||||
}
|
||||
|
||||
|
||||
@@ -1,44 +1,95 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
const NamedJobCacheInsert string = `INSERT INTO job_cache (
|
||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||
shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
);`
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
|
||||
res, err := r.DB.NamedExec(NamedJobInsert, job)
|
||||
const NamedJobInsert string = `INSERT INTO job (
|
||||
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
|
||||
shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
|
||||
) VALUES (
|
||||
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
|
||||
:shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
|
||||
);`
|
||||
|
||||
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
|
||||
r.Mutex.Lock()
|
||||
defer r.Mutex.Unlock()
|
||||
|
||||
res, err := r.DB.NamedExec(NamedJobCacheInsert, job)
|
||||
if err != nil {
|
||||
log.Warn("Error while NamedJobInsert")
|
||||
cclog.Warn("Error while NamedJobInsert")
|
||||
return 0, err
|
||||
}
|
||||
id, err := res.LastInsertId()
|
||||
if err != nil {
|
||||
log.Warn("Error while getting last insert ID")
|
||||
cclog.Warn("Error while getting last insert ID")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
|
||||
r.Mutex.Lock()
|
||||
defer r.Mutex.Unlock()
|
||||
|
||||
query := sq.Select(jobCacheColumns...).From("job_cache")
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Errorf("Error while running query %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*schema.Job, 0, 50)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
|
||||
_, err = r.DB.Exec(
|
||||
"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while Job sync: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
_, err = r.DB.Exec("DELETE FROM job_cache")
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while Job cache clean: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// Start inserts a new job in the table, returning the unique job ID.
|
||||
// Statistics are not transfered!
|
||||
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
func (r *JobRepository) Start(job *schema.Job) (id int64, err error) {
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
|
||||
@@ -59,17 +110,39 @@ func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
|
||||
|
||||
// Stop updates the job with the database id jobId using the provided arguments.
|
||||
func (r *JobRepository) Stop(
|
||||
jobId int64,
|
||||
jobID int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
// Invalidate cache entries as job state is changing
|
||||
r.cache.Del(fmt.Sprintf("metadata:%d", jobID))
|
||||
r.cache.Del(fmt.Sprintf("energyFootprint:%d", jobID))
|
||||
|
||||
stmt := sq.Update("job").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job.id = ?", jobId)
|
||||
Where("job.id = ?", jobID)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return
|
||||
return err
|
||||
}
|
||||
|
||||
func (r *JobRepository) StopCached(
|
||||
jobID int64,
|
||||
duration int32,
|
||||
state schema.JobState,
|
||||
monitoringStatus int32,
|
||||
) (err error) {
|
||||
// Note: StopCached updates job_cache table, not the main job table
|
||||
// Cache invalidation happens when job is synced to main table
|
||||
stmt := sq.Update("job_cache").
|
||||
Set("job_state", state).
|
||||
Set("duration", duration).
|
||||
Set("monitoring_status", monitoringStatus).
|
||||
Where("job_cache.id = ?", jobID)
|
||||
|
||||
_, err = stmt.RunWith(r.stmtCache).Exec()
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
@@ -11,8 +12,8 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
@@ -22,13 +23,13 @@ import (
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) Find(
|
||||
jobId *int64,
|
||||
jobID *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) (*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
Where("job.job_id = ?", *jobID)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
@@ -39,23 +40,43 @@ func (r *JobRepository) Find(
|
||||
|
||||
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
|
||||
|
||||
log.Debugf("Timer Find %s", time.Since(start))
|
||||
cclog.Debugf("Timer Find %s", time.Since(start))
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// Find executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the batch job id, the cluster name,
|
||||
// and the start time of the job in UNIX epoch time seconds.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindCached(
|
||||
jobID *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) (*schema.Job, error) {
|
||||
q := sq.Select(jobCacheColumns...).From("job_cache").
|
||||
Where("job_cache.job_id = ?", *jobID)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job_cache.cluster = ?", *cluster)
|
||||
}
|
||||
if startTime != nil {
|
||||
q = q.Where("job_cache.start_time = ?", *startTime)
|
||||
}
|
||||
|
||||
q = q.OrderBy("job_cache.id DESC") // always use newest matching job by db id if more than one match
|
||||
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindAll executes a SQL query to find all batch jobs matching the given criteria.
|
||||
// Jobs are queried using the batch job id, and optionally filtered by cluster name
|
||||
// and start time (UNIX epoch time seconds).
|
||||
// It returns a slice of pointers to schema.Job data structures and an error variable.
|
||||
// An empty slice is returned if no matching jobs are found.
|
||||
func (r *JobRepository) FindAll(
|
||||
jobId *int64,
|
||||
jobID *int64,
|
||||
cluster *string,
|
||||
startTime *int64,
|
||||
) ([]*schema.Job, error) {
|
||||
start := time.Now()
|
||||
q := sq.Select(jobColumns...).From("job").
|
||||
Where("job.job_id = ?", *jobId)
|
||||
Where("job.job_id = ?", *jobID)
|
||||
|
||||
if cluster != nil {
|
||||
q = q.Where("job.cluster = ?", *cluster)
|
||||
@@ -66,30 +87,66 @@ func (r *JobRepository) FindAll(
|
||||
|
||||
rows, err := q.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Error("Error while running query")
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jobs := make([]*schema.Job, 0, 10)
|
||||
for rows.Next() {
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
}
|
||||
log.Debugf("Timer FindAll %s", time.Since(start))
|
||||
cclog.Debugf("Timer FindAll %s", time.Since(start))
|
||||
return jobs, nil
|
||||
}
|
||||
|
||||
// FindById executes a SQL query to find a specific batch job.
|
||||
// GetJobList returns job IDs for non-running jobs.
|
||||
// This is useful to process large job counts and intended to be used
|
||||
// together with FindById to process jobs one by one.
|
||||
// Use limit and offset for pagination. Use limit=0 to get all results (not recommended for large datasets).
|
||||
func (r *JobRepository) GetJobList(limit int, offset int) ([]int64, error) {
|
||||
query := sq.Select("id").From("job").
|
||||
Where("job.job_state != 'running'")
|
||||
|
||||
// Add pagination if limit is specified
|
||||
if limit > 0 {
|
||||
query = query.Limit(uint64(limit)).Offset(uint64(offset))
|
||||
}
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
cclog.Error("Error while running query")
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
jl := make([]int64, 0, 1000)
|
||||
for rows.Next() {
|
||||
var id int64
|
||||
err := rows.Scan(&id)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
jl = append(jl, id)
|
||||
}
|
||||
|
||||
cclog.Infof("Return job count %d", len(jl))
|
||||
return jl, nil
|
||||
}
|
||||
|
||||
// FindByID executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
|
||||
func (r *JobRepository) FindByID(ctx context.Context, jobID int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
From("job").Where("job.id = ?", jobID)
|
||||
|
||||
q, qerr := SecurityCheck(ctx, q)
|
||||
if qerr != nil {
|
||||
@@ -99,14 +156,14 @@ func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job,
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByIdWithUser executes a SQL query to find a specific batch job.
|
||||
// FindByIDWithUser executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id. The user is passed directly,
|
||||
// instead as part of the context.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
|
||||
func (r *JobRepository) FindByIDWithUser(user *schema.User, jobID int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
From("job").Where("job.id = ?", jobID)
|
||||
|
||||
q, qerr := SecurityCheckWithUser(user, q)
|
||||
if qerr != nil {
|
||||
@@ -116,24 +173,24 @@ func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schem
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByIdDirect executes a SQL query to find a specific batch job.
|
||||
// FindByIDDirect executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the database id.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
|
||||
func (r *JobRepository) FindByIDDirect(jobID int64) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").Where("job.id = ?", jobId)
|
||||
From("job").Where("job.id = ?", jobID)
|
||||
return scanJob(q.RunWith(r.stmtCache).QueryRow())
|
||||
}
|
||||
|
||||
// FindByJobId executes a SQL query to find a specific batch job.
|
||||
// FindByJobID executes a SQL query to find a specific batch job.
|
||||
// The job is queried using the slurm id and the clustername.
|
||||
// It returns a pointer to a schema.Job data structure and an error variable.
|
||||
// To check if no job was found test err == sql.ErrNoRows
|
||||
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
|
||||
func (r *JobRepository) FindByJobID(ctx context.Context, jobID int64, startTime int64, cluster string) (*schema.Job, error) {
|
||||
q := sq.Select(jobColumns...).
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.job_id = ?", jobID).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
|
||||
@@ -149,10 +206,10 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
|
||||
// The job is queried using the slurm id,a username and the cluster.
|
||||
// It returns a bool.
|
||||
// If job was found, user is owner: test err != sql.ErrNoRows
|
||||
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
|
||||
func (r *JobRepository) IsJobOwner(jobID int64, startTime int64, user string, cluster string) bool {
|
||||
q := sq.Select("id").
|
||||
From("job").
|
||||
Where("job.job_id = ?", jobId).
|
||||
Where("job.job_id = ?", jobID).
|
||||
Where("job.hpc_user = ?", user).
|
||||
Where("job.cluster = ?", cluster).
|
||||
Where("job.start_time = ?", startTime)
|
||||
@@ -178,7 +235,7 @@ func (r *JobRepository) FindConcurrentJobs(
|
||||
var startTime int64
|
||||
var stopTime int64
|
||||
|
||||
startTime = job.StartTimeUnix
|
||||
startTime = job.StartTime
|
||||
hostname := job.Resources[0].Hostname
|
||||
|
||||
if job.State == schema.JobStateRunning {
|
||||
@@ -194,59 +251,63 @@ func (r *JobRepository) FindConcurrentJobs(
|
||||
|
||||
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTime)
|
||||
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
|
||||
queryRunning = queryRunning.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
|
||||
|
||||
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
|
||||
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
|
||||
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
|
||||
// Get At Least One Exact Hostname Match from JSON Resources Array in Database
|
||||
query = query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, '$.hostname') = ?)", hostname)
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
cclog.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
items := make([]*model.JobLink, 0, 10)
|
||||
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
var id, jobID, startTime sql.NullInt64
|
||||
|
||||
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
if err = rows.Scan(&id, &jobID, &startTime); err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobID.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
JobID: int(jobID.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
rows, err = queryRunning.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
cclog.Errorf("Error while running query: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var id, jobId, startTime sql.NullInt64
|
||||
var id, jobID, startTime sql.NullInt64
|
||||
|
||||
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
|
||||
log.Warn("Error while scanning rows")
|
||||
if err := rows.Scan(&id, &jobID, &startTime); err != nil {
|
||||
cclog.Warn("Error while scanning rows")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if id.Valid {
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
|
||||
queryString += fmt.Sprintf("&jobId=%d", int(jobID.Int64))
|
||||
items = append(items,
|
||||
&model.JobLink{
|
||||
ID: fmt.Sprint(id.Int64),
|
||||
JobID: int(jobId.Int64),
|
||||
JobID: int(jobID.Int64),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
57
internal/repository/jobHooks.go
Normal file
57
internal/repository/jobHooks.go
Normal file
@@ -0,0 +1,57 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
)
|
||||
|
||||
type JobHook interface {
|
||||
JobStartCallback(job *schema.Job)
|
||||
JobStopCallback(job *schema.Job)
|
||||
}
|
||||
|
||||
var (
|
||||
initOnce sync.Once
|
||||
hooks []JobHook
|
||||
)
|
||||
|
||||
func RegisterJobJook(hook JobHook) {
|
||||
initOnce.Do(func() {
|
||||
hooks = make([]JobHook, 0)
|
||||
})
|
||||
|
||||
if hook != nil {
|
||||
hooks = append(hooks, hook)
|
||||
}
|
||||
}
|
||||
|
||||
func CallJobStartHooks(jobs []*schema.Job) {
|
||||
if hooks == nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, hook := range hooks {
|
||||
if hook != nil {
|
||||
for _, job := range jobs {
|
||||
hook.JobStartCallback(job)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func CallJobStopHooks(job *schema.Job) {
|
||||
if hooks == nil {
|
||||
return
|
||||
}
|
||||
|
||||
for _, hook := range hooks {
|
||||
if hook != nil {
|
||||
hook.JobStopCallback(job)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
@@ -12,9 +12,10 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
@@ -67,7 +68,8 @@ func (r *JobRepository) QueryJobs(
|
||||
|
||||
rows, err := query.RunWith(r.stmtCache).Query()
|
||||
if err != nil {
|
||||
log.Errorf("Error while running query: %v", err)
|
||||
queryString, queryVars, _ := query.ToSql()
|
||||
cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -76,7 +78,7 @@ func (r *JobRepository) QueryJobs(
|
||||
job, err := scanJob(rows)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
log.Warn("Error while scanning rows (Jobs)")
|
||||
cclog.Warn("Error while scanning rows (Jobs)")
|
||||
return nil, err
|
||||
}
|
||||
jobs = append(jobs, job)
|
||||
@@ -122,7 +124,7 @@ func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.Select
|
||||
if len(user.Projects) != 0 {
|
||||
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
|
||||
} else {
|
||||
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
|
||||
cclog.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
|
||||
return query.Where("job.hpc_user = ?", user.Username), nil
|
||||
}
|
||||
case user.HasRole(schema.RoleUser): // User : Only personal jobs
|
||||
@@ -145,6 +147,11 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
|
||||
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
|
||||
}
|
||||
if filter.DbID != nil {
|
||||
dbIDs := make([]string, len(filter.DbID))
|
||||
copy(dbIDs, filter.DbID)
|
||||
query = query.Where(sq.Eq{"job.id": dbIDs})
|
||||
}
|
||||
if filter.JobID != nil {
|
||||
query = buildStringCondition("job.job_id", filter.JobID, query)
|
||||
}
|
||||
@@ -176,6 +183,9 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
|
||||
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
|
||||
}
|
||||
if filter.Shared != nil {
|
||||
query = query.Where("job.shared = ?", *filter.Shared)
|
||||
}
|
||||
if filter.State != nil {
|
||||
states := make([]string, len(filter.State))
|
||||
for i, val := range filter.State {
|
||||
@@ -194,7 +204,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
|
||||
}
|
||||
if filter.Node != nil {
|
||||
query = buildStringCondition("job.resources", filter.Node, query)
|
||||
query = buildResourceJsonCondition("hostname", filter.Node, query)
|
||||
}
|
||||
if filter.Energy != nil {
|
||||
query = buildFloatCondition("job.energy", filter.Energy, query)
|
||||
@@ -207,7 +217,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
return query
|
||||
}
|
||||
|
||||
func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
func buildIntCondition(field string, cond *config.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
@@ -215,7 +225,7 @@ func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBu
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
}
|
||||
|
||||
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.From != nil && cond.To != nil {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
|
||||
} else if cond.From != nil {
|
||||
@@ -235,7 +245,7 @@ func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBui
|
||||
case "last30d":
|
||||
then = now - (60 * 60 * 24 * 30)
|
||||
default:
|
||||
log.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
|
||||
cclog.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
|
||||
return query
|
||||
}
|
||||
return query.Where(field+" BETWEEN ? AND ?", then, now)
|
||||
@@ -296,6 +306,28 @@ func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.
|
||||
return query
|
||||
}
|
||||
|
||||
func buildResourceJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
// Verify and Search Only in Valid Jsons
|
||||
query = query.Where("JSON_VALID(resources)")
|
||||
// add "AND" Sql query Block for field match
|
||||
if cond.Eq != nil {
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") = ?)", *cond.Eq)
|
||||
}
|
||||
if cond.Neq != nil { // Currently Unused
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") != ?)", *cond.Neq)
|
||||
}
|
||||
if cond.StartsWith != nil { // Currently Unused
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\")) LIKE ?)", fmt.Sprint(*cond.StartsWith, "%"))
|
||||
}
|
||||
if cond.EndsWith != nil { // Currently Unused
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") LIKE ?)", fmt.Sprint("%", *cond.EndsWith))
|
||||
}
|
||||
if cond.Contains != nil {
|
||||
return query.Where("EXISTS (SELECT 1 FROM json_each(job.resources) WHERE json_extract(value, \"$."+jsonField+"\") LIKE ?)", fmt.Sprint("%", *cond.Contains, "%"))
|
||||
}
|
||||
return query
|
||||
}
|
||||
|
||||
var (
|
||||
matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
|
||||
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
|
||||
@@ -304,7 +336,7 @@ var (
|
||||
func toSnakeCase(str string) string {
|
||||
for _, c := range str {
|
||||
if c == '\'' || c == '\\' {
|
||||
log.Panic("toSnakeCase() attack vector!")
|
||||
cclog.Panic("toSnakeCase() attack vector!")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package repository
|
||||
@@ -8,39 +8,40 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/schema"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func TestFind(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
jobId, cluster, startTime := int64(398998), "fritz", int64(1675957496)
|
||||
job, err := r.Find(&jobId, &cluster, &startTime)
|
||||
jobID, cluster, startTime := int64(398800), "fritz", int64(1675954712)
|
||||
job, err := r.Find(&jobID, &cluster, &startTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// fmt.Printf("%+v", job)
|
||||
|
||||
if job.ID != 5 {
|
||||
t.Errorf("wrong summary for diagnostic 3\ngot: %d \nwant: 1366", job.JobID)
|
||||
if *job.ID != 345 {
|
||||
t.Errorf("wrong summary for diagnostic \ngot: %d \nwant: 345", job.JobID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindById(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
job, err := r.FindById(getContext(t), 5)
|
||||
job, err := r.FindByID(getContext(t), 338)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// fmt.Printf("%+v", job)
|
||||
|
||||
if job.JobID != 398998 {
|
||||
t.Errorf("wrong summary for diagnostic 3\ngot: %d \nwant: 1404396", job.JobID)
|
||||
if job.JobID != 398793 {
|
||||
t.Errorf("wrong summary for diagnostic \ngot: %d \nwant: 1404396", job.JobID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,3 +72,61 @@ func TestGetTags(t *testing.T) {
|
||||
t.Errorf("wrong tag count \ngot: %d \nwant: 0", counts["bandwidth"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindJobsBetween(t *testing.T) {
|
||||
r := setup(t)
|
||||
|
||||
// 1. Find a job to use (Find all jobs)
|
||||
// We use a large time range to ensure we get something if it exists
|
||||
jobs, err := r.FindJobsBetween(0, 9999999999, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(jobs) == 0 {
|
||||
t.Fatal("No jobs in test db")
|
||||
}
|
||||
|
||||
targetJob := jobs[0]
|
||||
|
||||
// 2. Create a tag
|
||||
tagName := fmt.Sprintf("testtag_%d", time.Now().UnixNano())
|
||||
tagId, err := r.CreateTag("testtype", tagName, "global")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// 3. Link Tag (Manually to avoid archive dependency side-effects in unit test)
|
||||
_, err = r.DB.Exec("INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)", *targetJob.ID, tagId)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// 4. Search with omitTagged = false (Should find the job)
|
||||
jobsFound, err := r.FindJobsBetween(0, 9999999999, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
var found bool
|
||||
for _, j := range jobsFound {
|
||||
if *j.ID == *targetJob.ID {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Errorf("Target job %d should be found when omitTagged=false", *targetJob.ID)
|
||||
}
|
||||
|
||||
// 5. Search with omitTagged = true (Should NOT find the job)
|
||||
jobsFiltered, err := r.FindJobsBetween(0, 9999999999, true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
for _, j := range jobsFiltered {
|
||||
if *j.ID == *targetJob.ID {
|
||||
t.Errorf("Target job %d should NOT be found when omitTagged=true", *targetJob.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
@@ -9,14 +10,14 @@ import (
|
||||
"embed"
|
||||
"fmt"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||
"github.com/golang-migrate/migrate/v4"
|
||||
"github.com/golang-migrate/migrate/v4/database/mysql"
|
||||
"github.com/golang-migrate/migrate/v4/database/sqlite3"
|
||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||
)
|
||||
|
||||
const Version uint = 8
|
||||
const Version uint = 10
|
||||
|
||||
//go:embed migrations/*
|
||||
var migrationFiles embed.FS
|
||||
@@ -54,13 +55,13 @@ func checkDBVersion(backend string, db *sql.DB) error {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
log.Fatalf("unsupported database backend: %s", backend)
|
||||
cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
|
||||
}
|
||||
|
||||
v, dirty, err := m.Version()
|
||||
if err != nil {
|
||||
if err == migrate.ErrNilVersion {
|
||||
log.Warn("Legacy database without version or missing database file!")
|
||||
cclog.Warn("Legacy database without version or missing database file!")
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
@@ -84,7 +85,7 @@ func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err erro
|
||||
case "sqlite3":
|
||||
d, err := iofs.New(migrationFiles, "migrations/sqlite3")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
|
||||
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
|
||||
@@ -102,7 +103,7 @@ func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err erro
|
||||
return m, err
|
||||
}
|
||||
default:
|
||||
log.Fatalf("unsupported database backend: %s", backend)
|
||||
cclog.Abortf("Migration: Unsupported database backend '%s'.\n", backend)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
@@ -115,8 +116,17 @@ func MigrateDB(backend string, db string) error {
|
||||
}
|
||||
|
||||
v, dirty, err := m.Version()
|
||||
if err != nil {
|
||||
if err == migrate.ErrNilVersion {
|
||||
cclog.Info("Legacy database without version or missing database file!")
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
||||
if v < Version {
|
||||
cclog.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
|
||||
}
|
||||
|
||||
if dirty {
|
||||
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
|
||||
@@ -124,7 +134,7 @@ func MigrateDB(backend string, db string) error {
|
||||
|
||||
if err := m.Up(); err != nil {
|
||||
if err == migrate.ErrNoChange {
|
||||
log.Info("DB already up to date!")
|
||||
cclog.Info("DB already up to date!")
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
@@ -142,7 +152,7 @@ func RevertDB(backend string, db string) error {
|
||||
|
||||
if err := m.Migrate(Version - 1); err != nil {
|
||||
if err == migrate.ErrNoChange {
|
||||
log.Info("DB already up to date!")
|
||||
cclog.Info("DB already up to date!")
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
DROP TABLE IF EXISTS job_cache;
|
||||
231
internal/repository/migrations/sqlite3/09_add-job-cache.up.sql
Normal file
231
internal/repository/migrations/sqlite3/09_add-job-cache.up.sql
Normal file
@@ -0,0 +1,231 @@
|
||||
CREATE TABLE "job_cache" (
|
||||
id INTEGER PRIMARY KEY,
|
||||
job_id BIGINT NOT NULL,
|
||||
cluster VARCHAR(255) NOT NULL,
|
||||
subcluster VARCHAR(255) NOT NULL,
|
||||
submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
|
||||
start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
|
||||
hpc_user VARCHAR(255) NOT NULL,
|
||||
project VARCHAR(255) NOT NULL,
|
||||
cluster_partition VARCHAR(255),
|
||||
array_job_id BIGINT,
|
||||
duration INT NOT NULL,
|
||||
walltime INT NOT NULL,
|
||||
job_state VARCHAR(255) NOT NULL
|
||||
CHECK (job_state IN (
|
||||
'boot_fail', 'cancelled', 'completed', 'deadline',
|
||||
'failed', 'node_fail', 'out_of_memory', 'pending',
|
||||
'preempted', 'running', 'suspended', 'timeout'
|
||||
)),
|
||||
meta_data TEXT, -- JSON
|
||||
resources TEXT NOT NULL, -- JSON
|
||||
num_nodes INT NOT NULL,
|
||||
num_hwthreads INT,
|
||||
num_acc INT,
|
||||
smt TINYINT NOT NULL DEFAULT 1 CHECK (smt IN (0, 1)),
|
||||
shared TEXT NOT NULL
|
||||
CHECK (shared IN ("none", "single_user", "multi_user")),
|
||||
monitoring_status TINYINT NOT NULL DEFAULT 1
|
||||
CHECK (monitoring_status IN (0, 1, 2, 3)),
|
||||
energy REAL NOT NULL DEFAULT 0.0,
|
||||
energy_footprint TEXT DEFAULT NULL,
|
||||
footprint TEXT DEFAULT NULL,
|
||||
UNIQUE (job_id, cluster, start_time)
|
||||
);
|
||||
|
||||
CREATE TABLE "job_new" (
|
||||
id INTEGER PRIMARY KEY,
|
||||
job_id BIGINT NOT NULL,
|
||||
cluster TEXT NOT NULL,
|
||||
subcluster TEXT NOT NULL,
|
||||
submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
|
||||
start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
|
||||
hpc_user TEXT NOT NULL,
|
||||
project TEXT NOT NULL,
|
||||
cluster_partition TEXT,
|
||||
array_job_id BIGINT,
|
||||
duration INT NOT NULL,
|
||||
walltime INT NOT NULL,
|
||||
job_state TEXT NOT NULL
|
||||
CHECK (job_state IN (
|
||||
'boot_fail', 'cancelled', 'completed', 'deadline',
|
||||
'failed', 'node_fail', 'out_of_memory', 'pending',
|
||||
'preempted', 'running', 'suspended', 'timeout'
|
||||
)),
|
||||
meta_data TEXT, -- JSON
|
||||
resources TEXT NOT NULL, -- JSON
|
||||
num_nodes INT NOT NULL,
|
||||
num_hwthreads INT,
|
||||
num_acc INT,
|
||||
smt INT NOT NULL DEFAULT 1,
|
||||
shared TEXT NOT NULL
|
||||
CHECK (shared IN ("none", "single_user", "multi_user")),
|
||||
monitoring_status TINYINT NOT NULL DEFAULT 1
|
||||
CHECK (monitoring_status IN (0, 1, 2, 3)),
|
||||
energy REAL NOT NULL DEFAULT 0.0,
|
||||
energy_footprint TEXT DEFAULT NULL,
|
||||
footprint TEXT DEFAULT NULL,
|
||||
UNIQUE (job_id, cluster, start_time)
|
||||
);
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS lookup_exclusive (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL UNIQUE
|
||||
);
|
||||
|
||||
INSERT INTO lookup_exclusive (id, name) VALUES
|
||||
(0, 'multi_user'),
|
||||
(1, 'none'),
|
||||
(2, 'single_user');
|
||||
|
||||
INSERT INTO job_new (
|
||||
id, job_id, cluster, subcluster, submit_time, start_time, hpc_user, project,
|
||||
cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
|
||||
num_nodes, num_hwthreads, num_acc, smt, shared, monitoring_status, energy,
|
||||
energy_footprint, footprint
|
||||
) SELECT
|
||||
id,
|
||||
job_id,
|
||||
cluster,
|
||||
subcluster,
|
||||
0,
|
||||
start_time,
|
||||
hpc_user,
|
||||
project,
|
||||
cluster_partition,
|
||||
array_job_id,
|
||||
duration,
|
||||
walltime,
|
||||
job_state,
|
||||
meta_data,
|
||||
resources,
|
||||
num_nodes,
|
||||
num_hwthreads,
|
||||
num_acc,
|
||||
smt,
|
||||
(
|
||||
SELECT name FROM lookup_exclusive
|
||||
WHERE id = job.exclusive
|
||||
),
|
||||
monitoring_status,
|
||||
energy,
|
||||
energy_footprint,
|
||||
footprint
|
||||
FROM job;
|
||||
|
||||
DROP TABLE lookup_exclusive;
|
||||
DROP TABLE job; -- Deletes All Existing 'job' Indices; Recreate after Renaming
|
||||
ALTER TABLE job_new RENAME TO job;
|
||||
|
||||
-- Recreate Indices from 08_add-footprint, include new submit_time indices
|
||||
-- Cluster Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
|
||||
-- Cluster Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_submittime ON job (cluster, submit_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
|
||||
|
||||
-- Cluster+Partition Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
|
||||
-- Cluster+Partition Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_submittime ON job (cluster, cluster_partition, submit_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
|
||||
|
||||
-- Cluster+Partition+Jobstate Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
|
||||
-- Cluster+Partition+Jobstate Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_submittime ON job (cluster, cluster_partition, job_state, submit_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
|
||||
|
||||
-- Cluster+JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
|
||||
-- Cluster+JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_submittime ON job (cluster, job_state, submit_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
|
||||
|
||||
-- User Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
|
||||
-- User Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
|
||||
|
||||
-- Project Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
|
||||
-- Project Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
|
||||
|
||||
-- JobState Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
|
||||
-- JobState Filter Sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
|
||||
|
||||
-- ArrayJob Filter
|
||||
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
|
||||
|
||||
-- Sorting without active filters
|
||||
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
|
||||
|
||||
-- Single filters with default starttime sorting
|
||||
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
|
||||
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
|
||||
|
||||
-- Optimize DB index usage
|
||||
PRAGMA optimize;
|
||||
|
||||
-- Optimize DB size: https://sqlite.org/lang_vacuum.html
|
||||
-- Not allowed within a migration transaction; Keep command here for documentation and recommendation
|
||||
-- Command: 'VACUUM;'
|
||||
@@ -0,0 +1 @@
|
||||
DROP TABLE IF EXISTS node;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user