mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-28 21:37:31 +01:00
Compare commits
1722 Commits
v1.0.0-rc.
...
optimize-c
| Author | SHA1 | Date | |
|---|---|---|---|
| 1ec41d8389 | |||
| 888d7fb235 | |||
| adebffd251 | |||
|
|
2e5d85c223 | ||
|
|
07b989cb81 | ||
| a418abc7d5 | |||
| a1db8263d7 | |||
| 4c3cd8e66a | |||
| 6ecb934967 | |||
|
|
348b6010e8 | ||
| ca0f9a42c7 | |||
|
|
0a0db36433 | ||
| 92236b1d1d | |||
|
|
df3bc111a4 | ||
| cc21e0e62c | |||
| 45a1bc78b7 | |||
|
|
ff180affd7 | ||
| 5d136634a2 | |||
| 998aff2345 | |||
|
|
0dea959391 | ||
| dadcb983e7 | |||
| 8989b7a410 | |||
|
|
bcfe9022de | ||
|
|
31f3c28294 | ||
|
|
12c01655c3 | ||
| ab55ce91a1 | |||
| 03c65e06f6 | |||
| defa8fa994 | |||
| c9d8de0d56 | |||
| 86fbecc679 | |||
| 8ee6c09e9b | |||
| fc1ba1f5b3 | |||
| 82e79b074a | |||
|
|
bae7ec11b4 | ||
|
|
2d90fd05d6 | ||
|
e1c1148160
|
|||
|
dc161ec421
|
|||
|
abd11d783b
|
|||
|
064aa0a238
|
|||
|
f00f9fcee0
|
|||
|
|
705d70ddc0 | ||
|
|
7789489d08 | ||
|
|
62cd21eb83 | ||
|
90b52f997d
|
|||
|
745c0357f3
|
|||
|
|
087c00eb7f | ||
|
|
facfc9d4c9 | ||
|
|
89e06c3530 | ||
|
57536d982c
|
|||
|
|
0c0f423b84 | ||
|
7bd79dcc3c
|
|||
|
2a659915a4
|
|||
|
|
c77e2969c5 | ||
|
d8ad6dd3f0
|
|||
|
cd8b574cca
|
|||
|
2da35909c1
|
|||
|
2e24fde430
|
|||
|
757be60b22
|
|||
|
589149790f
|
|||
|
29c440a637
|
|||
|
6035b62734
|
|||
|
|
fe3ebe0abc | ||
|
|
a52d7d017f | ||
|
9af44779aa
|
|||
|
|
1cf2c41bd7 | ||
|
|
2eeefc2720 | ||
|
|
0dd894890f | ||
| 25ff094bdf | |||
| d2ff4a2e02 | |||
| 4b983e3b9b | |||
| 98f9c18f72 | |||
| 7d8b305cd9 | |||
| 6e3462f962 | |||
| 2c8608f5a4 | |||
|
|
3215bc3de0 | ||
| 140c7f6e47 | |||
| c15f1117f5 | |||
|
|
48729b172d | ||
|
|
76ce8122e2 | ||
| f016bd4232 | |||
| 54ea5d7900 | |||
| 865cd3db54 | |||
| 90c8fbf07c | |||
| f4ee0d1042 | |||
|
|
e75da7f8cc | ||
|
|
12e9f6700e | ||
|
bca7dd743b
|
|||
|
8d6c6b819b
|
|||
|
|
d07a537f0b | ||
|
|
5e4994a64c | ||
|
|
a5a1fd1a6a | ||
|
|
49a1748641 | ||
|
|
a2f0b57ab9 | ||
| 0dff9fa07f | |||
| 1feb3baf68 | |||
| d21943a514 | |||
| 035ac2384e | |||
| ac7eb93141 | |||
|
|
51e9d33f9f | ||
|
|
d1e7ea09bc | ||
|
|
7dd3ee3084 | ||
| 1980ef5f43 | |||
| fd9b76c6a7 | |||
| abdd7ea6f1 | |||
| c7b366f35f | |||
|
|
84e715a273 | ||
| 396a628175 | |||
| 624746f34b | |||
| 2b395a94e6 | |||
| f6aa40d927 | |||
| c920c57f5d | |||
| 363e839c49 | |||
|
|
e681e9e7ec | ||
| a8194de492 | |||
|
|
1145b31a49 | ||
|
|
b7df1f56ef | ||
|
|
c43d4a0f16 | ||
| a8d385a1ee | |||
| 5579b6f40c | |||
| 7123a8c1cc | |||
|
|
9cc09145ec | ||
| 6294f8e263 | |||
| 0adf2bad92 | |||
| a85f72fccd | |||
| db8772dc0b | |||
| fa7727c6ca | |||
|
|
5655639320 | ||
|
|
720f40c9c9 | ||
| f671d8df90 | |||
| 265da42385 | |||
| b160284a1b | |||
|
|
fcb37b0367 | ||
| 0984c1d431 | |||
|
|
276559d120 | ||
|
|
e3148b16eb | ||
|
|
4d13c37008 | ||
|
|
84d7a7aa7d | ||
|
|
5616801f3e | ||
|
|
b5f6ee9c0c | ||
|
|
af73ce9c6d | ||
|
|
a7a95bb866 | ||
|
5d7dd62b72
|
|||
|
46fb52d67e
|
|||
|
|
39b8356683 | ||
|
42ce598865
|
|||
|
0d62a300e7
|
|||
|
|
3cf88f757c | ||
|
75a74c162d
|
|||
|
248f11f4f8
|
|||
|
|
75e849922d | ||
|
|
d39b955b25 | ||
|
00a41373e8
|
|||
|
|
e9cd6b4225 | ||
|
|
13cca1ee62 | ||
|
|
7b4e2fcf59 | ||
|
|
f2285e603b | ||
|
|
b7bd8210e5 | ||
|
|
1791e665aa | ||
|
|
a71341064e | ||
|
|
74dbbaa794 | ||
|
|
732fab4a04 | ||
|
|
aa3fcbfe17 | ||
| 5e58c9f376 | |||
| b600eeca5e | |||
|
|
e6662c4592 | ||
|
|
1ffcc5e241 | ||
|
|
32f0664012 | ||
|
|
2ef1826b12 | ||
|
|
d397457ce6 | ||
|
|
e8c81ba7d4 | ||
|
|
318dbd65e0 | ||
|
|
dd56e75b50 | ||
|
|
df93786474 | ||
|
|
4deec9a170 | ||
|
|
f26cabbdf1 | ||
|
|
195a1edcfe | ||
|
|
7101d2bb3b | ||
|
|
3452891613 | ||
|
|
b25abc5f16 | ||
|
|
60a847922e | ||
| 0d857b49a2 | |||
| eb5aa9ad02 | |||
| 98661aad15 | |||
| 69739ffdfd | |||
| 95689e3c99 | |||
|
|
9d9babe94d | ||
| 9d15a87c88 | |||
|
|
719aaaff4b | ||
| bbde91a1f9 | |||
| 55cb2cb6d6 | |||
| 752e19c276 | |||
|
|
dac382af53 | ||
| 28a3ff8d67 | |||
|
|
ae81687da9 | ||
|
|
2173d3527d | ||
| 2859f12dc1 | |||
| b307e885ce | |||
| 4853814228 | |||
| ca08717b9d | |||
|
|
934bc13c2c | ||
|
|
4aa337ccc8 | ||
|
|
700f2aad55 | ||
|
|
836e6e4242 | ||
| b5182c4c13 | |||
|
|
808e281ee8 | ||
| b9e65b50db | |||
| 0ea836c69c | |||
|
|
e074bb315c | ||
|
|
084d00cb0d | ||
| 7ecfc8468e | |||
| c782043c64 | |||
|
|
fbf4004e92 | ||
|
|
a2c1b65f91 | ||
|
|
0af550bf4e | ||
|
|
436194e46d | ||
|
|
49938bcef8 | ||
|
|
da2a78faa3 | ||
|
|
98dc8cf5b5 | ||
|
|
cd810b45ec | ||
|
|
22b1d4d276 | ||
|
|
25c5457ef3 | ||
|
|
ea6b9d910b | ||
|
|
5567371ccd | ||
|
|
585c4fcace | ||
| 525d99140f | |||
| 499b4287f9 | |||
|
|
b7df4f7cca | ||
| f41301036b | |||
| 30516776e5 | |||
| 07afcc4cd4 | |||
|
|
05abea87e7 | ||
|
|
4459840f5f | ||
|
|
55e0456aac | ||
|
|
f18ae35030 | ||
|
|
f416be77f7 | ||
| 1d4c79c821 | |||
| d4edbd7d1a | |||
| 5281f3bb60 | |||
|
|
e91fbf405f | ||
|
|
1d41ff8190 | ||
|
|
77a2a256e4 | ||
|
|
eb09504306 | ||
|
|
b912be5978 | ||
|
|
1a41629535 | ||
|
|
b81d9b05ac | ||
|
|
1d62ee1e22 | ||
|
|
55d2c7d7eb | ||
|
|
bb527fb410 | ||
| 9a97d0e8eb | |||
| 93dcfee8c5 | |||
| 76139ef53c | |||
|
|
32319adf72 | ||
|
|
10a5c89a16 | ||
|
|
40bff1eff9 | ||
|
|
ceba4eb0c6 | ||
|
|
faacf3f343 | ||
|
|
7cd98c4f25 | ||
|
|
489ad44b9f | ||
|
|
02a8cf05d1 | ||
| 7db2bbe6b0 | |||
| b6f0faa97f | |||
| a3fffa8e8b | |||
| 72248defbf | |||
| 155e05495e | |||
| 9c92a7796b | |||
| 7c78407c49 | |||
| cb219b3c74 | |||
| d59aa2e855 | |||
|
|
cd3d133f0d | ||
|
|
3b7fc44ce9 | ||
| e1efc68476 | |||
| 8f0bb907ff | |||
|
|
e5c620ca20 | ||
|
|
d0bcfb90e6 | ||
|
|
9deee54e41 | ||
|
|
94b86ef11a | ||
|
|
d8cd752dcb | ||
|
|
5d376e6865 | ||
| 9c3beddf54 | |||
| c6465ad9e5 | |||
| d415381d4a | |||
| 211d4fae54 | |||
|
|
3276ed7785 | ||
|
|
77b7548ef3 | ||
|
|
59851f410e | ||
|
|
4cb8d648cb | ||
| c8627a13f4 | |||
| 0ea0270fe1 | |||
| 19402d30af | |||
| b2f870e3c0 | |||
| 9e542dc200 | |||
| 6cf59043a3 | |||
| 71b75eea0e | |||
|
e900a686db
|
|||
|
fb8db3c3ae
|
|||
|
|
170a9ace8a | ||
|
|
518e9950ea | ||
|
25c8fca561
|
|||
| 754f7e16f6 | |||
| 04a2e460ae | |||
| 2ebab1e2e2 | |||
| a9366d14c6 | |||
| 42809e3f75 | |||
| 4cec933349 | |||
| d3f3c532b1 | |||
| ad1e87d0b8 | |||
|
|
affa85c086 | ||
|
|
aa053d78f7 | ||
|
|
fae6d9d835 | ||
|
|
78f1db7ad1 | ||
| f1367f84f8 | |||
|
|
4c81696f4d | ||
|
|
a91f8f72e3 | ||
|
|
87f7ed329c | ||
|
|
8641d9053d | ||
|
|
4a5ab8a279 | ||
|
|
d179412ab6 | ||
|
|
968c7d179d | ||
| 56399523d7 | |||
| 4d6326b8be | |||
|
|
a2414791bf | ||
|
|
faf3a19f0c | ||
|
|
4e6038d6c1 | ||
|
|
ddc2ecf829 | ||
| ecb5aef735 | |||
| 11ec2267da | |||
| 8576ae458d | |||
|
|
c66445acb5 | ||
|
|
29a20f7b0b | ||
|
|
874c019fb6 | ||
|
|
54825626de | ||
| 9bf5c5dc1a | |||
| 64fef9774c | |||
| 999667ec0c | |||
| c1135531ba | |||
| 287256e5f1 | |||
| 0bc26aa194 | |||
|
|
502d7e9084 | ||
|
|
89875db4a9 | ||
|
|
5a8b929448 | ||
|
|
fe78f2f433 | ||
|
|
e37591ce6d | ||
| 1cd4a57bd3 | |||
| b35172e2f7 | |||
| 3cfcd30128 | |||
| e56532e5c8 | |||
| fdee4f8938 | |||
|
|
7acc89e42d | ||
|
|
af7d208c21 | ||
|
|
91b90d033e | ||
|
|
7a0975b94d | ||
|
|
c58b01a602 | ||
|
|
8244449646 | ||
|
|
436afa4a61 | ||
|
|
998f800632 | ||
| 06ed056d43 | |||
| d446c13546 | |||
| 6e74fa294a | |||
|
|
43bdb56072 | ||
|
|
10a0b0add8 | ||
| e707fd0893 | |||
|
|
19c8e9beb1 | ||
|
|
32e5353847 | ||
|
|
d2f2d78954 | ||
| b8fdfc30c0 | |||
| 79a2ca8ae8 | |||
| d1a78c13a4 | |||
| f4b00e9de1 | |||
| 0a5e155096 | |||
| 4ecc050c4c | |||
| 88dc5036b3 | |||
| d30c6ef3bf | |||
|
0419fec810
|
|||
|
43e5fd1131
|
|||
|
|
11e94124cc | ||
|
|
102109388b | ||
|
|
60a69aa0a2 | ||
| 5e2cbd75fa | |||
| 14f1192ccb | |||
| 72b2560ecf | |||
| 7fce6fa401 | |||
| e6286768a7 | |||
| 0306723307 | |||
| 6f49998ad3 | |||
| 457c944ec6 | |||
| 33c38f9464 | |||
| 46351389b6 | |||
|
|
d56b0e93db | ||
|
|
f9aa47ea1c | ||
| d567a5312e | |||
| 97a322354f | |||
| 554527445b | |||
|
|
c5aff1a2ca | ||
| 987cc40318 | |||
| 104fd1576a | |||
| 72ce3954b4 | |||
| cfa7461855 | |||
| 44cda8a232 | |||
| cf119e6843 | |||
|
|
451744f321 | ||
|
|
ca6682b94b | ||
|
|
cbad2341c3 | ||
|
|
a956c7b135 | ||
|
|
ea6caeb2f0 | ||
|
|
c17e8b1156 | ||
|
|
b993b1e096 | ||
| d7d81e352d | |||
| 078c608bda | |||
| f2e57f9edd | |||
| 5698d5216f | |||
| 10aa2bfbd3 | |||
| 6cfed989ff | |||
| ab70acd582 | |||
|
|
79e1c236fe | ||
|
|
fed62b6c45 | ||
|
|
0d62181272 | ||
|
|
290a71bd48 | ||
|
|
6e385db378 | ||
|
|
ffe8329b84 | ||
| f13be109c2 | |||
| d24d85b970 | |||
| 8d44ac90ad | |||
|
|
4083de2a51 | ||
|
|
131df075db | ||
|
|
afd6f50ba2 | ||
|
|
ad01366705 | ||
| 6325793902 | |||
|
|
8ea176f9da | ||
| 03b5272e44 | |||
| 7da01975f7 | |||
| 7cff8bbfd2 | |||
|
|
c98cbb33f8 | ||
| f3ea95535b | |||
| b9b84b7971 | |||
| be7340ca30 | |||
| 881c4566dd | |||
| 7efbb0217f | |||
| 9e2ce39cde | |||
|
|
0ff6cae1c3 | ||
|
|
d02ba3d717 | ||
| 6aa830adb6 | |||
| be6603cbb9 | |||
|
|
8d208929d5 | ||
|
|
cb0f96b737 | ||
|
|
83723ab050 | ||
|
|
3abaefa550 | ||
|
|
389010dbbd | ||
| 81fe2c043e | |||
| c76e9bb3fe | |||
| 48b68d3410 | |||
| 2b64b31393 | |||
| 2333068de7 | |||
| 78530029ef | |||
| 329b6e5640 | |||
|
|
967f0a3294 | ||
|
|
6eb779d359 | ||
|
|
414147177a | ||
|
|
3b37f3630c | ||
|
|
7c1a818582 | ||
|
|
c4cf7e9707 | ||
|
|
1ceb681521 | ||
|
|
443176a0d1 | ||
|
|
261905a364 | ||
| e00288b160 | |||
| f141ca926f | |||
| f7a0954213 | |||
|
|
da8d562eba | ||
|
|
399af8592c | ||
| 6239e7f19b | |||
| d0e1b7186c | |||
| fea3292f50 | |||
| 9973aa9ffa | |||
| 0b38a980d2 | |||
| 20838b6882 | |||
| 8f4ef1e274 | |||
| e1c7583670 | |||
| 39a2157d46 | |||
| dd63e7157a | |||
| 340efd7926 | |||
| ecc6194b57 | |||
|
|
90c3381954 | ||
|
|
21334c8026 | ||
|
|
cbdef6ce9e | ||
|
|
591cd9fd66 | ||
|
|
e8d2a45afb | ||
|
|
3b533938a6 | ||
|
|
9fe342a7e9 | ||
|
|
2152ced97a | ||
|
|
404be5f317 | ||
|
|
f56783a439 | ||
|
|
fb278182d3 | ||
|
|
c2c63d2f67 | ||
|
|
7f740455fe | ||
|
|
946b992746 | ||
|
|
a6c43e6f2f | ||
|
|
ecad52c18d | ||
|
|
e49e5a0474 | ||
|
|
9231b3cfca | ||
|
|
68e0159292 | ||
|
|
1a674590bf | ||
|
|
1ef47e7b3f | ||
|
|
214a2762df | ||
| cb5d06decd | |||
| 8555a88202 | |||
|
|
2287f4493a | ||
|
|
bb357f7cab | ||
|
|
d9b240cd2d | ||
|
|
bea5ee96d9 | ||
|
|
7d205fd526 | ||
|
|
c15b2a0cbb | ||
|
|
7ccba30a3d | ||
|
|
8091485588 | ||
|
|
1413f968d6 | ||
|
|
d1d1bb09e9 | ||
|
|
3c1a7e0171 | ||
|
|
0cb50f2f01 | ||
|
|
2287586700 | ||
|
|
ea7660ddb3 | ||
|
|
44e98e8f2f | ||
|
|
856ccbb969 | ||
|
|
0920286b4c | ||
|
|
f34e10cfd9 | ||
| ae5d202661 | |||
| bc43c844fc | |||
| 67be9aa27b | |||
| 047b997a22 | |||
| bac51891b7 | |||
|
|
714d6af7cd | ||
| 6efd6334bb | |||
| 91f4475d76 | |||
|
|
de309784b4 | ||
|
|
a623cf53f3 | ||
| 440cd59e50 | |||
| eefb6f6265 | |||
| f5e1226837 | |||
| 151f7e701f | |||
| 40398497c2 | |||
|
|
cda10788fb | ||
|
|
845905d9c8 | ||
| 89055506d6 | |||
|
|
5908ae7905 | ||
|
|
4131665284 | ||
|
|
6a43dfb0d7 | ||
| 3d38d78845 | |||
| 600f19ac80 | |||
|
|
0a3a664653 | ||
|
|
471ec1cd2e | ||
|
|
e296cd7ca0 | ||
|
|
31cfa8cd7c | ||
|
|
70fe8aa367 | ||
|
|
cc9dafac6f | ||
|
|
32429f1481 | ||
| 9485a463b8 | |||
| 35c6ab4a08 | |||
| e58b0fa015 | |||
| beb92967e5 | |||
| 015583f1cd | |||
| d40c54b802 | |||
| 647665b6b9 | |||
| 4fc78bc382 | |||
| 50d000e7e2 | |||
|
|
ad500c4bef | ||
|
|
916077c6f8 | ||
|
|
935fb238a4 | ||
|
|
d03e5b4562 | ||
|
|
05c45c6468 | ||
| 9020613a8b | |||
| be92d5943d | |||
|
|
b2368a0751 | ||
| 7948d5f773 | |||
|
|
1a16851ad0 | ||
|
|
810c14a839 | ||
|
|
df0e8eb228 | ||
| 79605c8a9e | |||
|
|
9b644119ae | ||
|
|
ffa9919019 | ||
|
55ca892f90
|
|||
|
eaca187032
|
|||
|
|
3b9d05cc6d | ||
| d00881de2e | |||
| d8e85cf75d | |||
| 39f21763e4 | |||
|
|
af43901ca3 | ||
|
|
62565b9ae2 | ||
|
|
bca176170c | ||
|
|
2a91ca0cff | ||
|
|
19a75554b0 | ||
|
|
58ae476a3e | ||
|
|
44d8254a0b | ||
|
|
bd2cdfcef2 | ||
| a50b832c2a | |||
|
|
10194105e3 | ||
|
|
b474288df7 | ||
|
|
f338209f32 | ||
|
|
bef832e45b | ||
|
|
71cfb4db77 | ||
| 86453e7e11 | |||
|
|
98b9f8e62d | ||
| 44cd8d258d | |||
| 764b65d094 | |||
|
|
4d2c64b012 | ||
|
|
35c0b0be58 | ||
|
|
7a54e2cfb3 | ||
|
|
54283f6d3c | ||
|
|
697acd1d88 | ||
|
|
5cdb80b4d6 | ||
|
|
e48ff8be73 | ||
|
|
096217eea6 | ||
|
|
ed5290be86 | ||
|
|
b036c3903c | ||
|
|
57b43b7b60 | ||
| ab1ddb7bd1 | |||
| 881f2f32f4 | |||
| 0754ba5292 | |||
|
|
743a89c3a2 | ||
|
|
6692c3ab7c | ||
|
|
c16a5fdac4 | ||
|
|
60ec7e54f5 | ||
| dd48f5ab87 | |||
|
|
db674ec31d | ||
|
|
48150ffc8b | ||
|
|
1ad80efab6 | ||
|
|
aa8789f8f8 | ||
|
|
56e3f2da5c | ||
|
|
a4104822e2 | ||
|
|
c13f386e3b | ||
| 4bd73450b5 | |||
| 64da28e814 | |||
| 639e1b9c6d | |||
|
|
63e828d2df | ||
|
|
b8c30b5703 | ||
|
|
805ea91fc2 | ||
|
|
c4c422da57 | ||
| 544fb35121 | |||
| 43edccb284 | |||
| 7531ba4b5c | |||
| 983aa592d8 | |||
| 8378784231 | |||
| dca25cc601 | |||
|
|
c8fe81cd80 | ||
| c0a4724f57 | |||
| 484c52d813 | |||
|
|
47843b2087 | ||
|
|
c3a6126799 | ||
|
|
e94b250541 | ||
|
|
db5f6c7540 | ||
|
|
79a6c9e90d | ||
| e2e67e3977 | |||
| 6c06450701 | |||
|
|
d7379a1af2 | ||
|
|
d731611e0c | ||
|
|
dceb92ba8e | ||
|
|
1e039cb1bf | ||
|
6f3e1ffbe3
|
|||
|
|
6a6dca3fce | ||
|
|
d6d92071bf | ||
|
|
d40657dc64 | ||
|
|
6dde2a1e59 | ||
|
|
b7823cec16 | ||
|
|
eabd7b8d51 | ||
|
|
27ec445e54 | ||
|
|
ad108b285f | ||
|
|
f471214ef7 | ||
|
|
a0190f8f40 | ||
| 82af984023 | |||
| 0373010497 | |||
|
|
c22d869aa7 | ||
| 87c93e90cd | |||
| 3d6dca9386 | |||
|
|
f946e7e6ab | ||
|
|
d50dfa5867 | ||
| 249128e011 | |||
| ca16a80b1f | |||
|
|
e789e7ba9b | ||
|
|
5048f7be14 | ||
|
|
0e3603f596 | ||
| 9cd4b3c1cc | |||
| 1d9aa75960 | |||
|
|
0a24ef70e0 | ||
| 3b5d3d671e | |||
| 7db83d216e | |||
| d1a7002422 | |||
| 1d8e7e072f | |||
|
7466fe7a34
|
|||
|
|
24cf5047da | ||
|
|
1f103e5ef5 | ||
|
|
9e87974eb1 | ||
|
|
d806cf76c4 | ||
|
|
6e2703998d | ||
| 6f9737c2c2 | |||
|
|
5e696c10d5 | ||
|
|
927e25c72c | ||
| 8b1b99ba35 | |||
| 2c102cd1ff | |||
|
|
42c4926c47 | ||
|
|
703556d893 | ||
|
|
0b529a5c3c | ||
|
|
5186b3f61e | ||
| 4dc0da5099 | |||
| 1bad6ba065 | |||
| 3efee22536 | |||
| eef48ac3a3 | |||
| e35cfbc3dd | |||
| 4a5fd96b32 | |||
|
|
bdffe73f59 | ||
| cdfe722457 | |||
| 0aecea6de2 | |||
| 5a88c77171 | |||
| 8003217092 | |||
| 9b325041c1 | |||
| 1e7fbe5d56 | |||
| 0261c263f9 | |||
| 8d6ae85b0d | |||
| f14bdb3068 | |||
| 3c66840f95 | |||
| 733e3ea9d5 | |||
|
ca634bb707
|
|||
| 9abc206d1a | |||
| 85f17c0fd8 | |||
| 14bad81b9f | |||
|
|
ffd596e2c7 | ||
| 99f8187092 | |||
| f30b784f45 | |||
| f06b5f8fc0 | |||
| 2e781b900d | |||
| d76b1ae75d | |||
| 40110580e0 | |||
| eab7961a83 | |||
| 432e06e801 | |||
| fe1ff5c7a3 | |||
| 6e66b8e08b | |||
| 7abdd0545e | |||
|
|
3f1768e467 | ||
|
|
f464921ae3 | ||
|
|
7603ad3fb0 | ||
|
|
be7ccc78b8 | ||
|
|
b3135c982f | ||
|
13386175f5
|
|||
|
23e8f3dc2d
|
|||
|
|
b323ce2eef | ||
|
|
08e323ba51 | ||
|
|
9f50f36b1d | ||
|
|
4399c1d590 | ||
|
|
f7376f6dca | ||
|
|
518cb34340 | ||
|
|
f210a5f508 | ||
|
|
9ebc49dd1c | ||
|
|
c119eeb468 | ||
|
|
ab616f8f79 | ||
|
|
69286881e4 | ||
|
|
4419df8d1b | ||
|
|
aed2bd48fc | ||
|
|
d3d752f90c | ||
|
|
33ecfe88ef | ||
|
|
fd52fdd35b | ||
|
|
1d13d3dccf | ||
|
|
1c84bcae35 | ||
|
|
df497d5952 | ||
|
|
f65e122f8d | ||
| 161f0744aa | |||
| 95de9ad3b3 | |||
|
|
d5c170055f | ||
|
|
61f0521072 | ||
|
|
6ca14c55f2 | ||
|
|
1309d09aee | ||
| aba75b3a19 | |||
|
|
e87481d8db | ||
| acaad69917 | |||
|
|
ff588ad57a | ||
| 65df27154c | |||
| 8dfa1957f4 | |||
| 570eba3794 | |||
| 94a39fc61f | |||
| 2d359e5f99 | |||
|
|
04692e0c44 | ||
|
|
809fd23b88 | ||
|
|
e3653daea3 | ||
|
|
48fa75386c | ||
|
|
1b3a12a4dc | ||
|
|
543ddf540e | ||
|
|
a3fb471546 | ||
|
|
277f964b30 | ||
|
|
9bcf7adb67 | ||
|
|
f343fa0071 | ||
|
|
e5862e9218 | ||
|
|
29ae2423f8 | ||
|
|
1755a4a7df | ||
|
|
25d3325049 | ||
|
|
fb6a4c3b87 | ||
| 317f80a984 | |||
| 28cdc1d9e5 | |||
| c2087b15d5 | |||
| a8d785beb3 | |||
|
|
a6784b5549 | ||
|
|
d770292be8 | ||
|
|
b3a1037ade | ||
|
|
02946cf0b4 | ||
|
|
cf051d5108 | ||
|
|
96977c6183 | ||
|
|
73d83164fc | ||
|
|
1064f5e4a8 | ||
|
|
5be98c7087 | ||
|
|
0d689c7dff | ||
|
|
1f24ed46a0 | ||
|
|
92b4159f9e | ||
|
|
5817b41e29 | ||
| d6b132e3a6 | |||
|
|
318f70f34c | ||
|
|
e41525d40a | ||
|
|
a102220e52 | ||
|
|
e9a214c5b2 | ||
|
|
c53f5eb144 | ||
|
|
9ed64e0388 | ||
|
|
93040d4629 | ||
|
|
0144ad43f5 | ||
|
|
8da2fc30c3 | ||
| 0e27ae7795 | |||
| 33c6cdb9fe | |||
|
|
73b7014469 | ||
| 25aaf55b93 | |||
| 6a7546c43b | |||
| 0adda4bf7b | |||
|
|
f5f36427a4 | ||
|
|
590bfd3a10 | ||
|
|
16db9bd1a2 | ||
|
|
d0af933b35 | ||
|
|
2b56b40e6d | ||
|
|
4b2d7068b3 | ||
|
|
bd93b8be8e | ||
|
|
aa3fe2b872 | ||
|
|
a61ff915ac | ||
|
|
0a3e678329 | ||
|
|
d4336b0dcb | ||
|
|
65d2698af4 | ||
|
|
6454576417 | ||
|
|
a485bd5977 | ||
|
|
e733688fd0 | ||
|
|
e86f6a8cbd | ||
|
|
fcc9e17664 | ||
|
|
5c9d4ffa9a | ||
|
|
419bc2747b | ||
|
|
1ee99d6866 | ||
|
|
3ab8973895 | ||
|
|
acfa3baeb5 | ||
|
|
c21d7cf101 | ||
|
|
ec895e1d9e | ||
|
|
c964f09a4f | ||
|
|
0bc32f27df | ||
|
|
6640e93ce9 | ||
|
|
d7aefe0cf0 | ||
|
|
187fe5b361 | ||
|
|
b31aea7bc5 | ||
|
c661baf058
|
|||
|
|
0fe0461340 | ||
|
|
d5394c9e92 | ||
|
|
42135fd26c | ||
|
|
38569f55c7 | ||
|
|
5ce03c2db3 | ||
|
|
1031b3eb79 | ||
|
|
fcdf4cd476 | ||
| 6268dffff8 | |||
| c10737bfd7 | |||
|
|
bd0cc69668 | ||
|
|
84fffac264 | ||
|
|
5bf968010e | ||
|
|
61bc095d01 | ||
|
|
e376f97547 | ||
|
|
f2428d3cb3 | ||
|
|
2fdac85d31 | ||
|
|
b731395689 | ||
|
|
07405e3466 | ||
|
|
fc0c76bd77 | ||
|
|
d209547968 | ||
| 632b9fc5ea | |||
| 702591b4ec | |||
|
|
c562746e5f | ||
|
|
c0443cbec2 | ||
|
|
0191bc3821 | ||
|
|
633bd42036 | ||
|
|
998ef8d834 | ||
|
|
c25b076ca9 | ||
|
|
f43379f365 | ||
|
|
d902c0acf4 | ||
|
|
58e678d72c | ||
|
|
cbc49669d0 | ||
|
|
78bb638fd6 | ||
|
|
7a61bae471 | ||
|
|
e1b992526e | ||
|
|
1b043838ea | ||
|
|
07e72294dc | ||
|
|
b6b37ee68b | ||
|
|
43cb1f1bff | ||
|
|
f7a67c72bf | ||
|
|
c5476d08fa | ||
|
|
8af92b1557 | ||
|
|
eaa826bb8a | ||
|
|
140b3c371d | ||
|
|
f158eaa29c | ||
|
|
c4b98ade53 | ||
|
|
f2e85306ca | ||
|
|
42b9de8360 | ||
|
|
6c244f3121 | ||
|
|
9f56213d2f | ||
|
|
fb2f7cf680 | ||
|
|
8fcdd24f84 | ||
|
|
aaafde4a7c | ||
|
|
2b23003556 | ||
|
|
5681062f01 | ||
|
|
d61bf212f5 | ||
|
|
2bd7c8d51e | ||
|
|
1e63cdbcda | ||
|
|
86d85f12be | ||
|
|
dd470d49ec | ||
|
|
95d8062b00 | ||
|
|
8f82399214 | ||
|
|
6247150e9c | ||
|
5266644725
|
|||
|
81d9e96552
|
|||
|
|
4ec9f06114 | ||
|
0033e9f6c0
|
|||
|
571652c314
|
|||
|
|
7ec233e18a | ||
|
|
13c9a12336 | ||
|
|
83d472ecd6 | ||
|
|
c21da6512a | ||
|
|
4b4374e0df | ||
|
|
407276a04d | ||
|
|
64f60905b4 | ||
|
|
9e6072fed2 | ||
|
|
a3e5c424fd | ||
|
|
6683a350aa | ||
|
|
05bfa9b546 | ||
|
|
735988decb | ||
|
|
d0580592be | ||
|
|
817076bdbf | ||
|
|
736236e9ca | ||
|
|
3f4114c51b | ||
|
|
5c2c493c56 | ||
|
|
2c383ebea1 | ||
|
|
91e73450cf | ||
|
|
e55798944e | ||
|
|
5ea11a5ad2 | ||
|
|
2a3383e9e6 | ||
|
|
e871703724 | ||
|
|
1ee367d7be | ||
|
|
bce536b9b4 | ||
|
|
7c9182e0b0 | ||
|
|
aa915d639d | ||
|
|
9489ebc7d6 | ||
|
2a5c525193
|
|||
|
9e2d981c60
|
|||
|
|
53dfe9e4f5 | ||
|
48e95fbdb0
|
|||
|
fd94d85edf
|
|||
|
f2d1a85afb
|
|||
|
0bdbcb8bab
|
|||
|
|
7b91a819be | ||
| bc89025924 | |||
|
|
16bcaef4c3 | ||
|
|
fcbfa451f2 | ||
|
|
559ce53ca4 | ||
|
|
ee2c5b58d7 | ||
|
|
d98d998106 | ||
|
212c45e070
|
|||
|
143fa9b6ed
|
|||
|
4849928288
|
|||
|
|
9248ee8868 | ||
|
|
1616d96732 | ||
| 0bbedd1600 | |||
|
|
c7e49644d8 | ||
|
010c903c74
|
|||
|
e4d12e3537
|
|||
|
051cc8384e
|
|||
|
49a94170d2
|
|||
|
|
42e8e37bd4 | ||
|
|
5d2c350ce2 | ||
|
|
85dc0362c1 | ||
|
|
01c06728eb | ||
|
|
257250714d | ||
|
|
3b769c3059 | ||
|
|
a7395ed45b | ||
|
|
ab07c7928f | ||
|
|
b0c0d15505 | ||
|
|
fcf50790da | ||
|
|
1e43654607 | ||
|
|
4fecbe820d | ||
|
|
763c9dfa6b | ||
|
9de5879786
|
|||
|
|
9396e7492c | ||
|
3ac3415178
|
|||
|
1aae1c59d0
|
|||
|
907e80a01c
|
|||
|
|
8a10b69716 | ||
|
|
1a3cf7edd6 | ||
|
|
76d0fc979b | ||
|
|
a42d8ece35 | ||
|
|
93377f53fc | ||
|
|
c853d74ba0 | ||
|
|
0b9f74f4f4 | ||
|
|
5da6baf828 | ||
|
5766945006
|
|||
|
a53d473b58
|
|||
|
|
d1207ad80e | ||
|
|
e2efe71b33 | ||
|
|
2aef6ed9c0 | ||
|
|
fcb6db0603 | ||
| 01b1136316 | |||
|
|
2512fe9e75 | ||
|
|
f89b5cd2ec | ||
|
|
ab284ed208 | ||
|
|
00a578657c | ||
|
|
38ce40ae7d | ||
| e1be6c7138 | |||
| 28539e60b0 | |||
|
adb11b3ed0
|
|||
|
|
f1e6dedd44 | ||
|
|
8ea1454c06 | ||
| 81b8d578f2 | |||
|
|
16b11db39c | ||
| 0d923cc920 | |||
| c523e93564 | |||
| d588798ea1 | |||
| a11f165f2a | |||
|
|
d4f487d554 | ||
|
|
93d5a0e532 | ||
|
|
00ddc462d2 | ||
|
|
5f4a74f8ba | ||
|
|
a8eff6fbd1 | ||
|
|
baa7367ebe | ||
|
|
69f8a34aac | ||
|
|
21b3a67988 | ||
|
|
d89574ce73 | ||
| ddeac6b9d9 | |||
| 17906ec0eb | |||
|
|
311c088d3d | ||
| a2584d6083 | |||
| 35bd7739c6 | |||
| 7f43c88a39 | |||
|
|
fc1c54a141 | ||
|
|
2af111c584 | ||
| c093cca8b1 | |||
|
|
2bb1b78ba4 | ||
| 3ab26172c4 | |||
| cdd45ce88b | |||
|
210a7d3136
|
|||
|
92ec64d80f
|
|||
|
ff37f71fdb
|
|||
|
6056341525
|
|||
|
|
075612f5bd | ||
| 1a87ed8210 | |||
|
|
c05ffeb16d | ||
| ee3710c5ed | |||
| 4327c4b1f7 | |||
| 492e56a098 | |||
| f0257a2784 | |||
| ec1ead89ab | |||
|
|
ae53e87aba | ||
|
|
939dd2320a | ||
|
|
2c8b73e2e2 | ||
|
|
eabc6212ea | ||
|
|
c120d6517f | ||
|
|
597ee1dad7 | ||
|
|
c4a901504d | ||
|
|
f5cc5d07fd | ||
|
|
8a0e6c921c | ||
|
|
bf1bff9ace | ||
|
|
06f24e988f | ||
|
|
ae327f545e | ||
|
|
35012b18c5 | ||
|
|
9688bad622 | ||
|
|
447b8d3372 | ||
|
|
01102cb9b0 | ||
|
|
934d1a6114 | ||
|
|
6f74c8cb77 | ||
|
|
63b9e619a4 | ||
|
|
82e28f26d7 | ||
|
|
ca9fd96baa | ||
|
|
39b22267d6 | ||
|
|
60d7984d66 | ||
|
|
33d219d2ac | ||
|
|
85a77e05af | ||
|
|
3dfeabcec6 | ||
|
|
673fdc443c | ||
|
|
2f6e5a7648 | ||
|
|
2cbe8e9517 | ||
|
|
2f0460d6ec | ||
|
|
37f4ed7770 | ||
|
|
e3104c61cb | ||
|
|
bc434ee8cb | ||
|
|
f4102b948e | ||
|
|
ed991de11a | ||
|
|
322e161064 | ||
|
|
1adc741cc2 | ||
|
|
4eff87bbf7 | ||
|
|
fc6970d08a | ||
|
|
f616c7e1c6 | ||
|
|
89ec749172 | ||
|
|
182f0f2c64 | ||
|
|
e3681495ce | ||
|
|
37415fa261 | ||
|
|
7243dbe763 | ||
|
|
0ff5c4bedd | ||
|
|
f047f89ad5 | ||
|
|
0eb0aa1d3b | ||
|
|
6019891591 | ||
|
|
615281601c | ||
|
|
82baf5d384 | ||
|
|
6fe93ecb7e | ||
|
|
b3222f3523 | ||
|
|
3b94863521 | ||
|
|
582dc8bf46 | ||
|
|
a9868fd275 | ||
|
|
218e56576a | ||
|
|
c50e79375a | ||
|
|
dcb8308f35 | ||
|
|
183b310696 | ||
|
|
c7d0c86d52 | ||
|
|
48225662b1 | ||
|
|
f53fc088ec | ||
|
|
05517fcbcd | ||
|
|
18af51b0a4 | ||
|
|
ede3da7a87 | ||
|
|
8e3327ef6a | ||
|
|
827f6daabc | ||
|
|
2567442321 | ||
|
|
9cf5478519 | ||
|
|
e5275311c2 | ||
|
|
21e4870e4c | ||
|
|
beba7c8d2e | ||
|
|
fe35313305 | ||
|
|
d7a8bbf40b | ||
|
|
f1893c596e | ||
|
|
6367c1ab4d | ||
|
|
9579887fc4 | ||
|
|
e29be2f140 | ||
|
|
2736b5d1ef | ||
|
|
ff52fb16b6 | ||
|
|
ccbf3867e1 | ||
|
|
f0de422c6e | ||
|
|
64cc19b252 | ||
|
|
26226009f0 | ||
|
|
d10e09da02 | ||
|
|
00a2e58fee | ||
|
|
b1cb45dfe6 | ||
|
|
a2951d1f05 | ||
|
|
c0b1e97602 | ||
|
|
71621a9dc4 | ||
|
|
b3ed2afebe | ||
|
|
704620baff | ||
|
|
8feb805167 | ||
|
|
065b32755a | ||
|
|
1b5f4bff2c | ||
|
|
8e1c5a485f | ||
| 5fa6c9db35 | |||
| 5482b9be2c | |||
|
|
7400273b0a | ||
|
|
0b7cdde4a0 | ||
|
|
d5382aec4f | ||
|
|
df484dc816 | ||
|
|
7ea4086807 | ||
|
|
b04bf6a951 | ||
| 7c33dcf630 | |||
| 5e65e21f0b | |||
| 53ca38ce53 | |||
|
|
398e3c1b91 | ||
| 508978d586 | |||
| e267481f71 | |||
|
|
193bee5ac8 | ||
| f58efa2871 | |||
| 6568b6d723 | |||
|
|
4b1b34d8a7 | ||
| 39c09f8565 | |||
|
|
275a77807e | ||
|
|
6443541a79 | ||
|
|
5eb6f7d307 | ||
|
|
bce2a66177 | ||
|
|
7602641909 | ||
|
|
54f3a261c5 | ||
|
|
906bac965f | ||
|
|
4ec1de6900 | ||
|
|
8ded131666 | ||
| 47b14f932e | |||
|
|
838ebb3f69 | ||
| c459724114 | |||
| b0c9d1164d | |||
| 7c51d88501 | |||
| 5b03cf826b | |||
| f305863616 | |||
| db5809d522 | |||
|
|
83df6f015c | ||
| e7231b0e13 | |||
|
|
cff60eb51c | ||
|
f914a312f5
|
|||
| 56ebb301ca | |||
|
|
a59df12595 | ||
|
|
5cc7fc6ccb | ||
|
|
55027cb630 | ||
|
|
036eba68e1 | ||
|
|
d34e0d9348 | ||
|
|
31765ce0ef | ||
|
|
9fe7cdca92 | ||
|
|
adc3502b6b | ||
|
|
95fe369648 | ||
|
|
01845a0cb7 | ||
|
|
708eaf4178 | ||
|
|
d629a58712 | ||
|
|
90886b63d6 | ||
|
|
084f89fa32 | ||
|
|
ceb3a095d8 | ||
|
|
1758275f11 | ||
|
|
e74e506ffe | ||
|
|
599a36466a | ||
|
|
613e128cab | ||
|
|
e4f8022b7a | ||
|
|
5603c41900 | ||
| a8a27c9b51 | |||
|
|
b70de5a4be | ||
|
|
b1fd07cd30 | ||
|
|
6ab2e02fe6 | ||
|
|
5535c5780c | ||
|
|
49e0a2c055 | ||
|
|
efbe53b6b4 | ||
|
5e074dad10
|
|||
|
d6a88896d0
|
|||
|
5c99f5f8bb
|
|||
|
e1faba0ff2
|
|||
|
ba2f406bc0
|
|||
|
9b6db4684a
|
|||
|
|
561fd41d5d | ||
|
|
ce9995dac7 | ||
|
|
0afaea9513 | ||
|
|
9b5c6e3164 | ||
|
|
e6ebec8c1e | ||
|
|
2551921ed6 | ||
|
|
e02575aad7 | ||
|
|
ff3502c87a | ||
|
|
017f9b2140 | ||
|
|
c80d3a6958 | ||
|
|
3ca1127685 | ||
|
|
18369da5bc | ||
|
|
e65100cdc8 | ||
|
|
6a1cb51c2f | ||
|
c4d93e492b
|
|||
|
c2f72f72ac
|
|||
|
721b6b2afa
|
|||
|
b6f011c669
|
|||
|
801607fc16
|
|||
|
01a4d33514
|
|||
|
e348ec74fd
|
|||
|
0458675608
|
|||
|
c61ffce0e9
|
|||
|
68a97dc980
|
|||
|
a07d167390
|
|||
|
|
a8721dcc69 | ||
|
|
68cf952ac6 | ||
|
|
e14d6a81fe | ||
|
|
a4912893a8 | ||
|
0adfb631ef
|
|||
|
b64ce1f67f
|
|||
|
e8e3b1595d
|
|||
|
f1427d5272
|
|||
|
|
bf6b87d65c | ||
|
|
0240997257 | ||
|
|
f1e341f0b9 | ||
|
a54acb8c42
|
|||
|
c6ede67589
|
|||
|
|
11176da5d8 | ||
|
|
0a604336c4 | ||
|
|
be9df7649f | ||
|
|
63fb923995 | ||
|
|
3afe40083d | ||
|
|
9d4767539c | ||
|
ac9bba8b5b
|
|||
|
80c46bea7f
|
|||
|
|
614f694777 | ||
|
|
1072d7b449 | ||
|
1b70596735
|
|||
|
|
61eebc9fbd | ||
|
b05909969f
|
|||
|
bd89ce7cc9
|
|||
|
130613b717
|
|||
|
b3c1f39a0e
|
|||
|
97c807cd33
|
|||
|
aede5f71ec
|
|||
|
786770f56a
|
|||
|
|
74d4f00784 | ||
|
d61c4235dc
|
|||
|
e8794b8c79
|
|||
|
552da005dc
|
|||
|
|
51452d2e68 | ||
|
5c5484b4d2
|
|||
|
|
9974a851e8 | ||
| 6c0bfc6c35 | |||
|
|
41bbd203cc | ||
|
|
4344c26bef | ||
|
|
e1c1c06fb2 | ||
|
|
70e63764ff | ||
|
|
d10f3e3af6 | ||
|
|
a4397d5447 | ||
|
|
320c87a1db | ||
|
|
8d1228c9e8 | ||
|
|
420bec7c46 | ||
|
|
ba1658beac | ||
|
|
575753038b | ||
|
|
061c9f0979 | ||
|
|
b48d1b8ad6 | ||
| dff7aeefb8 | |||
| 54f7980162 | |||
|
|
684cb5a376 | ||
|
|
597bccc080 | ||
|
|
72557fd0bf | ||
|
|
0b2f2214f9 | ||
|
|
ef51e69ffb | ||
|
|
c9eb40f455 | ||
|
|
b66750339d | ||
|
|
136460567c | ||
|
|
f80123c85d | ||
|
|
a22340196f | ||
|
|
cbaeffde2c | ||
| 649d50812b | |||
| b67f5436f8 | |||
| b637ddeb28 | |||
|
|
a20b7eacd6 | ||
| 6df639a0c3 | |||
|
|
d4a9887532 | ||
|
|
79b08a181d | ||
|
|
758cef1bd3 | ||
| fb8bbea99d | |||
| 9b261a4778 | |||
|
|
aafa29db8b | ||
|
|
896c39f9bc | ||
|
|
3a97ff7f57 | ||
|
|
7518c41fab | ||
|
|
8cb00a5340 | ||
|
|
baa51db26c | ||
|
|
fc260b2291 | ||
|
|
43ebb01b63 | ||
|
|
067dc0df5d | ||
| 6828c97415 | |||
| 50401e0030 | |||
| c3d2508693 | |||
| 642fd5cc91 | |||
| e8fb5a0030 | |||
|
|
0dee5073c6 | ||
|
|
b9b452f043 | ||
|
|
ddd3fad1c6 | ||
|
|
1f5723a97e | ||
|
|
5a177c952d | ||
|
|
86e456d152 | ||
| 03895f9e45 | |||
| 5c79f44055 | |||
| 83c38e74db | |||
| 1e5f2944cf | |||
|
|
e45ecbdef7 | ||
| c65694b36c | |||
| 0005469101 | |||
|
|
60b56bd41a | ||
|
|
81fe492655 | ||
|
|
849b7e038d | ||
|
|
82f5257cf1 | ||
|
|
e347659db4 | ||
|
|
7940317857 | ||
|
|
58415ab5c3 | ||
|
|
1176974a78 | ||
|
|
ce792426e6 | ||
| e92e727279 | |||
| f761900a3e | |||
| 32a57661fd | |||
|
|
5004e44934 | ||
| 99d55f05f8 | |||
| 9fd839fad8 | |||
|
|
1c7cc9e16f | ||
| 06d01962a6 | |||
| 2c2c1accb5 | |||
| 105b7eabf0 | |||
|
|
de1d83e1a9 | ||
|
|
ce97780741 | ||
| e81e56ea1d | |||
| aa6336ea1e | |||
| dd887cbb1f | |||
|
|
860053be67 | ||
| 5a4671b7b1 | |||
|
|
ec581e3509 | ||
| e7ae9dd06d | |||
| 0c7f55ff8d | |||
| bcd7f47409 | |||
| 476caebe7f | |||
|
|
dadc81c026 | ||
|
|
cc719d0ae5 | ||
|
|
53af79cf0d | ||
|
|
f534ad66e1 | ||
|
|
2d8cf02296 | ||
|
|
71386f8466 | ||
|
|
c897c8e56b | ||
|
|
2036069051 | ||
|
|
be6c63e526 | ||
|
|
a2af9c152a | ||
|
|
63f3dc926c | ||
|
|
21dde870c6 | ||
|
|
04f37a85ce | ||
| 10a332083b | |||
|
|
6818d1de62 | ||
|
|
1b10b75e25 | ||
|
|
b829a5aafe | ||
|
|
07073e290a | ||
|
|
ee6d286cd7 | ||
|
|
119637cb9b | ||
|
|
ee4097a2dd | ||
|
|
1185737eaa | ||
|
|
7d14086e54 | ||
|
|
78494cd30e | ||
|
|
ead5c54bcb | ||
|
|
b5b355c16c | ||
|
|
3067d7b250 | ||
|
|
9bc36152d9 | ||
|
|
c1b944b838 | ||
|
|
175a88f1c4 | ||
|
|
aac3e7d2f4 | ||
|
|
c0488b8cbe | ||
|
|
d66703c4d0 | ||
|
|
173975aadd | ||
|
|
d97fa37d2c | ||
|
|
782262b52e | ||
|
|
b8213ef6be | ||
|
|
e34623b1ce | ||
|
|
4e375ff32b | ||
|
|
f7529be3ea | ||
|
|
1aa9720405 | ||
|
|
709880ff5a | ||
|
|
6b78b4e12b | ||
|
|
f342a65aba | ||
|
|
dc860f8fd9 | ||
|
|
f8f900151a | ||
|
|
8d409eed0f | ||
|
|
dc86523cce | ||
|
|
506d112cce | ||
|
|
eb7f92282d | ||
|
|
3468e987b6 | ||
|
|
5acd9ece7f | ||
|
|
8bc43baf2c | ||
|
|
a2c99fb56d | ||
|
|
9689f95ea1 | ||
|
|
84d6b48353 | ||
|
|
bf64fc5213 | ||
|
|
d9f9c8aaf5 | ||
| 2502989ca2 | |||
| ba7cc9168e | |||
| dc0d9fe038 | |||
| 0e6c6937cd | |||
|
|
280b16c11c | ||
| 4b922c575e | |||
|
|
09528ed6b9 | ||
|
|
e61ff01518 | ||
|
|
a4c68bf7fe | ||
| bb1c8cc25d | |||
| 4b06fa788d | |||
|
|
ab08600486 | ||
| 7a5ccff6da | |||
| a407a5cf01 | |||
| 2b3e2f25ec | |||
| ed5ecbd914 | |||
| 2d4759114e | |||
| c68b9fec42 | |||
|
|
0f34c8cac6 | ||
|
|
d388a45630 | ||
|
|
84b63af080 | ||
| 20902f842d | |||
| df7217f79c | |||
|
|
bd6f38b4f3 | ||
|
|
827a85412e | ||
|
|
c4a9fcc1ca | ||
|
|
0993549cac | ||
|
|
1b8c4e293c | ||
|
|
b449b77b95 | ||
|
|
f235b1a99c | ||
|
|
b2b4beaeaa | ||
|
|
8d7f942de4 | ||
|
|
c1b5134627 | ||
|
|
f5c43d60d3 | ||
|
|
69ee19bed0 | ||
|
|
4d7819802d | ||
|
|
07acbf673a | ||
| 47a82bf843 | |||
| cdb66365bf | |||
|
|
a2a4b2e6c2 | ||
|
|
cffdd055c9 | ||
|
|
64796519c6 | ||
|
|
98f1255d4f | ||
|
|
b52330ebf0 | ||
|
|
f857ac0c4e | ||
| 369757b35b | |||
|
|
68f5b0bba4 | ||
| 1da0e3a747 | |||
|
|
a0b8f36dbb | ||
|
|
c401e195f6 | ||
|
|
b836eee1e7 | ||
|
|
b623092721 | ||
| 9533f06eaf | |||
|
|
8143ca1741 | ||
|
|
59c749a164 | ||
|
|
1771883754 | ||
|
|
3014f59cc2 | ||
|
|
f933cad87f | ||
|
|
fd94d30a8e | ||
|
|
81d9015d59 | ||
|
|
5772f38deb | ||
| 4f9b7b4e52 | |||
|
|
73a6f6c13c | ||
|
|
69519ec040 | ||
|
|
c84b819212 | ||
|
|
ce758610b6 | ||
|
|
08bfd3edff | ||
| 1d0db276e8 | |||
| d7117f3d49 | |||
|
|
4a622aae41 | ||
|
|
c84a0fb8c3 | ||
|
|
ba42f4efc0 | ||
| 13d99a6ae0 | |||
| 3b8bcf7b32 | |||
| a7dd3fbc0b | |||
|
|
77677a9f1b | ||
|
|
28609a3372 | ||
|
|
4eceab4dc7 | ||
|
|
6a1e35107f | ||
| 4aa9c4831f | |||
| 69b3f767f6 | |||
| 204901189d | |||
| 80be78604f | |||
|
|
61c83c375d | ||
|
|
b7aacd1b33 | ||
|
|
2f35482aff | ||
| d839c53642 | |||
|
|
f36f62fb47 | ||
|
|
2ddc24b7ee | ||
| dc67a1f103 | |||
|
|
f6c4c963ec | ||
|
|
987f77170f | ||
| ebcae32e23 | |||
| 8a0977561f | |||
|
|
3b9b37a0f3 | ||
| 9fec8a4822 | |||
| c87db1dfe6 | |||
| e99d1a1e90 | |||
| cda46141cc | |||
| 3028f60807 | |||
|
|
734e818b19 | ||
|
|
57bda63506 | ||
| da551a0bb4 | |||
| 32b0c8bdd7 | |||
|
|
129e6a69b8 | ||
| 14c487c9e4 | |||
| 6185635aa9 | |||
| 56d559fdd7 | |||
| cfcf939339 | |||
| d51be5c308 | |||
| 29552fadc3 | |||
| 15231bc683 | |||
| c7a04328d9 | |||
| 87ce4f63d4 | |||
| 80aed87415 | |||
| 65cf86586a | |||
| 4f6d1fec68 | |||
| 202521cbfd | |||
| 90bdfcfbb6 | |||
| 42e05fc999 | |||
|
|
fe6de5bc68 | ||
|
|
e550e57ac0 | ||
| e69f2c4253 | |||
| 4a2afc7a5a | |||
|
|
cf91563912 | ||
| 9e3ba41746 | |||
| 19d645f65c | |||
|
|
05b43c0f21 | ||
|
|
cf04f420e0 | ||
|
|
f758e52ccd | ||
|
|
ea0c0de687 | ||
|
|
a2cc1bd226 | ||
|
|
8910a612ac | ||
|
|
c04344bfde | ||
|
|
6834f07df3 | ||
| b8273a9b02 | |||
|
|
32420fb531 | ||
|
|
e91cdf6b79 | ||
|
|
e80ce7a474 | ||
|
|
da8cefe153 | ||
|
|
bbd8637ca6 | ||
|
|
b42a11d30e | ||
|
|
ed056b065e | ||
|
|
163462b29c | ||
|
|
f286872a33 | ||
|
|
e32042204b | ||
|
|
bc6e6250e1 | ||
|
|
423e800d9e | ||
|
|
d929bdc9a1 | ||
|
|
298051c334 | ||
|
|
8a473de793 | ||
|
|
6393035e55 | ||
|
|
4244a37440 | ||
|
|
6e0c13df89 | ||
|
|
1921be661b | ||
|
|
284a7079d6 | ||
|
|
36abed2093 | ||
|
|
742c2e399e | ||
|
|
2655bda644 | ||
|
|
f7571211fd | ||
|
|
38f58047f2 | ||
| 3e73df76dd | |||
| 284258fbc6 | |||
|
|
968434be49 | ||
|
|
ae79f3e98a | ||
| f81ffbe83d | |||
|
|
c0ab5de2f1 | ||
|
|
25f5a889d0 | ||
|
|
71739e301c | ||
|
|
650bcae6be | ||
|
|
1062989686 | ||
|
|
536a51b93f | ||
|
|
19f2e16bae | ||
| 5923070191 | |||
| 04e8279ae4 | |||
| aab50775d8 | |||
| c6a0d442cc | |||
| 2674f2a769 | |||
|
|
eed8bb2d44 | ||
|
|
58c7b0d1b4 | ||
|
|
55943cacbf | ||
| b25ceccae9 | |||
| c5633e9e6d | |||
| df9fd77d06 | |||
|
|
56f66aa706 | ||
| e7ecc260f8 | |||
|
|
c935af2ba2 | ||
| cd5cc9bc2e | |||
|
|
07a6d113a7 | ||
| fd38e1ddc0 | |||
| 07a3fa00a9 | |||
|
|
5362164970 | ||
|
|
5a67569fd3 | ||
|
|
185f7144b0 | ||
| 8fee8fcab2 | |||
|
|
9bd5468089 | ||
| 120567269d | |||
|
|
036685cbe7 | ||
|
|
2f471dc192 | ||
|
|
4729905322 | ||
|
|
aacb7489e6 | ||
|
|
c04aea89c9 | ||
| b5a7249ad5 | |||
|
|
ae543447b8 | ||
|
|
2c56bfd89e | ||
|
|
b6be76eb07 | ||
|
|
08063feef2 | ||
|
|
affcba441b | ||
| dd80d5af9e | |||
| 7174f27a89 | |||
| 3828c138b8 | |||
|
|
e3f195dad0 | ||
| 6d8a3aa256 | |||
| ab1a9fa781 | |||
| cf1b7e2db4 | |||
| 34050c8ce0 | |||
| eb626db9c2 | |||
|
|
6cd98693c3 | ||
| d1e3e06b10 | |||
|
|
60a3b653af | ||
| 8aaa5722e9 | |||
| 99a2888431 | |||
| f3a8061dfc | |||
|
|
7d85c0e9ad | ||
| c0a319ba7e | |||
| 24678ff952 | |||
| d289360cb2 | |||
| 463b60acb6 | |||
|
|
af67022a5d | ||
|
|
fe78c8b851 | ||
|
|
f992ffc2da | ||
|
|
981551e9c6 | ||
| c1397ef7d5 | |||
|
|
45a3346783 | ||
|
|
2f6c16a7d8 | ||
|
|
76d3018b5c | ||
|
|
95b170e542 | ||
|
|
26875e06f5 | ||
| 34f15cc407 | |||
|
|
3c26deda30 | ||
|
|
1761754865 | ||
|
|
9afb6dc933 | ||
| 8eda4b306d | |||
| 82b8e8c284 | |||
|
|
bcadb1adda | ||
| eb867528e3 | |||
| feba722a53 | |||
| bb7c1005c9 | |||
|
|
a9544f5609 | ||
|
|
fc33bfb47b | ||
|
|
b450cdd20f | ||
|
|
d0516f12b0 | ||
| c973a29734 | |||
| 33613cdda0 | |||
| 25acb2eaa5 | |||
| 9cd65bb20c | |||
|
|
5ba84efab6 | ||
|
|
1f60963cbb | ||
|
|
c8068f45eb | ||
|
|
fb78b3f1e2 | ||
|
|
10ca86e583 | ||
|
|
c5834e69d3 | ||
|
|
984cf46bf0 | ||
|
|
4c708dba3f | ||
|
|
f466312015 | ||
|
|
34fc6b1541 | ||
|
|
19ad462abf | ||
|
|
9dc2e9c679 | ||
|
|
4cfe52e7c9 | ||
|
|
07f8950838 | ||
| 369d20930b | |||
| 9b7b3812d9 | |||
|
|
e577e086a6 | ||
|
|
ece57bf65e | ||
|
|
dbd2b491ed | ||
|
|
2146fccaae | ||
| f0685919fd |
331
.github/workflows/Release.yml
vendored
331
.github/workflows/Release.yml
vendored
@@ -1,331 +0,0 @@
|
||||
# See: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions
|
||||
|
||||
# Workflow name
|
||||
name: Release
|
||||
|
||||
# Run on tag push
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '**'
|
||||
|
||||
jobs:
|
||||
|
||||
#
|
||||
# Build on AlmaLinux 8.5 using golang-1.18.2
|
||||
#
|
||||
AlmaLinux-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://hub.docker.com/_/almalinux
|
||||
container: almalinux:8.5
|
||||
# The job outputs link to the outputs of the 'rpmrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
rpm : ${{steps.rpmrename.outputs.RPM}}
|
||||
srpm : ${{steps.rpmrename.outputs.SRPM}}
|
||||
steps:
|
||||
|
||||
# Use dnf to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
dnf --assumeyes group install "Development Tools" "RPM Development Tools"
|
||||
dnf --assumeyes install wget openssl-devel diffutils delve which npm
|
||||
dnf --assumeyes install 'dnf-command(builddep)'
|
||||
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# Use dnf to install build dependencies
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
|
||||
rpm -i go*.rpm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
#dnf --assumeyes builddep build/package/cc-backend.spec
|
||||
|
||||
- name: RPM build ClusterCockpit
|
||||
id: rpmbuild
|
||||
run: make RPM
|
||||
|
||||
# AlmaLinux 8.5 is a derivate of RedHat Enterprise Linux 8 (UBI8),
|
||||
# so the created RPM both contain the substring 'el8' in the RPM file names
|
||||
# This step replaces the substring 'el8' to 'alma85'. It uses the move operation
|
||||
# because it is unclear whether the default AlmaLinux 8.5 container contains the
|
||||
# 'rename' command. This way we also get the new names for output.
|
||||
- name: Rename RPMs (s/el8/alma85/)
|
||||
id: rpmrename
|
||||
run: |
|
||||
OLD_RPM="${{steps.rpmbuild.outputs.RPM}}"
|
||||
OLD_SRPM="${{steps.rpmbuild.outputs.SRPM}}"
|
||||
NEW_RPM="${OLD_RPM/el8/alma85}"
|
||||
NEW_SRPM=${OLD_SRPM/el8/alma85}
|
||||
mv "${OLD_RPM}" "${NEW_RPM}"
|
||||
mv "${OLD_SRPM}" "${NEW_SRPM}"
|
||||
echo "::set-output name=SRPM::${NEW_SRPM}"
|
||||
echo "::set-output name=RPM::${NEW_RPM}"
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for AlmaLinux 8.5
|
||||
path: ${{ steps.rpmrename.outputs.RPM }}
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for AlmaLinux 8.5
|
||||
path: ${{ steps.rpmrename.outputs.SRPM }}
|
||||
|
||||
#
|
||||
# Build on UBI 8 using golang-1.18.2
|
||||
#
|
||||
UBI-8-RPM-build:
|
||||
runs-on: ubuntu-latest
|
||||
# See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c359854d70cc534b3a3784e?container-tabs=gti
|
||||
container: registry.access.redhat.com/ubi8/ubi:8.5-226.1645809065
|
||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
||||
outputs:
|
||||
rpm : ${{steps.rpmbuild.outputs.RPM}}
|
||||
srpm : ${{steps.rpmbuild.outputs.SRPM}}
|
||||
steps:
|
||||
|
||||
# Use dnf to install development packages
|
||||
- name: Install development packages
|
||||
run: dnf --assumeyes --disableplugin=subscription-manager install rpm-build go-srpm-macros rpm-build-libs rpm-libs gcc make python38 git wget openssl-devel diffutils delve which
|
||||
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
|
||||
# Use dnf to install build dependencies
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
|
||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
|
||||
rpm -i go*.rpm
|
||||
dnf --assumeyes --disableplugin=subscription-manager install npm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
#dnf --assumeyes builddep build/package/cc-backend.spec
|
||||
|
||||
- name: RPM build ClusterCockpit
|
||||
id: rpmbuild
|
||||
run: make RPM
|
||||
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save RPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for UBI 8
|
||||
path: ${{ steps.rpmbuild.outputs.RPM }}
|
||||
- name: Save SRPM as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for UBI 8
|
||||
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
||||
|
||||
#
|
||||
# Build on Ubuntu 20.04 using official go 1.19.1 package
|
||||
#
|
||||
Ubuntu-focal-build:
|
||||
runs-on: ubuntu-latest
|
||||
container: ubuntu:20.04
|
||||
# The job outputs link to the outputs of the 'debrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
deb : ${{steps.debrename.outputs.DEB}}
|
||||
steps:
|
||||
# Use apt to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
apt update && apt --assume-yes upgrade
|
||||
apt --assume-yes install build-essential sed git wget bash
|
||||
apt --assume-yes install npm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
- name: Install Golang
|
||||
run: |
|
||||
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
||||
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
go version
|
||||
- name: DEB build ClusterCockpit
|
||||
id: dpkg-build
|
||||
run: |
|
||||
ls -la
|
||||
pwd
|
||||
env
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
git config --global --add safe.directory $(pwd)
|
||||
make DEB
|
||||
- name: Rename DEB (add '_ubuntu20.04')
|
||||
id: debrename
|
||||
run: |
|
||||
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
||||
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu20.04.deb"
|
||||
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
||||
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save DEB as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 20.04
|
||||
path: ${{ steps.debrename.outputs.DEB }}
|
||||
|
||||
#
|
||||
# Build on Ubuntu 20.04 using official go 1.19.1 package
|
||||
#
|
||||
Ubuntu-jammy-build:
|
||||
runs-on: ubuntu-latest
|
||||
container: ubuntu:22.04
|
||||
# The job outputs link to the outputs of the 'debrename' step
|
||||
# Only job outputs can be used in child jobs
|
||||
outputs:
|
||||
deb : ${{steps.debrename.outputs.DEB}}
|
||||
steps:
|
||||
# Use apt to install development packages
|
||||
- name: Install development packages
|
||||
run: |
|
||||
apt update && apt --assume-yes upgrade
|
||||
apt --assume-yes install build-essential sed git wget bash npm
|
||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
||||
# Checkout git repository and submodules
|
||||
# fetch-depth must be 0 to use git describe
|
||||
# See: https://github.com/marketplace/actions/checkout
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: recursive
|
||||
fetch-depth: 0
|
||||
# Use official golang package
|
||||
- name: Install Golang
|
||||
run: |
|
||||
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
||||
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
go version
|
||||
- name: DEB build ClusterCockpit
|
||||
id: dpkg-build
|
||||
run: |
|
||||
ls -la
|
||||
pwd
|
||||
env
|
||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
||||
git config --global --add safe.directory $(pwd)
|
||||
make DEB
|
||||
- name: Rename DEB (add '_ubuntu22.04')
|
||||
id: debrename
|
||||
run: |
|
||||
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
||||
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu22.04.deb"
|
||||
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
||||
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
||||
# See: https://github.com/actions/upload-artifact
|
||||
- name: Save DEB as artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 22.04
|
||||
path: ${{ steps.debrename.outputs.DEB }}
|
||||
|
||||
#
|
||||
# Create release with fresh RPMs
|
||||
#
|
||||
Release:
|
||||
runs-on: ubuntu-latest
|
||||
# We need the RPMs, so add dependency
|
||||
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build, Ubuntu-jammy-build]
|
||||
|
||||
steps:
|
||||
# See: https://github.com/actions/download-artifact
|
||||
- name: Download AlmaLinux 8.5 RPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for AlmaLinux 8.5
|
||||
- name: Download AlmaLinux 8.5 SRPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for AlmaLinux 8.5
|
||||
|
||||
- name: Download UBI 8 RPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend RPM for UBI 8
|
||||
- name: Download UBI 8 SRPM
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend SRPM for UBI 8
|
||||
|
||||
- name: Download Ubuntu 20.04 DEB
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 20.04
|
||||
|
||||
- name: Download Ubuntu 22.04 DEB
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: cc-backend DEB for Ubuntu 22.04
|
||||
|
||||
# The download actions do not publish the name of the downloaded file,
|
||||
# so we re-use the job outputs of the parent jobs. The files are all
|
||||
# downloaded to the current folder.
|
||||
# The gh-release action afterwards does not accept file lists but all
|
||||
# files have to be listed at 'files'. The step creates one output per
|
||||
# RPM package (2 per distro)
|
||||
- name: Set RPM variables
|
||||
id: files
|
||||
run: |
|
||||
ALMA_85_RPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.rpm}}")
|
||||
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
|
||||
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
|
||||
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
|
||||
U_2004_DEB=$(basename "${{ needs.Ubuntu-focal-build.outputs.deb}}")
|
||||
U_2204_DEB=$(basename "${{ needs.Ubuntu-jammy-build.outputs.deb}}")
|
||||
echo "ALMA_85_RPM::${ALMA_85_RPM}"
|
||||
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||
echo "UBI_8_RPM::${UBI_8_RPM}"
|
||||
echo "UBI_8_SRPM::${UBI_8_SRPM}"
|
||||
echo "U_2004_DEB::${U_2004_DEB}"
|
||||
echo "U_2204_DEB::${U_2204_DEB}"
|
||||
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
|
||||
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
|
||||
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
|
||||
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
|
||||
echo "::set-output name=U_2004_DEB::${U_2004_DEB}"
|
||||
echo "::set-output name=U_2204_DEB::${U_2204_DEB}"
|
||||
|
||||
# See: https://github.com/softprops/action-gh-release
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
name: cc-backend-${{github.ref_name}}
|
||||
files: |
|
||||
${{ steps.files.outputs.ALMA_85_RPM }}
|
||||
${{ steps.files.outputs.ALMA_85_SRPM }}
|
||||
${{ steps.files.outputs.UBI_8_RPM }}
|
||||
${{ steps.files.outputs.UBI_8_SRPM }}
|
||||
${{ steps.files.outputs.U_2004_DEB }}
|
||||
${{ steps.files.outputs.U_2204_DEB }}
|
||||
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: 1.19.x
|
||||
go-version: 1.25.x
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
- name: Build, Vet & Test
|
||||
|
||||
32
.gitignore
vendored
32
.gitignore
vendored
@@ -1,16 +1,32 @@
|
||||
/cc-backend
|
||||
|
||||
/var/job-archive
|
||||
/var/*.db
|
||||
/var/machine-state
|
||||
|
||||
/.env
|
||||
/config.json
|
||||
/uiConfig.json
|
||||
|
||||
/var/job-archive
|
||||
/var/machine-state
|
||||
/var/*.db-shm
|
||||
/var/*.db-wal
|
||||
/var/*.db
|
||||
/var/*.txt
|
||||
|
||||
/var/checkpoints*
|
||||
|
||||
migrateTimestamps.pl
|
||||
test_ccms_*
|
||||
|
||||
/web/frontend/public/build
|
||||
/web/frontend/node_modules
|
||||
/.vscode/*
|
||||
|
||||
/archive-migration
|
||||
/archive-manager
|
||||
var/job.db-shm
|
||||
var/job.db-wal
|
||||
|
||||
/internal/repository/testdata/job.db-shm
|
||||
/internal/repository/testdata/job.db-wal
|
||||
|
||||
/.vscode/*
|
||||
dist/
|
||||
*.db
|
||||
.idea
|
||||
tools/archive-migration/archive-migration
|
||||
tools/archive-manager/archive-manager
|
||||
|
||||
93
.goreleaser.yaml
Normal file
93
.goreleaser.yaml
Normal file
@@ -0,0 +1,93 @@
|
||||
before:
|
||||
hooks:
|
||||
- go mod tidy
|
||||
builds:
|
||||
- env:
|
||||
- CGO_ENABLED=1
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
goamd64:
|
||||
- v3
|
||||
id: "cc-backend"
|
||||
binary: cc-backend
|
||||
main: ./cmd/cc-backend
|
||||
ldflags:
|
||||
- -s -w -X main.version={{.Version}}
|
||||
- -X main.commit={{.Commit}} -X main.date={{.Date}}
|
||||
- -linkmode external -extldflags -static
|
||||
tags:
|
||||
- static_build
|
||||
hooks:
|
||||
pre: make frontend
|
||||
- env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
goamd64:
|
||||
- v3
|
||||
id: "archive-manager"
|
||||
binary: archive-manager
|
||||
main: ./tools/archive-manager
|
||||
tags:
|
||||
- static_build
|
||||
- env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
goarch:
|
||||
- amd64
|
||||
goamd64:
|
||||
- v3
|
||||
id: "gen-keypair"
|
||||
binary: gen-keypair
|
||||
main: ./tools/gen-keypair
|
||||
tags:
|
||||
- static_build
|
||||
archives:
|
||||
- format: tar.gz
|
||||
# this name template makes the OS and Arch compatible with the results of uname.
|
||||
name_template: >-
|
||||
{{ .ProjectName }}_
|
||||
{{- title .Os }}_
|
||||
{{- if eq .Arch "amd64" }}x86_64
|
||||
{{- else }}{{ .Arch }}{{ end }}
|
||||
{{- if .Arm }}v{{ .Arm }}{{ end }}
|
||||
checksum:
|
||||
name_template: "checksums.txt"
|
||||
snapshot:
|
||||
name_template: "{{ incpatch .Version }}-next"
|
||||
changelog:
|
||||
sort: asc
|
||||
filters:
|
||||
include:
|
||||
- "^feat:"
|
||||
- "^fix:"
|
||||
- "^sec:"
|
||||
- "^docs:"
|
||||
groups:
|
||||
- title: "Dependency updates"
|
||||
regexp: '^.*?(feat|fix)\(deps\)!?:.+$'
|
||||
order: 300
|
||||
- title: "New Features"
|
||||
regexp: '^.*?feat(\([[:word:]]+\))??!?:.+$'
|
||||
order: 100
|
||||
- title: "Security updates"
|
||||
regexp: '^.*?sec(\([[:word:]]+\))??!?:.+$'
|
||||
order: 150
|
||||
- title: "Bug fixes"
|
||||
regexp: '^.*?fix(\([[:word:]]+\))??!?:.+$'
|
||||
order: 200
|
||||
- title: "Documentation updates"
|
||||
regexp: ^.*?doc(\([[:word:]]+\))??!?:.+$
|
||||
order: 400
|
||||
release:
|
||||
draft: false
|
||||
footer: |
|
||||
Supports job archive version 2 and database version 8.
|
||||
Please check out the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for further details on breaking changes.
|
||||
|
||||
# vim: set ts=2 sw=2 tw=0 fo=cnqoj
|
||||
26
AGENTS.md
Normal file
26
AGENTS.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# ClusterCockpit Backend - Agent Guidelines
|
||||
|
||||
## Build/Test Commands
|
||||
|
||||
- Build: `make` or `go build ./cmd/cc-backend`
|
||||
- Run all tests: `make test` (runs: `go clean -testcache && go build ./... && go vet ./... && go test ./...`)
|
||||
- Run single test: `go test -run TestName ./path/to/package`
|
||||
- Run single test file: `go test ./path/to/package -run TestName`
|
||||
- Frontend build: `cd web/frontend && npm install && npm run build`
|
||||
- Generate GraphQL: `make graphql` (uses gqlgen)
|
||||
- Generate Swagger: `make swagger` (uses swaggo/swag)
|
||||
|
||||
## Code Style
|
||||
|
||||
- **Formatting**: Use `gofumpt` for all Go files (strict requirement)
|
||||
- **Copyright header**: All files must include copyright header (see existing files)
|
||||
- **Package docs**: Document packages with comprehensive package-level comments explaining purpose, usage, configuration
|
||||
- **Imports**: Standard library first, then external packages, then internal packages (grouped with blank lines)
|
||||
- **Naming**: Use camelCase for private, PascalCase for exported; descriptive names (e.g., `JobRepository`, `handleError`)
|
||||
- **Error handling**: Return errors, don't panic; use custom error types where appropriate; log with cclog package
|
||||
- **Logging**: Use `cclog` package (e.g., `cclog.Errorf()`, `cclog.Warnf()`, `cclog.Debugf()`)
|
||||
- **Testing**: Use standard `testing` package; use `testify/assert` for assertions; name tests `TestFunctionName`
|
||||
- **Comments**: Document all exported functions/types with godoc-style comments
|
||||
- **Structs**: Document fields with inline comments, especially for complex configurations
|
||||
- **HTTP handlers**: Return proper status codes; use `handleError()` helper for consistent error responses
|
||||
- **JSON**: Use struct tags for JSON marshaling; `DisallowUnknownFields()` for strict decoding
|
||||
306
CLAUDE.md
Normal file
306
CLAUDE.md
Normal file
@@ -0,0 +1,306 @@
|
||||
# CLAUDE.md
|
||||
|
||||
This file provides guidance to Claude Code (claude.ai/code) when working with
|
||||
code in this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
ClusterCockpit is a job-specific performance monitoring framework for HPC
|
||||
clusters. This is a Golang backend that provides REST and GraphQL APIs, serves a
|
||||
Svelte-based frontend, and manages job archives and metric data from various
|
||||
time-series databases.
|
||||
|
||||
## Build and Development Commands
|
||||
|
||||
### Building
|
||||
|
||||
```bash
|
||||
# Build everything (frontend + backend)
|
||||
make
|
||||
|
||||
# Build only the frontend
|
||||
make frontend
|
||||
|
||||
# Build only the backend (requires frontend to be built first)
|
||||
go build -ldflags='-s -X main.date=$(date +"%Y-%m-%d:T%H:%M:%S") -X main.version=1.5.0 -X main.commit=$(git rev-parse --short HEAD)' ./cmd/cc-backend
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
make test
|
||||
|
||||
# Run tests with verbose output
|
||||
go test -v ./...
|
||||
|
||||
# Run tests for a specific package
|
||||
go test ./internal/repository
|
||||
```
|
||||
|
||||
### Code Generation
|
||||
|
||||
```bash
|
||||
# Regenerate GraphQL schema and resolvers (after modifying api/schema.graphqls)
|
||||
make graphql
|
||||
|
||||
# Regenerate Swagger/OpenAPI docs (after modifying API comments)
|
||||
make swagger
|
||||
```
|
||||
|
||||
### Frontend Development
|
||||
|
||||
```bash
|
||||
cd web/frontend
|
||||
|
||||
# Install dependencies
|
||||
npm install
|
||||
|
||||
# Build for production
|
||||
npm run build
|
||||
|
||||
# Development mode with watch
|
||||
npm run dev
|
||||
```
|
||||
|
||||
### Running
|
||||
|
||||
```bash
|
||||
# Initialize database and create admin user
|
||||
./cc-backend -init-db -add-user demo:admin:demo
|
||||
|
||||
# Start server in development mode (enables GraphQL Playground and Swagger UI)
|
||||
./cc-backend -server -dev -loglevel info
|
||||
|
||||
# Start demo with sample data
|
||||
./startDemo.sh
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
### Backend Structure
|
||||
|
||||
The backend follows a layered architecture with clear separation of concerns:
|
||||
|
||||
- **cmd/cc-backend**: Entry point, orchestrates initialization of all subsystems
|
||||
- **internal/repository**: Data access layer using repository pattern
|
||||
- Abstracts database operations (SQLite3 only)
|
||||
- Implements LRU caching for performance
|
||||
- Provides repositories for Job, User, Node, and Tag entities
|
||||
- Transaction support for batch operations
|
||||
- **internal/api**: REST API endpoints (Swagger/OpenAPI documented)
|
||||
- **internal/graph**: GraphQL API (uses gqlgen)
|
||||
- Schema in `api/schema.graphqls`
|
||||
- Generated code in `internal/graph/generated/`
|
||||
- Resolvers in `internal/graph/schema.resolvers.go`
|
||||
- **internal/auth**: Authentication layer
|
||||
- Supports local accounts, LDAP, OIDC, and JWT tokens
|
||||
- Implements rate limiting for login attempts
|
||||
- **pkg/metricstore**: Metric store with data loading API
|
||||
- In-memory metric storage with checkpointing
|
||||
- Query API for loading job metric data
|
||||
- **internal/archiver**: Job archiving to file-based archive
|
||||
- **internal/api/nats.go**: NATS-based API for job and node operations
|
||||
- Subscribes to NATS subjects for job events (start/stop)
|
||||
- Handles node state updates via NATS
|
||||
- Uses InfluxDB line protocol message format
|
||||
- **pkg/archive**: Job archive backend implementations
|
||||
- File system backend (default)
|
||||
- S3 backend
|
||||
- SQLite backend (experimental)
|
||||
- **parquet** sub-package: Parquet format support (schema, reader, writer, conversion)
|
||||
- **internal/metricstoreclient**: Client for cc-metric-store queries
|
||||
|
||||
### Frontend Structure
|
||||
|
||||
- **web/frontend**: Svelte 5 application
|
||||
- Uses Rollup for building
|
||||
- Components organized by feature (analysis, job, user, etc.)
|
||||
- GraphQL client using @urql/svelte
|
||||
- Bootstrap 5 + SvelteStrap for UI
|
||||
- uPlot for time-series visualization
|
||||
- **web/templates**: Server-side Go templates
|
||||
|
||||
### Key Concepts
|
||||
|
||||
**Job Archive**: Completed jobs are stored in a file-based archive following the
|
||||
[ClusterCockpit job-archive
|
||||
specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||
Each job has a `meta.json` file with metadata and metric data files.
|
||||
|
||||
**Metric Data Repositories**: Time-series metric data is stored separately from
|
||||
job metadata. The system supports multiple backends (cc-metric-store is
|
||||
recommended). Configuration is per-cluster in `config.json`.
|
||||
|
||||
**Authentication Flow**:
|
||||
|
||||
1. Multiple authenticators can be configured (local, LDAP, OIDC, JWT)
|
||||
2. Each authenticator's `CanLogin` method is called to determine if it should handle the request
|
||||
3. The first authenticator that returns true performs the actual `Login`
|
||||
4. JWT tokens are used for API authentication
|
||||
|
||||
**Database Migrations**: SQL migrations in `internal/repository/migrations/sqlite3/` are
|
||||
applied automatically on startup. Version tracking in `version` table.
|
||||
|
||||
**Scopes**: Metrics can be collected at different scopes:
|
||||
|
||||
- Node scope (always available)
|
||||
- Core scope (for jobs with ≤8 nodes)
|
||||
- Accelerator scope (for GPU/accelerator metrics)
|
||||
|
||||
## Configuration
|
||||
|
||||
- **config.json**: Main configuration (clusters, metric repositories, archive settings)
|
||||
- `main.apiSubjects`: NATS subject configuration (optional)
|
||||
- `subjectJobEvent`: Subject for job start/stop events (e.g., "cc.job.event")
|
||||
- `subjectNodeState`: Subject for node state updates (e.g., "cc.node.state")
|
||||
- `nats`: NATS client connection configuration (optional)
|
||||
- `address`: NATS server address (e.g., "nats://localhost:4222")
|
||||
- `username`: Authentication username (optional)
|
||||
- `password`: Authentication password (optional)
|
||||
- `creds-file-path`: Path to NATS credentials file (optional)
|
||||
- **.env**: Environment variables (secrets like JWT keys)
|
||||
- Copy from `configs/env-template.txt`
|
||||
- NEVER commit this file
|
||||
- **cluster.json**: Cluster topology and metric definitions (loaded from archive or config)
|
||||
|
||||
## Database
|
||||
|
||||
- Default: SQLite 3 (`./var/job.db`)
|
||||
- Connection managed by `internal/repository`
|
||||
- Schema version in `internal/repository/migration.go`
|
||||
|
||||
## Code Generation
|
||||
|
||||
**GraphQL** (gqlgen):
|
||||
|
||||
- Schema: `api/schema.graphqls`
|
||||
- Config: `gqlgen.yml`
|
||||
- Generated code: `internal/graph/generated/`
|
||||
- Custom resolvers: `internal/graph/schema.resolvers.go`
|
||||
- Run `make graphql` after schema changes
|
||||
|
||||
**Swagger/OpenAPI**:
|
||||
|
||||
- Annotations in `internal/api/*.go`
|
||||
- Generated docs: `internal/api/docs.go`, `api/swagger.yaml`
|
||||
- Run `make swagger` after API changes
|
||||
|
||||
## Testing Conventions
|
||||
|
||||
- Test files use `_test.go` suffix
|
||||
- Test data in `testdata/` subdirectories
|
||||
- Repository tests use in-memory SQLite
|
||||
- API tests use httptest
|
||||
|
||||
## Common Workflows
|
||||
|
||||
### Adding a new GraphQL field
|
||||
|
||||
1. Edit schema in `api/schema.graphqls`
|
||||
2. Run `make graphql`
|
||||
3. Implement resolver in `internal/graph/schema.resolvers.go`
|
||||
|
||||
### Adding a new REST endpoint
|
||||
|
||||
1. Add handler in `internal/api/*.go`
|
||||
2. Add route in `internal/api/rest.go`
|
||||
3. Add Swagger annotations
|
||||
4. Run `make swagger`
|
||||
|
||||
### Adding a new metric data backend
|
||||
|
||||
1. Implement metric loading functions in `pkg/metricstore/query.go`
|
||||
2. Add cluster configuration to metric store initialization
|
||||
3. Update config.json schema documentation
|
||||
|
||||
### Modifying database schema
|
||||
|
||||
1. Create new migration in `internal/repository/migrations/sqlite3/`
|
||||
2. Increment `repository.Version`
|
||||
3. Test with fresh database and existing database
|
||||
|
||||
## NATS API
|
||||
|
||||
The backend supports a NATS-based API as an alternative to the REST API for job and node operations.
|
||||
|
||||
### Setup
|
||||
|
||||
1. Configure NATS client connection in `config.json`:
|
||||
```json
|
||||
{
|
||||
"nats": {
|
||||
"address": "nats://localhost:4222",
|
||||
"username": "user",
|
||||
"password": "pass"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. Configure API subjects in `config.json` under `main`:
|
||||
```json
|
||||
{
|
||||
"main": {
|
||||
"apiSubjects": {
|
||||
"subjectJobEvent": "cc.job.event",
|
||||
"subjectNodeState": "cc.node.state"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Message Format
|
||||
|
||||
Messages use **InfluxDB line protocol** format with the following structure:
|
||||
|
||||
#### Job Events
|
||||
|
||||
**Start Job:**
|
||||
```
|
||||
job,function=start_job event="{\"jobId\":123,\"user\":\"alice\",\"cluster\":\"test\", ...}" 1234567890000000000
|
||||
```
|
||||
|
||||
**Stop Job:**
|
||||
```
|
||||
job,function=stop_job event="{\"jobId\":123,\"cluster\":\"test\",\"startTime\":1234567890,\"stopTime\":1234571490,\"jobState\":\"completed\"}" 1234571490000000000
|
||||
```
|
||||
|
||||
**Tags:**
|
||||
- `function`: Either `start_job` or `stop_job`
|
||||
|
||||
**Fields:**
|
||||
- `event`: JSON payload containing job data (see REST API documentation for schema)
|
||||
|
||||
#### Node State Updates
|
||||
|
||||
```json
|
||||
{
|
||||
"cluster": "testcluster",
|
||||
"nodes": [
|
||||
{
|
||||
"hostname": "node001",
|
||||
"states": ["allocated"],
|
||||
"cpusAllocated": 8,
|
||||
"memoryAllocated": 16384,
|
||||
"gpusAllocated": 0,
|
||||
"jobsRunning": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Implementation Notes
|
||||
|
||||
- NATS API mirrors REST API functionality but uses messaging
|
||||
- Job start/stop events are processed asynchronously
|
||||
- Duplicate job detection is handled (same as REST API)
|
||||
- All validation rules from REST API apply
|
||||
- Messages are logged; no responses are sent back to publishers
|
||||
- If NATS client is unavailable, API subscriptions are skipped (logged as warning)
|
||||
|
||||
## Dependencies
|
||||
|
||||
- Go 1.24.0+ (check go.mod for exact version)
|
||||
- Node.js (for frontend builds)
|
||||
- SQLite 3 (only supported database)
|
||||
- Optional: NATS server for NATS API integration
|
||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 NHR@FAU, University Erlangen-Nuremberg
|
||||
Copyright (c) NHR@FAU, University Erlangen-Nuremberg
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
140
Makefile
140
Makefile
@@ -1,11 +1,9 @@
|
||||
TARGET = ./cc-backend
|
||||
VAR = ./var
|
||||
CFG = config.json .env
|
||||
FRONTEND = ./web/frontend
|
||||
VERSION = 1.0.0
|
||||
VERSION = 1.5.0
|
||||
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
||||
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
|
||||
LD_FLAGS = '-s -X main.buildTime=${CURRENT_TIME} -X main.version=${VERSION} -X main.hash=${GIT_HASH}'
|
||||
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
|
||||
|
||||
EXECUTABLES = go npm
|
||||
K := $(foreach exec,$(EXECUTABLES),\
|
||||
@@ -24,18 +22,41 @@ SVELTE_COMPONENTS = status \
|
||||
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
|
||||
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/filters/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
|
||||
$(wildcard $(FRONTEND)/src/analysis/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/*.js) \
|
||||
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/header/*.svelte) \
|
||||
$(wildcard $(FRONTEND)/src/job/*.svelte)
|
||||
|
||||
.PHONY: clean test tags $(TARGET)
|
||||
.PHONY: clean distclean test tags frontend swagger graphql $(TARGET)
|
||||
|
||||
.NOTPARALLEL:
|
||||
|
||||
$(TARGET): $(VAR) $(CFG) $(SVELTE_TARGETS)
|
||||
$(TARGET): $(SVELTE_TARGETS)
|
||||
$(info ===> BUILD cc-backend)
|
||||
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
|
||||
|
||||
frontend:
|
||||
$(info ===> BUILD frontend)
|
||||
cd web/frontend && npm ci && npm run build
|
||||
|
||||
swagger:
|
||||
$(info ===> GENERATE swagger)
|
||||
@go tool github.com/swaggo/swag/cmd/swag init --parseDependency -d ./internal/api -g rest.go -o ./api
|
||||
@mv ./api/docs.go ./internal/api/docs.go
|
||||
|
||||
graphql:
|
||||
$(info ===> GENERATE graphql)
|
||||
@go tool github.com/99designs/gqlgen
|
||||
|
||||
clean:
|
||||
$(info ===> CLEAN)
|
||||
@go clean
|
||||
@@ -45,7 +66,7 @@ distclean:
|
||||
@$(MAKE) clean
|
||||
$(info ===> DISTCLEAN)
|
||||
@rm -rf $(FRONTEND)/node_modules
|
||||
@rm -rf $(VAR)
|
||||
@rm -rf ./var
|
||||
|
||||
test:
|
||||
$(info ===> TESTING)
|
||||
@@ -59,103 +80,8 @@ tags:
|
||||
@ctags -R
|
||||
|
||||
$(VAR):
|
||||
@mkdir $(VAR)
|
||||
|
||||
config.json:
|
||||
$(info ===> Initialize config.json file)
|
||||
@cp configs/config.json config.json
|
||||
|
||||
.env:
|
||||
$(info ===> Initialize .env file)
|
||||
@cp configs/env-template.txt .env
|
||||
@mkdir -p $(VAR)
|
||||
|
||||
$(SVELTE_TARGETS): $(SVELTE_SRC)
|
||||
$(info ===> BUILD frontend)
|
||||
cd web/frontend && npm install && npm run build
|
||||
|
||||
install: $(TARGET)
|
||||
@WORKSPACE=$(PREFIX)
|
||||
@if [ -z "$${WORKSPACE}" ]; then exit 1; fi
|
||||
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
|
||||
@install -Dpm 755 $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
|
||||
@install -Dpm 600 configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
|
||||
|
||||
.ONESHELL:
|
||||
.PHONY: RPM
|
||||
RPM: build/package/cc-backend.spec
|
||||
@WORKSPACE="$${PWD}"
|
||||
@SPECFILE="$${WORKSPACE}/build/package/cc-backend.spec"
|
||||
# Setup RPM build tree
|
||||
@eval $$(rpm --eval "ARCH='%{_arch}' RPMDIR='%{_rpmdir}' SOURCEDIR='%{_sourcedir}' SPECDIR='%{_specdir}' SRPMDIR='%{_srcrpmdir}' BUILDDIR='%{_builddir}'")
|
||||
@mkdir --parents --verbose "$${RPMDIR}" "$${SOURCEDIR}" "$${SPECDIR}" "$${SRPMDIR}" "$${BUILDDIR}"
|
||||
# Create source tarball
|
||||
@COMMITISH="HEAD"
|
||||
@VERS=$$(git describe --tags $${COMMITISH})
|
||||
@VERS=$${VERS#v}
|
||||
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
|
||||
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
|
||||
@eval $$(rpmspec --query --queryformat "NAME='%{name}' VERSION='%{version}' RELEASE='%{release}' NVR='%{NVR}' NVRA='%{NVRA}'" --define="VERS $${VERS}" "$${SPECFILE}")
|
||||
@PREFIX="$${NAME}-$${VERSION}"
|
||||
@FORMAT="tar.gz"
|
||||
@SRCFILE="$${SOURCEDIR}/$${PREFIX}.$${FORMAT}"
|
||||
@git archive --verbose --format "$${FORMAT}" --prefix="$${PREFIX}/" --output="$${SRCFILE}" $${COMMITISH}
|
||||
# Build RPM and SRPM
|
||||
@rpmbuild -ba --define="VERS $${VERS}" --rmsource --clean "$${SPECFILE}"
|
||||
# Report RPMs and SRPMs when in GitHub Workflow
|
||||
@if [ "$${GITHUB_ACTIONS}" = true ]; then
|
||||
@ RPMFILE="$${RPMDIR}/$${ARCH}/$${NVRA}.rpm"
|
||||
@ SRPMFILE="$${SRPMDIR}/$${NVR}.src.rpm"
|
||||
@ echo "RPM: $${RPMFILE}"
|
||||
@ echo "SRPM: $${SRPMFILE}"
|
||||
@ echo "::set-output name=SRPM::$${SRPMFILE}"
|
||||
@ echo "::set-output name=RPM::$${RPMFILE}"
|
||||
@fi
|
||||
|
||||
.ONESHELL:
|
||||
.PHONY: DEB
|
||||
DEB: build/package/cc-backend.deb.control
|
||||
@BASEDIR=$${PWD}
|
||||
@WORKSPACE=$${PWD}/.dpkgbuild
|
||||
@DEBIANDIR=$${WORKSPACE}/debian
|
||||
@DEBIANBINDIR=$${WORKSPACE}/DEBIAN
|
||||
@mkdir --parents --verbose $$WORKSPACE $$DEBIANBINDIR
|
||||
#@mkdir --parents --verbose $$DEBIANDIR
|
||||
@CONTROLFILE="$${BASEDIR}/build/package/cc-backend.deb.control"
|
||||
@COMMITISH="HEAD"
|
||||
@VERS=$$(git describe --tags --abbrev=0 $${COMMITISH})
|
||||
@VERS=$${VERS#v}
|
||||
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
|
||||
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
|
||||
@ARCH=$$(uname -m)
|
||||
@ARCH=$$(echo $$ARCH | sed -e s+'_'+'-'+g)
|
||||
@if [ "$${ARCH}" = "x86-64" ]; then ARCH=amd64; fi
|
||||
@PREFIX="$${NAME}-$${VERSION}_$${ARCH}"
|
||||
@SIZE_BYTES=$$(du -bcs --exclude=.dpkgbuild "$${WORKSPACE}"/ | awk '{print $$1}' | head -1 | sed -e 's/^0\+//')
|
||||
@SIZE="$$(awk -v size="$$SIZE_BYTES" 'BEGIN {print (size/1024)+1}' | awk '{print int($$0)}')"
|
||||
#@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANDIR}/control
|
||||
@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANBINDIR}/control
|
||||
@mkdir --parents --verbose "$${WORKSPACE}"/$(VAR)
|
||||
@touch "$${WORKSPACE}"/$(VAR)/job.db
|
||||
@cd web/frontend && yarn install && yarn build && cd -
|
||||
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
|
||||
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
|
||||
@cp $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
|
||||
@chmod 0755 $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
|
||||
@mkdir --parents --verbose $${WORKSPACE}/etc/$(TARGET)
|
||||
@cp configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
|
||||
@chmod 0600 $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
|
||||
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/systemd/system
|
||||
@cp build/package/$(TARGET).service $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
|
||||
@chmod 0644 $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
|
||||
@mkdir --parents --verbose $${WORKSPACE}/etc/default
|
||||
@cp build/package/$(TARGET).config $${WORKSPACE}/etc/default/$(TARGET)
|
||||
@chmod 0600 $${WORKSPACE}/etc/default/$(TARGET)
|
||||
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/sysusers.d
|
||||
@cp build/package/$(TARGET).sysusers $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
|
||||
@chmod 0644 $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
|
||||
@DEB_FILE="cc-metric-store_$${VERS}_$${ARCH}.deb"
|
||||
@dpkg-deb -b $${WORKSPACE} "$$DEB_FILE"
|
||||
@rm -r "$${WORKSPACE}"
|
||||
@if [ "$${GITHUB_ACTIONS}" = "true" ]; then
|
||||
@ echo "::set-output name=DEB::$${DEB_FILE}"
|
||||
@fi
|
||||
cd web/frontend && npm ci && npm run build
|
||||
|
||||
277
README.md
277
README.md
@@ -1,67 +1,108 @@
|
||||
# NOTE
|
||||
|
||||
While we do our best to keep the master branch in a usable state, there is no guarantee the master branch works.
|
||||
Please do not use it for production!
|
||||
|
||||
Please have a look at the [Release
|
||||
Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md)
|
||||
for breaking changes!
|
||||
|
||||
# ClusterCockpit REST and GraphQL API backend
|
||||
|
||||
[](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml)
|
||||
|
||||
This is a Golang backend implementation for a REST and GraphQL API according to the [ClusterCockpit specifications](https://github.com/ClusterCockpit/cc-specifications).
|
||||
It also includes a web interface for ClusterCockpit.
|
||||
While there is a backend for the InfluxDB timeseries database, the only tested and supported setup is using cc-metric-store as a mtric data backend.
|
||||
We will add documentation how to integrate ClusterCockpit with other timeseries databases in the future.
|
||||
This implementation replaces the previous PHP Symfony based ClusterCockpit web-interface.
|
||||
[Here](https://github.com/ClusterCockpit/ClusterCockpit/wiki/Why-we-switched-from-PHP-Symfony-to-a-Golang-based-solution) is a discussion of the reasons why we switched from PHP Symfony to a Golang based solution.
|
||||
This is a Golang backend implementation for a REST and GraphQL API according to
|
||||
the [ClusterCockpit
|
||||
specifications](https://github.com/ClusterCockpit/cc-specifications). It also
|
||||
includes a web interface for ClusterCockpit. This implementation replaces the
|
||||
previous PHP Symfony based ClusterCockpit web interface. The reasons for
|
||||
switching from PHP Symfony to a Golang based solution are explained
|
||||
[here](https://github.com/ClusterCockpit/ClusterCockpit/wiki/Why-we-switched-from-PHP-Symfony-to-a-Golang-based-solution).
|
||||
|
||||
## Overview
|
||||
|
||||
This is a golang web backend for the ClusterCockpit job-specific performance monitoring framework.
|
||||
It provides a REST API for integrating ClusterCockpit with a HPC cluster batch system and external analysis scripts.
|
||||
Data exchange between the web frontend and backend is based on a GraphQL API.
|
||||
The web frontend is also served by the backend using [Svelte](https://svelte.dev/) components.
|
||||
Layout and styling is based on [Bootstrap 5](https://getbootstrap.com/) using [Bootstrap Icons](https://icons.getbootstrap.com/).
|
||||
The backend uses [SQLite 3](https://sqlite.org/) as relational SQL database by default.
|
||||
It can optionally use a MySQL/MariaDB database server.
|
||||
Finished batch jobs are stored in a file-based job archive following [this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||
The backend supports authentication using local accounts or an external LDAP directory.
|
||||
Authorization for APIs is implemented using [JWT](https://jwt.io/) tokens created with public/private key encryption.
|
||||
This is a Golang web backend for the ClusterCockpit job-specific performance
|
||||
monitoring framework. It provides a REST API and an optional NATS-based messaging
|
||||
API for integrating ClusterCockpit with an HPC cluster batch system and external
|
||||
analysis scripts. Data exchange between the web front-end and the back-end is
|
||||
based on a GraphQL API. The web frontend is also served by the backend using
|
||||
[Svelte](https://svelte.dev/) components. Layout and styling are based on
|
||||
[Bootstrap 5](https://getbootstrap.com/) using
|
||||
[Bootstrap Icons](https://icons.getbootstrap.com/).
|
||||
|
||||
You find more detailed information here:
|
||||
* `./configs/README.md`: Infos about configuration and setup of cc-backend.
|
||||
* `./init/README.md`: Infos on how to setup cc-backend as systemd service on Linux.
|
||||
* `./tools/README.md`: Infos on the JWT authorizatin token workflows in ClusterCockpit.
|
||||
* `./docs`: You can find further documentation here. There is also a Hands-on tutorial that is recommended to get familiar with the ClusterCockpit setup.
|
||||
The backend uses [SQLite 3](https://sqlite.org/) as the relational SQL database.
|
||||
While there are metric data backends for the InfluxDB and Prometheus time series
|
||||
databases, the only tested and supported setup is to use cc-metric-store as the
|
||||
metric data backend. Documentation on how to integrate ClusterCockpit with other
|
||||
time series databases will be added in the future.
|
||||
|
||||
**NOTICE**
|
||||
For real-time integration with HPC systems, the backend can subscribe to
|
||||
[NATS](https://nats.io/) subjects to receive job start/stop events and node
|
||||
state updates, providing an alternative to REST API polling.
|
||||
|
||||
ClusterCockpit requires a recent version of the golang toolchain and node.js.
|
||||
You can check in `go.mod` what is the current minimal golang version required.
|
||||
Homebrew and Archlinux usually have up to date golang versions. For other Linux
|
||||
distros this often means you have to install the golang compiler yourself.
|
||||
Fortunatly this is easy with golang. Since a lot of functionality is based on
|
||||
the go standard library it is crucial for security and performance to use a
|
||||
recent golang version. Also an old golang tool chain may restrict the supported
|
||||
versions of third party packages.
|
||||
Completed batch jobs are stored in a file-based job archive according to
|
||||
[this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||
The backend supports authentication via local accounts, an external LDAP
|
||||
directory, and JWT tokens. Authorization for APIs is implemented with
|
||||
[JWT](https://jwt.io/) tokens created with public/private key encryption.
|
||||
|
||||
## Demo Setup
|
||||
You find a detailed documentation on the [ClusterCockpit
|
||||
Webpage](https://clustercockpit.org).
|
||||
|
||||
We provide a shell skript that downloads demo data and automatically builds and
|
||||
starts cc-backend. You need `wget`, `go`, `node`, `npm` in your path to start
|
||||
the demo. The demo will download 32MB of data (223MB on disk).
|
||||
## Build requirements
|
||||
|
||||
ClusterCockpit requires a current version of the golang toolchain and node.js.
|
||||
You can check `go.mod` to see what is the current minimal golang version needed.
|
||||
Homebrew and Archlinux usually have current golang versions. For other Linux
|
||||
distros this often means that you have to install the golang compiler yourself.
|
||||
Fortunately, this is easy with golang. Since much of the functionality is based
|
||||
on the Go standard library, it is crucial for security and performance to use a
|
||||
current version of golang. In addition, an old golang toolchain may limit the supported
|
||||
versions of third-party packages.
|
||||
|
||||
## How to try ClusterCockpit with a demo setup
|
||||
|
||||
We provide a shell script that downloads demo data and automatically starts the
|
||||
cc-backend. You will need `wget`, `go`, `node`, `npm` in your path to
|
||||
start the demo. The demo downloads 32MB of data (223MB on disk).
|
||||
|
||||
```sh
|
||||
git clone https://github.com/ClusterCockpit/cc-backend.git
|
||||
cd ./cc-backend
|
||||
./startDemo.sh
|
||||
```
|
||||
You can access the web interface at http://localhost:8080.
|
||||
Credentials for login: `demo:demo`.
|
||||
Please note that some views do not work without a metric backend (e.g., the Systems and Status view).
|
||||
|
||||
## Howto Build and Run
|
||||
You can also try the demo using the latest release binary.
|
||||
Create a folder and put the release binary `cc-backend` into this folder.
|
||||
Execute the following steps:
|
||||
|
||||
There is a Makefile to automate the build of cc-backend. The Makefile supports the following targets:
|
||||
* `$ make`: Initialize `var` directory and build svelte frontend and backend binary. Please note that there is no proper prerequesite handling. Any change of frontend source files will trigger a complete rebuild.
|
||||
* `$ make clean`: Clean go build cache and remove binary
|
||||
* `$ make test`: Run the tests that are also run in the GitHub workflow setup.
|
||||
```shell
|
||||
./cc-backend -init
|
||||
vim config.json (Add a second cluster entry and name the clusters alex and fritz)
|
||||
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
|
||||
tar xf job-archive-demo.tar
|
||||
./cc-backend -init-db -add-user demo:admin:demo -loglevel info
|
||||
./cc-backend -server -dev -loglevel info
|
||||
```
|
||||
|
||||
You can access the web interface at [http://localhost:8080](http://localhost:8080).
|
||||
Credentials for login are `demo:demo`.
|
||||
Please note that some views do not work without a metric backend (e.g., the
|
||||
Analysis, Systems and Status views).
|
||||
|
||||
## How to build and run
|
||||
|
||||
There is a Makefile to automate the build of cc-backend. The Makefile supports
|
||||
the following targets:
|
||||
|
||||
- `make`: Initialize `var` directory and build svelte frontend and backend
|
||||
binary. Note that there is no proper prerequisite handling. Any change of
|
||||
frontend source files will result in a complete rebuild.
|
||||
- `make clean`: Clean go build cache and remove binary.
|
||||
- `make test`: Run the tests that are also run in the GitHub workflow setup.
|
||||
|
||||
A common workflow for setting up cc-backend from scratch is:
|
||||
|
||||
A common workflow to setup cc-backend fron scratch is:
|
||||
```sh
|
||||
git clone https://github.com/ClusterCockpit/cc-backend.git
|
||||
|
||||
@@ -72,87 +113,95 @@ make
|
||||
# EDIT THE .env FILE BEFORE YOU DEPLOY (Change the secrets)!
|
||||
# If authentication is disabled, it can be empty.
|
||||
cp configs/env-template.txt .env
|
||||
vim ./.env
|
||||
vim .env
|
||||
|
||||
cp configs/config.json ./
|
||||
vim ./config.json
|
||||
cp configs/config.json .
|
||||
vim config.json
|
||||
|
||||
#Optional: Link an existing job archive:
|
||||
ln -s <your-existing-job-archive> ./var/job-archive
|
||||
|
||||
# This will first initialize the job.db database by traversing all
|
||||
# `meta.json` files in the job-archive and add a new user. `--no-server` will cause the
|
||||
# executable to stop once it has done that instead of starting a server.
|
||||
./cc-backend --init-db --add-user <your-username>:admin:<your-password>
|
||||
# `meta.json` files in the job-archive and add a new user.
|
||||
./cc-backend -init-db -add-user <your-username>:admin:<your-password>
|
||||
|
||||
# Start a HTTP server (HTTPS can be enabled, the default port is 8080).
|
||||
# Start a HTTP server (HTTPS can be enabled in the configuration, the default port is 8080).
|
||||
# The --dev flag enables GraphQL Playground (http://localhost:8080/playground) and Swagger UI (http://localhost:8080/swagger).
|
||||
./cc-backend --server --dev
|
||||
./cc-backend -server -dev
|
||||
|
||||
# Show other options:
|
||||
./cc-backend --help
|
||||
./cc-backend -help
|
||||
```
|
||||
### Run as systemd daemon
|
||||
|
||||
In order to run this program as a daemon, cc-backend ships with an [example systemd setup](./init/README.md).
|
||||
## Project file structure
|
||||
|
||||
## Configuration and Setup
|
||||
|
||||
cc-backend can be used as a local web-interface for an existing job archive or as a general web-interface server for a live ClusterCockpit Monitoring framework.
|
||||
|
||||
Create your job-archive according to [this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||
At least one cluster with a valid `cluster.json` file is required.
|
||||
Having no jobs in the job-archive at all is fine.
|
||||
|
||||
### Configuration
|
||||
|
||||
A config file in the JSON format has to be provided using `--config` to override the defaults.
|
||||
By default, if there is a `config.json` file in the current directory of the `cc-backend` process, it will be loaded even without the `--config` flag.
|
||||
You find documentation of all supported configuration and command line options [here](./configs/README.md).
|
||||
|
||||
## Database initialization and migration
|
||||
|
||||
Every cc-backend version supports a specific database version.
|
||||
On startup the version of the sqlite database is validated and cc-backend will terminate if the version does not match.
|
||||
cc-backend supports to migrate the database schema up to the required version using the `--migrate-db` command line option.
|
||||
In case the database file does not yet exist it is created and initialized by the `--migrate-db` command line option.
|
||||
In case you want to use a newer database version with an older version of cc-backend you can downgrade a database using the external [migrate](https://github.com/golang-migrate/migrate) tool.
|
||||
In this case you have to provide the path to the migration files in a recent source tree: `./internal/repository/migrations/`.
|
||||
|
||||
## Development
|
||||
In case the REST or GraphQL API is changed the according code generators have to be used.
|
||||
|
||||
### Update GraphQL schema
|
||||
|
||||
This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL API.
|
||||
The schema can be found in `./api/schema.graphqls`.
|
||||
After changing it, you need to run `go run github.com/99designs/gqlgen` which will update `./internal/graph/model`.
|
||||
In case new resolvers are needed, they will be inserted into `./internal/graph/schema.resolvers.go`, where you will need to implement them.
|
||||
If you start cc-backend with flag `--dev` the GraphQL Playground UI is available at http://localhost:8080/playground .
|
||||
|
||||
### Update Swagger UI
|
||||
|
||||
This project integrates [swagger ui](https://swagger.io/tools/swagger-ui/) to document and test its REST API.
|
||||
The swagger doc files can be found in `./api/`.
|
||||
You can generate the configuration of swagger-ui by running `go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api `.
|
||||
You need to move the generated `./api/doc.go` to `./internal/api/doc.go`.
|
||||
If you start cc-backend with flag `--dev` the Swagger UI is available at http://localhost:8080/swagger/ .
|
||||
You have to enter a JWT key for a user with role API.
|
||||
|
||||
**NOTICE** The user owning the JWT token must not be logged in the same browser (have a running session), otherwise Swagger requests will not work. It is recommended to create a separate user that has just the API role.
|
||||
|
||||
## Project Structure
|
||||
|
||||
- `api/` contains the API schema files for the REST and GraphQL APIs. The REST API is documented in the OpenAPI 3.0 format in [./api/openapi.yaml](./api/openapi.yaml).
|
||||
- `cmd/cc-backend` contains `main.go` for the main application.
|
||||
- `cmd/gen-keypair` contains is a small application to generate a compatible JWT keypair includin a README about JWT setup in ClusterCockpit.
|
||||
- `configs/` contains documentation about configuration and command line options and required environment variables. An example configuration file is provided.
|
||||
- `init/` contains an example systemd setup for production use.
|
||||
- `internal/` contains library source code that is not intended to be used by others.
|
||||
- `pkg/` contains go packages that can also be used by other projects.
|
||||
- `test/` Test apps and test data.
|
||||
- `web/` Server side templates and frontend related files:
|
||||
- `templates` Serverside go templates
|
||||
- `frontend` Svelte components and static assets for frontend UI
|
||||
- `gqlgen.yml` configures the behaviour and generation of [gqlgen](https://github.com/99designs/gqlgen).
|
||||
- `startDemo.sh` is a shell script that sets up demo data, and builds and starts cc-backend.
|
||||
- [`.github/`](https://github.com/ClusterCockpit/cc-backend/tree/master/.github)
|
||||
GitHub Actions workflows and dependabot configuration for CI/CD.
|
||||
- [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api)
|
||||
contains the API schema files for the REST and GraphQL APIs. The REST API is
|
||||
documented in the OpenAPI 3.0 format in
|
||||
[./api/swagger.yaml](./api/swagger.yaml). The GraphQL schema is in
|
||||
[./api/schema.graphqls](./api/schema.graphqls).
|
||||
- [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend)
|
||||
contains the main application entry point and CLI implementation.
|
||||
- [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs)
|
||||
contains documentation about configuration and command line options and required
|
||||
environment variables. Sample configuration files are provided.
|
||||
- [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init)
|
||||
contains an example of setting up systemd for production use.
|
||||
- [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal)
|
||||
contains library source code that is not intended for use by others.
|
||||
- [`api`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/api)
|
||||
REST API handlers and NATS integration
|
||||
- [`archiver`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/archiver)
|
||||
Job archiving functionality
|
||||
- [`auth`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/auth)
|
||||
Authentication (local, LDAP, OIDC) and JWT token handling
|
||||
- [`config`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/config)
|
||||
Configuration management and validation
|
||||
- [`graph`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/graph)
|
||||
GraphQL schema and resolvers
|
||||
- [`importer`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/importer)
|
||||
Job data import and database initialization
|
||||
- [`metricdispatch`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricdispatch)
|
||||
Dispatches metric data loading to appropriate backends
|
||||
- [`repository`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository)
|
||||
Database repository layer for jobs and metadata
|
||||
- [`routerConfig`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/routerConfig)
|
||||
HTTP router configuration and middleware
|
||||
- [`tagger`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/tagger)
|
||||
Job classification and application detection
|
||||
- [`taskmanager`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/taskmanager)
|
||||
Background task management and scheduled jobs
|
||||
- [`metricstoreclient`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/metricstoreclient)
|
||||
Client for cc-metric-store queries
|
||||
- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
|
||||
contains Go packages that can be used by other projects.
|
||||
- [`archive`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/archive)
|
||||
Job archive backend implementations (filesystem, S3, SQLite)
|
||||
- [`metricstore`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg/metricstore)
|
||||
In-memory metric data store with checkpointing and metric loading
|
||||
- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
|
||||
Additional command line helper tools.
|
||||
- [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
||||
Commands for getting infos about an existing job archive, importing jobs
|
||||
between archive backends, and converting archives between JSON and Parquet formats.
|
||||
- [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration)
|
||||
Tool for migrating job archives between formats.
|
||||
- [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
||||
Tool to convert external pubkey for use in `cc-backend`.
|
||||
- [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
||||
contains a small application to generate a compatible JWT keypair. You find
|
||||
documentation on how to use it
|
||||
[here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
|
||||
- [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web)
|
||||
Server-side templates and frontend-related files:
|
||||
- [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend)
|
||||
Svelte components and static assets for the frontend UI
|
||||
- [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates)
|
||||
Server-side Go templates, including monitoring views
|
||||
- [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml)
|
||||
Configures the behaviour and generation of
|
||||
[gqlgen](https://github.com/99designs/gqlgen).
|
||||
- [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh)
|
||||
is a shell script that sets up demo data, and builds and starts `cc-backend`.
|
||||
|
||||
290
ReleaseNotes.md
290
ReleaseNotes.md
@@ -1,27 +1,277 @@
|
||||
# `cc-backend` version 1.0.0
|
||||
# `cc-backend` version 1.5.0
|
||||
|
||||
Supports job archive version 1 and database version 4.
|
||||
Supports job archive version 3 and database version 10.
|
||||
|
||||
This is the initial release of `cc-backend`, the API backend and frontend
|
||||
This is a feature release of `cc-backend`, the API backend and frontend
|
||||
implementation of ClusterCockpit.
|
||||
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
|
||||
|
||||
**Breaking changes**
|
||||
## Breaking changes
|
||||
|
||||
The aggregate job statistic core hours is now computed using the job table
|
||||
column `num_hwthreads`. In a the future release this column will be renamed to
|
||||
`num_cores`. For correct display of core hours `num_hwthreads` must be correctly
|
||||
filled on job start. If your existing jobs do not provide the correct value in
|
||||
this column then you can set this with one SQL INSERT statement. This only applies
|
||||
if you have exclusive jobs, only. Please be aware that we treat this column as
|
||||
it is the number of cores. In case you have SMT enabled and `num_hwthreads`
|
||||
is not the number of cores the core hours will be too high by a factor!
|
||||
### Configuration changes
|
||||
|
||||
**Features**
|
||||
* Supports user roles admin, support, manager, user, and api.
|
||||
* Unified search bar supports job id, job name, project id, user name, and name
|
||||
* Performance improvements for sqlite db backend
|
||||
* Extended REST api supports to query job metrics
|
||||
* Better support for shared jobs
|
||||
* More flexible metric list configuration
|
||||
* Versioning and migration for database and job archive
|
||||
- **JSON attribute naming**: All JSON configuration attributes now use `kebab-case`
|
||||
style consistently (e.g., `api-allowed-ips` instead of `apiAllowedIPs`).
|
||||
Update your `config.json` accordingly.
|
||||
- **Removed `disable-archive` option**: This obsolete configuration option has been removed.
|
||||
- **Removed `clusters` config section**: The separate clusters configuration section
|
||||
has been removed. Cluster information is now derived from the job archive.
|
||||
- **`apiAllowedIPs` is now optional**: If not specified, defaults to not
|
||||
restricted.
|
||||
|
||||
### Architecture changes
|
||||
|
||||
- **Web framework replaced**: Migrated from `gorilla/mux` to `chi` as the HTTP
|
||||
router. This should be transparent to users but affects how middleware and
|
||||
routes are composed. A proper 404 handler is now in place.
|
||||
- **MetricStore moved**: The `metricstore` package has been moved from `internal/`
|
||||
to `pkg/` as it is now part of the public API.
|
||||
- **MySQL/MariaDB support removed**: Only SQLite is now supported as the database backend.
|
||||
- **Archive to Cleanup renaming**: Archive-related functions have been refactored
|
||||
and renamed to "Cleanup" for clarity.
|
||||
- **`minRunningFor` filter removed**: This undocumented filter has been removed
|
||||
from the API and frontend.
|
||||
|
||||
### Dependency changes
|
||||
|
||||
- **cc-lib v2.5.1**: Switched to cc-lib version 2 with updated APIs (currently at v2.5.1)
|
||||
- **cclib NATS client**: Now using the cclib NATS client implementation
|
||||
- Removed obsolete `util.Float` usage from cclib
|
||||
|
||||
## Major new features
|
||||
|
||||
### NATS API Integration
|
||||
|
||||
- **Real-time job events**: Subscribe to job start/stop events via NATS
|
||||
- **Node state updates**: Receive real-time node state changes via NATS
|
||||
- **Configurable subjects**: NATS API subjects are now configurable via `api-subjects`
|
||||
- **Deadlock fixes**: Improved NATS client stability and graceful shutdown
|
||||
|
||||
### Public Dashboard
|
||||
|
||||
- **Public-facing interface**: New public dashboard route for external users
|
||||
- **DoubleMetricPlot component**: New visualization component for comparing metrics
|
||||
- **Improved layout**: Reviewed and optimized dashboard layouts for better readability
|
||||
|
||||
### Enhanced Node Management
|
||||
|
||||
- **Node state tracking**: New node table in database with timestamp tracking
|
||||
- **Node state filtering**: Filter jobs by node state in systems view
|
||||
- **Node list enhancements**: Improved paging, filtering, and continuous scroll support
|
||||
- **Nodestate retention and archiving**: Node state data is now subject to configurable
|
||||
retention policies and can be archived to Parquet format for long-term storage
|
||||
- **Faulty node metric tracking**: Faulty node state metric lists are persisted to the database
|
||||
|
||||
### Health Monitoring
|
||||
|
||||
- **Health status dashboard**: New dedicated "Health" tab in the status details view
|
||||
showing per-node metric health across the cluster
|
||||
- **CCMS health check**: Support for querying health status of external
|
||||
cc-metric-store (CCMS) instances via the API
|
||||
- **GraphQL health endpoints**: New GraphQL queries and resolvers for health data
|
||||
- **Cluster/subcluster filter**: Filter health status view by cluster or subcluster
|
||||
|
||||
### Log Viewer
|
||||
|
||||
- **Web-based log viewer**: New log viewer page in the admin interface for inspecting
|
||||
backend log output directly from the browser without shell access
|
||||
- **Accessible from header**: Quick access link from the navigation header
|
||||
|
||||
### MetricStore Improvements
|
||||
|
||||
- **Memory tracking worker**: New worker for CCMS memory usage tracking
|
||||
- **Dynamic retention**: Support for job specific dynamic retention times
|
||||
- **Improved compression**: Transparent compression for job archive imports
|
||||
- **Parallel processing**: Parallelized Iter function in all archive backends
|
||||
|
||||
### Job Tagging System
|
||||
|
||||
- **Job tagger option**: Enable automatic job tagging via configuration flag
|
||||
- **Application detection**: Automatic detection of applications (MATLAB, GROMACS, etc.)
|
||||
- **Job classification**: Automatic detection of pathological jobs
|
||||
- **omit-tagged**: Option to exclude tagged jobs from retention/cleanup operations (`none`, `all`, or `user`)
|
||||
- **Admin UI trigger**: Taggers can be run on-demand from the admin web interface
|
||||
without restarting the backend
|
||||
|
||||
### Archive Backends
|
||||
|
||||
- **Parquet archive format**: New Parquet file format for job archiving, providing
|
||||
columnar storage with efficient compression for analytical workloads
|
||||
- **S3 backend**: Full support for S3-compatible object storage
|
||||
- **SQLite backend**: Full support for SQLite backend using blobs
|
||||
- **Performance improvements**: Fixed performance bugs in archive backends
|
||||
- **Better error handling**: Improved error messages and fallback handling
|
||||
- **Zstd compression**: Parquet writers use zstd compression for better
|
||||
compression ratios compared to the previous snappy default
|
||||
- **Optimized sort order**: Job and nodestate Parquet files are sorted by
|
||||
cluster, subcluster, and start time for efficient range queries
|
||||
|
||||
### Unified Archive Retention and Format Conversion
|
||||
|
||||
- **Uniform retention policy**: Job archive retention now supports both JSON and
|
||||
Parquet as target formats under a single, consistent policy configuration
|
||||
- **Archive manager tool**: The `tools/archive-manager` utility now supports
|
||||
format conversion between JSON and Parquet job archives
|
||||
- **Parquet reader**: Full Parquet archive reader implementation for reading back
|
||||
archived job data
|
||||
|
||||
## New features and improvements
|
||||
|
||||
### Frontend
|
||||
|
||||
- **Loading indicators**: Added loading indicators to status detail and job lists
|
||||
- **Job info layout**: Reviewed and improved job info row layout
|
||||
- **Metric selection**: Enhanced metric selection with drag-and-drop fixes
|
||||
- **Filter presets**: Move list filter preset to URL for easy sharing
|
||||
- **Job comparison**: Improved job comparison views and plots
|
||||
- **Subcluster reactivity**: Job list now reacts to subcluster filter changes
|
||||
- **Short jobs quick selection**: New "Short jobs" quick-filter button in job lists
|
||||
replaces the removed undocumented `minRunningFor` filter
|
||||
- **Row plot cursor sync**: Cursor position is now synchronized across all metric
|
||||
plots in a job list row for easier cross-metric comparison
|
||||
- **Disabled metrics handling**: Improved handling and display of disabled metrics
|
||||
across job view, node view, and list rows
|
||||
- **"Not configured" info cards**: Informational cards shown when optional features
|
||||
are not yet configured
|
||||
- **Frontend dependencies**: Bumped frontend dependencies to latest versions
|
||||
- **Svelte 5 compatibility**: Fixed Svelte state warnings and compatibility issues
|
||||
|
||||
### Backend
|
||||
|
||||
- **Progress bars**: Import function now shows progress during long operations
|
||||
- **Better logging**: Improved logging with appropriate log levels throughout
|
||||
- **Graceful shutdown**: Fixed shutdown timeout bugs and hanging issues
|
||||
- **Configuration defaults**: Sensible defaults for most configuration options
|
||||
- **Documentation**: Extensive documentation improvements across packages
|
||||
- **Server flag in systemd unit**: Example systemd unit now includes the `-server` flag
|
||||
|
||||
### Security
|
||||
|
||||
- **LDAP security hardening**: Improved input validation, connection handling, and
|
||||
error reporting in the LDAP authenticator
|
||||
- **OIDC security hardening**: Stricter token validation and improved error handling
|
||||
in the OIDC authenticator
|
||||
- **Auth schema extensions**: Additional schema fields for improved auth configuration
|
||||
|
||||
### API improvements
|
||||
|
||||
- **Role-based metric visibility**: Metrics can now have role-based access control
|
||||
- **Job exclusivity filter**: New filter for exclusive vs. shared jobs
|
||||
- **Improved error messages**: Better error messages and documentation in REST API
|
||||
- **GraphQL enhancements**: Improved GraphQL queries and resolvers
|
||||
- **Stop job lookup order**: Reversed lookup order in stop job requests for
|
||||
more reliable job matching (cluster+jobId first, then jobId alone)
|
||||
|
||||
### Performance
|
||||
|
||||
- **Database indices**: Optimized SQLite indices for better query performance
|
||||
- **Job cache**: Introduced caching table for faster job inserts
|
||||
- **Parallel imports**: Archive imports now run in parallel where possible
|
||||
- **External tool integration**: Optimized use of external tools (fd) for better performance
|
||||
- **Node repository queries**: Reviewed and optimized node repository SQL queries
|
||||
- **Buffer pool**: Resized and pooled internal buffers for better memory reuse
|
||||
|
||||
### Developer experience
|
||||
|
||||
- **AI agent guidelines**: Added documentation for AI coding agents (AGENTS.md, CLAUDE.md)
|
||||
- **Example API payloads**: Added example JSON API payloads for testing
|
||||
- **Unit tests**: Added more unit tests for NATS API, node repository, and other components
|
||||
- **Test improvements**: Better test coverage; test DB is now copied before unit tests
|
||||
to avoid state pollution between test runs
|
||||
- **Parquet writer tests**: Comprehensive tests for Parquet archive writing and conversion
|
||||
|
||||
## Bug fixes
|
||||
|
||||
- Fixed nodelist paging issues
|
||||
- Fixed metric select drag and drop functionality
|
||||
- Fixed render race conditions in nodeList
|
||||
- Fixed tag count grouping including type
|
||||
- Fixed wrong metricstore schema (missing comma)
|
||||
- Fixed configuration issues causing shutdown hangs
|
||||
- Fixed deadlock when NATS is not configured
|
||||
- Fixed archive backend performance bugs
|
||||
- Fixed continuous scroll buildup on refresh
|
||||
- Improved footprint calculation logic
|
||||
- Fixed polar plot data query decoupling
|
||||
- Fixed missing resolution parameter handling
|
||||
- Fixed node table initialization fallback
|
||||
- Fixed reactivity key placement in nodeList
|
||||
- Fixed nodeList resolver data handling and increased nodestate filter cutoff
|
||||
- Fixed job always being transferred to main job table before archiving
|
||||
- Fixed AppTagger error handling and logging
|
||||
- Fixed log endpoint formatting and correctness
|
||||
- Fixed automatic refresh in metric status tab
|
||||
- Fixed NULL value handling in `health_state` and `health_metrics` columns
|
||||
- Fixed bugs related to `job_cache` IDs being used in the main job table
|
||||
- Fixed SyncJobs bug causing start job hooks to be called with wrong (cache) IDs
|
||||
- Fixed 404 handler route for sub-routers
|
||||
|
||||
## Configuration changes
|
||||
|
||||
### New configuration options
|
||||
|
||||
```json
|
||||
{
|
||||
"main": {
|
||||
"enable-job-taggers": true,
|
||||
"resampling": {
|
||||
"minimum-points": 600,
|
||||
"trigger": 180,
|
||||
"resolutions": [240, 60]
|
||||
},
|
||||
"api-subjects": {
|
||||
"subject-job-event": "cc.job.event",
|
||||
"subject-node-state": "cc.node.state"
|
||||
}
|
||||
},
|
||||
"nats": {
|
||||
"address": "nats://0.0.0.0:4222",
|
||||
"username": "root",
|
||||
"password": "root"
|
||||
},
|
||||
"cron": {
|
||||
"commit-job-worker": "1m",
|
||||
"duration-worker": "5m",
|
||||
"footprint-worker": "10m"
|
||||
},
|
||||
"metric-store": {
|
||||
"cleanup": {
|
||||
"mode": "archive",
|
||||
"interval": "48h",
|
||||
"directory": "./var/archive"
|
||||
}
|
||||
},
|
||||
"archive": {
|
||||
"retention": {
|
||||
"policy": "delete",
|
||||
"age": "6months",
|
||||
"target-format": "parquet"
|
||||
}
|
||||
},
|
||||
"nodestate": {
|
||||
"retention": {
|
||||
"policy": "archive",
|
||||
"age": "30d",
|
||||
"archive-path": "./var/nodestate-archive"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Migration notes
|
||||
|
||||
- Review and update your `config.json` to use kebab-case attribute names
|
||||
- If using NATS, configure the new `nats` and `api-subjects` sections
|
||||
- If using S3 archive backend, configure the new `archive` section options
|
||||
- Test the new public dashboard at `/public` route
|
||||
- Review cron worker configuration if you need different frequencies
|
||||
- If using the archive retention feature, configure the `target-format` option
|
||||
to choose between `json` (default) and `parquet` output formats
|
||||
- Consider enabling nodestate retention if you track node states over time
|
||||
|
||||
## Known issues
|
||||
|
||||
- Currently energy footprint metrics of type energy are ignored for calculating
|
||||
total energy.
|
||||
- With energy footprint metrics of type power the unit is ignored and it is
|
||||
assumed the metric has the unit Watt.
|
||||
|
||||
@@ -4,6 +4,34 @@ scalar Any
|
||||
scalar NullableFloat
|
||||
scalar MetricScope
|
||||
scalar JobState
|
||||
scalar SchedulerState
|
||||
scalar MonitoringState
|
||||
|
||||
type Node {
|
||||
id: ID!
|
||||
hostname: String!
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
jobsRunning: Int!
|
||||
cpusAllocated: Int
|
||||
memoryAllocated: Int
|
||||
gpusAllocated: Int
|
||||
schedulerState: SchedulerState!
|
||||
healthState: MonitoringState!
|
||||
metaData: Any
|
||||
healthData: Any
|
||||
}
|
||||
|
||||
type NodeStates {
|
||||
state: String!
|
||||
count: Int!
|
||||
}
|
||||
|
||||
type NodeStatesTimed {
|
||||
state: String!
|
||||
counts: [Int!]!
|
||||
times: [Int!]!
|
||||
}
|
||||
|
||||
type Job {
|
||||
id: ID!
|
||||
@@ -18,8 +46,9 @@ type Job {
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
energy: Float!
|
||||
SMT: Int!
|
||||
exclusive: Int!
|
||||
shared: String!
|
||||
partition: String!
|
||||
arrayJobId: Int!
|
||||
monitoringStatus: Int!
|
||||
@@ -27,7 +56,8 @@ type Job {
|
||||
tags: [Tag!]!
|
||||
resources: [Resource!]!
|
||||
concurrentJobs: JobLinkResultList
|
||||
|
||||
footprint: [FootprintValue]
|
||||
energyFootprint: [EnergyFootprintValue]
|
||||
metaData: Any
|
||||
userData: User
|
||||
}
|
||||
@@ -40,7 +70,6 @@ type JobLink {
|
||||
type Cluster {
|
||||
name: String!
|
||||
partitions: [String!]! # Slurm partitions
|
||||
metricConfig: [MetricConfig!]!
|
||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||
}
|
||||
|
||||
@@ -56,9 +85,24 @@ type SubCluster {
|
||||
flopRateSimd: MetricValue!
|
||||
memoryBandwidth: MetricValue!
|
||||
topology: Topology!
|
||||
metricConfig: [MetricConfig!]!
|
||||
footprint: [String!]!
|
||||
}
|
||||
|
||||
type FootprintValue {
|
||||
name: String!
|
||||
stat: String!
|
||||
value: Float!
|
||||
}
|
||||
|
||||
type EnergyFootprintValue {
|
||||
hardware: String!
|
||||
metric: String!
|
||||
value: Float!
|
||||
}
|
||||
|
||||
type MetricValue {
|
||||
name: String
|
||||
unit: Unit!
|
||||
value: Float!
|
||||
}
|
||||
@@ -97,6 +141,7 @@ type MetricConfig {
|
||||
normal: Float
|
||||
caution: Float!
|
||||
alert: Float!
|
||||
lowerIsBetter: Boolean
|
||||
subClusters: [SubClusterConfig!]!
|
||||
}
|
||||
|
||||
@@ -104,6 +149,7 @@ type Tag {
|
||||
id: ID!
|
||||
type: String!
|
||||
name: String!
|
||||
scope: String!
|
||||
}
|
||||
|
||||
type Resource {
|
||||
@@ -119,6 +165,13 @@ type JobMetricWithName {
|
||||
metric: JobMetric!
|
||||
}
|
||||
|
||||
type ClusterMetricWithName {
|
||||
name: String!
|
||||
unit: Unit
|
||||
timestep: Int!
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type JobMetric {
|
||||
unit: Unit
|
||||
timestep: Int!
|
||||
@@ -133,6 +186,43 @@ type Series {
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
median: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type NamedStatsWithScope {
|
||||
name: String!
|
||||
scope: MetricScope!
|
||||
stats: [ScopedStats!]!
|
||||
}
|
||||
|
||||
type ScopedStats {
|
||||
hostname: String!
|
||||
id: String
|
||||
data: MetricStatistics!
|
||||
}
|
||||
|
||||
type JobStats {
|
||||
id: Int!
|
||||
jobId: String!
|
||||
startTime: Int!
|
||||
duration: Int!
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
numNodes: Int!
|
||||
numHWThreads: Int
|
||||
numAccelerators: Int
|
||||
stats: [NamedStats!]!
|
||||
}
|
||||
|
||||
type NamedStats {
|
||||
name: String!
|
||||
data: MetricStatistics!
|
||||
}
|
||||
|
||||
type Unit {
|
||||
base: String!
|
||||
prefix: String
|
||||
@@ -144,31 +234,74 @@ type MetricStatistics {
|
||||
max: Float!
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricFootprints {
|
||||
metric: String!
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type Footprints {
|
||||
nodehours: [NullableFloat!]!
|
||||
timeWeights: TimeWeights!
|
||||
metrics: [MetricFootprints!]!
|
||||
}
|
||||
|
||||
enum Aggregate { USER, PROJECT, CLUSTER }
|
||||
enum Weights { NODE_COUNT, NODE_HOURS }
|
||||
type TimeWeights {
|
||||
nodeHours: [NullableFloat!]!
|
||||
accHours: [NullableFloat!]!
|
||||
coreHours: [NullableFloat!]!
|
||||
}
|
||||
|
||||
enum Aggregate {
|
||||
USER
|
||||
PROJECT
|
||||
CLUSTER
|
||||
SUBCLUSTER
|
||||
}
|
||||
enum SortByAggregate {
|
||||
TOTALWALLTIME
|
||||
TOTALJOBS
|
||||
TOTALUSERS
|
||||
TOTALNODES
|
||||
TOTALNODEHOURS
|
||||
TOTALCORES
|
||||
TOTALCOREHOURS
|
||||
TOTALACCS
|
||||
TOTALACCHOURS
|
||||
}
|
||||
|
||||
type NodeMetrics {
|
||||
host: String!
|
||||
state: String!
|
||||
subCluster: String!
|
||||
metrics: [JobMetricWithName!]!
|
||||
}
|
||||
|
||||
type ClusterMetrics {
|
||||
nodeCount: Int!
|
||||
metrics: [ClusterMetricWithName!]!
|
||||
}
|
||||
|
||||
type NodesResultList {
|
||||
items: [NodeMetrics!]!
|
||||
offset: Int
|
||||
limit: Int
|
||||
count: Int
|
||||
totalNodes: Int
|
||||
hasNextPage: Boolean
|
||||
}
|
||||
|
||||
type ClusterSupport {
|
||||
cluster: String!
|
||||
subClusters: [String!]!
|
||||
}
|
||||
|
||||
type GlobalMetricListItem {
|
||||
name: String!
|
||||
unit: Unit!
|
||||
scope: MetricScope!
|
||||
footprint: String
|
||||
availability: [ClusterSupport!]!
|
||||
}
|
||||
|
||||
type Count {
|
||||
name: String!
|
||||
count: Int!
|
||||
@@ -180,48 +313,143 @@ type User {
|
||||
email: String!
|
||||
}
|
||||
|
||||
input MetricStatItem {
|
||||
metricName: String!
|
||||
range: FloatRange!
|
||||
}
|
||||
|
||||
type Query {
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [Tag!]! # List of all tags
|
||||
globalMetrics: [GlobalMetricListItem!]!
|
||||
|
||||
user(username: String!): User
|
||||
allocatedNodes(cluster: String!): [Count!]!
|
||||
|
||||
## Node Queries New
|
||||
node(id: ID!): Node
|
||||
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
|
||||
nodesWithMeta(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
|
||||
nodeStates(filter: [NodeFilter!]): [NodeStates!]!
|
||||
nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]!
|
||||
|
||||
job(id: ID!): Job
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
||||
jobMetrics(
|
||||
id: ID!
|
||||
metrics: [String!]
|
||||
scopes: [MetricScope!]
|
||||
resolution: Int
|
||||
): [JobMetricWithName!]!
|
||||
|
||||
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||
|
||||
scopedJobStats(
|
||||
id: ID!
|
||||
metrics: [String!]
|
||||
scopes: [MetricScope!]
|
||||
): [NamedStatsWithScope!]!
|
||||
|
||||
jobs(
|
||||
filter: [JobFilter!]
|
||||
page: PageRequest
|
||||
order: OrderByInput
|
||||
): JobResultList!
|
||||
|
||||
jobsStatistics(
|
||||
filter: [JobFilter!]
|
||||
metrics: [String!]
|
||||
page: PageRequest
|
||||
sortBy: SortByAggregate
|
||||
groupBy: Aggregate
|
||||
numDurationBins: String
|
||||
numMetricBins: Int
|
||||
): [JobsStatistics!]!
|
||||
|
||||
jobsMetricStats(filter: [JobFilter!], metrics: [String!]): [JobStats!]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||
|
||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]!
|
||||
jobsCount(filter: [JobFilter]!, groupBy: Aggregate!, weight: Weights, limit: Int): [Count!]!
|
||||
rooflineHeatmap(
|
||||
filter: [JobFilter!]!
|
||||
rows: Int!
|
||||
cols: Int!
|
||||
minX: Float!
|
||||
minY: Float!
|
||||
maxX: Float!
|
||||
maxY: Float!
|
||||
): [[Float!]!]!
|
||||
|
||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
||||
nodeMetrics(
|
||||
cluster: String!
|
||||
nodes: [String!]
|
||||
scopes: [MetricScope!]
|
||||
metrics: [String!]
|
||||
from: Time!
|
||||
to: Time!
|
||||
): [NodeMetrics!]!
|
||||
|
||||
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
||||
nodeMetricsList(
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
stateFilter: String!
|
||||
nodeFilter: String!
|
||||
scopes: [MetricScope!]
|
||||
metrics: [String!]
|
||||
from: Time!
|
||||
to: Time!
|
||||
page: PageRequest
|
||||
resolution: Int
|
||||
): NodesResultList!
|
||||
|
||||
clusterMetrics(
|
||||
cluster: String!
|
||||
metrics: [String!]
|
||||
from: Time!
|
||||
to: Time!
|
||||
): ClusterMetrics!
|
||||
}
|
||||
|
||||
type Mutation {
|
||||
createTag(type: String!, name: String!): Tag!
|
||||
createTag(type: String!, name: String!, scope: String!): Tag!
|
||||
deleteTag(id: ID!): ID!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagFromList(tagIds: [ID!]!): [Int!]!
|
||||
|
||||
updateConfiguration(name: String!, value: String!): String
|
||||
}
|
||||
|
||||
type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { from: Time!, to: Time! }
|
||||
type IntRangeOutput {
|
||||
from: Int!
|
||||
to: Int!
|
||||
}
|
||||
type TimeRangeOutput {
|
||||
range: String
|
||||
from: Time!
|
||||
to: Time!
|
||||
}
|
||||
|
||||
input NodeFilter {
|
||||
hostname: StringInput
|
||||
cluster: StringInput
|
||||
subCluster: StringInput
|
||||
schedulerState: SchedulerState
|
||||
healthState: MonitoringState
|
||||
timeStart: Int
|
||||
}
|
||||
|
||||
input JobFilter {
|
||||
tags: [ID!]
|
||||
dbId: [ID!]
|
||||
jobId: StringInput
|
||||
arrayJobId: Int
|
||||
user: StringInput
|
||||
project: StringInput
|
||||
jobName: StringInput
|
||||
cluster: StringInput
|
||||
subCluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
energy: FloatRange
|
||||
|
||||
minRunningFor: Int
|
||||
|
||||
@@ -231,20 +459,15 @@ input JobFilter {
|
||||
|
||||
startTime: TimeRange
|
||||
state: [JobState!]
|
||||
flopsAnyAvg: FloatRange
|
||||
memBwAvg: FloatRange
|
||||
loadAvg: FloatRange
|
||||
memUsedMax: FloatRange
|
||||
|
||||
exclusive: Int
|
||||
sharedNode: StringInput
|
||||
selfJobId: StringInput
|
||||
selfStartTime: Time
|
||||
selfDuration: Int
|
||||
metricStats: [MetricStatItem!]
|
||||
shared: String
|
||||
schedule: String
|
||||
node: StringInput
|
||||
}
|
||||
|
||||
input OrderByInput {
|
||||
field: String!
|
||||
type: String!
|
||||
order: SortDirectionEnum! = ASC
|
||||
}
|
||||
|
||||
@@ -262,18 +485,36 @@ input StringInput {
|
||||
in: [String!]
|
||||
}
|
||||
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input FloatRange { from: Float!, to: Float! }
|
||||
input TimeRange { from: Time, to: Time }
|
||||
input IntRange {
|
||||
from: Int!
|
||||
to: Int!
|
||||
}
|
||||
input TimeRange {
|
||||
range: String
|
||||
from: Time
|
||||
to: Time
|
||||
}
|
||||
|
||||
input FloatRange {
|
||||
from: Float!
|
||||
to: Float!
|
||||
}
|
||||
|
||||
type NodeStateResultList {
|
||||
items: [Node!]!
|
||||
count: Int
|
||||
}
|
||||
|
||||
type JobResultList {
|
||||
items: [Job!]!
|
||||
offset: Int
|
||||
limit: Int
|
||||
count: Int
|
||||
hasNextPage: Boolean
|
||||
}
|
||||
|
||||
type JobLinkResultList {
|
||||
listQuery: String
|
||||
items: [JobLink!]!
|
||||
count: Int
|
||||
}
|
||||
@@ -283,18 +524,39 @@ type HistoPoint {
|
||||
value: Int!
|
||||
}
|
||||
|
||||
type MetricHistoPoints {
|
||||
metric: String!
|
||||
unit: String!
|
||||
stat: String
|
||||
data: [MetricHistoPoint!]
|
||||
}
|
||||
|
||||
type MetricHistoPoint {
|
||||
bin: Int
|
||||
count: Int!
|
||||
min: Int
|
||||
max: Int
|
||||
}
|
||||
|
||||
type JobsStatistics {
|
||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster/subcluster
|
||||
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
||||
totalUsers: Int! # if *not* User-Statistics: Number of active users (based on running jobs)
|
||||
totalJobs: Int! # Number of jobs
|
||||
runningJobs: Int! # Number of running jobs
|
||||
shortJobs: Int! # Number of jobs with a duration of less than duration
|
||||
shortJobs: Int! # Number of jobs with a duration of less than config'd ShortRunningJobsDuration
|
||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||
totalNodes: Int! # Sum of the nodes of all matched jobs
|
||||
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
||||
totalCores: Int! # Sum of the cores of all matched jobs
|
||||
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
||||
totalAccs: Int! # Sum of the accs of all matched jobs
|
||||
totalAccHours: Int! # Sum of the gpu hours of all matched jobs
|
||||
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
||||
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
||||
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
||||
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
||||
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
|
||||
}
|
||||
|
||||
input PageRequest {
|
||||
|
||||
2272
api/swagger.json
2272
api/swagger.json
File diff suppressed because it is too large
Load Diff
1569
api/swagger.yaml
1569
api/swagger.yaml
File diff suppressed because it is too large
Load Diff
@@ -1,17 +0,0 @@
|
||||
CC_USER=clustercockpit
|
||||
|
||||
CC_GROUP=clustercockpit
|
||||
|
||||
CC_HOME=/tmp
|
||||
|
||||
LOG_DIR=/var/log
|
||||
|
||||
DATA_DIR=/var/run/cc-backend
|
||||
|
||||
MAX_OPEN_FILES=10000
|
||||
|
||||
CONF_DIR=/etc/cc-backend
|
||||
|
||||
CONF_FILE=/etc/cc-backend/cc-backend.json
|
||||
|
||||
RESTART_ON_UPGRADE=true
|
||||
@@ -1,12 +0,0 @@
|
||||
Package: cc-backend
|
||||
Version: {VERSION}
|
||||
Installed-Size: {INSTALLED_SIZE}
|
||||
Architecture: {ARCH}
|
||||
Maintainer: thomas.gruber@fau.de
|
||||
Depends: libc6 (>= 2.2.1)
|
||||
Build-Depends: debhelper-compat (= 13), git, golang-go, npm, yarn
|
||||
Description: ClusterCockpit backend and web frontend
|
||||
Homepage: https://github.com/ClusterCockpit/cc-backend
|
||||
Source: cc-backend
|
||||
Rules-Requires-Root: no
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
[Unit]
|
||||
Description=ClusterCockpit backend and web frontend (cc-backend)
|
||||
Documentation=https://github.com/ClusterCockpit/cc-backend
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
EnvironmentFile=/etc/default/cc-backend
|
||||
Type=simple
|
||||
User=clustercockpit
|
||||
Group=clustercockpit
|
||||
Restart=on-failure
|
||||
TimeoutStopSec=100
|
||||
LimitNOFILE=infinity
|
||||
ExecStart=/usr/bin/cc-backend --config ${CONF_FILE}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -1,70 +0,0 @@
|
||||
Name: cc-backend
|
||||
Version: %{VERS}
|
||||
Release: 1%{?dist}
|
||||
Summary: ClusterCockpit backend and web frontend
|
||||
|
||||
License: MIT
|
||||
Source0: %{name}-%{version}.tar.gz
|
||||
|
||||
#BuildRequires: go-toolset
|
||||
#BuildRequires: systemd-rpm-macros
|
||||
#BuildRequires: npm
|
||||
|
||||
Provides: %{name} = %{version}
|
||||
|
||||
%description
|
||||
ClusterCockpit backend and web frontend
|
||||
|
||||
%global debug_package %{nil}
|
||||
|
||||
%prep
|
||||
%autosetup
|
||||
|
||||
|
||||
%build
|
||||
#CURRENT_TIME=$(date +%Y-%m-%d:T%H:%M:\%S)
|
||||
#LD_FLAGS="-s -X main.buildTime=${CURRENT_TIME} -X main.version=%{VERS}"
|
||||
mkdir ./var
|
||||
touch ./var/job.db
|
||||
cd web/frontend && yarn install && yarn build && cd -
|
||||
go build -ldflags="-s -X main.version=%{VERS}" ./cmd/cc-backend
|
||||
|
||||
|
||||
%install
|
||||
# Install cc-backend
|
||||
#make PREFIX=%{buildroot} install
|
||||
install -Dpm 755 cc-backend %{buildroot}/%{_bindir}/%{name}
|
||||
install -Dpm 0600 configs/config.json %{buildroot}%{_sysconfdir}/%{name}/%{name}.json
|
||||
# Integrate into system
|
||||
install -Dpm 0644 build/package/%{name}.service %{buildroot}%{_unitdir}/%{name}.service
|
||||
install -Dpm 0600 build/package/%{name}.config %{buildroot}%{_sysconfdir}/default/%{name}
|
||||
install -Dpm 0644 build/package/%{name}.sysusers %{buildroot}%{_sysusersdir}/%{name}.conf
|
||||
|
||||
|
||||
%check
|
||||
# go test should be here... :)
|
||||
|
||||
%pre
|
||||
%sysusers_create_package scripts/%{name}.sysusers
|
||||
|
||||
%post
|
||||
%systemd_post %{name}.service
|
||||
|
||||
%preun
|
||||
%systemd_preun %{name}.service
|
||||
|
||||
%files
|
||||
# Binary
|
||||
%attr(-,clustercockpit,clustercockpit) %{_bindir}/%{name}
|
||||
# Config
|
||||
%dir %{_sysconfdir}/%{name}
|
||||
%attr(0600,clustercockpit,clustercockpit) %config(noreplace) %{_sysconfdir}/%{name}/%{name}.json
|
||||
# Systemd
|
||||
%{_unitdir}/%{name}.service
|
||||
%{_sysconfdir}/default/%{name}
|
||||
%{_sysusersdir}/%{name}.conf
|
||||
|
||||
%changelog
|
||||
* Mon Mar 07 2022 Thomas Gruber - 0.1
|
||||
- Initial metric store implementation
|
||||
|
||||
@@ -1,2 +0,0 @@
|
||||
#Type Name ID GECOS Home directory Shell
|
||||
u clustercockpit - "User for ClusterCockpit" /run/cc-backend /sbin/nologin
|
||||
38
cmd/cc-backend/cli.go
Normal file
38
cmd/cc-backend/cli.go
Normal file
@@ -0,0 +1,38 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// This file defines all command-line flags and their default values.
|
||||
package main
|
||||
|
||||
import "flag"
|
||||
|
||||
var (
|
||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB,
|
||||
flagForceDB, flagDev, flagVersion, flagLogDateTime, flagApplyTags bool
|
||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
)
|
||||
|
||||
func cliInit() {
|
||||
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize sqlite database file, config.json and .env")
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'hpc_user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||
flag.BoolVar(&flagApplyTags, "apply-tags", false, "Run taggers on all completed jobs and exit")
|
||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: <username>:[admin,support,manager,api,user]:<password>")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove a existing user. Argument format: <username>")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug, info , warn (default), err, crit]`")
|
||||
flag.Parse()
|
||||
}
|
||||
94
cmd/cc-backend/init.go
Normal file
94
cmd/cc-backend/init.go
Normal file
@@ -0,0 +1,94 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// This file contains bootstrap logic for initializing the environment,
|
||||
// creating default configuration files, and setting up the database.
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||
)
|
||||
|
||||
const envString = `
|
||||
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||
# You can generate your own keypair using the gen-keypair tool
|
||||
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||
|
||||
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||
`
|
||||
|
||||
const configString = `
|
||||
{
|
||||
"main": {
|
||||
"addr": "127.0.0.1:8080",
|
||||
"short-running-jobs-duration": 300,
|
||||
"resampling": {
|
||||
"minimum-points": 600,
|
||||
"trigger": 300,
|
||||
"resolutions": [
|
||||
240,
|
||||
60
|
||||
]
|
||||
},
|
||||
"api-allowed-ips": [
|
||||
"*"
|
||||
],
|
||||
"emission-constant": 317
|
||||
},
|
||||
"cron": {
|
||||
"commit-job-worker": "1m",
|
||||
"duration-worker": "5m",
|
||||
"footprint-worker": "10m"
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"auth": {
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
}
|
||||
}
|
||||
}
|
||||
`
|
||||
|
||||
func initEnv() {
|
||||
if util.CheckFileExists("var") {
|
||||
cclog.Exit("Directory ./var already exists. Cautiously exiting application initialization.")
|
||||
}
|
||||
|
||||
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
|
||||
cclog.Abortf("Could not write default ./config.json with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
|
||||
cclog.Abortf("Could not write default ./.env file with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
if err := os.Mkdir("var", 0o777); err != nil {
|
||||
cclog.Abortf("Could not create default ./var folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
|
||||
err := repository.MigrateDB("./var/job.db")
|
||||
if err != nil {
|
||||
cclog.Abortf("Could not initialize default SQLite database as './var/job.db'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
if err := os.Mkdir("var/job-archive", 0o777); err != nil {
|
||||
cclog.Abortf("Could not create default ./var/job-archive folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
archiveCfg := "{\"kind\": \"file\",\"path\": \"./var/job-archive\"}"
|
||||
if err := archive.Init(json.RawMessage(archiveCfg)); err != nil {
|
||||
cclog.Abortf("Could not initialize job-archive, exited.\nError: %s\n", err.Error())
|
||||
}
|
||||
}
|
||||
@@ -1,528 +1,518 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// It orchestrates initialization of all subsystems including configuration,
|
||||
// database, authentication, and the HTTP server.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/runtimeEnv"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/tagger"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/taskmanager"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
"github.com/go-co-op/gocron"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/nats"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/runtime"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||
"github.com/google/gops/agent"
|
||||
"github.com/gorilla/handlers"
|
||||
"github.com/gorilla/mux"
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
"github.com/joho/godotenv"
|
||||
|
||||
_ "github.com/go-sql-driver/mysql"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
const logoString = `
|
||||
____ _ _ ____ _ _ _
|
||||
_____ _ _ ____ _ _ _
|
||||
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
|
||||
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
|
||||
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
|
||||
\____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
||||
\_____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|
||||
|_|
|
||||
`
|
||||
|
||||
// Environment variable names
|
||||
const (
|
||||
envGOGC = "GOGC"
|
||||
)
|
||||
|
||||
// Default configurations
|
||||
const (
|
||||
defaultArchiveConfig = `{"kind":"file","path":"./var/job-archive"}`
|
||||
)
|
||||
|
||||
var (
|
||||
buildTime string
|
||||
hash string
|
||||
date string
|
||||
commit string
|
||||
version string
|
||||
)
|
||||
|
||||
func main() {
|
||||
var flagReinitDB, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagDev, flagVersion, flagLogDateTime bool
|
||||
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
|
||||
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
|
||||
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
|
||||
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
|
||||
flag.Parse()
|
||||
|
||||
if flagVersion {
|
||||
func printVersion() {
|
||||
fmt.Print(logoString)
|
||||
fmt.Printf("Version:\t%s\n", version)
|
||||
fmt.Printf("Git hash:\t%s\n", hash)
|
||||
fmt.Printf("Build time:\t%s\n", buildTime)
|
||||
fmt.Printf("Git hash:\t%s\n", commit)
|
||||
fmt.Printf("Build time:\t%s\n", date)
|
||||
fmt.Printf("SQL db version:\t%d\n", repository.Version)
|
||||
fmt.Printf("Job archive version:\t%d\n", archive.Version)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Apply config flags for pkg/log
|
||||
log.Init(flagLogLevel, flagLogDateTime)
|
||||
func initGops() error {
|
||||
if !flagGops {
|
||||
return nil
|
||||
}
|
||||
|
||||
// See https://github.com/google/gops (Runtime overhead is almost zero)
|
||||
if flagGops {
|
||||
if err := agent.Listen(agent.Options{}); err != nil {
|
||||
log.Fatalf("gops/agent.Listen failed: %s", err.Error())
|
||||
return fmt.Errorf("starting gops agent: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := runtimeEnv.LoadEnv("./.env"); err != nil && !os.IsNotExist(err) {
|
||||
log.Fatalf("parsing './.env' file failed: %s", err.Error())
|
||||
func loadEnvironment() error {
|
||||
if err := godotenv.Load(); err != nil {
|
||||
return fmt.Errorf("loading .env file: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Initialize sub-modules and handle command line flags.
|
||||
// The order here is important!
|
||||
config.Init(flagConfigFile)
|
||||
func initConfiguration() error {
|
||||
ccconf.Init(flagConfigFile)
|
||||
|
||||
// As a special case for `db`, allow using an environment variable instead of the value
|
||||
// stored in the config. This can be done for people having security concerns about storing
|
||||
// the password for their mysql database in config.json.
|
||||
if strings.HasPrefix(config.Keys.DB, "env:") {
|
||||
envvar := strings.TrimPrefix(config.Keys.DB, "env:")
|
||||
config.Keys.DB = os.Getenv(envvar)
|
||||
cfg := ccconf.GetPackageConfig("main")
|
||||
if cfg == nil {
|
||||
return fmt.Errorf("main configuration must be present")
|
||||
}
|
||||
|
||||
config.Init(cfg)
|
||||
return nil
|
||||
}
|
||||
|
||||
func initDatabase() error {
|
||||
repository.Connect(config.Keys.DB)
|
||||
return nil
|
||||
}
|
||||
|
||||
func handleDatabaseCommands() error {
|
||||
if flagMigrateDB {
|
||||
err := repository.MigrateDB(config.Keys.DBDriver, config.Keys.DB)
|
||||
err := repository.MigrateDB(config.Keys.DB)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return fmt.Errorf("migrating database to version %d: %w", repository.Version, err)
|
||||
}
|
||||
os.Exit(0)
|
||||
cclog.Exitf("MigrateDB Success: Migrated SQLite database at '%s' to version %d.\n",
|
||||
config.Keys.DB, repository.Version)
|
||||
}
|
||||
|
||||
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
|
||||
db := repository.GetConnection()
|
||||
if flagRevertDB {
|
||||
err := repository.RevertDB(config.Keys.DB)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reverting database to version %d: %w", repository.Version-1, err)
|
||||
}
|
||||
cclog.Exitf("RevertDB Success: Reverted SQLite database at '%s' to version %d.\n",
|
||||
config.Keys.DB, repository.Version-1)
|
||||
}
|
||||
|
||||
if flagForceDB {
|
||||
err := repository.ForceDB(config.Keys.DB)
|
||||
if err != nil {
|
||||
return fmt.Errorf("forcing database to version %d: %w", repository.Version, err)
|
||||
}
|
||||
cclog.Exitf("ForceDB Success: Forced SQLite database at '%s' to version %d.\n",
|
||||
config.Keys.DB, repository.Version)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func handleUserCommands() error {
|
||||
if config.Keys.DisableAuthentication && (flagNewUser != "" || flagDelUser != "") {
|
||||
return fmt.Errorf("--add-user and --del-user can only be used if authentication is enabled")
|
||||
}
|
||||
|
||||
var authentication *auth.Authentication
|
||||
if !config.Keys.DisableAuthentication {
|
||||
var err error
|
||||
if authentication, err = auth.Init(db.DB, map[string]interface{}{
|
||||
"ldap": config.Keys.LdapConfig,
|
||||
"jwt": config.Keys.JwtConfig,
|
||||
}); err != nil {
|
||||
log.Fatalf("auth initialization failed: %v", err)
|
||||
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
|
||||
auth.Init(&cfg)
|
||||
} else {
|
||||
cclog.Warn("Authentication disabled due to missing configuration")
|
||||
auth.Init(nil)
|
||||
}
|
||||
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
|
||||
authentication.SessionMaxAge = d
|
||||
}
|
||||
// Check for default security keys
|
||||
checkDefaultSecurityKeys()
|
||||
|
||||
if flagNewUser != "" {
|
||||
parts := strings.SplitN(flagNewUser, ":", 3)
|
||||
if len(parts) != 3 || len(parts[0]) == 0 {
|
||||
log.Fatal("invalid argument format for user creation")
|
||||
if err := addUser(flagNewUser); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := authentication.AddUser(&auth.User{
|
||||
Username: parts[0], Projects: make([]string, 0), Password: parts[2], Roles: strings.Split(parts[1], ","),
|
||||
}); err != nil {
|
||||
log.Fatalf("adding '%s' user authentication failed: %v", parts[0], err)
|
||||
}
|
||||
}
|
||||
if flagDelUser != "" {
|
||||
if err := authentication.DelUser(flagDelUser); err != nil {
|
||||
log.Fatalf("deleting user failed: %v", err)
|
||||
if err := delUser(flagDelUser); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
if flagSyncLDAP {
|
||||
if authentication.LdapAuth == nil {
|
||||
log.Fatal("cannot sync: LDAP authentication is not configured")
|
||||
if err := syncLDAP(authHandle); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := authentication.LdapAuth.Sync(); err != nil {
|
||||
log.Fatalf("LDAP sync failed: %v", err)
|
||||
}
|
||||
log.Info("LDAP sync successfull")
|
||||
}
|
||||
|
||||
if flagGenJWT != "" {
|
||||
user, err := authentication.GetUser(flagGenJWT)
|
||||
if err := generateJWT(authHandle, flagGenJWT); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkDefaultSecurityKeys warns if default JWT keys are detected
|
||||
func checkDefaultSecurityKeys() {
|
||||
// Default JWT public key from init.go
|
||||
defaultJWTPublic := "kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
|
||||
if os.Getenv("JWT_PUBLIC_KEY") == defaultJWTPublic {
|
||||
cclog.Warn("Using default JWT keys - not recommended for production environments")
|
||||
}
|
||||
}
|
||||
|
||||
func addUser(userSpec string) error {
|
||||
parts := strings.SplitN(userSpec, ":", 3)
|
||||
if len(parts) != 3 || len(parts[0]) == 0 {
|
||||
return fmt.Errorf("invalid user format, want: <username>:[admin,support,manager,api,user]:<password>, have: %s", userSpec)
|
||||
}
|
||||
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.AddUser(&schema.User{
|
||||
Username: parts[0],
|
||||
Projects: make([]string, 0),
|
||||
Password: parts[2],
|
||||
Roles: strings.Split(parts[1], ","),
|
||||
}); err != nil {
|
||||
return fmt.Errorf("adding user '%s' with roles '%s': %w", parts[0], parts[1], err)
|
||||
}
|
||||
|
||||
cclog.Infof("Add User: Added new user '%s' with roles '%s'", parts[0], parts[1])
|
||||
return nil
|
||||
}
|
||||
|
||||
func delUser(username string) error {
|
||||
ur := repository.GetUserRepository()
|
||||
if err := ur.DelUser(username); err != nil {
|
||||
return fmt.Errorf("deleting user '%s': %w", username, err)
|
||||
}
|
||||
cclog.Infof("Delete User: Deleted user '%s' from DB", username)
|
||||
return nil
|
||||
}
|
||||
|
||||
func syncLDAP(authHandle *auth.Authentication) error {
|
||||
if authHandle.LdapAuth == nil {
|
||||
return fmt.Errorf("LDAP authentication is not configured")
|
||||
}
|
||||
|
||||
if err := authHandle.LdapAuth.Sync(); err != nil {
|
||||
return fmt.Errorf("synchronizing LDAP: %w", err)
|
||||
}
|
||||
|
||||
cclog.Print("Sync LDAP: LDAP synchronization successfull.")
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateJWT(authHandle *auth.Authentication, username string) error {
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(username)
|
||||
if err != nil {
|
||||
log.Fatalf("could not get user from JWT: %v", err)
|
||||
return fmt.Errorf("getting user '%s': %w", username, err)
|
||||
}
|
||||
|
||||
if !user.HasRole(auth.RoleApi) {
|
||||
log.Warnf("user '%s' does not have the API role", user.Username)
|
||||
if !user.HasRole(schema.RoleAPI) {
|
||||
cclog.Warnf("JWT: User '%s' does not have the role 'api'. REST API endpoints will return error!\n", user.Username)
|
||||
}
|
||||
|
||||
jwt, err := authentication.JwtAuth.ProvideJWT(user)
|
||||
jwt, err := authHandle.JwtAuth.ProvideJWT(user)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
|
||||
return fmt.Errorf("generating JWT for user '%s': %w", user.Username, err)
|
||||
}
|
||||
|
||||
fmt.Printf("MAIN > JWT for '%s': %s\n", user.Username, jwt)
|
||||
}
|
||||
} else if flagNewUser != "" || flagDelUser != "" {
|
||||
log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled")
|
||||
cclog.Printf("JWT: Successfully generated JWT for user '%s': %s\n", user.Username, jwt)
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := archive.Init(config.Keys.Archive, config.Keys.DisableArchive); err != nil {
|
||||
log.Fatalf("failed to initialize archive: %s", err.Error())
|
||||
}
|
||||
|
||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
||||
log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
|
||||
func initSubsystems() error {
|
||||
// Initialize nats client
|
||||
natsConfig := ccconf.GetPackageConfig("nats")
|
||||
if err := nats.Init(natsConfig); err != nil {
|
||||
cclog.Warnf("initializing (optional) nats client: %s", err.Error())
|
||||
}
|
||||
nats.Connect()
|
||||
|
||||
// Initialize job archive
|
||||
archiveCfg := ccconf.GetPackageConfig("archive")
|
||||
if archiveCfg == nil {
|
||||
cclog.Debug("Archive configuration not found, using default archive configuration")
|
||||
archiveCfg = json.RawMessage(defaultArchiveConfig)
|
||||
}
|
||||
if err := archive.Init(archiveCfg); err != nil {
|
||||
return fmt.Errorf("initializing archive: %w", err)
|
||||
}
|
||||
|
||||
// Handle database re-initialization
|
||||
if flagReinitDB {
|
||||
if err := importer.InitDB(); err != nil {
|
||||
log.Fatalf("failed to re-initialize repository DB: %s", err.Error())
|
||||
return fmt.Errorf("re-initializing repository DB: %w", err)
|
||||
}
|
||||
cclog.Print("Init DB: Successfully re-initialized repository DB.")
|
||||
}
|
||||
|
||||
// Handle job import
|
||||
if flagImportJob != "" {
|
||||
if err := importer.HandleImportFlag(flagImportJob); err != nil {
|
||||
log.Fatalf("job import failed: %s", err.Error())
|
||||
return fmt.Errorf("importing job: %w", err)
|
||||
}
|
||||
cclog.Infof("Import Job: Imported Job '%s' into DB", flagImportJob)
|
||||
}
|
||||
|
||||
// Initialize taggers
|
||||
if config.Keys.EnableJobTaggers {
|
||||
tagger.Init()
|
||||
}
|
||||
|
||||
// Apply tags if requested
|
||||
if flagApplyTags {
|
||||
tagger.Init()
|
||||
|
||||
if err := tagger.RunTaggers(); err != nil {
|
||||
return fmt.Errorf("running job taggers: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if !flagServer {
|
||||
return
|
||||
return nil
|
||||
}
|
||||
|
||||
// Setup the http.Handler/Router used by the server
|
||||
jobRepo := repository.GetJobRepository()
|
||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
||||
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
if os.Getenv("DEBUG") != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("MAIN > Panic caused by: %w", e)
|
||||
}
|
||||
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
})
|
||||
}
|
||||
|
||||
api := &api.RestApi{
|
||||
JobRepository: jobRepo,
|
||||
Resolver: resolver,
|
||||
MachineStateDir: config.Keys.MachineStateDir,
|
||||
Authentication: authentication,
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
buildInfo := web.Build{Version: version, Hash: hash, Buildtime: buildTime}
|
||||
|
||||
r.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, r, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo})
|
||||
}).Methods(http.MethodGet)
|
||||
r.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, r, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
r.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, r, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
// Some routes, such as /login or /query, should only be accessible to a user that is logged in.
|
||||
// Those should be mounted to this subrouter. If authentication is enabled, a middleware will prevent
|
||||
// any unauthenticated accesses.
|
||||
secured := r.PathPrefix("/").Subrouter()
|
||||
if !config.Keys.DisableAuthentication {
|
||||
r.Handle("/login", authentication.Login(
|
||||
// On success:
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect),
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, r, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
Error: err.Error(),
|
||||
Build: buildInfo,
|
||||
})
|
||||
})).Methods(http.MethodPost)
|
||||
|
||||
r.Handle("/logout", authentication.Logout(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
web.RenderTemplate(rw, r, "login.tmpl", &web.Page{
|
||||
Title: "Bye - ClusterCockpit",
|
||||
Info: "Logout sucessful",
|
||||
Build: buildInfo,
|
||||
})
|
||||
}))).Methods(http.MethodPost)
|
||||
|
||||
secured.Use(func(next http.Handler) http.Handler {
|
||||
return authentication.Auth(
|
||||
// On success;
|
||||
next,
|
||||
|
||||
// On failure:
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, r, "login.tmpl", &web.Page{
|
||||
Title: "Authentication failed - ClusterCockpit",
|
||||
Error: err.Error(),
|
||||
Build: buildInfo,
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
r.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||
}
|
||||
secured.Handle("/query", graphQLEndpoint)
|
||||
|
||||
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
routerConfig.HandleSearchBar(rw, r, api)
|
||||
})
|
||||
|
||||
// Mount all /monitoring/... and /api/... routes.
|
||||
routerConfig.SetupRoutes(secured, version, hash, buildTime)
|
||||
api.MountRoutes(secured)
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
r.PathPrefix("/").Handler(web.ServeFiles())
|
||||
} else {
|
||||
r.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||
}
|
||||
|
||||
r.Use(handlers.CompressHandler)
|
||||
r.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||
r.Use(handlers.CORS(
|
||||
handlers.AllowCredentials(),
|
||||
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||
handlers.AllowedOrigins([]string{"*"})))
|
||||
handler := handlers.CustomLoggingHandler(io.Discard, r, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||
log.Infof("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
} else {
|
||||
log.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
params.Request.Method, params.URL.RequestURI(),
|
||||
params.StatusCode, float32(params.Size)/1024,
|
||||
time.Since(params.TimeStamp).Milliseconds())
|
||||
}
|
||||
})
|
||||
|
||||
func runServer(ctx context.Context) error {
|
||||
var wg sync.WaitGroup
|
||||
server := http.Server{
|
||||
ReadTimeout: 10 * time.Second,
|
||||
WriteTimeout: 10 * time.Second,
|
||||
Handler: handler,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
log.Fatalf("starting http listener failed: %v", err)
|
||||
}
|
||||
// Initialize metric store if configuration is provided
|
||||
haveMetricstore := false
|
||||
mscfg := ccconf.GetPackageConfig("metric-store")
|
||||
if mscfg != nil {
|
||||
metrics := metricstore.BuildMetricList()
|
||||
metricstore.Init(mscfg, metrics, &wg)
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
|
||||
go func() {
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
|
||||
}()
|
||||
}
|
||||
|
||||
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
|
||||
if err != nil {
|
||||
log.Fatalf("loading X509 keypair failed: %v", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
CipherSuites: []uint16{
|
||||
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
},
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
// Inject repository as NodeProvider to break import cycle
|
||||
ms := metricstore.GetMemoryStore()
|
||||
jobRepo := repository.GetJobRepository()
|
||||
ms.SetNodeProvider(jobRepo)
|
||||
metricstore.MetricStoreHandle = &metricstore.InternalMetricStore{}
|
||||
haveMetricstore = true
|
||||
} else {
|
||||
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
|
||||
metricstore.MetricStoreHandle = nil
|
||||
cclog.Debug("missing internal metricstore configuration")
|
||||
}
|
||||
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
log.Fatalf("error while preparing server start: %s", err.Error())
|
||||
// Initialize external metric stores if configuration is provided
|
||||
mscfg = ccconf.GetPackageConfig("metric-store-external")
|
||||
if mscfg != nil {
|
||||
err := metricdispatch.Init(mscfg)
|
||||
|
||||
if err != nil {
|
||||
cclog.Debugf("initializing metricdispatch: %v", err)
|
||||
} else {
|
||||
haveMetricstore = true
|
||||
}
|
||||
}
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
if err := server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatalf("starting server failed: %v", err)
|
||||
if !haveMetricstore {
|
||||
return fmt.Errorf("missing metricstore configuration")
|
||||
}
|
||||
}()
|
||||
|
||||
// Start archiver and task manager
|
||||
archiver.Start(repository.GetJobRepository(), ctx)
|
||||
taskmanager.Start(ccconf.GetPackageConfig("cron"), ccconf.GetPackageConfig("archive"))
|
||||
|
||||
// Initialize web UI
|
||||
cfg := ccconf.GetPackageConfig("ui")
|
||||
if err := web.Init(cfg); err != nil {
|
||||
return fmt.Errorf("initializing web UI: %w", err)
|
||||
}
|
||||
|
||||
// Initialize HTTP server
|
||||
srv, err := NewServer(version, commit, date)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating server: %w", err)
|
||||
}
|
||||
|
||||
// Channel to collect errors from server
|
||||
errChan := make(chan error, 1)
|
||||
|
||||
// Start HTTP server
|
||||
wg.Go(func() {
|
||||
if err := srv.Start(ctx); err != nil {
|
||||
errChan <- err
|
||||
}
|
||||
})
|
||||
|
||||
// Handle shutdown signals
|
||||
wg.Add(1)
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-sigs
|
||||
runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
|
||||
select {
|
||||
case <-sigs:
|
||||
cclog.Info("Shutdown signal received")
|
||||
case <-ctx.Done():
|
||||
}
|
||||
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
server.Shutdown(context.Background())
|
||||
|
||||
// Then, wait for any async archivings still pending...
|
||||
api.JobRepository.WaitForArchiving()
|
||||
runtime.SystemdNotify(false, "Shutting down ...")
|
||||
srv.Shutdown(ctx)
|
||||
util.FsWatcherShutdown()
|
||||
taskmanager.Shutdown()
|
||||
}()
|
||||
|
||||
s := gocron.NewScheduler(time.Local)
|
||||
|
||||
if config.Keys.StopJobsExceedingWalltime > 0 {
|
||||
log.Info("Register undead jobs service")
|
||||
|
||||
s.Every(1).Day().At("3:00").Do(func() {
|
||||
err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
|
||||
}
|
||||
runtime.GC()
|
||||
})
|
||||
// Set GC percent if not configured
|
||||
if os.Getenv(envGOGC) == "" {
|
||||
debug.SetGCPercent(15)
|
||||
}
|
||||
runtime.SystemdNotify(true, "running")
|
||||
|
||||
var cfg struct {
|
||||
Compression int `json:"compression"`
|
||||
Retention schema.Retention `json:"retention"`
|
||||
}
|
||||
|
||||
cfg.Retention.IncludeDB = true
|
||||
|
||||
if err := json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
|
||||
log.Warn("Error while unmarshaling raw config json")
|
||||
}
|
||||
|
||||
switch cfg.Retention.Policy {
|
||||
case "delete":
|
||||
log.Info("Register retention delete service")
|
||||
|
||||
s.Every(1).Day().At("4:00").Do(func() {
|
||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().CleanUp(jobs)
|
||||
|
||||
if cfg.Retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %s", err.Error())
|
||||
}
|
||||
}
|
||||
})
|
||||
case "move":
|
||||
log.Info("Register retention move service")
|
||||
|
||||
s.Every(1).Day().At("4:00").Do(func() {
|
||||
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
|
||||
jobs, err := jobRepo.FindJobsBetween(0, startTime)
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %s", err.Error())
|
||||
}
|
||||
archive.GetHandle().Move(jobs, cfg.Retention.Location)
|
||||
|
||||
if cfg.Retention.IncludeDB {
|
||||
cnt, err := jobRepo.DeleteJobsBefore(startTime)
|
||||
if err != nil {
|
||||
log.Errorf("Error while deleting retention jobs from db: %v", err)
|
||||
} else {
|
||||
log.Infof("Retention: Removed %d jobs from db", cnt)
|
||||
}
|
||||
if err = jobRepo.Optimize(); err != nil {
|
||||
log.Errorf("Error occured in db optimization: %v", err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
if cfg.Compression > 0 {
|
||||
log.Info("Register compression service")
|
||||
|
||||
s.Every(1).Day().At("5:00").Do(func() {
|
||||
var jobs []*schema.Job
|
||||
|
||||
ar := archive.GetHandle()
|
||||
startTime := time.Now().Unix() - int64(cfg.Compression*24*3600)
|
||||
lastTime := ar.CompressLast(startTime)
|
||||
if startTime == lastTime {
|
||||
log.Info("Compression Service - Complete archive run")
|
||||
jobs, err = jobRepo.FindJobsBetween(0, startTime)
|
||||
|
||||
} else {
|
||||
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
log.Warnf("Error while looking for retention jobs: %v", err)
|
||||
}
|
||||
ar.Compress(jobs)
|
||||
})
|
||||
}
|
||||
|
||||
s.StartAsync()
|
||||
|
||||
if os.Getenv("GOGC") == "" {
|
||||
debug.SetGCPercent(25)
|
||||
}
|
||||
runtimeEnv.SystemdNotifiy(true, "running")
|
||||
waitDone := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
log.Print("Gracefull shutdown completed!")
|
||||
close(waitDone)
|
||||
}()
|
||||
|
||||
go func() {
|
||||
<-waitDone
|
||||
close(errChan)
|
||||
}()
|
||||
|
||||
// Wait for either:
|
||||
// 1. An error from server startup
|
||||
// 2. Completion of all goroutines (normal shutdown or crash)
|
||||
select {
|
||||
case err := <-errChan:
|
||||
// errChan will be closed when waitDone is closed, which happens
|
||||
// when all goroutines complete (either from normal shutdown or error)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
// Give the server 100ms to start and report any immediate startup errors
|
||||
// After that, just wait for normal shutdown completion
|
||||
select {
|
||||
case err := <-errChan:
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
case <-waitDone:
|
||||
// Normal shutdown completed
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Print("Graceful shutdown completed!")
|
||||
return nil
|
||||
}
|
||||
|
||||
func run() error {
|
||||
cliInit()
|
||||
|
||||
if flagVersion {
|
||||
printVersion()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Initialize logger
|
||||
cclog.Init(flagLogLevel, flagLogDateTime)
|
||||
|
||||
// Handle init flag
|
||||
if flagInit {
|
||||
initEnv()
|
||||
cclog.Exit("Successfully setup environment!\n" +
|
||||
"Please review config.json and .env and adjust it to your needs.\n" +
|
||||
"Add your job-archive at ./var/job-archive.")
|
||||
}
|
||||
|
||||
// Initialize gops agent
|
||||
if err := initGops(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Initialize subsystems in dependency order:
|
||||
// 1. Load environment variables from .env file (contains sensitive configuration)
|
||||
// 2. Load configuration from config.json (may reference environment variables)
|
||||
// 3. Handle database migration commands if requested
|
||||
// 4. Initialize database connection (requires config for connection string)
|
||||
// 5. Handle user commands if requested (requires database and authentication config)
|
||||
// 6. Initialize subsystems like archive and metrics (require config and database)
|
||||
|
||||
// Load environment and configuration
|
||||
if err := loadEnvironment(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := initConfiguration(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle database migration (migrate, revert, force)
|
||||
if err := handleDatabaseCommands(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Initialize database
|
||||
if err := initDatabase(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle user commands (add, delete, sync, JWT)
|
||||
if err := handleUserCommands(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Initialize subsystems (archive, metrics, taggers)
|
||||
if err := initSubsystems(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Exit if start server is not requested
|
||||
if !flagServer {
|
||||
cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||
}
|
||||
|
||||
// Run server with context
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
return runServer(ctx)
|
||||
}
|
||||
|
||||
func main() {
|
||||
if err := run(); err != nil {
|
||||
cclog.Error(err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
433
cmd/cc-backend/server.go
Normal file
433
cmd/cc-backend/server.go
Normal file
@@ -0,0 +1,433 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||
// This file contains HTTP server setup, routing configuration, and
|
||||
// authentication middleware integration.
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql/handler"
|
||||
"github.com/99designs/gqlgen/graphql/handler/transport"
|
||||
"github.com/99designs/gqlgen/graphql/playground"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/nats"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/runtime"
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/go-chi/chi/v5/middleware"
|
||||
"github.com/go-chi/cors"
|
||||
httpSwagger "github.com/swaggo/http-swagger"
|
||||
)
|
||||
|
||||
var buildInfo web.Build
|
||||
|
||||
// Environment variable names
|
||||
const (
|
||||
envDebug = "DEBUG"
|
||||
)
|
||||
|
||||
// Server encapsulates the HTTP server state and dependencies
|
||||
type Server struct {
|
||||
router chi.Router
|
||||
server *http.Server
|
||||
restAPIHandle *api.RestAPI
|
||||
natsAPIHandle *api.NatsAPI
|
||||
}
|
||||
|
||||
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
json.NewEncoder(rw).Encode(map[string]string{
|
||||
"status": http.StatusText(http.StatusUnauthorized),
|
||||
"error": err.Error(),
|
||||
})
|
||||
}
|
||||
|
||||
// NewServer creates and initializes a new Server instance
|
||||
func NewServer(version, commit, buildDate string) (*Server, error) {
|
||||
buildInfo = web.Build{Version: version, Hash: commit, Buildtime: buildDate}
|
||||
|
||||
s := &Server{
|
||||
router: chi.NewRouter(),
|
||||
}
|
||||
|
||||
if err := s.init(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (s *Server) init() error {
|
||||
// Setup the http.Handler/Router used by the server
|
||||
graph.Init()
|
||||
resolver := graph.GetResolverInstance()
|
||||
graphQLServer := handler.New(
|
||||
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||
|
||||
graphQLServer.AddTransport(transport.POST{})
|
||||
|
||||
if os.Getenv(envDebug) != "1" {
|
||||
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||
graphQLServer.SetRecoverFunc(func(ctx context.Context, err any) error {
|
||||
switch e := err.(type) {
|
||||
case string:
|
||||
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||
case error:
|
||||
return fmt.Errorf("MAIN > Panic caused by: %s", e.Error())
|
||||
}
|
||||
|
||||
return errors.New("MAIN > Internal server error (panic)")
|
||||
})
|
||||
}
|
||||
|
||||
authHandle := auth.GetAuthInstance()
|
||||
|
||||
// Middleware must be defined before routes in chi
|
||||
s.router.Use(func(next http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
start := time.Now()
|
||||
ww := middleware.NewWrapResponseWriter(rw, r.ProtoMajor)
|
||||
next.ServeHTTP(ww, r)
|
||||
cclog.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||
r.Method, r.URL.RequestURI(),
|
||||
ww.Status(), float32(ww.BytesWritten())/1024,
|
||||
time.Since(start).Milliseconds())
|
||||
})
|
||||
})
|
||||
s.router.Use(middleware.Compress(5))
|
||||
s.router.Use(middleware.Recoverer)
|
||||
s.router.Use(cors.Handler(cors.Options{
|
||||
AllowCredentials: true,
|
||||
AllowedHeaders: []string{"X-Requested-With", "Content-Type", "Authorization", "Origin"},
|
||||
AllowedMethods: []string{"GET", "POST", "HEAD", "OPTIONS"},
|
||||
AllowedOrigins: []string{"*"},
|
||||
}))
|
||||
|
||||
s.restAPIHandle = api.New()
|
||||
|
||||
info := map[string]any{}
|
||||
info["hasOpenIDConnect"] = false
|
||||
|
||||
if auth.Keys.OpenIDConfig != nil {
|
||||
openIDConnect := auth.NewOIDC(authHandle)
|
||||
openIDConnect.RegisterEndpoints(s.router)
|
||||
info["hasOpenIDConnect"] = true
|
||||
}
|
||||
|
||||
s.router.Get("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
cclog.Debugf("##%v##", info)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||
})
|
||||
s.router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||
})
|
||||
s.router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||
})
|
||||
|
||||
if !config.Keys.DisableAuthentication {
|
||||
// Create login failure handler (used by both /login and /jwt-login)
|
||||
loginFailureHandler := func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Login failed - ClusterCockpit",
|
||||
MsgType: "alert-warning",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
}
|
||||
|
||||
s.router.Post("/login", authHandle.Login(loginFailureHandler).ServeHTTP)
|
||||
s.router.HandleFunc("/jwt-login", authHandle.Login(loginFailureHandler).ServeHTTP)
|
||||
|
||||
s.router.Post("/logout", authHandle.Logout(
|
||||
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Bye - ClusterCockpit",
|
||||
MsgType: "alert-info",
|
||||
Message: "Logout successful",
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
})
|
||||
})).ServeHTTP)
|
||||
}
|
||||
|
||||
if flagDev {
|
||||
s.router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||
s.router.Get("/swagger/*", httpSwagger.Handler(
|
||||
httpSwagger.URL("http://"+config.Keys.Addr+"/swagger/doc.json")))
|
||||
}
|
||||
|
||||
// Secured routes (require authentication)
|
||||
s.router.Group(func(secured chi.Router) {
|
||||
if !config.Keys.DisableAuthentication {
|
||||
secured.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.Auth(
|
||||
next,
|
||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||
rw.WriteHeader(http.StatusUnauthorized)
|
||||
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||
Title: "Authentication failed - ClusterCockpit",
|
||||
MsgType: "alert-danger",
|
||||
Message: err.Error(),
|
||||
Build: buildInfo,
|
||||
Infos: info,
|
||||
Redirect: r.RequestURI,
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
secured.Handle("/query", graphQLServer)
|
||||
|
||||
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
||||
})
|
||||
|
||||
routerConfig.SetupRoutes(secured, buildInfo)
|
||||
})
|
||||
|
||||
// API routes (JWT token auth)
|
||||
s.router.Route("/api", func(apiRouter chi.Router) {
|
||||
// Main API routes with API auth
|
||||
apiRouter.Group(func(securedapi chi.Router) {
|
||||
if !config.Keys.DisableAuthentication {
|
||||
securedapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthAPI(next, onFailureResponse)
|
||||
})
|
||||
}
|
||||
s.restAPIHandle.MountAPIRoutes(securedapi)
|
||||
})
|
||||
|
||||
// Metric store API routes with separate auth
|
||||
apiRouter.Group(func(metricstoreapi chi.Router) {
|
||||
if !config.Keys.DisableAuthentication {
|
||||
metricstoreapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthMetricStoreAPI(next, onFailureResponse)
|
||||
})
|
||||
}
|
||||
s.restAPIHandle.MountMetricStoreAPIRoutes(metricstoreapi)
|
||||
})
|
||||
})
|
||||
|
||||
// User API routes
|
||||
s.router.Route("/userapi", func(userapi chi.Router) {
|
||||
if !config.Keys.DisableAuthentication {
|
||||
userapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthUserAPI(next, onFailureResponse)
|
||||
})
|
||||
}
|
||||
s.restAPIHandle.MountUserAPIRoutes(userapi)
|
||||
})
|
||||
|
||||
// Config API routes (uses Group with full paths to avoid shadowing
|
||||
// the /config page route that is registered in the secured group)
|
||||
s.router.Group(func(configapi chi.Router) {
|
||||
if !config.Keys.DisableAuthentication {
|
||||
configapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthConfigAPI(next, onFailureResponse)
|
||||
})
|
||||
}
|
||||
s.restAPIHandle.MountConfigAPIRoutes(configapi)
|
||||
})
|
||||
|
||||
// Frontend API routes
|
||||
s.router.Route("/frontend", func(frontendapi chi.Router) {
|
||||
if !config.Keys.DisableAuthentication {
|
||||
frontendapi.Use(func(next http.Handler) http.Handler {
|
||||
return authHandle.AuthFrontendAPI(next, onFailureResponse)
|
||||
})
|
||||
}
|
||||
s.restAPIHandle.MountFrontendAPIRoutes(frontendapi)
|
||||
})
|
||||
|
||||
if config.Keys.APISubjects != nil {
|
||||
s.natsAPIHandle = api.NewNatsAPI()
|
||||
if err := s.natsAPIHandle.StartSubscriptions(); err != nil {
|
||||
return fmt.Errorf("starting NATS subscriptions: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// 404 handler for pages and API routes
|
||||
notFoundHandler := func(rw http.ResponseWriter, r *http.Request) {
|
||||
if strings.HasPrefix(r.URL.Path, "/api/") || strings.HasPrefix(r.URL.Path, "/userapi/") ||
|
||||
strings.HasPrefix(r.URL.Path, "/frontend/") || strings.HasPrefix(r.URL.Path, "/config/") {
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
rw.WriteHeader(http.StatusNotFound)
|
||||
json.NewEncoder(rw).Encode(map[string]string{
|
||||
"status": "Resource not found",
|
||||
"error": "the requested endpoint does not exist",
|
||||
})
|
||||
return
|
||||
}
|
||||
rw.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||
rw.WriteHeader(http.StatusNotFound)
|
||||
web.RenderTemplate(rw, "404.tmpl", &web.Page{
|
||||
Title: "Page Not Found",
|
||||
Build: buildInfo,
|
||||
})
|
||||
}
|
||||
|
||||
// Set NotFound on the router so chi uses it for all unmatched routes,
|
||||
// including those under subrouters like /api, /userapi, /frontend, etc.
|
||||
s.router.NotFound(notFoundHandler)
|
||||
|
||||
if config.Keys.EmbedStaticFiles {
|
||||
if i, err := os.Stat("./var/img"); err == nil {
|
||||
if i.IsDir() {
|
||||
cclog.Info("Use local directory for static images")
|
||||
s.router.Handle("/img/*", http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||
}
|
||||
}
|
||||
fileServer := http.StripPrefix("/", web.ServeFiles())
|
||||
s.router.Handle("/*", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
if web.StaticFileExists(r.URL.Path) {
|
||||
fileServer.ServeHTTP(rw, r)
|
||||
return
|
||||
}
|
||||
notFoundHandler(rw, r)
|
||||
}))
|
||||
} else {
|
||||
staticDir := http.Dir(config.Keys.StaticFiles)
|
||||
fileServer := http.FileServer(staticDir)
|
||||
s.router.Handle("/*", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
f, err := staticDir.Open(r.URL.Path)
|
||||
if err == nil {
|
||||
f.Close()
|
||||
fileServer.ServeHTTP(rw, r)
|
||||
return
|
||||
}
|
||||
notFoundHandler(rw, r)
|
||||
}))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Server timeout defaults (in seconds)
|
||||
const (
|
||||
defaultReadTimeout = 20
|
||||
defaultWriteTimeout = 20
|
||||
)
|
||||
|
||||
func (s *Server) Start(ctx context.Context) error {
|
||||
// Use configurable timeouts with defaults
|
||||
readTimeout := time.Duration(defaultReadTimeout) * time.Second
|
||||
writeTimeout := time.Duration(defaultWriteTimeout) * time.Second
|
||||
|
||||
s.server = &http.Server{
|
||||
ReadTimeout: readTimeout,
|
||||
WriteTimeout: writeTimeout,
|
||||
Handler: s.router,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
// Start http or https server
|
||||
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("starting listener on '%s': %w", config.Keys.Addr, err)
|
||||
}
|
||||
|
||||
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHTTPTo != "" {
|
||||
go func() {
|
||||
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHTTPTo, http.StatusMovedPermanently))
|
||||
}()
|
||||
}
|
||||
|
||||
if config.Keys.HTTPSCertFile != "" && config.Keys.HTTPSKeyFile != "" {
|
||||
cert, err := tls.LoadX509KeyPair(
|
||||
config.Keys.HTTPSCertFile, config.Keys.HTTPSKeyFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("loading X509 keypair (check 'https-cert-file' and 'https-key-file' in config.json): %w", err)
|
||||
}
|
||||
listener = tls.NewListener(listener, &tls.Config{
|
||||
Certificates: []tls.Certificate{cert},
|
||||
CipherSuites: []uint16{
|
||||
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
},
|
||||
MinVersion: tls.VersionTLS12,
|
||||
PreferServerCipherSuites: true,
|
||||
})
|
||||
cclog.Infof("HTTPS server listening at %s...", config.Keys.Addr)
|
||||
} else {
|
||||
cclog.Infof("HTTP server listening at %s...", config.Keys.Addr)
|
||||
}
|
||||
//
|
||||
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||
// be established first, then the user can be changed, and after that,
|
||||
// the actual http server can be started.
|
||||
if err := runtime.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||
return fmt.Errorf("dropping privileges: %w", err)
|
||||
}
|
||||
|
||||
// Handle context cancellation for graceful shutdown
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||
cclog.Errorf("Server shutdown error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
if err = s.server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
return fmt.Errorf("server failed: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Server) Shutdown(ctx context.Context) {
|
||||
// Create a shutdown context with timeout
|
||||
shutdownCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
nc := nats.GetClient()
|
||||
if nc != nil {
|
||||
nc.Close()
|
||||
}
|
||||
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||
cclog.Errorf("Server shutdown error: %v", err)
|
||||
}
|
||||
|
||||
// Archive all the metric store data
|
||||
metricstore.Shutdown()
|
||||
|
||||
// Shutdown archiver with 10 second timeout for fast shutdown
|
||||
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
cclog.Warnf("Archiver shutdown: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
## Intro
|
||||
|
||||
cc-backend requires a configuration file specifying the cluster systems to be used. Still many default
|
||||
options documented below are used. cc-backend tries to load a config.json from the working directory per default.
|
||||
To overwrite the default specify a json config file location using the command line option `--config <filepath>`.
|
||||
All security relevant configuration. e.g., keys and passwords, are set using environment variables.
|
||||
It is supported to specify these by means of an `.env` file located in the project root.
|
||||
|
||||
## Configuration Options
|
||||
|
||||
* `addr`: Type string. Address where the http (or https) server will listen on (for example: 'localhost:80'). Default `:8080`.
|
||||
* `user`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
|
||||
* `group`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
|
||||
* `disable-authentication`: Type bool. Disable authentication (for everything: API, Web-UI, ...). Default `false`.
|
||||
* `embed-static-files`: Type bool. If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not. Default `true`.
|
||||
* `static-files`: Type string. Folder where static assets can be found, if `embed-static-files` is `false`. No default.
|
||||
* `db-driver`: Type string. 'sqlite3' or 'mysql' (mysql will work for mariadb as well). Default `sqlite3`.
|
||||
* `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`.
|
||||
* `job-archive`: Type string. Path to the job-archive. Default: `./var/job-archive`.
|
||||
* `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`.
|
||||
* `validate`: Type bool. Validate all input json documents against json schema.
|
||||
* `"session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`.
|
||||
* `"jwt-max-age`: Type string. Specifies for how long a JWT token shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `0`.
|
||||
* `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates.
|
||||
* `redirect-http-to`: Type string. If not the empty string and `addr` does not end in ":80", redirect every request incoming at port 80 to that url.
|
||||
* `machine-state-dir`: Type string. Where to store MachineState files. TODO: Explain in more detail!
|
||||
* `"stop-jobs-exceeding-walltime`: Type int. If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job. Default `0`.
|
||||
* `short-running-jobs-duration`: Type int. Do not show running jobs shorter than X seconds. Default `300`.
|
||||
* `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`.
|
||||
- `url`: Type string. URL of LDAP directory server.
|
||||
- `user_base`: Type string. Base DN of user tree root.
|
||||
- `search_dn`: Type string. DN for authenticating LDAP admin account with general read rights.
|
||||
- `user_bind`: Type string. Expression used to authenticate users via LDAP bind. Must contain `uid={username}`.
|
||||
- `user_filter`: Type string. Filter to extract users for syncing.
|
||||
- `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.
|
||||
- `sync_del_old_users`: Type bool. Delete obsolete users in database.
|
||||
* `clusters`: Type array of objects
|
||||
- `name`: Type string. The name of the cluster.
|
||||
- `metricDataRepository`: Type object with properties: `kind` (Type string, can be one of `cc-metric-store`, `influxdb` ), `url` (Type string), `token` (Type string)
|
||||
- `filterRanges` Type object. This option controls the slider ranges for the UI controls of numNodes, duration, and startTime. Example:
|
||||
```
|
||||
"filterRanges": {
|
||||
"numNodes": { "from": 1, "to": 64 },
|
||||
"duration": { "from": 0, "to": 86400 },
|
||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
||||
}
|
||||
```
|
||||
* `ui-defaults`: Type object. Default configuration for ui views. If overwritten, all options must be provided! Most options can be overwritten by the user via the web interface.
|
||||
- `analysis_view_histogramMetrics`: Type string array. Metrics to show as job count histograms in analysis view. Default `["flops_any", "mem_bw", "mem_used"]`.
|
||||
- `analysis_view_scatterPlotMetrics`: Type array of string array. Initial
|
||||
scatter plot configuration in analysis view. Default `[["flops_any", "mem_bw"], ["flops_any", "cpu_load"], ["cpu_load", "mem_bw"]]`.
|
||||
- `job_view_nodestats_selectedMetrics`: Type string array. Initial metrics shown in node statistics table of single job view. Default `["flops_any", "mem_bw", "mem_used"]`.
|
||||
- `job_view_polarPlotMetrics`: Type string array. Metrics shown in polar plot of single job view. Default `["flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"]`.
|
||||
- `job_view_selectedMetrics`: Type string array. Default `["flops_any", "mem_bw", "mem_used"]`.
|
||||
- `plot_general_colorBackground`: Type bool. Color plot background according to job average threshold limits. Default `true`.
|
||||
- `plot_general_colorscheme`: Type string array. Initial color scheme. Default `"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"`.
|
||||
- `plot_general_lineWidth`: Type int. Initial linewidth. Default `3`.
|
||||
- `plot_list_jobsPerPage`: Type int. Jobs shown per page in job lists. Default `50`.
|
||||
- `plot_list_selectedMetrics`: Type string array. Initial metric plots shown in jobs lists. Default `"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"`.
|
||||
- `plot_view_plotsPerRow`: Type int. Number of plots per row in single job view. Default `3`.
|
||||
- `plot_view_showPolarplot`: Type bool. Option to toggle polar plot in single job view. Default `true`.
|
||||
- `plot_view_showRoofline`: Type bool. Option to toggle roofline plot in single job view. Default `true`.
|
||||
- `plot_view_showStatTable`: Type bool. Option to toggle the node statistic table in single job view. Default `true`.
|
||||
- `system_view_selectedMetric`: Type string. Initial metric shown in system view. Default `cpu_load`.
|
||||
|
||||
Some of the `ui-defaults` values can be appended by `:<clustername>` in order to have different settings depending on the current cluster. Those are notably `job_view_nodestats_selectedMetrics`, `job_view_polarPlotMetrics`, `job_view_selectedMetrics` and `plot_list_selectedMetrics`.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
An example env file is found in this directory. Copy it to `.env` in the project root and adapt it for your needs.
|
||||
|
||||
* `JWT_PUBLIC_KEY` and `JWT_PRIVATE_KEY`: Base64 encoded Ed25519 keys used for JSON Web Token (JWT) authentication. You can generate your own keypair using `go run ./cmd/gen-keypair/gen-keypair.go`. More information in [README_TOKENS.md](./README_TOKENS.md).
|
||||
* `SESSION_KEY`: Some random bytes used as secret for cookie-based sessions.
|
||||
* `LDAP_ADMIN_PASSWORD`: The LDAP admin user password (optional).
|
||||
* `CROSS_LOGIN_JWT_HS512_KEY`: Used for token based logins via another authentication service.
|
||||
* `LOGLEVEL`: Can be `err`, `warn`, `info` or `debug` (optional, `debug` by default). Can be used to reduce logging.
|
||||
@@ -1,51 +0,0 @@
|
||||
## Introduction
|
||||
|
||||
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs.
|
||||
JSON Web Token (JWT) is an open standard (RFC 7519) that defines a compact and self-contained way for securely transmitting information between parties as a JSON object.
|
||||
This information can be verified and trusted because it is digitally signed.
|
||||
In ClusterCockpit JWTs are signed using a public/private key pair using ECDSA.
|
||||
Because tokens are signed using public/private key pairs, the signature also certifies that only the party holding the private key is the one that signed it.
|
||||
Expiration of the generated tokens as well as the max. length of a browser session can be configured in the `config.json` file described [here](./README.md).
|
||||
|
||||
The [Ed25519](https://ed25519.cr.yp.to/) algorithm for signatures was used because it is compatible with other tools that require authentication, such as NATS.io, and because these elliptic-curve methods provide simillar security with smaller keys compared to something like RSA. They are sligthly more expensive to validate, but that effect is negligible.
|
||||
|
||||
## JWT Payload
|
||||
|
||||
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
|
||||
Currently ClusterCockpit sets the following claims:
|
||||
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
|
||||
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
|
||||
* `roles`: An array of strings specifying the roles set for the subject.
|
||||
* `exp`: Expiration date of the token (only if explicitly configured)
|
||||
|
||||
It is important to know that JWTs are not encrypted, only signed. This means that outsiders cannot create new JWTs or modify existing ones, but they are able to read out the username.
|
||||
|
||||
## Workflow
|
||||
|
||||
1. Create a new ECDSA Public/private keypair:
|
||||
```
|
||||
$ go build ./cmd/gen-keypair/
|
||||
$ ./gen-keypair
|
||||
```
|
||||
2. Add keypair in your `.env` file. A template can be found in `./configs`.
|
||||
|
||||
When a user logs in via the `/login` page using a browser, a session cookie (secured using the random bytes in the `SESSION_KEY` env. variable you shoud change as well) is used for all requests after the successfull login. The JWTs make it easier to use the APIs of ClusterCockpit using scripts or other external programs. The token is specified n the `Authorization` HTTP header using the [Bearer schema](https://datatracker.ietf.org/doc/html/rfc6750) (there is an example below). Tokens can be issued to users from the configuration view in the Web-UI or the command line. In order to use the token for API endpoints such as `/api/jobs/start_job/`, the user that executes it needs to have the `api` role. Regular users can only perform read-only queries and only look at data connected to jobs they started themselves.
|
||||
|
||||
## cc-metric-store
|
||||
|
||||
The [cc-metric-store](https://github.com/ClusterCockpit/cc-metric-store) also uses JWTs for authentication. As it does not issue new tokens, it does not need to kown the private key. The public key of the keypair that is used to generate the JWTs that grant access to the `cc-metric-store` can be specified in its `config.json`. When configuring the `metricDataRepository` object in the `cluster.json` file, you can put a token issued by ClusterCockpit itself.
|
||||
|
||||
## Setup user and JWT token for REST API authorization
|
||||
|
||||
1. Create user:
|
||||
```
|
||||
$ ./cc-backend --add-user <username>:api:<password> --no-server
|
||||
```
|
||||
2. Issue token for user:
|
||||
```
|
||||
$ ./cc-backend --jwt <username> --no-server
|
||||
```
|
||||
3. Use issued token token on client side:
|
||||
```
|
||||
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
|
||||
```
|
||||
@@ -1,53 +1,29 @@
|
||||
{
|
||||
"addr": "127.0.0.1:8080",
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
"main": {
|
||||
"addr": "127.0.0.1:8080"
|
||||
},
|
||||
"clusters": [
|
||||
"cron": {
|
||||
"commit-job-worker": "1m",
|
||||
"duration-worker": "3m",
|
||||
"footprint-worker": "5m"
|
||||
},
|
||||
"auth": {
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
}
|
||||
},
|
||||
"metric-store-external": [
|
||||
{
|
||||
"name": "fritz",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
"scope": "fritz",
|
||||
"url": "http://0.0.0.0:8082",
|
||||
"token": "eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzU3Nzg4NDQsImlhdCI6MTc2ODU3ODg0NCwicm9sZXMiOlsiYWRtaW4iLCJhcGkiXSwic3ViIjoiZGVtbyJ9._SDEW9WaUVXSBFmWqGhyIZXLoqoDU8F1hkfh4cXKIqF4yw7w50IUpfUBtwUFUOnoviFKoi563f6RAMC7XxeLDA"
|
||||
}
|
||||
],
|
||||
"metric-store": {
|
||||
"checkpoints": {
|
||||
"interval": "12h"
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
"retention-in-memory": "48h",
|
||||
"memory-cap": 100
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "alex",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": ""
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1,50 +1,99 @@
|
||||
{
|
||||
"main": {
|
||||
"addr": "0.0.0.0:443",
|
||||
"ldap": {
|
||||
"url": "ldaps://test",
|
||||
"user_base": "ou=people,ou=hpc,dc=test,dc=de",
|
||||
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de",
|
||||
"user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de",
|
||||
"user_filter": "(&(objectclass=posixAccount)(uid=*))"
|
||||
},
|
||||
"https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem",
|
||||
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
|
||||
"user": "clustercockpit",
|
||||
"group": "clustercockpit",
|
||||
"api-allowed-ips": ["*"],
|
||||
"short-running-jobs-duration": 300,
|
||||
"enable-job-taggers": true,
|
||||
"nodestate-retention": {
|
||||
"policy": "move",
|
||||
"target-kind": "file",
|
||||
"target-path": "./var/nodestate-archive"
|
||||
},
|
||||
"resampling": {
|
||||
"minimum-points": 600,
|
||||
"trigger": 180,
|
||||
"resolutions": [240, 60]
|
||||
},
|
||||
"api-subjects": {
|
||||
"subject-job-event": "cc.job.event",
|
||||
"subject-node-state": "cc.node.state"
|
||||
}
|
||||
},
|
||||
"nats": {
|
||||
"address": "nats://0.0.0.0:4222",
|
||||
"username": "root",
|
||||
"password": "root"
|
||||
},
|
||||
"auth": {
|
||||
"jwts": {
|
||||
"max-age": "2000h"
|
||||
}
|
||||
},
|
||||
"cron": {
|
||||
"commit-job-worker": "1m",
|
||||
"duration-worker": "5m",
|
||||
"footprint-worker": "10m"
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
"kind": "s3",
|
||||
"endpoint": "http://x.x.x.x",
|
||||
"bucket": "jobarchive",
|
||||
"access-key": "xx",
|
||||
"secret-key": "xx",
|
||||
"retention": {
|
||||
"policy": "move",
|
||||
"age": 365,
|
||||
"location": "./var/archive"
|
||||
}
|
||||
},
|
||||
"validate": true,
|
||||
"clusters": [
|
||||
"metric-store-external": [
|
||||
{
|
||||
"name": "test",
|
||||
"metricDataRepository": {
|
||||
"kind": "cc-metric-store",
|
||||
"url": "http://localhost:8082",
|
||||
"token": "eyJhbGciOiJF-E-pQBQ"
|
||||
"scope": "*",
|
||||
"url": "http://x.x.x.x:8082",
|
||||
"token": "MySecret"
|
||||
},
|
||||
"filterRanges": {
|
||||
"numNodes": {
|
||||
"from": 1,
|
||||
"to": 64
|
||||
{
|
||||
"scope": "fritz",
|
||||
"url": "http://x.x.x.x:8084",
|
||||
"token": "MySecret"
|
||||
},
|
||||
"duration": {
|
||||
"from": 0,
|
||||
"to": 86400
|
||||
{
|
||||
"scope": "fritz-spr1tb",
|
||||
"url": "http://x.x.x.x:8083",
|
||||
"token": "MySecret"
|
||||
},
|
||||
"startTime": {
|
||||
"from": "2022-01-01T00:00:00Z",
|
||||
"to": null
|
||||
}
|
||||
}
|
||||
{
|
||||
"scope": "alex",
|
||||
"url": "http://x.x.x.x:8084",
|
||||
"token": "MySecret"
|
||||
}
|
||||
],
|
||||
"jwts": {
|
||||
"cookieName": "",
|
||||
"forceJWTValidationViaDatabase": false,
|
||||
"max-age": 0,
|
||||
"trustedExternalIssuer": ""
|
||||
"metric-store": {
|
||||
"checkpoints": {
|
||||
"interval": "12h",
|
||||
"directory": "./var/checkpoints"
|
||||
},
|
||||
"short-running-jobs-duration": 300
|
||||
"memory-cap": 100,
|
||||
"retention-in-memory": "48h",
|
||||
"cleanup": {
|
||||
"mode": "archive",
|
||||
"interval": "48h",
|
||||
"directory": "./var/archive"
|
||||
},
|
||||
"nats-subscriptions": [
|
||||
{
|
||||
"subscribe-to": "hpc-nats",
|
||||
"cluster-tag": "fritz"
|
||||
},
|
||||
{
|
||||
"subscribe-to": "hpc-nats",
|
||||
"cluster-tag": "alex"
|
||||
}
|
||||
]
|
||||
},
|
||||
"ui-file": "ui-config.json"
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||
# You can generate your own keypair using `go run utils/gen-keypair.go`
|
||||
# You can generate your own keypair using `go run tools/gen-keypair/main.go`
|
||||
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||
|
||||
# Base64 encoded Ed25519 public key for accepting externally generated JWTs
|
||||
# Keys in PEM format can be converted, see `tools/convert-pem-pubkey-for-cc/Readme.md`
|
||||
# Keys in PEM format can be converted, see `tools/convert-pem-pubkey/Readme.md`
|
||||
CROSS_LOGIN_JWT_PUBLIC_KEY=""
|
||||
|
||||
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||
|
||||
@@ -117,10 +117,12 @@ foreach my $ln (split("\n", $topo)) {
|
||||
|
||||
my $node;
|
||||
my @sockets;
|
||||
my @nodeCores;
|
||||
foreach my $socket ( @{$DOMAINS{socket}} ) {
|
||||
push @sockets, "[".join(",", @{$socket})."]";
|
||||
$node .= join(",", @{$socket})
|
||||
push @nodeCores, join(",", @{$socket});
|
||||
}
|
||||
$node = join(",", @nodeCores);
|
||||
$INFO{sockets} = join(",\n", @sockets);
|
||||
|
||||
my @memDomains;
|
||||
@@ -212,9 +214,27 @@ print <<"END";
|
||||
"socketsPerNode": $INFO{socketsPerNode},
|
||||
"coresPerSocket": $INFO{coresPerSocket},
|
||||
"threadsPerCore": $INFO{threadsPerCore},
|
||||
"flopRateScalar": $flopsScalar,
|
||||
"flopRateSimd": $flopsSimd,
|
||||
"memoryBandwidth": $memBw,
|
||||
"flopRateScalar": {
|
||||
"unit": {
|
||||
"base": "F/s",
|
||||
"prefix": "G"
|
||||
},
|
||||
"value": $flopsScalar
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"unit": {
|
||||
"base": "F/s",
|
||||
"prefix": "G"
|
||||
},
|
||||
"value": $flopsSimd
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"unit": {
|
||||
"base": "B/s",
|
||||
"prefix": "G"
|
||||
},
|
||||
"value": $memBw
|
||||
},
|
||||
"nodes": "<FILL IN NODE RANGES>",
|
||||
"topology": {
|
||||
"node": [$node],
|
||||
|
||||
22
configs/startJobPayload.json
Normal file
22
configs/startJobPayload.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"cluster": "fritz",
|
||||
"jobId": 123000,
|
||||
"jobState": "running",
|
||||
"numAcc": 0,
|
||||
"numHwthreads": 72,
|
||||
"numNodes": 1,
|
||||
"partition": "main",
|
||||
"requestedMemory": 128000,
|
||||
"resources": [{ "hostname": "f0726" }],
|
||||
"startTime": 1649723812,
|
||||
"subCluster": "main",
|
||||
"submitTime": 1649723812,
|
||||
"user": "k106eb10",
|
||||
"project": "k106eb",
|
||||
"walltime": 86400,
|
||||
"metaData": {
|
||||
"slurmInfo": "JobId=398759\nJobName=myJob\nUserId=dummyUser\nGroupId=dummyGroup\nAccount=dummyAccount\nQOS=normal Requeue=False Restarts=0 BatchFlag=True\nTimeLimit=1439'\nSubmitTime=2023-02-09T14:10:18\nPartition=singlenode\nNodeList=xx\nNumNodes=xx NumCPUs=72 NumTasks=72 CPUs/Task=1\nNTasksPerNode:Socket:Core=0:None:None\nTRES_req=cpu=72,mem=250000M,node=1,billing=72\nTRES_alloc=cpu=72,node=1,billing=72\nCommand=myCmd\nWorkDir=myDir\nStdErr=\nStdOut=\n",
|
||||
"jobScript": "#!/bin/bash -l\n#SBATCH --job-name=dummy_job\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/dummy/\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\n\n#This is a dummy job script\n./mybinary\n",
|
||||
"jobName": "ams_pipeline"
|
||||
}
|
||||
}
|
||||
7
configs/stopJobPayload.json
Normal file
7
configs/stopJobPayload.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"cluster": "fritz",
|
||||
"jobId": 123000,
|
||||
"jobState": "completed",
|
||||
"startTime": 1649723812,
|
||||
"stopTime": 1649763839
|
||||
}
|
||||
419
configs/tagger/README.md
Normal file
419
configs/tagger/README.md
Normal file
@@ -0,0 +1,419 @@
|
||||
# Job Tagging Configuration
|
||||
|
||||
ClusterCockpit provides automatic job tagging functionality to classify and
|
||||
categorize jobs based on configurable rules. The tagging system consists of two
|
||||
main components:
|
||||
|
||||
1. **Application Detection** - Identifies which application a job is running
|
||||
2. **Job Classification** - Analyzes job performance characteristics and applies classification tags
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
configs/tagger/
|
||||
├── apps/ # Application detection patterns
|
||||
│ ├── vasp.txt
|
||||
│ ├── gromacs.txt
|
||||
│ └── ...
|
||||
└── jobclasses/ # Job classification rules
|
||||
├── parameters.json
|
||||
├── lowUtilization.json
|
||||
├── highload.json
|
||||
└── ...
|
||||
```
|
||||
|
||||
## Activating Tagger Rules
|
||||
|
||||
### Step 1: Copy Configuration Files
|
||||
|
||||
To activate tagging, review, adapt, and copy the configuration files from
|
||||
`configs/tagger/` to `var/tagger/`:
|
||||
|
||||
```bash
|
||||
# From the cc-backend root directory
|
||||
mkdir -p var/tagger
|
||||
cp -r configs/tagger/apps var/tagger/
|
||||
cp -r configs/tagger/jobclasses var/tagger/
|
||||
```
|
||||
|
||||
### Step 2: Enable Tagging in Configuration
|
||||
|
||||
Add or set the following configuration key in the `main` section of your `config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"enable-job-taggers": true
|
||||
}
|
||||
```
|
||||
|
||||
**Important**: Automatic tagging is disabled by default. You must explicitly
|
||||
enable it by setting `enable-job-taggers: true` in the main configuration file.
|
||||
|
||||
### Step 3: Restart cc-backend
|
||||
|
||||
The tagger system automatically loads configuration from `./var/tagger/` at
|
||||
startup. After copying the files and enabling the feature, restart cc-backend:
|
||||
|
||||
```bash
|
||||
./cc-backend -server
|
||||
```
|
||||
|
||||
### Step 4: Verify Configuration Loaded
|
||||
|
||||
Check the logs for messages indicating successful configuration loading:
|
||||
|
||||
```
|
||||
[INFO] Setup file watch for ./var/tagger/apps
|
||||
[INFO] Setup file watch for ./var/tagger/jobclasses
|
||||
```
|
||||
|
||||
## How Tagging Works
|
||||
|
||||
### Automatic Tagging
|
||||
|
||||
When `enable-job-taggers` is set to `true` in the configuration, tags are
|
||||
automatically applied when:
|
||||
|
||||
- **Job Start**: Application detection runs immediately when a job starts
|
||||
- **Job Stop**: Job classification runs when a job completes
|
||||
|
||||
The system analyzes job metadata and metrics to determine appropriate tags.
|
||||
|
||||
**Note**: Automatic tagging only works for jobs that start or stop after the
|
||||
feature is enabled. Existing jobs are not automatically retagged.
|
||||
|
||||
### Manual Tagging (Retroactive)
|
||||
|
||||
To apply tags to existing jobs in the database, use the `-apply-tags` command
|
||||
line option:
|
||||
|
||||
```bash
|
||||
./cc-backend -apply-tags
|
||||
```
|
||||
|
||||
This processes all jobs in the database and applies current tagging rules. This
|
||||
is useful when:
|
||||
|
||||
- You have existing jobs that were created before tagging was enabled
|
||||
- You've added new tagging rules and want to apply them to historical data
|
||||
- You've modified existing rules and want to re-evaluate all jobs
|
||||
|
||||
### Hot Reload
|
||||
|
||||
The tagger system watches the configuration directories for changes. You can
|
||||
modify or add rules without restarting `cc-backend`:
|
||||
|
||||
- Changes to `var/tagger/apps/*` are detected automatically
|
||||
- Changes to `var/tagger/jobclasses/*` are detected automatically
|
||||
|
||||
## Application Detection
|
||||
|
||||
Application detection identifies which software a job is running by matching
|
||||
patterns in the job script.
|
||||
|
||||
### Configuration Format
|
||||
|
||||
Application patterns are stored in text files under `var/tagger/apps/`. Each
|
||||
file contains one or more regular expression patterns (one per line) that match
|
||||
against the job script.
|
||||
|
||||
**Example: `apps/vasp.txt`**
|
||||
|
||||
```
|
||||
vasp
|
||||
VASP
|
||||
```
|
||||
|
||||
### How It Works
|
||||
|
||||
1. When a job starts, the system retrieves the job script from metadata
|
||||
2. Each line in the app files is treated as a regex pattern
|
||||
3. Patterns are matched case-insensitively against the lowercased job script
|
||||
4. If a match is found, a tag of type `app` with the filename (without extension) is applied
|
||||
5. Only the first matching application is tagged
|
||||
|
||||
### Adding New Applications
|
||||
|
||||
1. Create a new file in `var/tagger/apps/` (e.g., `tensorflow.txt`)
|
||||
2. Add regex patterns, one per line:
|
||||
|
||||
```
|
||||
tensorflow
|
||||
tf\.keras
|
||||
import tensorflow
|
||||
```
|
||||
|
||||
3. The file is automatically detected and loaded
|
||||
|
||||
**Note**: The tag name will be the filename without the `.txt` extension (e.g., `tensorflow`).
|
||||
|
||||
## Job Classification
|
||||
|
||||
Job classification analyzes completed jobs based on their metrics and properties
|
||||
to identify performance issues or characteristics.
|
||||
|
||||
### Configuration Format
|
||||
|
||||
Job classification rules are defined in JSON files under
|
||||
`var/tagger/jobclasses/`. Each rule file defines:
|
||||
|
||||
- **Metrics required**: Which job metrics to analyze
|
||||
- **Requirements**: Pre-conditions that must be met
|
||||
- **Variables**: Computed values used in the rule
|
||||
- **Rule expression**: Boolean expression that determines if the rule matches
|
||||
- **Hint template**: Message displayed when the rule matches
|
||||
|
||||
### Parameters File
|
||||
|
||||
`jobclasses/parameters.json` defines shared threshold values used across multiple rules:
|
||||
|
||||
```json
|
||||
{
|
||||
"lowcpuload_threshold_factor": 0.9,
|
||||
"highmemoryusage_threshold_factor": 0.9,
|
||||
"job_min_duration_seconds": 600.0,
|
||||
"sampling_interval_seconds": 30.0
|
||||
}
|
||||
```
|
||||
|
||||
### Rule File Structure
|
||||
|
||||
**Example: `jobclasses/lowUtilization.json`**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "Low resource utilization",
|
||||
"tag": "lowutilization",
|
||||
"parameters": ["job_min_duration_seconds"],
|
||||
"metrics": ["flops_any", "mem_bw"],
|
||||
"requirements": [
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
{
|
||||
"name": "mem_bw_perc",
|
||||
"expr": "1.0 - (mem_bw.avg / mem_bw.limits.peak)"
|
||||
}
|
||||
],
|
||||
"rule": "flops_any.avg < flops_any.limits.alert",
|
||||
"hint": "Average flop rate {{.flops_any.avg}} falls below threshold {{.flops_any.limits.alert}}"
|
||||
}
|
||||
```
|
||||
|
||||
#### Field Descriptions
|
||||
|
||||
| Field | Description |
|
||||
| -------------- | ----------------------------------------------------------------------------- |
|
||||
| `name` | Human-readable description of the rule |
|
||||
| `tag` | Tag identifier applied when the rule matches |
|
||||
| `parameters` | List of parameter names from `parameters.json` to include in rule environment |
|
||||
| `metrics` | List of metrics required for evaluation (must be present in job data) |
|
||||
| `requirements` | Boolean expressions that must all be true for the rule to be evaluated |
|
||||
| `variables` | Named expressions computed before evaluating the main rule |
|
||||
| `rule` | Boolean expression that determines if the job matches this classification |
|
||||
| `hint` | Go template string for generating a user-visible message |
|
||||
|
||||
### Expression Environment
|
||||
|
||||
Expressions in `requirements`, `variables`, and `rule` have access to:
|
||||
|
||||
**Job Properties:**
|
||||
|
||||
- `job.shared` - Shared node allocation type
|
||||
- `job.duration` - Job runtime in seconds
|
||||
- `job.numCores` - Number of CPU cores
|
||||
- `job.numNodes` - Number of nodes
|
||||
- `job.jobState` - Job completion state
|
||||
- `job.numAcc` - Number of accelerators
|
||||
- `job.smt` - SMT setting
|
||||
|
||||
**Metric Statistics (for each metric in `metrics`):**
|
||||
|
||||
- `<metric>.min` - Minimum value
|
||||
- `<metric>.max` - Maximum value
|
||||
- `<metric>.avg` - Average value
|
||||
- `<metric>.limits.peak` - Peak limit from cluster config
|
||||
- `<metric>.limits.normal` - Normal threshold
|
||||
- `<metric>.limits.caution` - Caution threshold
|
||||
- `<metric>.limits.alert` - Alert threshold
|
||||
|
||||
**Parameters:**
|
||||
|
||||
- All parameters listed in the `parameters` field
|
||||
|
||||
**Variables:**
|
||||
|
||||
- All variables defined in the `variables` array
|
||||
|
||||
### Expression Language
|
||||
|
||||
Rules use the [expr](https://github.com/expr-lang/expr) language for expressions. Supported operations:
|
||||
|
||||
- **Arithmetic**: `+`, `-`, `*`, `/`, `%`, `^`
|
||||
- **Comparison**: `==`, `!=`, `<`, `<=`, `>`, `>=`
|
||||
- **Logical**: `&&`, `||`, `!`
|
||||
- **Functions**: Standard math functions (see expr documentation)
|
||||
|
||||
### Hint Templates
|
||||
|
||||
Hints use Go's `text/template` syntax. Variables from the evaluation environment are accessible:
|
||||
|
||||
```
|
||||
{{.flops_any.avg}} # Access metric average
|
||||
{{.job.duration}} # Access job property
|
||||
{{.my_variable}} # Access computed variable
|
||||
```
|
||||
|
||||
### Adding New Classification Rules
|
||||
|
||||
1. Create a new JSON file in `var/tagger/jobclasses/` (e.g., `memoryLeak.json`)
|
||||
2. Define the rule structure:
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "Memory Leak Detection",
|
||||
"tag": "memory_leak",
|
||||
"parameters": ["memory_leak_slope_threshold"],
|
||||
"metrics": ["mem_used"],
|
||||
"requirements": ["job.duration > 3600"],
|
||||
"variables": [
|
||||
{
|
||||
"name": "mem_growth",
|
||||
"expr": "(mem_used.max - mem_used.min) / job.duration"
|
||||
}
|
||||
],
|
||||
"rule": "mem_growth > memory_leak_slope_threshold",
|
||||
"hint": "Memory usage grew by {{.mem_growth}} per second"
|
||||
}
|
||||
```
|
||||
|
||||
3. Add any new parameters to `parameters.json`
|
||||
4. The file is automatically detected and loaded
|
||||
|
||||
## Configuration Paths
|
||||
|
||||
The tagger system reads from these paths (relative to cc-backend working directory):
|
||||
|
||||
- **Application patterns**: `./var/tagger/apps/`
|
||||
- **Job classification rules**: `./var/tagger/jobclasses/`
|
||||
|
||||
These paths are defined as constants in the source code and cannot be changed without recompiling.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Tags Not Applied
|
||||
|
||||
1. **Check tagging is enabled**: Verify `enable-job-taggers: true` is set in `config.json`
|
||||
|
||||
2. **Check configuration exists**:
|
||||
|
||||
```bash
|
||||
ls -la var/tagger/apps
|
||||
ls -la var/tagger/jobclasses
|
||||
```
|
||||
|
||||
3. **Check logs for errors**:
|
||||
|
||||
```bash
|
||||
./cc-backend -server -loglevel debug
|
||||
```
|
||||
|
||||
4. **Verify file permissions**: Ensure cc-backend can read the configuration files
|
||||
|
||||
5. **For existing jobs**: Use `./cc-backend -apply-tags` to retroactively tag jobs
|
||||
|
||||
### Rules Not Matching
|
||||
|
||||
1. **Enable debug logging**: Set `loglevel: debug` to see detailed rule evaluation
|
||||
2. **Check requirements**: Ensure all requirements in the rule are satisfied
|
||||
3. **Verify metrics exist**: Classification rules require job metrics to be available
|
||||
4. **Check metric names**: Ensure metric names match those in your cluster configuration
|
||||
|
||||
### File Watch Not Working
|
||||
|
||||
If changes to configuration files aren't detected:
|
||||
|
||||
1. Restart cc-backend to reload all configuration
|
||||
2. Check filesystem supports file watching (network filesystems may not)
|
||||
3. Check logs for file watch setup messages
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Start Simple**: Begin with basic rules and refine based on results
|
||||
2. **Use Requirements**: Filter out irrelevant jobs early with requirements
|
||||
3. **Test Incrementally**: Add one rule at a time and verify behavior
|
||||
4. **Document Rules**: Use descriptive names and clear hint messages
|
||||
5. **Share Parameters**: Define common thresholds in `parameters.json` for consistency
|
||||
6. **Version Control**: Keep your `var/tagger/` configuration in version control
|
||||
7. **Backup Before Changes**: Test new rules on a copy before deploying to production
|
||||
|
||||
## Examples
|
||||
|
||||
### Simple Application Detection
|
||||
|
||||
**File: `var/tagger/apps/python.txt`**
|
||||
|
||||
```
|
||||
python
|
||||
python3
|
||||
\.py
|
||||
```
|
||||
|
||||
This detects jobs running Python scripts.
|
||||
|
||||
### Complex Classification Rule
|
||||
|
||||
**File: `var/tagger/jobclasses/cpuImbalance.json`**
|
||||
|
||||
```json
|
||||
{
|
||||
"name": "CPU Load Imbalance",
|
||||
"tag": "cpu_imbalance",
|
||||
"parameters": ["core_load_imbalance_threshold_factor"],
|
||||
"metrics": ["cpu_load"],
|
||||
"requirements": ["job.numCores > 1", "job.duration > 600"],
|
||||
"variables": [
|
||||
{
|
||||
"name": "load_variance",
|
||||
"expr": "(cpu_load.max - cpu_load.min) / cpu_load.avg"
|
||||
}
|
||||
],
|
||||
"rule": "load_variance > core_load_imbalance_threshold_factor",
|
||||
"hint": "CPU load varies by {{printf \"%.1f%%\" (load_variance * 100)}} across cores"
|
||||
}
|
||||
```
|
||||
|
||||
This detects jobs where CPU load is unevenly distributed across cores.
|
||||
|
||||
## Reference
|
||||
|
||||
### Configuration Options
|
||||
|
||||
**Main Configuration (`config.json`)**:
|
||||
|
||||
- `enable-job-taggers` (boolean, default: `false`) - Enables automatic job tagging system
|
||||
- Must be set to `true` to activate automatic tagging on job start/stop events
|
||||
- Does not affect the `-apply-tags` command line option
|
||||
|
||||
**Command Line Options**:
|
||||
|
||||
- `-apply-tags` - Apply all tagging rules to existing jobs in the database
|
||||
- Works independently of `enable-job-taggers` configuration
|
||||
- Useful for retroactively tagging jobs or re-evaluating with updated rules
|
||||
|
||||
### Default Configuration Location
|
||||
|
||||
The example configurations are provided in:
|
||||
|
||||
- `configs/tagger/apps/` - Example application patterns (16 applications)
|
||||
- `configs/tagger/jobclasses/` - Example classification rules (3 rules)
|
||||
|
||||
Copy these to `var/tagger/` and customize for your environment.
|
||||
|
||||
### Tag Types
|
||||
|
||||
- `app` - Application tags (e.g., "vasp", "gromacs")
|
||||
- `jobClass` - Classification tags (e.g., "lowutilization", "highload")
|
||||
|
||||
Tags can be queried and filtered in the ClusterCockpit UI and API.
|
||||
1
configs/tagger/apps/alf.txt
Normal file
1
configs/tagger/apps/alf.txt
Normal file
@@ -0,0 +1 @@
|
||||
alf
|
||||
7
configs/tagger/apps/caracal.txt
Normal file
7
configs/tagger/apps/caracal.txt
Normal file
@@ -0,0 +1,7 @@
|
||||
calc_rate
|
||||
qmdffgen
|
||||
dynamic
|
||||
evbopt
|
||||
explore
|
||||
black_box
|
||||
poly_qmdff
|
||||
3
configs/tagger/apps/chroma.txt
Normal file
3
configs/tagger/apps/chroma.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
chroma
|
||||
qdp
|
||||
qmp
|
||||
1
configs/tagger/apps/cp2k.txt
Normal file
1
configs/tagger/apps/cp2k.txt
Normal file
@@ -0,0 +1 @@
|
||||
cp2k
|
||||
1
configs/tagger/apps/cpmd.txt
Normal file
1
configs/tagger/apps/cpmd.txt
Normal file
@@ -0,0 +1 @@
|
||||
cpmd
|
||||
1
configs/tagger/apps/flame.txt
Normal file
1
configs/tagger/apps/flame.txt
Normal file
@@ -0,0 +1 @@
|
||||
flame
|
||||
3
configs/tagger/apps/gromacs.txt
Normal file
3
configs/tagger/apps/gromacs.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
gromacs
|
||||
gmx
|
||||
mdrun
|
||||
1
configs/tagger/apps/julia.txt
Normal file
1
configs/tagger/apps/julia.txt
Normal file
@@ -0,0 +1 @@
|
||||
julia
|
||||
1
configs/tagger/apps/lammps.txt
Normal file
1
configs/tagger/apps/lammps.txt
Normal file
@@ -0,0 +1 @@
|
||||
lmp
|
||||
1
configs/tagger/apps/matlab.txt
Normal file
1
configs/tagger/apps/matlab.txt
Normal file
@@ -0,0 +1 @@
|
||||
matlab
|
||||
1
configs/tagger/apps/openfoam.txt
Normal file
1
configs/tagger/apps/openfoam.txt
Normal file
@@ -0,0 +1 @@
|
||||
openfoam
|
||||
1
configs/tagger/apps/orca.txt
Normal file
1
configs/tagger/apps/orca.txt
Normal file
@@ -0,0 +1 @@
|
||||
orca
|
||||
4
configs/tagger/apps/python.txt
Normal file
4
configs/tagger/apps/python.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
python
|
||||
pip
|
||||
anaconda
|
||||
conda
|
||||
2
configs/tagger/apps/starccm.txt
Normal file
2
configs/tagger/apps/starccm.txt
Normal file
@@ -0,0 +1,2 @@
|
||||
starccm+
|
||||
-podkey
|
||||
10
configs/tagger/apps/turbomole.txt
Normal file
10
configs/tagger/apps/turbomole.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
dscf
|
||||
grad
|
||||
ridft
|
||||
rdgrad
|
||||
ricc2
|
||||
statpt
|
||||
aoforce
|
||||
escf
|
||||
egrad
|
||||
odft
|
||||
3
configs/tagger/apps/vasp.txt
Normal file
3
configs/tagger/apps/vasp.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
vasp_gam
|
||||
vasp_ncl
|
||||
vasp_std
|
||||
21
configs/tagger/jobclasses/highMemoryUsage.json
Normal file
21
configs/tagger/jobclasses/highMemoryUsage.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "High memory usage",
|
||||
"tag": "highmemory",
|
||||
"parameters": [
|
||||
"highmemoryusage_threshold_factor",
|
||||
"job_min_duration_seconds"
|
||||
],
|
||||
"metrics": ["mem_used"],
|
||||
"requirements": [
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
{
|
||||
"name": "memory_usage_pct",
|
||||
"expr": "mem_used.max / mem_used.limits.peak * 100.0"
|
||||
}
|
||||
],
|
||||
"rule": "mem_used.max > memory_used.limits.alert",
|
||||
"hint": "This job used high memory: peak memory usage {{.mem_used.max}} GB ({{.memory_usage_pct}}% of {{.mem_used.limits.peak}} GB node capacity), exceeding the {{.highmemoryusage_threshold_factor}} utilization threshold. Risk of out-of-memory conditions."
|
||||
}
|
||||
21
configs/tagger/jobclasses/highload.json
Normal file
21
configs/tagger/jobclasses/highload.json
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "Excessive CPU load",
|
||||
"tag": "excessiveload",
|
||||
"parameters": [
|
||||
"excessivecpuload_threshold_factor",
|
||||
"job_min_duration_seconds"
|
||||
],
|
||||
"metrics": ["cpu_load"],
|
||||
"requirements": [
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
{
|
||||
"name": "load_threshold",
|
||||
"expr": "cpu_load.limits.peak * excessivecpuload_threshold_factor"
|
||||
}
|
||||
],
|
||||
"rule": "cpu_load.avg > load_threshold",
|
||||
"hint": "This job was detected as having excessive CPU load: average cpu load {{.cpu_load.avg}} exceeds the oversubscription threshold {{.load_threshold}} ({{.excessivecpuload_threshold_factor}} \u00d7 {{.cpu_load.limits.peak}} peak cores), indicating CPU contention."
|
||||
}
|
||||
22
configs/tagger/jobclasses/lowUtilization.json
Normal file
22
configs/tagger/jobclasses/lowUtilization.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "Low resource utilization",
|
||||
"tag": "lowutilization",
|
||||
"parameters": ["job_min_duration_seconds"],
|
||||
"metrics": ["flops_any", "mem_bw"],
|
||||
"requirements": [
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
{
|
||||
"name": "mem_bw_pct",
|
||||
"expr": "mem_bw.avg / mem_bw.limits.peak * 100.0"
|
||||
},
|
||||
{
|
||||
"name": "flops_any_pct",
|
||||
"expr": "flops_any.avg / flops_any.limits.peak * 100.0"
|
||||
}
|
||||
],
|
||||
"rule": "flops_any.avg < flops_any.limits.alert && mem_bw.avg < mem_bw.limits.alert",
|
||||
"hint": "This job shows low resource utilization: FLOP rate {{.flops_any.avg}} GF/s ({{.flops_any_pct}}% of peak) and memory bandwidth {{.mem_bw.avg}} GB/s ({{.mem_bw_pct}}% of peak) are both below their alert thresholds."
|
||||
}
|
||||
18
configs/tagger/jobclasses/lowload.json
Normal file
18
configs/tagger/jobclasses/lowload.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "Low CPU load",
|
||||
"tag": "lowload",
|
||||
"parameters": ["lowcpuload_threshold_factor", "job_min_duration_seconds"],
|
||||
"metrics": ["cpu_load"],
|
||||
"requirements": [
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
{
|
||||
"name": "load_threshold",
|
||||
"expr": "cpu_load.limits.peak * lowcpuload_threshold_factor"
|
||||
}
|
||||
],
|
||||
"rule": "cpu_load.avg < load_threshold",
|
||||
"hint": "This job was detected as low CPU load: average cpu load {{.cpu_load.avg}} is below the threshold {{.load_threshold}} ({{.lowcpuload_threshold_factor}})."
|
||||
}
|
||||
22
configs/tagger/jobclasses/memoryBound.json
Normal file
22
configs/tagger/jobclasses/memoryBound.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "Memory bandwidth bound",
|
||||
"tag": "memorybound",
|
||||
"parameters": ["membound_bw_threshold_factor", "job_min_duration_seconds"],
|
||||
"metrics": ["mem_bw"],
|
||||
"requirements": [
|
||||
"job.shared == \"none\"",
|
||||
"job.duration > job_min_duration_seconds"
|
||||
],
|
||||
"variables": [
|
||||
{
|
||||
"name": "mem_bw_threshold",
|
||||
"expr": "mem_bw.limits.peak * membound_bw_threshold_factor"
|
||||
},
|
||||
{
|
||||
"name": "mem_bw_pct",
|
||||
"expr": "mem_bw.avg / mem_bw.limits.peak * 100.0"
|
||||
}
|
||||
],
|
||||
"rule": "mem_bw.avg > mem_bw_threshold",
|
||||
"hint": "This job is memory bandwidth bound: memory bandwidth {{.mem_bw.avg}} GB/s ({{.mem_bw_pct}}% of peak) is within {{.membound_bw_threshold_factor}} of peak bandwidth. Consider improving data reuse or compute intensity."
|
||||
}
|
||||
15
configs/tagger/jobclasses/parameters.json
Normal file
15
configs/tagger/jobclasses/parameters.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"lowcpuload_threshold_factor": 0.85,
|
||||
"excessivecpuload_threshold_factor": 1.2,
|
||||
"highmemoryusage_threshold_factor": 0.9,
|
||||
"node_load_imbalance_threshold_factor": 0.1,
|
||||
"core_load_imbalance_threshold_factor": 0.1,
|
||||
"high_memory_load_threshold_factor": 0.9,
|
||||
"lowgpuload_threshold_factor": 0.7,
|
||||
"membound_bw_threshold_factor": 0.8,
|
||||
"memory_leak_slope_threshold": 0.1,
|
||||
"job_min_duration_seconds": 600.0,
|
||||
"sampling_interval_seconds": 30.0,
|
||||
"cpu_load_pre_cutoff_samples": 11.0,
|
||||
"cpu_load_core_pre_cutoff_samples": 6.0
|
||||
}
|
||||
45
configs/uiConfig.json
Normal file
45
configs/uiConfig.json
Normal file
@@ -0,0 +1,45 @@
|
||||
{
|
||||
"job-list": {
|
||||
"use-paging": false,
|
||||
"show-footprint":false
|
||||
},
|
||||
"job-view": {
|
||||
"show-polar-plot": true,
|
||||
"show-footprint": true,
|
||||
"show-roofline": true,
|
||||
"show-stat-table": true
|
||||
},
|
||||
"metric-config": {
|
||||
"job-list-metrics": ["mem_bw", "flops_dp"],
|
||||
"job-view-plot-metrics": ["mem_bw", "flops_dp"],
|
||||
"job-view-table-metrics": ["mem_bw", "flops_dp"],
|
||||
"clusters": [
|
||||
{
|
||||
"name": "test",
|
||||
"sub-clusters": [
|
||||
{
|
||||
"name": "one",
|
||||
"job-list-metrics": ["mem_used", "flops_sp"]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"node-list": {
|
||||
"use-paging": true
|
||||
},
|
||||
"plot-configuration": {
|
||||
"plots-per-row": 3,
|
||||
"color-background": true,
|
||||
"line-width": 3,
|
||||
"color-scheme": [
|
||||
"#00bfff",
|
||||
"#0000ff",
|
||||
"#ff00ff",
|
||||
"#ff0000",
|
||||
"#ff8000",
|
||||
"#ffff00",
|
||||
"#80ff00"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
# Release versioning
|
||||
|
||||
Releases are numbered with an integer ID, starting with 1.
|
||||
Each release embeds the following assets in the binary:
|
||||
* Web front-end with Javascript files and all static assets.
|
||||
* Golang template files for server-side rendering.
|
||||
* JSON schema files for validation.
|
||||
* Database migration files
|
||||
|
||||
The remaining external assets are:
|
||||
* The SQL database used
|
||||
* The job archive
|
||||
* The configuration file `config.json`
|
||||
|
||||
Both external assets are also versioned with integer IDs.
|
||||
This means that each release binary is bound to specific versions of the SQL
|
||||
database and the job archive.
|
||||
The configuration file is validated against the current schema on startup.
|
||||
The command line switch `-migrate-db` can be used to upgrade the SQL database
|
||||
to migrate from a previous version to the latest one.
|
||||
We offer a separate tool `archive-migration` to migrate an existing job archive
|
||||
archive from the previous to the latest version.
|
||||
|
||||
# Versioning of APIs
|
||||
|
||||
cc-backend provides two API backends:
|
||||
* A REST API for querying jobs
|
||||
* A GraphQL API for data exchange between web frontend and cc-backend
|
||||
|
||||
Both APIs will also be versioned. We still need to decide wether we will also support
|
||||
older REST API version by versioning the endpoint URLs.
|
||||
|
||||
# How to build
|
||||
|
||||
Please always build `cc-backend` with the supplied Makefile. This will ensure
|
||||
that the frontend is also built correctly and that the version in the binary file is coded
|
||||
in the binary.
|
||||
239
docs/Hands-on.md
239
docs/Hands-on.md
@@ -1,239 +0,0 @@
|
||||
# CC-HANDSON - Setup ClusterCockpit from scratch (w/o docker)
|
||||
|
||||
## Prerequisites
|
||||
* Perl
|
||||
* Yarn
|
||||
* Go
|
||||
* Optional: curl
|
||||
* Script migrateTimestamp.pl
|
||||
|
||||
## Documentation
|
||||
You find READMEs or api docs in
|
||||
* ./cc-backend/configs
|
||||
* ./cc-backend/init
|
||||
* ./cc-backend/api
|
||||
|
||||
## ClusterCockpit configuration files
|
||||
### cc-backend
|
||||
* `./.env` Passwords and Tokens set in the environment
|
||||
* `./config.json` Configuration options for cc-backend
|
||||
|
||||
### cc-metric-store
|
||||
* `./config.json` Optional to overwrite configuration options
|
||||
|
||||
### cc-metric-collector
|
||||
Not yet included in the hands-on setup.
|
||||
|
||||
## Setup Components
|
||||
Start by creating a base folder for all of the following steps.
|
||||
* `mkdir clustercockpit`
|
||||
* `cd clustercockpit`
|
||||
|
||||
### Setup cc-backend
|
||||
* Clone Repository
|
||||
- `git clone https://github.com/ClusterCockpit/cc-backend.git`
|
||||
- `cd cc-backend`
|
||||
* Setup Frontend
|
||||
- `cd ./web/frontend`
|
||||
- `yarn install`
|
||||
- `yarn build`
|
||||
- `cd ../..`
|
||||
* Build Go Executable
|
||||
- `go build ./cmd/cc-backend/`
|
||||
* Activate & Config environment for cc-backend
|
||||
- `cp configs/env-template.txt .env`
|
||||
- Optional: Have a look via `vim ./.env`
|
||||
- Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./`
|
||||
* Back to toplevel `clustercockpit`
|
||||
- `cd ..`
|
||||
* Prepare Datafolder and Database file
|
||||
- `mkdir var`
|
||||
- `./cc-backend --migrate-db`
|
||||
|
||||
### Setup cc-metric-store
|
||||
* Clone Repository
|
||||
- `git clone https://github.com/ClusterCockpit/cc-metric-store.git`
|
||||
- `cd cc-metric-store`
|
||||
* Build Go Executable
|
||||
- `go get`
|
||||
- `go build`
|
||||
* Prepare Datafolders
|
||||
- `mkdir -p var/checkpoints`
|
||||
- `mkdir -p var/archive`
|
||||
* Update Config
|
||||
- `vim config.json`
|
||||
- Exchange existing setting in `metrics` with the following:
|
||||
```
|
||||
"clock": { "frequency": 60, "aggregation": null },
|
||||
"cpi": { "frequency": 60, "aggregation": null },
|
||||
"cpu_load": { "frequency": 60, "aggregation": null },
|
||||
"flops_any": { "frequency": 60, "aggregation": null },
|
||||
"flops_dp": { "frequency": 60, "aggregation": null },
|
||||
"flops_sp": { "frequency": 60, "aggregation": null },
|
||||
"ib_bw": { "frequency": 60, "aggregation": null },
|
||||
"lustre_bw": { "frequency": 60, "aggregation": null },
|
||||
"mem_bw": { "frequency": 60, "aggregation": null },
|
||||
"mem_used": { "frequency": 60, "aggregation": null },
|
||||
"rapl_power": { "frequency": 60, "aggregation": null }
|
||||
```
|
||||
* Back to toplevel `clustercockpit`
|
||||
- `cd ..`
|
||||
|
||||
### Setup Demo Data
|
||||
* `mkdir source-data`
|
||||
* `cd source-data`
|
||||
* Download JobArchive-Source:
|
||||
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar.xz`
|
||||
- `tar xJf job-archive-dev.tar.xz`
|
||||
- `mv ./job-archive ./job-archive-source`
|
||||
- `rm ./job-archive-dev.tar.xz`
|
||||
* Download CC-Metric-Store Checkpoints:
|
||||
- `mkdir -p cc-metric-store-source/checkpoints`
|
||||
- `cd cc-metric-store-source/checkpoints`
|
||||
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/cc-metric-store-checkpoints.tar.xz`
|
||||
- `tar xf cc-metric-store-checkpoints.tar.xz`
|
||||
- `rm cc-metric-store-checkpoints.tar.xz`
|
||||
* Back to `source-data`
|
||||
- `cd ../..`
|
||||
* Run timestamp migration script. This may take tens of minutes!
|
||||
- `cp ../migrateTimestamps.pl .`
|
||||
- `./migrateTimestamps.pl`
|
||||
- Expected output:
|
||||
```
|
||||
Starting to update start- and stoptimes in job-archive for emmy
|
||||
Starting to update start- and stoptimes in job-archive for woody
|
||||
Done for job-archive
|
||||
Starting to update checkpoint filenames and data starttimes for emmy
|
||||
Starting to update checkpoint filenames and data starttimes for woody
|
||||
Done for checkpoints
|
||||
```
|
||||
* Copy `cluster.json` files from source to migrated folders
|
||||
- `cp source-data/job-archive-source/emmy/cluster.json cc-backend/var/job-archive/emmy/`
|
||||
- `cp source-data/job-archive-source/woody/cluster.json cc-backend/var/job-archive/woody/`
|
||||
* Initialize Job-Archive in SQLite3 job.db and add demo user
|
||||
- `cd cc-backend`
|
||||
- `./cc-backend --init-db --add-user demo:admin:AdminDev`
|
||||
- Expected output:
|
||||
```
|
||||
<6>[INFO] new user "demo" created (roles: ["admin"], auth-source: 0)
|
||||
<6>[INFO] Building job table...
|
||||
<6>[INFO] A total of 3936 jobs have been registered in 1.791 seconds.
|
||||
```
|
||||
* Back to toplevel `clustercockpit`
|
||||
- `cd ..`
|
||||
|
||||
### Startup both Apps
|
||||
* In cc-backend root: `$./cc-backend --server --dev`
|
||||
- Starts Clustercockpit at `http:localhost:8080`
|
||||
- Log: `<6>[INFO] HTTP server listening at :8080...`
|
||||
- Use local internet browser to access interface
|
||||
- You should see and be able to browse finished Jobs
|
||||
- Metadata is read from SQLite3 database
|
||||
- Metricdata is read from job-archive/JSON-Files
|
||||
- Create User in settings (top-right corner)
|
||||
- Name `apiuser`
|
||||
- Username `apiuser`
|
||||
- Role `API`
|
||||
- Submit & Refresh Page
|
||||
- Create JTW for `apiuser`
|
||||
- In Userlist, press `Gen. JTW` for `apiuser`
|
||||
- Save JWT for later use
|
||||
* In cc-metric-store root: `$./cc-metric-store`
|
||||
- Start the cc-metric-store on `http:localhost:8081`, Log:
|
||||
```
|
||||
2022/07/15 17:17:42 Loading checkpoints newer than 2022-07-13T17:17:42+02:00
|
||||
2022/07/15 17:17:45 Checkpoints loaded (5621 files, 319 MB, that took 3.034652s)
|
||||
2022/07/15 17:17:45 API http endpoint listening on '0.0.0.0:8081'
|
||||
```
|
||||
- Does *not* have a graphical interface
|
||||
- Otpional: Test function by executing:
|
||||
```
|
||||
$ curl -H "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" -D - "http://localhost:8081/api/query" -d "{ \"cluster\": \"emmy\", \"from\": $(expr $(date +%s) - 60), \"to\": $(date +%s), \"queries\": [{
|
||||
\"metric\": \"flops_any\",
|
||||
\"host\": \"e1111\"
|
||||
}] }"
|
||||
|
||||
HTTP/1.1 200 OK
|
||||
Content-Type: application/json
|
||||
Date: Fri, 15 Jul 2022 13:57:22 GMT
|
||||
Content-Length: 119
|
||||
{"results":[[JSON-DATA-ARRAY]]}
|
||||
```
|
||||
|
||||
### Development API web interfaces
|
||||
The `--dev` flag enables web interfaces to document and test the apis:
|
||||
* http://localhost:8080/playground - A GraphQL playground. To use it you must have a authenticated session in the same browser.
|
||||
* http://localhost:8080/swagger - A Swagger UI. To use it you have to be logged out, so no user session in the same browser. Use the JWT token with role Api generate previously to authenticate via http header.
|
||||
|
||||
### Use cc-backend API to start job
|
||||
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
|
||||
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
|
||||
* Click the `/job/start_job` endpoint and click the Try it out button.
|
||||
* Enter the following json into the request body text area and fill in a recent start timestamp by executing `date +%s`.:
|
||||
```
|
||||
{
|
||||
"jobId": 100000,
|
||||
"arrayJobId": 0,
|
||||
"user": "ccdemouser",
|
||||
"subCluster": "main",
|
||||
"cluster": "emmy",
|
||||
"startTime": <date +%s>,
|
||||
"project": "ccdemoproject",
|
||||
"resources": [
|
||||
{"hostname": "e0601"},
|
||||
{"hostname": "e0823"},
|
||||
{"hostname": "e0337"},
|
||||
{"hostname": "e1111"}],
|
||||
"numNodes": 4,
|
||||
"numHwthreads": 80,
|
||||
"walltime": 86400
|
||||
}
|
||||
```
|
||||
* The response body should be the database id of the started job, for example:
|
||||
```
|
||||
{
|
||||
"id": 3937
|
||||
}
|
||||
```
|
||||
* Check in ClusterCockpit
|
||||
- User `ccdemouser` should appear in Users-Tab with one running job
|
||||
- It could take up to 5 Minutes until the Job is displayed with some current data (5 Min Short-Job Filter)
|
||||
- Job then is marked with a green `running` tag
|
||||
- Metricdata displayed is read from cc-metric-store!
|
||||
|
||||
|
||||
### Use cc-backend API to stop job
|
||||
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
|
||||
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
|
||||
* Click the `/job/stop_job/{id}` endpoint and click the Try it out button.
|
||||
* Enter the database id at id that was returned by `start_job` and copy the following into the request body. Replace the timestamp with a recent one:
|
||||
```
|
||||
{
|
||||
"cluster": "emmy",
|
||||
"jobState": "completed",
|
||||
"stopTime": <RECENT TS>
|
||||
}
|
||||
```
|
||||
* On success a json document with the job meta data is returned.
|
||||
|
||||
* Check in ClusterCockpit
|
||||
- User `ccdemouser` should appear in Users-Tab with one completed job
|
||||
- Job is no longer marked with a green `running` tag -> Completed!
|
||||
- Metricdata displayed is now read from job-archive!
|
||||
* Check in job-archive
|
||||
- `cd ./cc-backend/var/job-archive/emmy/100/000`
|
||||
- `cd $STARTTIME`
|
||||
- Inspect `meta.json` and `data.json`
|
||||
|
||||
## Helper scripts
|
||||
* In this tarball you can find the perl script `generate_subcluster.pl` that helps to generate the subcluster section for your system.
|
||||
Usage:
|
||||
* Log into an exclusive cluster node.
|
||||
* The LIKWID tools likwid-topology and likwid-bench must be in the PATH!
|
||||
* `$./generate_subcluster.pl` outputs the subcluster section on `stdout`
|
||||
|
||||
Please be aware that
|
||||
* You have to enter the name and node list for the subCluster manually.
|
||||
* GPU detection only works if LIKWID was build with Cuda avalable and you run likwid-topology also with Cuda loaded.
|
||||
* Do not blindly trust the measured peakflops values.
|
||||
* Because the script blindly relies on the CSV format output by likwid-topology this is a fragile undertaking!
|
||||
@@ -1,82 +0,0 @@
|
||||
## Introduction
|
||||
|
||||
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs.
|
||||
JSON Web Token (JWT) is an open standard (RFC 7519) that defines a compact and self-contained way for securely transmitting information between parties as a JSON object.
|
||||
This information can be verified and trusted because it is digitally signed.
|
||||
In ClusterCockpit JWTs are signed using a public/private key pair using ECDSA.
|
||||
Because tokens are signed using public/private key pairs, the signature also certifies that only the party holding the private key is the one that signed it.
|
||||
Currently JWT tokens in ClusterCockpit not yet expire.
|
||||
|
||||
## JWT Payload
|
||||
|
||||
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
|
||||
Currently ClusterCockpit sets the following claims:
|
||||
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
|
||||
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
|
||||
* `roles`: An array of strings specifying the roles set for the subject.
|
||||
|
||||
## Workflow
|
||||
|
||||
1. Create a new ECDSA Public/private keypair:
|
||||
```
|
||||
$ go build ./tools/gen-keypair.go
|
||||
$ ./gen-keypair
|
||||
```
|
||||
2. Add keypair in your `.env` file. A template can be found in `./configs`.
|
||||
|
||||
There are two usage scenarios:
|
||||
* The APIs are used during a browser session. In this case on login a JWT token is issued on login, that is used by the web frontend to authorize against the GraphQL and REST APIs.
|
||||
* The REST API is used outside a browser session, e.g. by scripts. In this case you have to issue a token manually. This possible from within the configuration view or on the command line. It is recommended to issue a JWT token in this case for a special user that only has the `api` role. By using different users for different purposes a fine grained access control and access revocation management is possible.
|
||||
|
||||
The token is commonly specified in the Authorization HTTP header using the Bearer schema.
|
||||
|
||||
## Setup user and JWT token for REST API authorization
|
||||
|
||||
1. Create user:
|
||||
```
|
||||
$ ./cc-backend --add-user <username>:api:<Password> --no-server
|
||||
```
|
||||
2. Issue token for user:
|
||||
```
|
||||
$ ./cc-backend -jwt <username> -no-server
|
||||
```
|
||||
3. Use issued token token on client side:
|
||||
```
|
||||
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
|
||||
```
|
||||
|
||||
## Accept externally generated JWTs provided via cookie
|
||||
If there is an external service like an AuthAPI that can generate JWTs and hand them over to ClusterCockpit via cookies, CC can be configured to accept them:
|
||||
|
||||
1. `.env`: CC needs a public ed25519 key to verify foreign JWT signatures. Public keys in PEM format can be converted with the instructions in [/tools/convert-pem-pubkey-for-cc](../tools/convert-pem-pubkey-for-cc/Readme.md) .
|
||||
|
||||
```
|
||||
CROSS_LOGIN_JWT_PUBLIC_KEY="+51iXX8BdLFocrppRxIw52xCOf8xFSH/eNilN5IHVGc="
|
||||
```
|
||||
|
||||
2. `config.json`: Insert a name for the cookie (set by the external service) containing the JWT so that CC knows where to look at. Define a trusted issuer (JWT claim 'iss'), otherwise it will be rejected.
|
||||
If you want usernames and user roles from JWTs ('sub' and 'roles' claim) to be validated against CC's internal database, you need to enable it here. Unknown users will then be rejected and roles set via JWT will be ignored.
|
||||
|
||||
```json
|
||||
"jwts": {
|
||||
"cookieName": "access_cc",
|
||||
"forceJWTValidationViaDatabase": true,
|
||||
"trustedExternalIssuer": "auth.example.com"
|
||||
}
|
||||
```
|
||||
|
||||
3. Make sure your external service includes the same issuer (`iss`) in its JWTs. Example JWT payload:
|
||||
|
||||
```json
|
||||
{
|
||||
"iat": 1668161471,
|
||||
"nbf": 1668161471,
|
||||
"exp": 1668161531,
|
||||
"sub": "alice",
|
||||
"roles": [
|
||||
"user"
|
||||
],
|
||||
"jti": "a1b2c3d4-1234-5678-abcd-a1b2c3d4e5f6",
|
||||
"iss": "auth.example.com"
|
||||
}
|
||||
```
|
||||
@@ -1,78 +0,0 @@
|
||||
The job archive specifies an exchange format for job meta and performance metric
|
||||
data. It consists of two parts:
|
||||
* a [SQLite database schema](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#sqlite-database-schema) for job meta data and performance statistics
|
||||
* a [Json file format](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#json-file-format) together with a [Directory hierarchy specification](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#directory-hierarchy-specification)
|
||||
|
||||
By using an open, portable and simple specification based on files it is
|
||||
possible to exchange job performance data for research and analysis purposes as
|
||||
well as use it as a robust way for archiving job performance data to disk.
|
||||
|
||||
# SQLite database schema
|
||||
## Introduction
|
||||
|
||||
A SQLite 3 database schema is provided to standardize the job meta data
|
||||
information in a portable way. The schema also includes optional columns for job
|
||||
performance statistics (called a job performance footprint). The database acts
|
||||
as a front end to filter and select subsets of job IDs, that are the keys to get
|
||||
the full job performance data in the job performance tree hierarchy.
|
||||
|
||||
## Database schema
|
||||
|
||||
The schema includes 3 tables: the job table, a tag table and a jobtag table
|
||||
representing the MANY-TO-MANY relation between jobs and tags. The SQL schema is
|
||||
specified
|
||||
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/schemas/jobs-sqlite.sql).
|
||||
Explanation of the various columns including the JSON datatypes is documented
|
||||
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/job-meta.schema.json).
|
||||
|
||||
# Directory hierarchy specification
|
||||
|
||||
## Specification
|
||||
|
||||
To manage the number of directories within a single directory a tree approach is
|
||||
used splitting the integer job ID. The job id is split in junks of 1000 each.
|
||||
Usually 2 layers of directories is sufficient but the concept can be used for an
|
||||
arbitrary number of layers.
|
||||
|
||||
For a 2 layer schema this can be achieved with (code example in Perl):
|
||||
``` perl
|
||||
$level1 = $jobID/1000;
|
||||
$level2 = $jobID%1000;
|
||||
$dstPath = sprintf("%s/%s/%d/%03d", $trunk, $destdir, $level1, $level2);
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
For the job ID 1034871 the directory path is `./1034/871/`.
|
||||
|
||||
# Json file format
|
||||
## Overview
|
||||
|
||||
Every cluster must be configured in a `cluster.json` file.
|
||||
|
||||
The job data consists of two files:
|
||||
* `meta.json`: Contains job meta information and job statistics.
|
||||
* `data.json`: Contains complete job data with time series
|
||||
|
||||
The description of the json format specification is available as [[json
|
||||
schema|https://json-schema.org/]] format file. The latest version of the json
|
||||
schema is part of the `cc-backend` source tree. For external reference it is
|
||||
also available in a separate repository.
|
||||
|
||||
## Specification `cluster.json`
|
||||
|
||||
The json schema specification is available
|
||||
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/cluster.schema.json).
|
||||
|
||||
## Specification `meta.json`
|
||||
|
||||
The json schema specification is available
|
||||
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-meta.schema.json).
|
||||
|
||||
## Specification `data.json`
|
||||
|
||||
The json schema specification is available
|
||||
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-data.schema.json).
|
||||
Metric time series data is stored for a fixed time step. The time step is set
|
||||
per metric. If no value is available for a metric time series data timestamp
|
||||
`null` is entered.
|
||||
@@ -1,13 +0,0 @@
|
||||
# Overview
|
||||
|
||||
Customizing `cc-backend` means changing the logo and certain legal texts
|
||||
instead of the placeholders. To change the logo displayed in the navigation bar, the
|
||||
file `web/frontend/public/img/logo.png` in the source tree must be replaced
|
||||
and cc-backend must be rebuild.
|
||||
|
||||
# Replace legal texts
|
||||
|
||||
To replace the `imprint.tmpl` and `privacy.tmpl` legal texts, you can place your
|
||||
version in `./var/`. At startup `cc-backend` will check if `./var/imprint.tmpl` and/or
|
||||
`./var/privacy.tmpl` exist and use them instead of the built-in placeholders.
|
||||
You can use the placeholders in `web/templates` as a blueprint.
|
||||
@@ -1,78 +0,0 @@
|
||||
In general, an upgrade is nothing more than a replacement of the binary file.
|
||||
All the necessary files, except the database file, the configuration file and
|
||||
the job archive, are embedded in the binary file. It is recommended to use a
|
||||
directory where the file names of the binary files are named with a version
|
||||
indicator. This can be, for example, the date or the Unix epoch time. A symbolic
|
||||
link points to the version to be used. This makes it easier to switch to earlier
|
||||
versions.
|
||||
|
||||
The database and the job archive are versioned. Each release binary supports
|
||||
specific versions of the database and job archive. If a version mismatch is
|
||||
detected, the application is terminated and migration is required.
|
||||
|
||||
**IMPORTANT NOTE**
|
||||
|
||||
It is recommended to make a backup copy of the database before each update. This
|
||||
is mandatory in case the database needs to be migrated. In the case of sqlite,
|
||||
this means to stopping `cc-backend` and copying the sqlite database file
|
||||
somewhere.
|
||||
|
||||
# Migrating the database
|
||||
|
||||
After you have backed up the database, run the following command to migrate the
|
||||
database to the latest version:
|
||||
```
|
||||
$ ./cc-backend -migrate-db
|
||||
```
|
||||
|
||||
The migration files are embedded in the binary and can also be viewed in the cc
|
||||
backend [source tree](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository/migrations).
|
||||
There are separate migration files for both supported
|
||||
database backends.
|
||||
We use the [migrate library](https://github.com/golang-migrate/migrate).
|
||||
|
||||
If something goes wrong, you can check the status and get the current schema
|
||||
(here for sqlite):
|
||||
```
|
||||
$ sqlite3 var/job.db
|
||||
```
|
||||
In the sqlite console execute:
|
||||
```
|
||||
.schema
|
||||
```
|
||||
to get the current databse schema.
|
||||
You can query the current version and whether the migration failed with:
|
||||
```
|
||||
SELECT * FROM schema_migrations;
|
||||
```
|
||||
The first column indicates the current database version and the second column is
|
||||
a dirty flag indicating whether the migration was successful.
|
||||
|
||||
# Migrating the job archive
|
||||
|
||||
Job archive migration requires a separate tool (`archive-migration`), which is
|
||||
part of the cc-backend source tree (build with `go build ./tools/archive-migration`)
|
||||
and is also provided as part of the releases.
|
||||
|
||||
Migration is supported only between two successive releases. The migration tool
|
||||
migrates the existing job archive to a new job archive. This means that there
|
||||
must be enough disk space for two complete job archives. If the tool is called
|
||||
without options:
|
||||
```
|
||||
$ ./archive-migration
|
||||
```
|
||||
|
||||
it is assumed that a job archive exists in `./var/job-archive`. The new job
|
||||
archive is written to `./var/job-archive-new`. Since execution is threaded in case
|
||||
of a fatal error, it is impossible to determine in which job the error occurred.
|
||||
In this case, you can run the tool in debug mode (with the `-debug` flag). In
|
||||
debug mode, threading is disabled and the job ID of each migrated job is output.
|
||||
Jobs with empty files will be skipped. Between multiple runs of the tools, the
|
||||
`job-archive-new` directory must be moved or deleted.
|
||||
|
||||
The `cluster.json` files in `job-archive-new` must be checked for errors, especially
|
||||
whether the aggregation attribute is set correctly for all metrics.
|
||||
|
||||
Migration takes several hours for relatively large job archives (several hundred
|
||||
GB). A versioned job archive contains a version.txt file in the root directory
|
||||
of the job archive. This file contains the version as an unsigned integer.
|
||||
@@ -1,33 +0,0 @@
|
||||
## Tips for frontend development
|
||||
|
||||
The frontend assets including the Svelte js files are per default embedded in
|
||||
the bgo binary. To enable a quick turnaround cycle for web development of the
|
||||
frontend disable embedding of static assets in `config.json`:
|
||||
```
|
||||
"embed-static-files": false,
|
||||
"static-files": "./web/frontend/public/",
|
||||
|
||||
```
|
||||
|
||||
Start the node build process (in directory `./web/frontend`) in development mode:
|
||||
```
|
||||
$ npm run dev
|
||||
```
|
||||
|
||||
This will start the build process in listen mode. Whenever you change a source
|
||||
files the depending javascript targets will be automatically rebuild.
|
||||
In case the javascript files are minified you may need to set the production
|
||||
flag by hand to false in `./web/frontend/rollup.config.mjs`:
|
||||
```
|
||||
const production = false
|
||||
```
|
||||
|
||||
Usually this should work automatically.
|
||||
|
||||
Because the files are still served by ./cc-backend you have to reload the view
|
||||
explicitly in your browser.
|
||||
|
||||
A common setup is to have three terminals open:
|
||||
* One running cc-backend (working directory repository root): `./cc-backend -server -dev`
|
||||
* Another running npm in developer mode (working directory `./web/frontend`): `npm run dev`
|
||||
* And the last one editing the frontend source files
|
||||
@@ -1,34 +0,0 @@
|
||||
## Overview
|
||||
|
||||
We use the standard golang testing environment.
|
||||
|
||||
The following conventions are used:
|
||||
|
||||
* *White box unit tests*: Tests for internal functionality are placed in files
|
||||
* *Black box unit tests*: Tests for public interfaces are placed in files
|
||||
with `<package name>_test.go` and belong to the package `<package_name>_test`.
|
||||
There only exists one package test file per package.
|
||||
* *Integration tests*: Tests that use multiple componenents are placed in a
|
||||
package test file. These are named `<package name>_test.go` and belong to the
|
||||
package `<package_name>_test`.
|
||||
* *Test assets*: Any required files are placed in a directory `./testdata`
|
||||
within each package directory.
|
||||
|
||||
## Executing tests
|
||||
|
||||
Visual Studio Code has a very good golang test integration.
|
||||
For debugging a test this is the recommended solution.
|
||||
|
||||
The Makefile provided by us has a `test` target that executes:
|
||||
```
|
||||
$ go clean -testcache
|
||||
$ go build ./...
|
||||
$ go vet ./...
|
||||
$ go test ./...
|
||||
```
|
||||
|
||||
Of course the commands can also be used on the command line.
|
||||
For details about golang testing refer to the standard documentation:
|
||||
|
||||
* [Testing package](https://pkg.go.dev/testing)
|
||||
* [go test command](https://pkg.go.dev/cmd/go#hdr-Test_packages)
|
||||
@@ -1,229 +0,0 @@
|
||||
#!/usr/bin/env perl
|
||||
use strict;
|
||||
use warnings;
|
||||
use utf8;
|
||||
|
||||
use JSON::PP; # from Perl default install
|
||||
use Time::Local qw( timelocal ); # from Perl default install
|
||||
use Time::Piece; # from Perl default install
|
||||
|
||||
### JSON
|
||||
my $json = JSON::PP->new->allow_nonref;
|
||||
|
||||
### TIME AND DATE
|
||||
# now
|
||||
my $localtime = localtime;
|
||||
my $epochtime = $localtime->epoch;
|
||||
# 5 days ago: Via epoch due to possible reverse month borders
|
||||
my $epochlessfive = $epochtime - (86400 * 5);
|
||||
my $locallessfive = localtime($epochlessfive);
|
||||
# Calc like `date --date 'TZ="Europe/Berlin" 0:00 5 days ago' +%s`)
|
||||
my ($day, $month, $year) = ($locallessfive->mday, $locallessfive->_mon, $locallessfive->year);
|
||||
my $checkpointStart = timelocal(0, 0, 0, $day, $month, $year);
|
||||
# for checkpoints
|
||||
my $halfday = 43200;
|
||||
|
||||
### JOB-ARCHIVE
|
||||
my $archiveTarget = './cc-backend/var/job-archive';
|
||||
my $archiveSrc = './source-data/job-archive-source';
|
||||
my @ArchiveClusters;
|
||||
|
||||
# Gen folder
|
||||
if ( not -d $archiveTarget ){
|
||||
mkdir( $archiveTarget ) or die "Couldn't create $archiveTarget directory, $!";
|
||||
}
|
||||
|
||||
# Get clusters by job-archive/$subfolder
|
||||
opendir my $dh, $archiveSrc or die "can't open directory: $!";
|
||||
while ( readdir $dh ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
|
||||
my $cluster = $_;
|
||||
push @ArchiveClusters, $cluster;
|
||||
}
|
||||
|
||||
# start for jobarchive
|
||||
foreach my $cluster ( @ArchiveClusters ) {
|
||||
print "Starting to update start- and stoptimes in job-archive for $cluster\n";
|
||||
|
||||
my $clusterTarget = "$archiveTarget/$cluster";
|
||||
|
||||
if ( not -d $clusterTarget ){
|
||||
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
|
||||
}
|
||||
|
||||
opendir my $dhLevel1, "$archiveSrc/$cluster" or die "can't open directory: $!";
|
||||
while ( readdir $dhLevel1 ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..';
|
||||
my $level1 = $_;
|
||||
|
||||
if ( -d "$archiveSrc/$cluster/$level1" ) {
|
||||
opendir my $dhLevel2, "$archiveSrc/$cluster/$level1" or die "can't open directory: $!";
|
||||
while ( readdir $dhLevel2 ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..';
|
||||
my $level2 = $_;
|
||||
my $jobSource = "$archiveSrc/$cluster/$level1/$level2";
|
||||
my $jobOrigin = "$jobSource";
|
||||
my $jobTargetL1 = "$clusterTarget/$level1";
|
||||
my $jobTargetL2 = "$jobTargetL1/$level2";
|
||||
|
||||
# check if files are directly accessible (old format) else get subfolders as file and update path
|
||||
if ( ! -e "$jobSource/meta.json") {
|
||||
opendir(D, "$jobSource") || die "Can't open directory $jobSource: $!\n";
|
||||
my @folders = readdir(D);
|
||||
closedir(D);
|
||||
if (!@folders) {
|
||||
next;
|
||||
}
|
||||
|
||||
foreach my $folder ( @folders ) {
|
||||
next if $folder eq '.' or $folder eq '..';
|
||||
$jobSource = "$jobSource/".$folder;
|
||||
}
|
||||
}
|
||||
# check if subfolder contains file, else skip
|
||||
if ( ! -e "$jobSource/meta.json") {
|
||||
print "$jobSource skipped\n";
|
||||
next;
|
||||
}
|
||||
|
||||
open my $metafh, '<', "$jobSource/meta.json" or die "Can't open file $!";
|
||||
my $rawstr = do { local $/; <$metafh> };
|
||||
close($metafh);
|
||||
my $metadata = $json->decode($rawstr);
|
||||
|
||||
# NOTE Start meta.json iteration here
|
||||
# my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT;
|
||||
# Set new startTime: Between 5 days and 1 day before now
|
||||
|
||||
# Remove id from attributes
|
||||
$metadata->{startTime} = $epochtime - (int(rand(432000)) + 86400);
|
||||
$metadata->{stopTime} = $metadata->{startTime} + $metadata->{duration};
|
||||
|
||||
# Add starttime subfolder to target path
|
||||
my $jobTargetL3 = "$jobTargetL2/".$metadata->{startTime};
|
||||
|
||||
if ( not -d $jobTargetL1 ){
|
||||
mkdir( $jobTargetL1 ) or die "Couldn't create $jobTargetL1 directory, $!";
|
||||
}
|
||||
|
||||
if ( not -d $jobTargetL2 ){
|
||||
mkdir( $jobTargetL2 ) or die "Couldn't create $jobTargetL2 directory, $!";
|
||||
}
|
||||
|
||||
# target is not directory
|
||||
if ( not -d $jobTargetL3 ){
|
||||
mkdir( $jobTargetL3 ) or die "Couldn't create $jobTargetL3 directory, $!";
|
||||
|
||||
my $outstr = $json->encode($metadata);
|
||||
open my $metaout, '>', "$jobTargetL3/meta.json" or die "Can't write to file $!";
|
||||
print $metaout $outstr;
|
||||
close($metaout);
|
||||
|
||||
open my $datafh, '<', "$jobSource/data.json" or die "Can't open file $!";
|
||||
my $datastr = do { local $/; <$datafh> };
|
||||
close($datafh);
|
||||
|
||||
open my $dataout, '>', "$jobTargetL3/data.json" or die "Can't write to file $!";
|
||||
print $dataout $datastr;
|
||||
close($dataout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
print "Done for job-archive\n";
|
||||
sleep(1);
|
||||
exit;
|
||||
|
||||
## CHECKPOINTS
|
||||
my $checkpTarget = './cc-metric-store/var/checkpoints';
|
||||
my $checkpSource = './source-data/cc-metric-store-source/checkpoints';
|
||||
my @CheckpClusters;
|
||||
|
||||
# Gen folder
|
||||
if ( not -d $checkpTarget ){
|
||||
mkdir( $checkpTarget ) or die "Couldn't create $checkpTarget directory, $!";
|
||||
}
|
||||
|
||||
# Get clusters by cc-metric-store/$subfolder
|
||||
opendir my $dhc, $checkpSource or die "can't open directory: $!";
|
||||
while ( readdir $dhc ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
|
||||
my $cluster = $_;
|
||||
push @CheckpClusters, $cluster;
|
||||
}
|
||||
closedir($dhc);
|
||||
|
||||
# start for checkpoints
|
||||
foreach my $cluster ( @CheckpClusters ) {
|
||||
print "Starting to update checkpoint filenames and data starttimes for $cluster\n";
|
||||
|
||||
my $clusterTarget = "$checkpTarget/$cluster";
|
||||
|
||||
if ( not -d $clusterTarget ){
|
||||
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
|
||||
}
|
||||
|
||||
opendir my $dhLevel1, "$checkpSource/$cluster" or die "can't open directory: $!";
|
||||
while ( readdir $dhLevel1 ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..';
|
||||
# Nodename as level1-folder
|
||||
my $level1 = $_;
|
||||
|
||||
if ( -d "$checkpSource/$cluster/$level1" ) {
|
||||
|
||||
my $nodeSource = "$checkpSource/$cluster/$level1/";
|
||||
my $nodeOrigin = "$nodeSource";
|
||||
my $nodeTarget = "$clusterTarget/$level1";
|
||||
my @files;
|
||||
|
||||
if ( -e "$nodeSource/1609459200.json") { # 1609459200 == First Checkpoint time in latest dump
|
||||
opendir(D, "$nodeSource") || die "Can't open directory $nodeSource: $!\n";
|
||||
while ( readdir D ) {
|
||||
chomp; next if $_ eq '.' or $_ eq '..';
|
||||
my $nodeFile = $_;
|
||||
push @files, $nodeFile;
|
||||
}
|
||||
closedir(D);
|
||||
my $length = @files;
|
||||
if (!@files || $length != 14) { # needs 14 files == 7 days worth of data
|
||||
next;
|
||||
}
|
||||
} else {
|
||||
next;
|
||||
}
|
||||
|
||||
# sort for integer timestamp-filename-part (moduleless): Guarantees start with index == 0 == 1609459200.json
|
||||
my @sortedFiles = sort { ($a =~ /^([0-9]{10}).json$/)[0] <=> ($b =~ /^([0-9]{10}).json$/)[0] } @files;
|
||||
|
||||
if ( not -d $nodeTarget ){
|
||||
mkdir( $nodeTarget ) or die "Couldn't create $nodeTarget directory, $!";
|
||||
|
||||
while (my ($index, $file) = each(@sortedFiles)) {
|
||||
open my $checkfh, '<', "$nodeSource/$file" or die "Can't open file $!";
|
||||
my $rawstr = do { local $/; <$checkfh> };
|
||||
close($checkfh);
|
||||
my $checkpdata = $json->decode($rawstr);
|
||||
|
||||
my $newTimestamp = $checkpointStart + ($index * $halfday);
|
||||
# Get Diff from old Timestamp
|
||||
my $timeDiff = $newTimestamp - $checkpdata->{from};
|
||||
# Set new timestamp
|
||||
$checkpdata->{from} = $newTimestamp;
|
||||
|
||||
foreach my $metric (keys %{$checkpdata->{metrics}}) {
|
||||
$checkpdata->{metrics}->{$metric}->{start} += $timeDiff;
|
||||
}
|
||||
|
||||
my $outstr = $json->encode($checkpdata);
|
||||
|
||||
open my $checkout, '>', "$nodeTarget/$newTimestamp.json" or die "Can't write to file $!";
|
||||
print $checkout $outstr;
|
||||
close($checkout);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
closedir($dhLevel1);
|
||||
}
|
||||
print "Done for checkpoints\n";
|
||||
@@ -1,35 +0,0 @@
|
||||
# Docs for ClusterCockpit Searchbar
|
||||
|
||||
## Usage
|
||||
|
||||
* Searchtags are implemented as `type:<query>` search-string
|
||||
* Types `jobId, jobName, projectId, username, name` for roles `admin` and `support`
|
||||
* `jobName` is jobName as persisted in `job.meta_data` table-column
|
||||
* `username` is actual account identifier as persisted in `job.user` table-column
|
||||
* `name` is account owners name as persisted in `user.name` table-column
|
||||
* Types `jobId, jobName, projectId` for role `user`
|
||||
* Examples:
|
||||
* `jobName:myJob12`
|
||||
* `jobId:123456`
|
||||
* `username:abcd100`
|
||||
* `name:Paul`
|
||||
* If no searchTag used: Best guess search with the following hierarchy
|
||||
* `jobId -> username -> name -> projectId -> jobName`
|
||||
* Destinations:
|
||||
* JobId: Job-Table (Allows multiple identical matches, e.g. JobIds from different clusters)
|
||||
* JobName: Job-Table (Allows multiple identical matches, e.g. JobNames from different clusters)
|
||||
* ProjectId: Job-Table
|
||||
* Username: Users-Table
|
||||
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table.
|
||||
* Name: Users-Table
|
||||
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table.
|
||||
* Best guess search always redirects to Job-Table or `/monitoring/user/$USER` (first username match)
|
||||
* Unprocessable queries will redirect to `/monitoring/jobs/?`
|
||||
* Spaces trimmed (both for searchTag and queryString)
|
||||
* ` job12` == `job12`
|
||||
* `projectID : abcd ` == `projectId:abcd`
|
||||
* `jobName`- and `name-`queries work with a part of the target-string
|
||||
* `jobName:myjob` for jobName "myjob_cluster1"
|
||||
* `name:Paul` for name "Paul Atreides"
|
||||
|
||||
* JobName GQL Query is resolved as matching the query as a part of the whole metaData-JSON in the SQL DB.
|
||||
185
go.mod
185
go.mod
@@ -1,91 +1,124 @@
|
||||
module github.com/ClusterCockpit/cc-backend
|
||||
|
||||
go 1.18
|
||||
go 1.25.0
|
||||
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.17.24
|
||||
github.com/ClusterCockpit/cc-units v0.4.0
|
||||
github.com/Masterminds/squirrel v1.5.3
|
||||
github.com/go-ldap/ldap/v3 v3.4.4
|
||||
github.com/go-sql-driver/mysql v1.7.0
|
||||
github.com/golang-jwt/jwt/v4 v4.5.0
|
||||
github.com/golang-migrate/migrate/v4 v4.15.2
|
||||
github.com/google/gops v0.3.27
|
||||
github.com/gorilla/handlers v1.5.1
|
||||
github.com/gorilla/mux v1.8.0
|
||||
github.com/gorilla/sessions v1.2.1
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.12.2
|
||||
github.com/jmoiron/sqlx v1.3.5
|
||||
github.com/mattn/go-sqlite3 v1.14.16
|
||||
github.com/prometheus/client_golang v1.14.0
|
||||
github.com/prometheus/common v0.40.0
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.2.0
|
||||
github.com/swaggo/http-swagger v1.3.3
|
||||
github.com/swaggo/swag v1.8.10
|
||||
github.com/vektah/gqlparser/v2 v2.5.1
|
||||
golang.org/x/crypto v0.6.0
|
||||
tool (
|
||||
github.com/99designs/gqlgen
|
||||
github.com/swaggo/swag/cmd/swag
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||
github.com/99designs/gqlgen v0.17.86
|
||||
github.com/ClusterCockpit/cc-lib/v2 v2.7.0
|
||||
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0
|
||||
github.com/Masterminds/squirrel v1.5.4
|
||||
github.com/aws/aws-sdk-go-v2 v1.41.1
|
||||
github.com/aws/aws-sdk-go-v2/config v1.32.8
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.19.8
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.96.0
|
||||
github.com/coreos/go-oidc/v3 v3.17.0
|
||||
github.com/expr-lang/expr v1.17.8
|
||||
github.com/go-chi/chi/v5 v5.2.5
|
||||
github.com/go-chi/cors v1.2.2
|
||||
github.com/go-co-op/gocron/v2 v2.19.1
|
||||
github.com/go-ldap/ldap/v3 v3.4.12
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1
|
||||
github.com/golang-migrate/migrate/v4 v4.19.1
|
||||
github.com/google/gops v0.3.29
|
||||
github.com/gorilla/sessions v1.4.0
|
||||
github.com/jmoiron/sqlx v1.4.0
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/mattn/go-sqlite3 v1.14.34
|
||||
github.com/parquet-go/parquet-go v0.27.0
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||
github.com/stretchr/testify v1.11.1
|
||||
github.com/swaggo/http-swagger v1.3.4
|
||||
github.com/swaggo/swag v1.16.6
|
||||
github.com/vektah/gqlparser/v2 v2.5.31
|
||||
golang.org/x/crypto v0.48.0
|
||||
golang.org/x/oauth2 v0.35.0
|
||||
golang.org/x/time v0.14.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Azure/go-ntlmssp v0.1.0 // indirect
|
||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||
github.com/agnivade/levenshtein v1.1.1 // indirect
|
||||
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||
github.com/andybalholm/brotli v1.2.0 // indirect
|
||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/containerd/containerd v1.6.18 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
||||
github.com/deepmap/oapi-codegen v1.12.4 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.3 // indirect
|
||||
github.com/ghodss/yaml v1.0.0 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
|
||||
github.com/go-co-op/gocron v1.25.0 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.6 // indirect
|
||||
github.com/go-openapi/jsonreference v0.20.2 // indirect
|
||||
github.com/go-openapi/spec v0.20.8 // indirect
|
||||
github.com/go-openapi/swag v0.22.3 // indirect
|
||||
github.com/golang/protobuf v1.5.2 // indirect
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.1 // indirect
|
||||
github.com/gorilla/websocket v1.5.0 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/golang-lru v0.5.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.8 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.17 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/signin v1.0.5 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.9 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.14 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.41.6 // indirect
|
||||
github.com/aws/smithy-go v1.24.0 // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.1.3 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.22.4 // indirect
|
||||
github.com/go-openapi/jsonreference v0.21.4 // indirect
|
||||
github.com/go-openapi/spec v0.22.3 // indirect
|
||||
github.com/go-openapi/swag/conv v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/jsonname v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/jsonutils v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/loading v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/stringutils v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/typeutils v0.25.4 // indirect
|
||||
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.5.0 // indirect
|
||||
github.com/goccy/go-yaml v1.19.2 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0 // indirect
|
||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/jpillora/backoff v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/kr/pretty v0.3.0 // indirect
|
||||
github.com/jonboulle/clockwork v0.5.0 // indirect
|
||||
github.com/klauspost/compress v1.18.4 // indirect
|
||||
github.com/kr/pretty v0.3.1 // indirect
|
||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||
github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/prometheus/client_model v0.3.0 // indirect
|
||||
github.com/prometheus/procfs v0.9.0 // indirect
|
||||
github.com/nats-io/nats.go v1.49.0 // indirect
|
||||
github.com/nats-io/nkeys v0.4.15 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/oapi-codegen/runtime v1.2.0 // indirect
|
||||
github.com/parquet-go/bitpack v1.0.0 // indirect
|
||||
github.com/parquet-go/jsonlite v1.4.0 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.25 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||
github.com/rogpeppe/go-internal v1.8.1 // indirect
|
||||
github.com/rogpeppe/go-internal v1.10.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/stretchr/testify v1.8.2 // indirect
|
||||
github.com/swaggo/files v1.0.0 // indirect
|
||||
github.com/urfave/cli/v2 v2.24.4 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
||||
go.uber.org/atomic v1.10.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea // indirect
|
||||
golang.org/x/mod v0.8.0 // indirect
|
||||
golang.org/x/net v0.7.0 // indirect
|
||||
golang.org/x/oauth2 v0.5.0 // indirect
|
||||
golang.org/x/sys v0.5.0 // indirect
|
||||
golang.org/x/text v0.7.0 // indirect
|
||||
golang.org/x/tools v0.6.0 // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/protobuf v1.28.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
github.com/sosodev/duration v1.3.1 // indirect
|
||||
github.com/stmcginnis/gofish v0.21.3 // indirect
|
||||
github.com/stretchr/objx v0.5.2 // indirect
|
||||
github.com/swaggo/files v1.0.1 // indirect
|
||||
github.com/twpayne/go-geom v1.6.1 // indirect
|
||||
github.com/urfave/cli/v2 v2.27.7 // indirect
|
||||
github.com/urfave/cli/v3 v3.6.1 // indirect
|
||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa // indirect
|
||||
golang.org/x/mod v0.33.0 // indirect
|
||||
golang.org/x/net v0.51.0 // indirect
|
||||
golang.org/x/sync v0.19.0 // indirect
|
||||
golang.org/x/sys v0.41.0 // indirect
|
||||
golang.org/x/text v0.34.0 // indirect
|
||||
golang.org/x/tools v0.42.0 // indirect
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
sigs.k8s.io/yaml v1.6.0 // indirect
|
||||
)
|
||||
|
||||
63
gqlgen.yml
63
gqlgen.yml
@@ -30,7 +30,9 @@ resolver:
|
||||
# gqlgen will search for any type names in the schema in these go packages
|
||||
# if they match it will use them, otherwise it will generate them.
|
||||
autobind:
|
||||
- "github.com/99designs/gqlgen/graphql/introspection"
|
||||
- "github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
- "github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
|
||||
# This section declares type mapping between the GraphQL and go type systems
|
||||
#
|
||||
@@ -50,34 +52,51 @@ models:
|
||||
- github.com/99designs/gqlgen/graphql.Int64
|
||||
- github.com/99designs/gqlgen/graphql.Int32
|
||||
Job:
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Job"
|
||||
model: "github.com/ClusterCockpit/cc-lib/v2/schema.Job"
|
||||
fields:
|
||||
tags:
|
||||
resolver: true
|
||||
metaData:
|
||||
resolver: true
|
||||
Cluster:
|
||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Cluster"
|
||||
model: "github.com/ClusterCockpit/cc-lib/v2/schema.Cluster"
|
||||
fields:
|
||||
partitions:
|
||||
resolver: true
|
||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
||||
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
||||
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
||||
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
||||
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
||||
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
||||
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
||||
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
||||
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
||||
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
||||
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
|
||||
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" }
|
||||
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
||||
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" }
|
||||
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
||||
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
||||
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
||||
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
||||
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }
|
||||
# Node:
|
||||
# model: "github.com/ClusterCockpit/cc-lib/v2/schema.Node"
|
||||
# fields:
|
||||
# metaData:
|
||||
# resolver: true
|
||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Float" }
|
||||
MetricScope: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricScope" }
|
||||
MetricValue: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricValue" }
|
||||
JobStatistics:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.JobStatistics" }
|
||||
GlobalMetricListItem:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.GlobalMetricListItem" }
|
||||
ClusterSupport:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.ClusterSupport" }
|
||||
Tag: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Tag" }
|
||||
Resource: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Resource" }
|
||||
JobState: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.JobState" }
|
||||
Node: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Node" }
|
||||
SchedulerState:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.SchedulerState" }
|
||||
HealthState:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.MonitoringState" }
|
||||
JobMetric: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.JobMetric" }
|
||||
Series: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Series" }
|
||||
MetricStatistics:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricStatistics" }
|
||||
MetricConfig:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.MetricConfig" }
|
||||
SubClusterConfig:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.SubClusterConfig" }
|
||||
Accelerator: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Accelerator" }
|
||||
Topology: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Topology" }
|
||||
FilterRanges:
|
||||
{ model: "github.com/ClusterCockpit/cc-lib/v2/schema.FilterRanges" }
|
||||
SubCluster: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.SubCluster" }
|
||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.StatsSeries" }
|
||||
Unit: { model: "github.com/ClusterCockpit/cc-lib/v2/schema.Unit" }
|
||||
|
||||
102
init/README.md
102
init/README.md
@@ -1,71 +1,79 @@
|
||||
# How to run this as a systemd service
|
||||
# How to run `cc-backend` as a systemd service.
|
||||
|
||||
The files in this directory assume that you install ClusterCockpit to `/opt/monitoring`.
|
||||
Of course you can choose any other location, but make sure to replace all paths that begin with `/opt/monitoring` in the `clustercockpit.service` file!
|
||||
The files in this directory assume that you install ClusterCockpit to
|
||||
`/opt/monitoring/cc-backend`.
|
||||
Of course you can choose any other location, but make sure you replace all paths
|
||||
starting with `/opt/monitoring/cc-backend` in the `clustercockpit.service` file!
|
||||
|
||||
If you have not installed [yarn](https://yarnpkg.com/getting-started/install) and [go](https://go.dev/doc/install) already, do that (Golang is available in most package managers).
|
||||
It is recommended and easy to install the most recent stable version of Golang as every version also improves the Golang standard library.
|
||||
|
||||
The `config.json` can have the optional fields *user* and *group*.
|
||||
If provided, the application will call [setuid](https://man7.org/linux/man-pages/man2/setuid.2.html) and [setgid](https://man7.org/linux/man-pages/man2/setgid.2.html) after having read the config file and having bound to a TCP port (so that it can take a privileged port), but before it starts accepting any connections.
|
||||
This is good for security, but means that the directories `web/frontend/public`, `var/` and `web/templates/` must be readable by that user and `var/` writable as well (All paths relative to the repos root).
|
||||
The `.env` and `config.json` files might contain secrets and should not be readable by that user.
|
||||
If those files are changed, the server has to be restarted.
|
||||
The `config.json` may contain the optional fields *user* and *group*. If
|
||||
specified, the application will call
|
||||
[setuid](https://man7.org/linux/man-pages/man2/setuid.2.html) and
|
||||
[setgid](https://man7.org/linux/man-pages/man2/setgid.2.html) after reading the
|
||||
config file and binding to a TCP port (so it can take a privileged port), but
|
||||
before it starts accepting any connections. This is good for security, but also
|
||||
means that the `var/` directory must be readable and writeable by this user.
|
||||
The `.env` and `config.json` files may contain secrets and should not be
|
||||
readable by this user. If these files are changed, the server must be restarted.
|
||||
|
||||
```sh
|
||||
# 1.: Clone this repository to /opt/monitoring
|
||||
git clone git@github.com:ClusterCockpit/cc-backend.git /opt/monitoring
|
||||
# 1. Clone this repository somewhere in your home
|
||||
git clone git@github.com:ClusterCockpit/cc-backend.git <DSTDIR>
|
||||
|
||||
# 2.: Install all dependencies and build everything
|
||||
cd /mnt/monitoring
|
||||
go get && go build cmd/cc-backend && (cd ./web/frontend && yarn install && yarn build)
|
||||
# 2. (Optional) Install dependencies and build. In general it is recommended to use the provided release binaries.
|
||||
cd <DSTDIR>
|
||||
make
|
||||
sudo mkdir -p /opt/monitoring/cc-backend/
|
||||
cp ./cc-backend /opt/monitoring/cc-backend/
|
||||
|
||||
# 3.: Modify the `./config.json` and env-template.txt file from the configs directory to your liking and put it in the repo root
|
||||
cp ./configs/config.json ./config.json
|
||||
cp ./configs/env-template.txt ./.env
|
||||
vim ./config.json # do your thing...
|
||||
vim ./.env # do your thing...
|
||||
# 3. Modify the `./config.json` and env-template.txt file from the configs directory to your liking and put it in the target directory
|
||||
cp ./configs/config.json /opt/monitoring/config.json
|
||||
cp ./configs/env-template.txt /opt/monitoring/.env
|
||||
vim /opt/monitoring/config.json # do your thing...
|
||||
vim /opt/monitoring/.env # do your thing...
|
||||
|
||||
# 4.: Add the systemd service unit file (in case /opt/ is mounted on another file system it may be better to copy the file to /etc)
|
||||
sudo ln -s /mnt/monitoring/init/clustercockpit.service /etc/systemd/system/clustercockpit.service
|
||||
# 4. (Optional) Customization: Add your versions of the login view, legal texts, and logo image.
|
||||
# You may use the templates in `./web/templates` as blueprint. Every overwrite separate.
|
||||
cp login.tmpl /opt/monitoring/cc-backend/var/
|
||||
cp imprint.tmpl /opt/monitoring/cc-backend/var/
|
||||
cp privacy.tmpl /opt/monitoring/cc-backend/var/
|
||||
# Ensure your logo, and any images you use in your login template has a suitable size.
|
||||
cp -R img /opt/monitoring/cc-backend/img
|
||||
|
||||
# 5.: Enable and start the server
|
||||
# 5. Copy the systemd service unit file. You may adopt it to your needs.
|
||||
sudo cp ./init/clustercockpit.service /etc/systemd/system/clustercockpit.service
|
||||
|
||||
# 6. Enable and start the server
|
||||
sudo systemctl enable clustercockpit.service # optional (if done, (re-)starts automatically)
|
||||
sudo systemctl start clustercockpit.service
|
||||
|
||||
# Check whats going on:
|
||||
sudo systemctl status clustercockpit.service
|
||||
sudo journalctl -u clustercockpit.service
|
||||
```
|
||||
|
||||
# Recommended deployment workflow
|
||||
# Recommended workflow for deployment
|
||||
|
||||
It is recommended to install all ClusterCockpit components in a common durectory, this can be something like `/opt/monitoring`, `var/monitoring` or `var/clustercockpit`.
|
||||
In the following we are using `/opt/monitoring`.
|
||||
It is recommended to install all ClusterCockpit components in a common directory, e.g. `/opt/monitoring`, `var/monitoring` or `var/clustercockpit`.
|
||||
In the following we use `/opt/monitoring`.
|
||||
|
||||
Two systemd services are running on the central monitoring server:
|
||||
Two systemd services run on the central monitoring server:
|
||||
* clustercockpit : binary cc-backend in `/opt/monitoring/cc-backend`.
|
||||
* cc-metric-store : Binary cc-metric-store in `/opt/monitoring/cc-metric-store`.
|
||||
|
||||
clustercockpit : Binary cc-backend in `/opt/monitoring/cc-backend`
|
||||
cc-metric-store: Binary cc-metric-store in `/opt/monitoring/cc-metric-store`
|
||||
|
||||
ClusterCockpit is deployed as a single file binary that embeds all static assets.
|
||||
We recommend to keep all binaries in a folder `archive` and link the currently active from cc-backend root.
|
||||
This allows to easily roll-back in case something breaks.
|
||||
ClusterCockpit is deployed as a single binary that embeds all static assets.
|
||||
We recommend keeping all `cc-backend` binary versions in a folder `archive` and
|
||||
linking the currently active one from the `cc-backend` root.
|
||||
This allows for easy roll-back in case something doesn't work.
|
||||
|
||||
## Workflow to deploy new version
|
||||
|
||||
This example assumes the DB and job archive did not change.
|
||||
This example assumes the DB and job archive versions did not change.
|
||||
* Stop systemd service: `$ sudo systemctl stop clustercockpit.service`
|
||||
* Backup the sqlite DB file and Job archive directory tree!
|
||||
* Clone cc-backend source tree (e.g. in your home directory)
|
||||
* Copy the adapted legal text files into the git source tree (./web/templates).
|
||||
* Build cc-backend:
|
||||
```
|
||||
$ cd web/frontend
|
||||
$ yarn && yarn build
|
||||
$ cd ../../
|
||||
$ go build ./cmd/cc-backend
|
||||
```
|
||||
* Copy `cc-backend` binary to `/opt/monitoring/cc-backend/archive`
|
||||
* Link from cc-backend root to recent version
|
||||
* Restart systemd service: `$ sudo systemctl restart clustercockpit.service`
|
||||
* Copy `cc-backend` binary to `/opt/monitoring/cc-backend/archive` (Tip: Use a
|
||||
date tag like `YYYYMMDD-cc-backend`)
|
||||
* Link from cc-backend root to current version
|
||||
* Start systemd service: `$ sudo systemctl start clustercockpit.service`
|
||||
* Check if everything is ok: `$ sudo systemctl status clustercockpit.service`
|
||||
* Check log for issues: `$ sudo journalctl -u clustercockpit.service`
|
||||
* Check the ClusterCockpit web frontend and your Slurm adapters if anything is broken!
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
[Unit]
|
||||
Description=ClusterCockpit Web Server (Go edition)
|
||||
Description=ClusterCockpit Web Server
|
||||
Documentation=https://github.com/ClusterCockpit/cc-backend
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
After=mariadb.service mysql.service
|
||||
# Database is file-based SQLite - no service dependency required
|
||||
|
||||
[Service]
|
||||
WorkingDirectory=/opt/monitoring/cc-backend
|
||||
@@ -12,7 +12,7 @@ NotifyAccess=all
|
||||
Restart=on-failure
|
||||
RestartSec=30
|
||||
TimeoutStopSec=100
|
||||
ExecStart=/opt/monitoring/cc-backend/cc-backend --config ./config.json
|
||||
ExecStart=/opt/monitoring/cc-backend/cc-backend --config ./config.json --server
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package api_test
|
||||
@@ -14,44 +14,56 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func setup(t *testing.T) *api.RestApi {
|
||||
func setup(t *testing.T) *api.RestAPI {
|
||||
repository.ResetConnection()
|
||||
|
||||
const testconfig = `{
|
||||
"main": {
|
||||
"addr": "0.0.0.0:8080",
|
||||
"validate": false,
|
||||
"api-allowed-ips": [
|
||||
"*"
|
||||
]
|
||||
},
|
||||
"metric-store": {
|
||||
"checkpoints": {
|
||||
"interval": "12h"
|
||||
},
|
||||
"retention-in-memory": "48h",
|
||||
"memory-cap": 100
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"clusters": [
|
||||
{
|
||||
"name": "testcluster",
|
||||
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
|
||||
"filterRanges": {
|
||||
"numNodes": { "from": 1, "to": 64 },
|
||||
"duration": { "from": 0, "to": 86400 },
|
||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
||||
"auth": {
|
||||
"jwts": {
|
||||
"max-age": "2m"
|
||||
}
|
||||
}
|
||||
]
|
||||
}`
|
||||
const testclusterJson = `{
|
||||
const testclusterJSON = `{
|
||||
"name": "testcluster",
|
||||
"subClusters": [
|
||||
{
|
||||
@@ -107,61 +119,73 @@ func setup(t *testing.T) *api.RestApi {
|
||||
]
|
||||
}`
|
||||
|
||||
log.Init("info", true)
|
||||
cclog.Init("info", true)
|
||||
tmpdir := t.TempDir()
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
||||
if err := os.Mkdir(jobarchive, 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0777); err != nil {
|
||||
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJson), 0666); err != nil {
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJSON), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dbfilepath := filepath.Join(tmpdir, "test.db")
|
||||
err := repository.MigrateDB("sqlite3", dbfilepath)
|
||||
err := repository.MigrateDB(dbfilepath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
config.Init(cfgFilePath)
|
||||
ccconf.Init(cfgFilePath)
|
||||
metricstore.MetricStoreHandle = &metricstore.InternalMetricStore{}
|
||||
|
||||
// Load and check main configuration
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
config.Init(cfg)
|
||||
} else {
|
||||
cclog.Abort("Main configuration must be present")
|
||||
}
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||
|
||||
repository.Connect("sqlite3", dbfilepath)
|
||||
db := repository.GetConnection()
|
||||
repository.Connect(dbfilepath)
|
||||
|
||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||
if err := archive.Init(json.RawMessage(archiveCfg)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
||||
t.Fatal(err)
|
||||
// metricstore initialization removed - it's initialized via callback in tests
|
||||
|
||||
archiver.Start(repository.GetJobRepository(), context.Background())
|
||||
|
||||
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
|
||||
auth.Init(&cfg)
|
||||
} else {
|
||||
cclog.Warn("Authentication disabled due to missing configuration")
|
||||
auth.Init(nil)
|
||||
}
|
||||
|
||||
jobRepo := repository.GetJobRepository()
|
||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
||||
graph.Init()
|
||||
|
||||
return &api.RestApi{
|
||||
JobRepository: resolver.Repo,
|
||||
Resolver: resolver,
|
||||
}
|
||||
return api.New()
|
||||
}
|
||||
|
||||
func cleanup() {
|
||||
// TODO: Clear all caches, reset all modules, etc...
|
||||
if err := archiver.Shutdown(5 * time.Second); err != nil {
|
||||
cclog.Warnf("Archiver shutdown timeout in tests: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -172,7 +196,6 @@ func cleanup() {
|
||||
func TestRestApi(t *testing.T) {
|
||||
restapi := setup(t)
|
||||
t.Cleanup(cleanup)
|
||||
|
||||
testData := schema.JobData{
|
||||
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||
schema.MetricScopeNode: {
|
||||
@@ -189,12 +212,16 @@ func TestRestApi(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
||||
metricstore.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
return testData, nil
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
restapi.MountRoutes(r)
|
||||
r := chi.NewRouter()
|
||||
restapi.MountAPIRoutes(r)
|
||||
|
||||
var TestJobID int64 = 123
|
||||
TestClusterName := "testcluster"
|
||||
var TestStartTime int64 = 123456789
|
||||
|
||||
const startJobBody string = `{
|
||||
"jobId": 123,
|
||||
@@ -207,10 +234,9 @@ func TestRestApi(t *testing.T) {
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"exclusive": 1,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"tags": [{ "type": "testTagType", "name": "testTagName" }],
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
@@ -221,31 +247,37 @@ func TestRestApi(t *testing.T) {
|
||||
"startTime": 123456789
|
||||
}`
|
||||
|
||||
var dbid int64
|
||||
const contextUserKey repository.ContextKey = "user"
|
||||
contextUserValue := &schema.User{
|
||||
Username: "testuser",
|
||||
Projects: make([]string, 0),
|
||||
Roles: []string{"user"},
|
||||
AuthType: 0,
|
||||
AuthSource: 2,
|
||||
}
|
||||
|
||||
if ok := t.Run("StartJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusCreated {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
var res api.StartJobApiResponse
|
||||
if err := json.Unmarshal(recorder.Body.Bytes(), &res); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(res.DBID)))
|
||||
// resolver := graph.GetResolverInstance()
|
||||
restapi.JobRepository.SyncJobs()
|
||||
job, err := restapi.JobRepository.Find(&TestJobID, &TestClusterName, &TestStartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
job.Tags, err = restapi.Resolver.Job().Tags(context.Background(), job)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
// job.Tags, err = resolver.Job().Tags(ctx, job)
|
||||
// if err != nil {
|
||||
// t.Fatal(err)
|
||||
// }
|
||||
|
||||
if job.JobID != 123 ||
|
||||
job.User != "testuser" ||
|
||||
@@ -254,23 +286,20 @@ func TestRestApi(t *testing.T) {
|
||||
job.SubCluster != "sc1" ||
|
||||
job.Partition != "default" ||
|
||||
job.Walltime != 3600 ||
|
||||
job.ArrayJobId != 0 ||
|
||||
job.ArrayJobID != 0 ||
|
||||
job.NumNodes != 1 ||
|
||||
job.NumHWThreads != 8 ||
|
||||
job.NumAcc != 0 ||
|
||||
job.Exclusive != 1 ||
|
||||
job.MonitoringStatus != 1 ||
|
||||
job.SMT != 1 ||
|
||||
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
||||
job.StartTime.Unix() != 123456789 {
|
||||
job.StartTime != 123456789 {
|
||||
t.Fatalf("unexpected job properties: %#v", job)
|
||||
}
|
||||
|
||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" {
|
||||
t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||
}
|
||||
|
||||
dbid = res.DBID
|
||||
// if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
|
||||
// t.Fatalf("unexpected tags: %#v", job.Tags)
|
||||
// }
|
||||
}); !ok {
|
||||
return
|
||||
}
|
||||
@@ -286,17 +315,19 @@ func TestRestApi(t *testing.T) {
|
||||
|
||||
var stoppedJob *schema.Job
|
||||
if ok := t.Run("StopJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
restapi.JobRepository.WaitForArchiving()
|
||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(dbid)))
|
||||
// Archiving happens asynchronously, will be completed in cleanup
|
||||
job, err := restapi.JobRepository.Find(&TestJobID, &TestClusterName, &TestStartTime)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -324,7 +355,7 @@ func TestRestApi(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run("CheckArchive", func(t *testing.T) {
|
||||
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
|
||||
data, err := metricdispatch.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -336,12 +367,14 @@ func TestRestApi(t *testing.T) {
|
||||
|
||||
t.Run("CheckDoubleStart", func(t *testing.T) {
|
||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
||||
body := strings.ReplaceAll(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusUnprocessableEntity {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
@@ -356,7 +389,7 @@ func TestRestApi(t *testing.T) {
|
||||
"partition": "default",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"exclusive": 1,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
@@ -368,10 +401,12 @@ func TestRestApi(t *testing.T) {
|
||||
}`
|
||||
|
||||
ok := t.Run("StartJobFailed", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusCreated {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
@@ -381,6 +416,9 @@ func TestRestApi(t *testing.T) {
|
||||
t.Fatal("subtest failed")
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Second)
|
||||
restapi.JobRepository.SyncJobs()
|
||||
|
||||
const stopJobBodyFailed string = `{
|
||||
"jobId": 12345,
|
||||
"cluster": "testcluster",
|
||||
@@ -390,16 +428,18 @@ func TestRestApi(t *testing.T) {
|
||||
}`
|
||||
|
||||
ok = t.Run("StopJobFailed", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
r.ServeHTTP(recorder, req)
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
restapi.JobRepository.WaitForArchiving()
|
||||
// Archiving happens asynchronously, will be completed in cleanup
|
||||
jobid, cluster := int64(12345), "testcluster"
|
||||
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
||||
if err != nil {
|
||||
@@ -413,4 +453,198 @@ func TestRestApi(t *testing.T) {
|
||||
if !ok {
|
||||
t.Fatal("subtest failed")
|
||||
}
|
||||
|
||||
t.Run("GetUsedNodesNoRunning", func(t *testing.T) {
|
||||
contextUserValue := &schema.User{
|
||||
Username: "testuser",
|
||||
Projects: make([]string, 0),
|
||||
Roles: []string{"api"},
|
||||
AuthType: 0,
|
||||
AuthSource: 2,
|
||||
}
|
||||
|
||||
req := httptest.NewRequest(http.MethodGet, "/jobs/used_nodes?ts=123456790", nil)
|
||||
recorder := httptest.NewRecorder()
|
||||
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
response := recorder.Result()
|
||||
if response.StatusCode != http.StatusOK {
|
||||
t.Fatal(response.Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
var result api.GetUsedNodesAPIResponse
|
||||
if err := json.NewDecoder(response.Body).Decode(&result); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if result.UsedNodes == nil {
|
||||
t.Fatal("expected usedNodes to be non-nil")
|
||||
}
|
||||
|
||||
if len(result.UsedNodes) != 0 {
|
||||
t.Fatalf("expected no used nodes for stopped jobs, got: %v", result.UsedNodes)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestStopJobWithReusedJobId verifies that stopping a recently started job works
|
||||
// even when an older job with the same jobId exists in the job table (e.g. with
|
||||
// state "failed"). This is a regression test for the bug where Find() on the job
|
||||
// table would match the old job instead of the new one still in job_cache.
|
||||
func TestStopJobWithReusedJobId(t *testing.T) {
|
||||
restapi := setup(t)
|
||||
t.Cleanup(cleanup)
|
||||
|
||||
testData := schema.JobData{
|
||||
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||
schema.MetricScopeNode: {
|
||||
Unit: schema.Unit{Base: "load"},
|
||||
Timestep: 60,
|
||||
Series: []schema.Series{
|
||||
{
|
||||
Hostname: "host123",
|
||||
Statistics: schema.MetricStatistics{Min: 0.1, Avg: 0.2, Max: 0.3},
|
||||
Data: []schema.Float{0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
metricstore.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
return testData, nil
|
||||
}
|
||||
|
||||
r := chi.NewRouter()
|
||||
restapi.MountAPIRoutes(r)
|
||||
|
||||
const contextUserKey repository.ContextKey = "user"
|
||||
contextUserValue := &schema.User{
|
||||
Username: "testuser",
|
||||
Projects: make([]string, 0),
|
||||
Roles: []string{"user"},
|
||||
AuthType: 0,
|
||||
AuthSource: 2,
|
||||
}
|
||||
|
||||
// Step 1: Start the first job (jobId=999)
|
||||
const startJobBody1 string = `{
|
||||
"jobId": 999,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
"partition": "default",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [{"hostname": "host123", "hwthreads": [0, 1, 2, 3, 4, 5, 6, 7]}],
|
||||
"startTime": 200000000
|
||||
}`
|
||||
|
||||
if ok := t.Run("StartFirstJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody1)))
|
||||
recorder := httptest.NewRecorder()
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
if recorder.Result().StatusCode != http.StatusCreated {
|
||||
t.Fatal(recorder.Result().Status, recorder.Body.String())
|
||||
}
|
||||
}); !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// Step 2: Sync to move job from cache to job table, then stop it as "failed"
|
||||
time.Sleep(1 * time.Second)
|
||||
restapi.JobRepository.SyncJobs()
|
||||
|
||||
const stopJobBody1 string = `{
|
||||
"jobId": 999,
|
||||
"startTime": 200000000,
|
||||
"cluster": "testcluster",
|
||||
"jobState": "failed",
|
||||
"stopTime": 200001000
|
||||
}`
|
||||
|
||||
if ok := t.Run("StopFirstJobAsFailed", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody1)))
|
||||
recorder := httptest.NewRecorder()
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
if recorder.Result().StatusCode != http.StatusOK {
|
||||
t.Fatal(recorder.Result().Status, recorder.Body.String())
|
||||
}
|
||||
|
||||
jobid, cluster := int64(999), "testcluster"
|
||||
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if job.State != schema.JobStateFailed {
|
||||
t.Fatalf("expected first job to be failed, got: %s", job.State)
|
||||
}
|
||||
}); !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// Wait for archiving to complete
|
||||
time.Sleep(1 * time.Second)
|
||||
|
||||
// Step 3: Start a NEW job with the same jobId=999 but different startTime.
|
||||
// This job will sit in job_cache (not yet synced).
|
||||
const startJobBody2 string = `{
|
||||
"jobId": 999,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
"partition": "default",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [{"hostname": "host123", "hwthreads": [0, 1, 2, 3, 4, 5, 6, 7]}],
|
||||
"startTime": 300000000
|
||||
}`
|
||||
|
||||
if ok := t.Run("StartSecondJob", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody2)))
|
||||
recorder := httptest.NewRecorder()
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
if recorder.Result().StatusCode != http.StatusCreated {
|
||||
t.Fatal(recorder.Result().Status, recorder.Body.String())
|
||||
}
|
||||
}); !ok {
|
||||
return
|
||||
}
|
||||
|
||||
// Step 4: Stop the second job WITHOUT syncing first.
|
||||
// Before the fix, this would fail because Find() on the job table would
|
||||
// match the old failed job (jobId=999) and reject with "already stopped".
|
||||
const stopJobBody2 string = `{
|
||||
"jobId": 999,
|
||||
"startTime": 300000000,
|
||||
"cluster": "testcluster",
|
||||
"jobState": "completed",
|
||||
"stopTime": 300001000
|
||||
}`
|
||||
|
||||
t.Run("StopSecondJobBeforeSync", func(t *testing.T) {
|
||||
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody2)))
|
||||
recorder := httptest.NewRecorder()
|
||||
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||
if recorder.Result().StatusCode != http.StatusOK {
|
||||
t.Fatalf("expected stop to succeed for cached job, got: %s %s",
|
||||
recorder.Result().Status, recorder.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
71
internal/api/cluster.go
Normal file
71
internal/api/cluster.go
Normal file
@@ -0,0 +1,71 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
)
|
||||
|
||||
// GetClustersAPIResponse model
|
||||
type GetClustersAPIResponse struct {
|
||||
Clusters []*schema.Cluster `json:"clusters"` // Array of clusters
|
||||
}
|
||||
|
||||
// getClusters godoc
|
||||
// @summary Lists all cluster configs
|
||||
// @tags Cluster query
|
||||
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
|
||||
// @produce json
|
||||
// @param cluster query string false "Job Cluster"
|
||||
// @success 200 {object} api.GetClustersAPIResponse "Array of clusters"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/clusters/ [get]
|
||||
func (api *RestAPI) getClusters(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleAPI) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleAPI)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
bw := bufio.NewWriter(rw)
|
||||
defer bw.Flush()
|
||||
|
||||
var clusters []*schema.Cluster
|
||||
|
||||
if r.URL.Query().Has("cluster") {
|
||||
name := r.URL.Query().Get("cluster")
|
||||
cluster := archive.GetCluster(name)
|
||||
if cluster == nil {
|
||||
handleError(fmt.Errorf("unknown cluster: %s", name), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
clusters = append(clusters, cluster)
|
||||
} else {
|
||||
clusters = archive.Clusters
|
||||
}
|
||||
|
||||
payload := GetClustersAPIResponse{
|
||||
Clusters: clusters,
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
2277
internal/api/docs.go
2277
internal/api/docs.go
File diff suppressed because it is too large
Load Diff
1154
internal/api/job.go
Normal file
1154
internal/api/job.go
Normal file
File diff suppressed because it is too large
Load Diff
165
internal/api/log.go
Normal file
165
internal/api/log.go
Normal file
@@ -0,0 +1,165 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package api
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
)
|
||||
|
||||
type LogEntry struct {
|
||||
Timestamp string `json:"timestamp"`
|
||||
Priority int `json:"priority"`
|
||||
Message string `json:"message"`
|
||||
Unit string `json:"unit"`
|
||||
}
|
||||
|
||||
var safePattern = regexp.MustCompile(`^[a-zA-Z0-9 :\-\.]+$`)
|
||||
|
||||
func (api *RestAPI) getJournalLog(rw http.ResponseWriter, r *http.Request) {
|
||||
user := repository.GetUserFromContext(r.Context())
|
||||
if !user.HasRole(schema.RoleAdmin) {
|
||||
handleError(fmt.Errorf("only admins are allowed to view logs"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
since := r.URL.Query().Get("since")
|
||||
if since == "" {
|
||||
since = "1 hour ago"
|
||||
}
|
||||
if !safePattern.MatchString(since) {
|
||||
handleError(fmt.Errorf("invalid 'since' parameter"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
lines := 200
|
||||
if l := r.URL.Query().Get("lines"); l != "" {
|
||||
n, err := strconv.Atoi(l)
|
||||
if err != nil || n < 1 {
|
||||
handleError(fmt.Errorf("invalid 'lines' parameter"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
if n > 1000 {
|
||||
n = 1000
|
||||
}
|
||||
lines = n
|
||||
}
|
||||
|
||||
unit := config.Keys.SystemdUnit
|
||||
if unit == "" {
|
||||
unit = "clustercockpit.service"
|
||||
}
|
||||
|
||||
args := []string{
|
||||
"--output=json",
|
||||
"--no-pager",
|
||||
"-n", fmt.Sprintf("%d", lines),
|
||||
"--since", since,
|
||||
"-u", unit,
|
||||
}
|
||||
|
||||
if level := r.URL.Query().Get("level"); level != "" {
|
||||
n, err := strconv.Atoi(level)
|
||||
if err != nil || n < 0 || n > 7 {
|
||||
handleError(fmt.Errorf("invalid 'level' parameter (must be 0-7)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
args = append(args, "--priority", fmt.Sprintf("%d", n))
|
||||
}
|
||||
|
||||
if search := r.URL.Query().Get("search"); search != "" {
|
||||
if !safePattern.MatchString(search) {
|
||||
handleError(fmt.Errorf("invalid 'search' parameter"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
args = append(args, "--grep", search)
|
||||
}
|
||||
|
||||
cclog.Debugf("calling journalctl with %s", strings.Join(args, " "))
|
||||
cmd := exec.CommandContext(r.Context(), "journalctl", args...)
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("failed to create pipe: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
handleError(fmt.Errorf("failed to start journalctl: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
entries := make([]LogEntry, 0, lines)
|
||||
scanner := bufio.NewScanner(stdout)
|
||||
for scanner.Scan() {
|
||||
var raw map[string]any
|
||||
if err := json.Unmarshal(scanner.Bytes(), &raw); err != nil {
|
||||
cclog.Debugf("error unmarshal log output: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
priority := 6 // default info
|
||||
if p, ok := raw["PRIORITY"]; ok {
|
||||
switch v := p.(type) {
|
||||
case string:
|
||||
if n, err := strconv.Atoi(v); err == nil {
|
||||
priority = n
|
||||
}
|
||||
case float64:
|
||||
priority = int(v)
|
||||
}
|
||||
}
|
||||
|
||||
msg := ""
|
||||
if m, ok := raw["MESSAGE"]; ok {
|
||||
if s, ok := m.(string); ok {
|
||||
msg = s
|
||||
}
|
||||
}
|
||||
|
||||
ts := ""
|
||||
if t, ok := raw["__REALTIME_TIMESTAMP"]; ok {
|
||||
if s, ok := t.(string); ok {
|
||||
ts = s
|
||||
}
|
||||
}
|
||||
|
||||
unitName := ""
|
||||
if u, ok := raw["_SYSTEMD_UNIT"]; ok {
|
||||
if s, ok := u.(string); ok {
|
||||
unitName = s
|
||||
}
|
||||
}
|
||||
|
||||
entries = append(entries, LogEntry{
|
||||
Timestamp: ts,
|
||||
Priority: priority,
|
||||
Message: msg,
|
||||
Unit: unitName,
|
||||
})
|
||||
}
|
||||
|
||||
if err := cmd.Wait(); err != nil {
|
||||
// journalctl returns exit code 1 when --grep matches nothing
|
||||
if len(entries) == 0 {
|
||||
cclog.Debugf("journalctl exited with: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(rw).Encode(entries); err != nil {
|
||||
cclog.Errorf("Failed to encode log entries: %v", err)
|
||||
}
|
||||
}
|
||||
151
internal/api/metricstore.go
Normal file
151
internal/api/metricstore.go
Normal file
@@ -0,0 +1,151 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
|
||||
"github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol"
|
||||
)
|
||||
|
||||
// handleFree godoc
|
||||
// @summary
|
||||
// @tags free
|
||||
// @description This endpoint allows the users to free the Buffers from the
|
||||
// metric store. This endpoint offers the users to remove then systematically
|
||||
// and also allows then to prune the data under node, if they do not want to
|
||||
// remove the whole node.
|
||||
// @produce json
|
||||
// @param to query string false "up to timestamp"
|
||||
// @success 200 {string} string "ok"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /free/ [post]
|
||||
func freeMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
rawTo := r.URL.Query().Get("to")
|
||||
if rawTo == "" {
|
||||
handleError(errors.New("'to' is a required query parameter"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
to, err := strconv.ParseInt(rawTo, 10, 64)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
bodyDec := json.NewDecoder(r.Body)
|
||||
var selectors [][]string
|
||||
err = bodyDec.Decode(&selectors)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
ms := metricstore.GetMemoryStore()
|
||||
n := 0
|
||||
for _, sel := range selectors {
|
||||
bn, err := ms.Free(sel, to)
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
n += bn
|
||||
}
|
||||
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
fmt.Fprintf(rw, "buffers freed: %d\n", n)
|
||||
}
|
||||
|
||||
// handleWrite godoc
|
||||
// @summary Receive metrics in InfluxDB line-protocol
|
||||
// @tags write
|
||||
// @description Write data to the in-memory store in the InfluxDB line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)
|
||||
|
||||
// @accept plain
|
||||
// @produce json
|
||||
// @param cluster query string false "If the lines in the body do not have a cluster tag, use this value instead."
|
||||
// @success 200 {string} string "ok"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /write/ [post]
|
||||
func writeMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
|
||||
// Extract the "cluster" query parameter without allocating a url.Values map.
|
||||
cluster := queryParam(r.URL.RawQuery, "cluster")
|
||||
|
||||
// Stream directly from the request body instead of copying it into a
|
||||
// temporary buffer via io.ReadAll. The line-protocol decoder supports
|
||||
// io.Reader natively, so this avoids the largest heap allocation.
|
||||
ms := metricstore.GetMemoryStore()
|
||||
dec := lineprotocol.NewDecoder(r.Body)
|
||||
if err := metricstore.DecodeLine(dec, ms, cluster); err != nil {
|
||||
cclog.Errorf("/api/write error: %s", err.Error())
|
||||
handleError(err, http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
}
|
||||
|
||||
// queryParam extracts a single query-parameter value from a raw query string
|
||||
// without allocating a url.Values map. Returns "" if the key is not present.
|
||||
func queryParam(raw, key string) string {
|
||||
for raw != "" {
|
||||
var kv string
|
||||
kv, raw, _ = strings.Cut(raw, "&")
|
||||
k, v, _ := strings.Cut(kv, "=")
|
||||
if k == key {
|
||||
return v
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// handleDebug godoc
|
||||
// @summary Debug endpoint
|
||||
// @tags debug
|
||||
// @description This endpoint allows the users to print the content of
|
||||
// nodes/clusters/metrics to review the state of the data.
|
||||
// @produce json
|
||||
// @param selector query string false "Selector"
|
||||
// @success 200 {string} string "Debug dump"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /debug/ [post]
|
||||
func debugMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
raw := r.URL.Query().Get("selector")
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
selector := []string{}
|
||||
if len(raw) != 0 {
|
||||
selector = strings.Split(raw, ":")
|
||||
}
|
||||
|
||||
ms := metricstore.GetMemoryStore()
|
||||
if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil {
|
||||
handleError(err, http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
400
internal/api/nats.go
Normal file
400
internal/api/nats.go
Normal file
@@ -0,0 +1,400 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/nats"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/receivers"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
influx "github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol"
|
||||
)
|
||||
|
||||
// NatsAPI provides NATS subscription-based handlers for Job and Node operations.
|
||||
// It mirrors the functionality of the REST API but uses NATS messaging with
|
||||
// InfluxDB line protocol as the message format.
|
||||
//
|
||||
// # Message Format
|
||||
//
|
||||
// All NATS messages use InfluxDB line protocol format (https://docs.influxdata.com/influxdb/v2.0/reference/syntax/line-protocol/)
|
||||
// with the following structure:
|
||||
//
|
||||
// measurement,tag1=value1,tag2=value2 field1=value1,field2=value2 timestamp
|
||||
//
|
||||
// # Job Events
|
||||
//
|
||||
// Job start/stop events use the "job" measurement with a "function" tag to distinguish operations:
|
||||
//
|
||||
// job,function=start_job event="{...JSON payload...}" <timestamp>
|
||||
// job,function=stop_job event="{...JSON payload...}" <timestamp>
|
||||
//
|
||||
// The JSON payload in the "event" field follows the schema.Job or StopJobAPIRequest structure.
|
||||
//
|
||||
// Example job start message:
|
||||
//
|
||||
// job,function=start_job event="{\"jobId\":1001,\"user\":\"testuser\",\"cluster\":\"testcluster\",...}" 1234567890000000000
|
||||
//
|
||||
// # Node State Events
|
||||
//
|
||||
// Node state updates use the "nodestate" measurement with cluster information:
|
||||
//
|
||||
// nodestate event="{...JSON payload...}" <timestamp>
|
||||
//
|
||||
// The JSON payload follows the UpdateNodeStatesRequest structure.
|
||||
//
|
||||
// Example node state message:
|
||||
//
|
||||
// nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"node01\",\"states\":[\"idle\"]}]}" 1234567890000000000
|
||||
type NatsAPI struct {
|
||||
JobRepository *repository.JobRepository
|
||||
// RepositoryMutex protects job creation operations from race conditions
|
||||
// when checking for duplicate jobs during startJob calls.
|
||||
RepositoryMutex sync.Mutex
|
||||
}
|
||||
|
||||
// NewNatsAPI creates a new NatsAPI instance with default dependencies.
|
||||
func NewNatsAPI() *NatsAPI {
|
||||
return &NatsAPI{
|
||||
JobRepository: repository.GetJobRepository(),
|
||||
}
|
||||
}
|
||||
|
||||
// StartSubscriptions registers all NATS subscriptions for Job and Node APIs.
|
||||
// Returns an error if the NATS client is not available or subscription fails.
|
||||
func (api *NatsAPI) StartSubscriptions() error {
|
||||
client := nats.GetClient()
|
||||
if client == nil {
|
||||
cclog.Warn("NATS client not available, skipping API subscriptions")
|
||||
return nil
|
||||
}
|
||||
|
||||
if config.Keys.APISubjects != nil {
|
||||
|
||||
s := config.Keys.APISubjects
|
||||
|
||||
if err := client.Subscribe(s.SubjectJobEvent, api.handleJobEvent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := client.Subscribe(s.SubjectNodeState, api.handleNodeState); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cclog.Info("NATS API subscriptions started")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// processJobEvent routes job event messages to the appropriate handler based on the "function" tag.
|
||||
// Validates that required tags and fields are present before processing.
|
||||
func (api *NatsAPI) processJobEvent(msg lp.CCMessage) {
|
||||
function, ok := msg.GetTag("function")
|
||||
if !ok {
|
||||
cclog.Errorf("Job event is missing required tag 'function': measurement=%s", msg.Name())
|
||||
return
|
||||
}
|
||||
|
||||
switch function {
|
||||
case "start_job":
|
||||
v, ok := msg.GetEventValue()
|
||||
if !ok {
|
||||
cclog.Errorf("Job start event is missing event field with JSON payload")
|
||||
return
|
||||
}
|
||||
api.handleStartJob(v)
|
||||
|
||||
case "stop_job":
|
||||
v, ok := msg.GetEventValue()
|
||||
if !ok {
|
||||
cclog.Errorf("Job stop event is missing event field with JSON payload")
|
||||
return
|
||||
}
|
||||
api.handleStopJob(v)
|
||||
|
||||
default:
|
||||
cclog.Warnf("Unknown job event function '%s', expected 'start_job' or 'stop_job'", function)
|
||||
}
|
||||
}
|
||||
|
||||
// handleJobEvent processes job-related messages received via NATS using InfluxDB line protocol.
|
||||
// The message must be in line protocol format with measurement="job" and include:
|
||||
// - tag "function" with value "start_job" or "stop_job"
|
||||
// - field "event" containing JSON payload (schema.Job or StopJobAPIRequest)
|
||||
//
|
||||
// Example: job,function=start_job event="{\"jobId\":1001,...}" 1234567890000000000
|
||||
func (api *NatsAPI) handleJobEvent(subject string, data []byte) {
|
||||
if len(data) == 0 {
|
||||
cclog.Warnf("NATS %s: received empty message", subject)
|
||||
return
|
||||
}
|
||||
|
||||
d := influx.NewDecoderWithBytes(data)
|
||||
|
||||
for d.Next() {
|
||||
m, err := receivers.DecodeInfluxMessage(d)
|
||||
if err != nil {
|
||||
cclog.Errorf("NATS %s: failed to decode InfluxDB line protocol message: %v", subject, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !m.IsEvent() {
|
||||
cclog.Debugf("NATS %s: received non-event message, skipping", subject)
|
||||
continue
|
||||
}
|
||||
|
||||
if m.Name() == "job" {
|
||||
api.processJobEvent(m)
|
||||
} else {
|
||||
cclog.Debugf("NATS %s: unexpected measurement name '%s', expected 'job'", subject, m.Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleStartJob processes job start messages received via NATS.
|
||||
// The payload parameter contains JSON following the schema.Job structure.
|
||||
// Jobs are validated, checked for duplicates, and inserted into the database.
|
||||
func (api *NatsAPI) handleStartJob(payload string) {
|
||||
if payload == "" {
|
||||
cclog.Error("NATS start job: payload is empty")
|
||||
return
|
||||
}
|
||||
req := schema.Job{
|
||||
Shared: "none",
|
||||
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||
}
|
||||
|
||||
dec := json.NewDecoder(strings.NewReader(payload))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
cclog.Errorf("NATS start job: parsing request failed: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Debugf("NATS start job: %s", req.GoString())
|
||||
req.State = schema.JobStateRunning
|
||||
|
||||
if err := importer.SanityChecks(&req); err != nil {
|
||||
cclog.Errorf("NATS start job: sanity check failed: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
var unlockOnce sync.Once
|
||||
api.RepositoryMutex.Lock()
|
||||
defer unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||
|
||||
jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("NATS start job: checking for duplicate failed: %v", err)
|
||||
return
|
||||
}
|
||||
if err == nil {
|
||||
for _, job := range jobs {
|
||||
if (req.StartTime - job.StartTime) < secondsPerDay {
|
||||
cclog.Errorf("NATS start job: job with jobId %d, cluster %s already exists (dbid: %d)",
|
||||
req.JobID, req.Cluster, job.ID)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// When tags are present, insert directly into the job table so that the
|
||||
// returned ID can be used with AddTagOrCreate (which queries the job table).
|
||||
var id int64
|
||||
if len(req.Tags) > 0 {
|
||||
id, err = api.JobRepository.StartDirect(&req)
|
||||
} else {
|
||||
id, err = api.JobRepository.Start(&req)
|
||||
}
|
||||
if err != nil {
|
||||
cclog.Errorf("NATS start job: insert into database failed: %v", err)
|
||||
return
|
||||
}
|
||||
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||
|
||||
for _, tag := range req.Tags {
|
||||
if _, err := api.JobRepository.AddTagOrCreate(nil, id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||
cclog.Errorf("NATS start job: adding tag to new job %d failed: %v", id, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Infof("NATS: new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d",
|
||||
id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||
}
|
||||
|
||||
// handleStopJob processes job stop messages received via NATS.
|
||||
// The payload parameter contains JSON following the StopJobAPIRequest structure.
|
||||
// The job is marked as stopped in the database and archiving is triggered if monitoring is enabled.
|
||||
func (api *NatsAPI) handleStopJob(payload string) {
|
||||
if payload == "" {
|
||||
cclog.Error("NATS stop job: payload is empty")
|
||||
return
|
||||
}
|
||||
var req StopJobAPIRequest
|
||||
|
||||
dec := json.NewDecoder(strings.NewReader(payload))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
cclog.Errorf("NATS job stop: parsing request failed: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if req.JobID == nil {
|
||||
cclog.Errorf("NATS job stop: the field 'jobId' is required")
|
||||
return
|
||||
}
|
||||
|
||||
isCached := false
|
||||
job, err := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime)
|
||||
if err != nil {
|
||||
// Not in cache, try main job table
|
||||
job, err = api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
|
||||
if err != nil {
|
||||
cclog.Errorf("NATS job stop: finding job failed: %v", err)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
isCached = true
|
||||
}
|
||||
|
||||
if job.State != schema.JobStateRunning {
|
||||
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: job has already been stopped (state is: %s)",
|
||||
job.JobID, job.ID, job.Cluster, job.State)
|
||||
return
|
||||
}
|
||||
|
||||
if job.StartTime > req.StopTime {
|
||||
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: stopTime %d must be >= startTime %d",
|
||||
job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime)
|
||||
return
|
||||
}
|
||||
|
||||
if req.State != "" && !req.State.Valid() {
|
||||
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: invalid job state: %#v",
|
||||
job.JobID, job.ID, job.Cluster, req.State)
|
||||
return
|
||||
} else if req.State == "" {
|
||||
req.State = schema.JobStateCompleted
|
||||
}
|
||||
|
||||
job.Duration = int32(req.StopTime - job.StartTime)
|
||||
job.State = req.State
|
||||
api.JobRepository.Mutex.Lock()
|
||||
defer api.JobRepository.Mutex.Unlock()
|
||||
|
||||
// If the job is still in job_cache, transfer it to the job table first
|
||||
if isCached {
|
||||
newID, err := api.JobRepository.TransferCachedJobToMain(*job.ID)
|
||||
if err != nil {
|
||||
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: transferring cached job failed: %v",
|
||||
job.JobID, *job.ID, job.Cluster, err)
|
||||
return
|
||||
}
|
||||
cclog.Infof("NATS: transferred cached job to main table: old id %d -> new id %d (jobId=%d)", *job.ID, newID, job.JobID)
|
||||
job.ID = &newID
|
||||
}
|
||||
|
||||
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||
cclog.Errorf("NATS job stop: jobId %d (id %d) on %s: marking job as '%s' failed: %v",
|
||||
job.JobID, *job.ID, job.Cluster, job.State, err)
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Infof("NATS: archiving job (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s",
|
||||
*job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||
|
||||
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
|
||||
return
|
||||
}
|
||||
|
||||
archiver.TriggerArchiving(job)
|
||||
}
|
||||
|
||||
// processNodestateEvent extracts and processes node state data from the InfluxDB message.
|
||||
// Updates node states in the repository for all nodes in the payload.
|
||||
func (api *NatsAPI) processNodestateEvent(msg lp.CCMessage) {
|
||||
v, ok := msg.GetEventValue()
|
||||
if !ok {
|
||||
cclog.Errorf("Nodestate event is missing event field with JSON payload")
|
||||
return
|
||||
}
|
||||
|
||||
var req UpdateNodeStatesRequest
|
||||
|
||||
dec := json.NewDecoder(strings.NewReader(v))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
cclog.Errorf("NATS nodestate: parsing request failed: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
repo := repository.GetNodeRepository()
|
||||
requestReceived := time.Now().Unix()
|
||||
|
||||
for _, node := range req.Nodes {
|
||||
state := determineState(node.States)
|
||||
nodeState := schema.NodeStateDB{
|
||||
TimeStamp: requestReceived,
|
||||
NodeState: state,
|
||||
CpusAllocated: node.CpusAllocated,
|
||||
MemoryAllocated: node.MemoryAllocated,
|
||||
GpusAllocated: node.GpusAllocated,
|
||||
HealthState: schema.MonitoringStateFull,
|
||||
JobsRunning: node.JobsRunning,
|
||||
}
|
||||
|
||||
if err := repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState); err != nil {
|
||||
cclog.Errorf("NATS nodestate: updating node state for %s on %s failed: %v",
|
||||
node.Hostname, req.Cluster, err)
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Debugf("NATS nodestate: updated %d node states for cluster %s", len(req.Nodes), req.Cluster)
|
||||
}
|
||||
|
||||
// handleNodeState processes node state update messages received via NATS using InfluxDB line protocol.
|
||||
// The message must be in line protocol format with measurement="nodestate" and include:
|
||||
// - field "event" containing JSON payload (UpdateNodeStatesRequest)
|
||||
//
|
||||
// Example: nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[...]}" 1234567890000000000
|
||||
func (api *NatsAPI) handleNodeState(subject string, data []byte) {
|
||||
if len(data) == 0 {
|
||||
cclog.Warnf("NATS %s: received empty message", subject)
|
||||
return
|
||||
}
|
||||
|
||||
d := influx.NewDecoderWithBytes(data)
|
||||
|
||||
for d.Next() {
|
||||
m, err := receivers.DecodeInfluxMessage(d)
|
||||
if err != nil {
|
||||
cclog.Errorf("NATS %s: failed to decode InfluxDB line protocol message: %v", subject, err)
|
||||
return
|
||||
}
|
||||
|
||||
if !m.IsEvent() {
|
||||
cclog.Warnf("NATS %s: received non-event message, skipping", subject)
|
||||
continue
|
||||
}
|
||||
|
||||
if m.Name() == "nodestate" {
|
||||
api.processNodestateEvent(m)
|
||||
} else {
|
||||
cclog.Warnf("NATS %s: unexpected measurement name '%s', expected 'nodestate'", subject, m.Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
947
internal/api/nats_test.go
Normal file
947
internal/api/nats_test.go
Normal file
@@ -0,0 +1,947 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
lp "github.com/ClusterCockpit/cc-lib/v2/ccMessage"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func setupNatsTest(t *testing.T) *NatsAPI {
|
||||
repository.ResetConnection()
|
||||
|
||||
const testconfig = `{
|
||||
"main": {
|
||||
"addr": "0.0.0.0:8080",
|
||||
"validate": false,
|
||||
"api-allowed-ips": [
|
||||
"*"
|
||||
]
|
||||
},
|
||||
"archive": {
|
||||
"kind": "file",
|
||||
"path": "./var/job-archive"
|
||||
},
|
||||
"auth": {
|
||||
"jwts": {
|
||||
"max-age": "2m"
|
||||
}
|
||||
}
|
||||
}`
|
||||
const testclusterJSON = `{
|
||||
"name": "testcluster",
|
||||
"subClusters": [
|
||||
{
|
||||
"name": "sc1",
|
||||
"nodes": "host123,host124,host125",
|
||||
"processorType": "Intel Core i7-4770",
|
||||
"socketsPerNode": 1,
|
||||
"coresPerSocket": 4,
|
||||
"threadsPerCore": 2,
|
||||
"flopRateScalar": {
|
||||
"unit": {
|
||||
"prefix": "G",
|
||||
"base": "F/s"
|
||||
},
|
||||
"value": 14
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"unit": {
|
||||
"prefix": "G",
|
||||
"base": "F/s"
|
||||
},
|
||||
"value": 112
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"unit": {
|
||||
"prefix": "G",
|
||||
"base": "B/s"
|
||||
},
|
||||
"value": 24
|
||||
},
|
||||
"numberOfNodes": 70,
|
||||
"topology": {
|
||||
"node": [0, 1, 2, 3, 4, 5, 6, 7],
|
||||
"socket": [[0, 1, 2, 3, 4, 5, 6, 7]],
|
||||
"memoryDomain": [[0, 1, 2, 3, 4, 5, 6, 7]],
|
||||
"die": [[0, 1, 2, 3, 4, 5, 6, 7]],
|
||||
"core": [[0], [1], [2], [3], [4], [5], [6], [7]]
|
||||
}
|
||||
}
|
||||
],
|
||||
"metricConfig": [
|
||||
{
|
||||
"name": "load_one",
|
||||
"unit": { "base": ""},
|
||||
"scope": "node",
|
||||
"timestep": 60,
|
||||
"aggregation": "avg",
|
||||
"peak": 8,
|
||||
"normal": 0,
|
||||
"caution": 0,
|
||||
"alert": 0
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
cclog.Init("info", true)
|
||||
tmpdir := t.TempDir()
|
||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||
if err := os.Mkdir(jobarchive, 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0o777); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJSON), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
dbfilepath := filepath.Join(tmpdir, "test.db")
|
||||
err := repository.MigrateDB(dbfilepath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
ccconf.Init(cfgFilePath)
|
||||
|
||||
// Load and check main configuration
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
config.Init(cfg)
|
||||
} else {
|
||||
cclog.Abort("Main configuration must be present")
|
||||
}
|
||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||
|
||||
repository.Connect(dbfilepath)
|
||||
|
||||
if err := archive.Init(json.RawMessage(archiveCfg)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// metricstore initialization removed - it's initialized via callback in tests
|
||||
|
||||
archiver.Start(repository.GetJobRepository(), context.Background())
|
||||
|
||||
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
|
||||
auth.Init(&cfg)
|
||||
} else {
|
||||
cclog.Warn("Authentication disabled due to missing configuration")
|
||||
auth.Init(nil)
|
||||
}
|
||||
|
||||
graph.Init()
|
||||
|
||||
return NewNatsAPI()
|
||||
}
|
||||
|
||||
func cleanupNatsTest() {
|
||||
if err := archiver.Shutdown(5 * time.Second); err != nil {
|
||||
cclog.Warnf("Archiver shutdown timeout in tests: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNatsHandleStartJob(t *testing.T) {
|
||||
natsAPI := setupNatsTest(t)
|
||||
t.Cleanup(cleanupNatsTest)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
payload string
|
||||
expectError bool
|
||||
validateJob func(t *testing.T, job *schema.Job)
|
||||
shouldFindJob bool
|
||||
}{
|
||||
{
|
||||
name: "valid job start",
|
||||
payload: `{
|
||||
"jobId": 1001,
|
||||
"user": "testuser1",
|
||||
"project": "testproj1",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 7200,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
"hwthreads": [0, 1, 2, 3, 4, 5, 6, 7]
|
||||
}
|
||||
],
|
||||
"startTime": 1234567890
|
||||
}`,
|
||||
expectError: false,
|
||||
shouldFindJob: true,
|
||||
validateJob: func(t *testing.T, job *schema.Job) {
|
||||
if job.JobID != 1001 {
|
||||
t.Errorf("expected JobID 1001, got %d", job.JobID)
|
||||
}
|
||||
if job.User != "testuser1" {
|
||||
t.Errorf("expected user testuser1, got %s", job.User)
|
||||
}
|
||||
if job.State != schema.JobStateRunning {
|
||||
t.Errorf("expected state running, got %s", job.State)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "invalid JSON",
|
||||
payload: `{
|
||||
"jobId": "not a number",
|
||||
"user": "testuser2"
|
||||
}`,
|
||||
expectError: true,
|
||||
shouldFindJob: false,
|
||||
},
|
||||
{
|
||||
name: "missing required fields",
|
||||
payload: `{
|
||||
"jobId": 1002
|
||||
}`,
|
||||
expectError: true,
|
||||
shouldFindJob: false,
|
||||
},
|
||||
{
|
||||
name: "job with unknown fields (should fail due to DisallowUnknownFields)",
|
||||
payload: `{
|
||||
"jobId": 1003,
|
||||
"user": "testuser3",
|
||||
"project": "testproj3",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"unknownField": "should cause error",
|
||||
"startTime": 1234567900
|
||||
}`,
|
||||
expectError: true,
|
||||
shouldFindJob: false,
|
||||
},
|
||||
{
|
||||
name: "job with tags",
|
||||
payload: `{
|
||||
"jobId": 1004,
|
||||
"user": "testuser4",
|
||||
"project": "testproj4",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
"hwthreads": [0, 1, 2, 3]
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
{
|
||||
"type": "test",
|
||||
"name": "testtag",
|
||||
"scope": "testuser4"
|
||||
}
|
||||
],
|
||||
"startTime": 1234567910
|
||||
}`,
|
||||
expectError: false,
|
||||
shouldFindJob: true,
|
||||
validateJob: func(t *testing.T, job *schema.Job) {
|
||||
if job.JobID != 1004 {
|
||||
t.Errorf("expected JobID 1004, got %d", job.JobID)
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
natsAPI.handleStartJob(tt.payload)
|
||||
natsAPI.JobRepository.SyncJobs()
|
||||
|
||||
// Allow some time for async operations
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
if tt.shouldFindJob {
|
||||
// Extract jobId from payload
|
||||
var payloadMap map[string]any
|
||||
json.Unmarshal([]byte(tt.payload), &payloadMap)
|
||||
jobID := int64(payloadMap["jobId"].(float64))
|
||||
cluster := payloadMap["cluster"].(string)
|
||||
startTime := int64(payloadMap["startTime"].(float64))
|
||||
|
||||
job, err := natsAPI.JobRepository.Find(&jobID, &cluster, &startTime)
|
||||
if err != nil {
|
||||
if !tt.expectError {
|
||||
t.Fatalf("expected to find job, but got error: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if tt.validateJob != nil {
|
||||
tt.validateJob(t, job)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNatsHandleStopJob(t *testing.T) {
|
||||
natsAPI := setupNatsTest(t)
|
||||
t.Cleanup(cleanupNatsTest)
|
||||
|
||||
// First, create a running job
|
||||
startPayload := `{
|
||||
"jobId": 2001,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
"hwthreads": [0, 1, 2, 3, 4, 5, 6, 7]
|
||||
}
|
||||
],
|
||||
"startTime": 1234567890
|
||||
}`
|
||||
|
||||
natsAPI.handleStartJob(startPayload)
|
||||
natsAPI.JobRepository.SyncJobs()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
payload string
|
||||
expectError bool
|
||||
validateJob func(t *testing.T, job *schema.Job)
|
||||
setupJobFunc func() // Optional: create specific test job
|
||||
}{
|
||||
{
|
||||
name: "valid job stop - completed",
|
||||
payload: `{
|
||||
"jobId": 2001,
|
||||
"cluster": "testcluster",
|
||||
"startTime": 1234567890,
|
||||
"jobState": "completed",
|
||||
"stopTime": 1234571490
|
||||
}`,
|
||||
expectError: false,
|
||||
validateJob: func(t *testing.T, job *schema.Job) {
|
||||
if job.State != schema.JobStateCompleted {
|
||||
t.Errorf("expected state completed, got %s", job.State)
|
||||
}
|
||||
expectedDuration := int32(1234571490 - 1234567890)
|
||||
if job.Duration != expectedDuration {
|
||||
t.Errorf("expected duration %d, got %d", expectedDuration, job.Duration)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "valid job stop - failed",
|
||||
setupJobFunc: func() {
|
||||
startPayloadFailed := `{
|
||||
"jobId": 2002,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
"hwthreads": [0, 1, 2, 3]
|
||||
}
|
||||
],
|
||||
"startTime": 1234567900
|
||||
}`
|
||||
natsAPI.handleStartJob(startPayloadFailed)
|
||||
natsAPI.JobRepository.SyncJobs()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
},
|
||||
payload: `{
|
||||
"jobId": 2002,
|
||||
"cluster": "testcluster",
|
||||
"startTime": 1234567900,
|
||||
"jobState": "failed",
|
||||
"stopTime": 1234569900
|
||||
}`,
|
||||
expectError: false,
|
||||
validateJob: func(t *testing.T, job *schema.Job) {
|
||||
if job.State != schema.JobStateFailed {
|
||||
t.Errorf("expected state failed, got %s", job.State)
|
||||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "invalid JSON",
|
||||
payload: `{
|
||||
"jobId": "not a number"
|
||||
}`,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "missing jobId",
|
||||
payload: `{
|
||||
"cluster": "testcluster",
|
||||
"jobState": "completed",
|
||||
"stopTime": 1234571490
|
||||
}`,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "invalid job state",
|
||||
setupJobFunc: func() {
|
||||
startPayloadInvalid := `{
|
||||
"jobId": 2003,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
"hwthreads": [0, 1]
|
||||
}
|
||||
],
|
||||
"startTime": 1234567910
|
||||
}`
|
||||
natsAPI.handleStartJob(startPayloadInvalid)
|
||||
natsAPI.JobRepository.SyncJobs()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
},
|
||||
payload: `{
|
||||
"jobId": 2003,
|
||||
"cluster": "testcluster",
|
||||
"startTime": 1234567910,
|
||||
"jobState": "invalid_state",
|
||||
"stopTime": 1234571510
|
||||
}`,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "stopTime before startTime",
|
||||
setupJobFunc: func() {
|
||||
startPayloadTime := `{
|
||||
"jobId": 2004,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
"hwthreads": [0]
|
||||
}
|
||||
],
|
||||
"startTime": 1234567920
|
||||
}`
|
||||
natsAPI.handleStartJob(startPayloadTime)
|
||||
natsAPI.JobRepository.SyncJobs()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
},
|
||||
payload: `{
|
||||
"jobId": 2004,
|
||||
"cluster": "testcluster",
|
||||
"startTime": 1234567920,
|
||||
"jobState": "completed",
|
||||
"stopTime": 1234567900
|
||||
}`,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "job not found",
|
||||
payload: `{
|
||||
"jobId": 99999,
|
||||
"cluster": "testcluster",
|
||||
"startTime": 1234567890,
|
||||
"jobState": "completed",
|
||||
"stopTime": 1234571490
|
||||
}`,
|
||||
expectError: true,
|
||||
},
|
||||
}
|
||||
|
||||
testData := schema.JobData{
|
||||
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
||||
schema.MetricScopeNode: {
|
||||
Unit: schema.Unit{Base: "load"},
|
||||
Timestep: 60,
|
||||
Series: []schema.Series{
|
||||
{
|
||||
Hostname: "host123",
|
||||
Statistics: schema.MetricStatistics{Min: 0.1, Avg: 0.2, Max: 0.3},
|
||||
Data: []schema.Float{0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
metricstore.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
|
||||
return testData, nil
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if tt.setupJobFunc != nil {
|
||||
tt.setupJobFunc()
|
||||
}
|
||||
|
||||
natsAPI.handleStopJob(tt.payload)
|
||||
|
||||
// Allow some time for async operations
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
if !tt.expectError && tt.validateJob != nil {
|
||||
// Extract job details from payload
|
||||
var payloadMap map[string]any
|
||||
json.Unmarshal([]byte(tt.payload), &payloadMap)
|
||||
jobID := int64(payloadMap["jobId"].(float64))
|
||||
cluster := payloadMap["cluster"].(string)
|
||||
|
||||
var startTime *int64
|
||||
if st, ok := payloadMap["startTime"]; ok {
|
||||
t := int64(st.(float64))
|
||||
startTime = &t
|
||||
}
|
||||
|
||||
job, err := natsAPI.JobRepository.Find(&jobID, &cluster, startTime)
|
||||
if err != nil {
|
||||
t.Fatalf("expected to find job, but got error: %v", err)
|
||||
}
|
||||
|
||||
tt.validateJob(t, job)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNatsHandleNodeState(t *testing.T) {
|
||||
natsAPI := setupNatsTest(t)
|
||||
t.Cleanup(cleanupNatsTest)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
data []byte
|
||||
expectError bool
|
||||
validateFn func(t *testing.T)
|
||||
}{
|
||||
{
|
||||
name: "valid node state update",
|
||||
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"host123\",\"states\":[\"allocated\"],\"cpusAllocated\":8,\"memoryAllocated\":16384,\"gpusAllocated\":0,\"jobsRunning\":1}]}" 1234567890000000000`),
|
||||
expectError: false,
|
||||
validateFn: func(t *testing.T) {
|
||||
// In a full test, we would verify the node state was updated in the database
|
||||
// For now, just ensure no error occurred
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple nodes",
|
||||
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"host123\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0},{\"hostname\":\"host124\",\"states\":[\"allocated\"],\"cpusAllocated\":4,\"memoryAllocated\":8192,\"gpusAllocated\":1,\"jobsRunning\":1}]}" 1234567890000000000`),
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "invalid JSON in event field",
|
||||
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":\"not an array\"}" 1234567890000000000`),
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "empty nodes array",
|
||||
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[]}" 1234567890000000000`),
|
||||
expectError: false, // Empty array should not cause error
|
||||
},
|
||||
{
|
||||
name: "invalid line protocol format",
|
||||
data: []byte(`invalid line protocol format`),
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "empty data",
|
||||
data: []byte(``),
|
||||
expectError: false, // Should be handled gracefully with warning
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
natsAPI.handleNodeState("test.subject", tt.data)
|
||||
|
||||
// Allow some time for async operations
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
if tt.validateFn != nil {
|
||||
tt.validateFn(t)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNatsProcessJobEvent(t *testing.T) {
|
||||
natsAPI := setupNatsTest(t)
|
||||
t.Cleanup(cleanupNatsTest)
|
||||
|
||||
msgStartJob, err := lp.NewMessage(
|
||||
"job",
|
||||
map[string]string{"function": "start_job"},
|
||||
nil,
|
||||
map[string]any{
|
||||
"event": `{
|
||||
"jobId": 3001,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
"hwthreads": [0, 1, 2, 3]
|
||||
}
|
||||
],
|
||||
"startTime": 1234567890
|
||||
}`,
|
||||
},
|
||||
time.Now(),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test message: %v", err)
|
||||
}
|
||||
|
||||
msgMissingTag, err := lp.NewMessage(
|
||||
"job",
|
||||
map[string]string{},
|
||||
nil,
|
||||
map[string]any{
|
||||
"event": `{}`,
|
||||
},
|
||||
time.Now(),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test message: %v", err)
|
||||
}
|
||||
|
||||
msgUnknownFunc, err := lp.NewMessage(
|
||||
"job",
|
||||
map[string]string{"function": "unknown_function"},
|
||||
nil,
|
||||
map[string]any{
|
||||
"event": `{}`,
|
||||
},
|
||||
time.Now(),
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test message: %v", err)
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
message lp.CCMessage
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "start_job function",
|
||||
message: msgStartJob,
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "missing function tag",
|
||||
message: msgMissingTag,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "unknown function",
|
||||
message: msgUnknownFunc,
|
||||
expectError: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
natsAPI.processJobEvent(tt.message)
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNatsHandleJobEvent(t *testing.T) {
|
||||
natsAPI := setupNatsTest(t)
|
||||
t.Cleanup(cleanupNatsTest)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
data []byte
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "valid influx line protocol",
|
||||
data: []byte(`job,function=start_job event="{\"jobId\":4001,\"user\":\"testuser\",\"project\":\"testproj\",\"cluster\":\"testcluster\",\"partition\":\"main\",\"walltime\":3600,\"numNodes\":1,\"numHwthreads\":8,\"numAcc\":0,\"shared\":\"none\",\"monitoringStatus\":1,\"smt\":1,\"resources\":[{\"hostname\":\"host123\",\"hwthreads\":[0,1,2,3]}],\"startTime\":1234567890}" 1234567890000000000`),
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "invalid influx line protocol",
|
||||
data: []byte(`invalid line protocol format`),
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "empty data",
|
||||
data: []byte(``),
|
||||
expectError: false, // Decoder should handle empty input gracefully
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// HandleJobEvent doesn't return errors, it logs them
|
||||
// We're just ensuring it doesn't panic
|
||||
natsAPI.handleJobEvent("test.subject", tt.data)
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNatsHandleJobEventEdgeCases(t *testing.T) {
|
||||
natsAPI := setupNatsTest(t)
|
||||
t.Cleanup(cleanupNatsTest)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
data []byte
|
||||
expectError bool
|
||||
description string
|
||||
}{
|
||||
{
|
||||
name: "non-event message (metric data)",
|
||||
data: []byte(`job,function=start_job value=123.45 1234567890000000000`),
|
||||
expectError: false,
|
||||
description: "Should skip non-event messages gracefully",
|
||||
},
|
||||
{
|
||||
name: "wrong measurement name",
|
||||
data: []byte(`wrongmeasurement,function=start_job event="{}" 1234567890000000000`),
|
||||
expectError: false,
|
||||
description: "Should warn about unexpected measurement but not fail",
|
||||
},
|
||||
{
|
||||
name: "missing event field",
|
||||
data: []byte(`job,function=start_job other_field="value" 1234567890000000000`),
|
||||
expectError: true,
|
||||
description: "Should error when event field is missing",
|
||||
},
|
||||
{
|
||||
name: "multiple measurements in one message",
|
||||
data: []byte("job,function=start_job event=\"{}\" 1234567890000000000\njob,function=stop_job event=\"{}\" 1234567890000000000"),
|
||||
expectError: false,
|
||||
description: "Should process multiple lines",
|
||||
},
|
||||
{
|
||||
name: "escaped quotes in JSON payload",
|
||||
data: []byte(`job,function=start_job event="{\"jobId\":6001,\"user\":\"test\\\"user\",\"cluster\":\"test\"}" 1234567890000000000`),
|
||||
expectError: true,
|
||||
description: "Should handle escaped quotes (though JSON parsing may fail)",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
natsAPI.handleJobEvent("test.subject", tt.data)
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNatsHandleNodeStateEdgeCases(t *testing.T) {
|
||||
natsAPI := setupNatsTest(t)
|
||||
t.Cleanup(cleanupNatsTest)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
data []byte
|
||||
expectError bool
|
||||
description string
|
||||
}{
|
||||
{
|
||||
name: "missing cluster field in JSON",
|
||||
data: []byte(`nodestate event="{\"nodes\":[]}" 1234567890000000000`),
|
||||
expectError: true,
|
||||
description: "Should fail when cluster is missing",
|
||||
},
|
||||
{
|
||||
name: "malformed JSON with unescaped quotes",
|
||||
data: []byte(`nodestate event="{\"cluster\":\"test"cluster\",\"nodes\":[]}" 1234567890000000000`),
|
||||
expectError: true,
|
||||
description: "Should fail on malformed JSON",
|
||||
},
|
||||
{
|
||||
name: "unicode characters in hostname",
|
||||
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"host-ñ123\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0}]}" 1234567890000000000`),
|
||||
expectError: false,
|
||||
description: "Should handle unicode characters",
|
||||
},
|
||||
{
|
||||
name: "very large node count",
|
||||
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[{\"hostname\":\"node1\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0},{\"hostname\":\"node2\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0},{\"hostname\":\"node3\",\"states\":[\"idle\"],\"cpusAllocated\":0,\"memoryAllocated\":0,\"gpusAllocated\":0,\"jobsRunning\":0}]}" 1234567890000000000`),
|
||||
expectError: false,
|
||||
description: "Should handle multiple nodes efficiently",
|
||||
},
|
||||
{
|
||||
name: "timestamp in past",
|
||||
data: []byte(`nodestate event="{\"cluster\":\"testcluster\",\"nodes\":[]}" 1000000000000000000`),
|
||||
expectError: false,
|
||||
description: "Should accept any valid timestamp",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
natsAPI.handleNodeState("test.subject", tt.data)
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNatsHandleStartJobDuplicatePrevention(t *testing.T) {
|
||||
natsAPI := setupNatsTest(t)
|
||||
t.Cleanup(cleanupNatsTest)
|
||||
|
||||
// Start a job
|
||||
payload := `{
|
||||
"jobId": 5001,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
"hwthreads": [0, 1, 2, 3]
|
||||
}
|
||||
],
|
||||
"startTime": 1234567890
|
||||
}`
|
||||
|
||||
natsAPI.handleStartJob(payload)
|
||||
natsAPI.JobRepository.SyncJobs()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// Try to start the same job again (within 24 hours)
|
||||
duplicatePayload := `{
|
||||
"jobId": 5001,
|
||||
"user": "testuser",
|
||||
"project": "testproj",
|
||||
"cluster": "testcluster",
|
||||
"partition": "main",
|
||||
"walltime": 3600,
|
||||
"numNodes": 1,
|
||||
"numHwthreads": 8,
|
||||
"numAcc": 0,
|
||||
"shared": "none",
|
||||
"monitoringStatus": 1,
|
||||
"smt": 1,
|
||||
"resources": [
|
||||
{
|
||||
"hostname": "host123",
|
||||
"hwthreads": [0, 1, 2, 3]
|
||||
}
|
||||
],
|
||||
"startTime": 1234567900
|
||||
}`
|
||||
|
||||
natsAPI.handleStartJob(duplicatePayload)
|
||||
natsAPI.JobRepository.SyncJobs()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
// Verify only one job exists
|
||||
jobID := int64(5001)
|
||||
cluster := "testcluster"
|
||||
jobs, err := natsAPI.JobRepository.FindAll(&jobID, &cluster, nil)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if len(jobs) != 1 {
|
||||
t.Errorf("expected 1 job, got %d", len(jobs))
|
||||
}
|
||||
}
|
||||
145
internal/api/node.go
Normal file
145
internal/api/node.go
Normal file
@@ -0,0 +1,145 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"maps"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
)
|
||||
|
||||
type UpdateNodeStatesRequest struct {
|
||||
Nodes []schema.NodePayload `json:"nodes"`
|
||||
Cluster string `json:"cluster" example:"fritz"`
|
||||
}
|
||||
|
||||
// metricListToNames converts a map of metric configurations to a list of metric names
|
||||
func metricListToNames(metricList map[string]*schema.Metric) []string {
|
||||
names := make([]string, 0, len(metricList))
|
||||
for name := range metricList {
|
||||
names = append(names, name)
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
// this routine assumes that only one of them exists per node
|
||||
func determineState(states []string) schema.SchedulerState {
|
||||
for _, state := range states {
|
||||
switch strings.ToLower(state) {
|
||||
case "allocated":
|
||||
return schema.NodeStateAllocated
|
||||
case "reserved":
|
||||
return schema.NodeStateReserved
|
||||
case "idle":
|
||||
return schema.NodeStateIdle
|
||||
case "down":
|
||||
return schema.NodeStateDown
|
||||
case "mixed":
|
||||
return schema.NodeStateMixed
|
||||
}
|
||||
}
|
||||
|
||||
return schema.NodeStateUnknown
|
||||
}
|
||||
|
||||
// updateNodeStates godoc
|
||||
// @summary Deliver updated Slurm node states
|
||||
// @tags Nodestates
|
||||
// @description Returns a JSON-encoded list of users.
|
||||
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||
// @produce json
|
||||
// @param request body UpdateNodeStatesRequest true "Request body containing nodes and their states"
|
||||
// @success 200 {object} api.DefaultAPIResponse "Success message"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/nodestats/ [post]
|
||||
func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
||||
// Parse request body
|
||||
req := UpdateNodeStatesRequest{}
|
||||
if err := decode(r.Body, &req); err != nil {
|
||||
handleError(fmt.Errorf("parsing request body failed: %w", err),
|
||||
http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
requestReceived := time.Now().Unix()
|
||||
repo := repository.GetNodeRepository()
|
||||
|
||||
m := make(map[string][]string)
|
||||
metricNames := make(map[string][]string)
|
||||
healthResults := make(map[string]metricstore.HealthCheckResult)
|
||||
|
||||
startMs := time.Now()
|
||||
|
||||
// Step 1: Build nodeList and metricList per subcluster
|
||||
for _, node := range req.Nodes {
|
||||
if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil {
|
||||
m[sc] = append(m[sc], node.Hostname)
|
||||
}
|
||||
}
|
||||
|
||||
for sc := range m {
|
||||
if sc != "" {
|
||||
metricList := archive.GetMetricConfigSubCluster(req.Cluster, sc)
|
||||
metricNames[sc] = metricListToNames(metricList)
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Determine which metric store to query and perform health check
|
||||
healthRepo, err := metricdispatch.GetHealthCheckRepo(req.Cluster)
|
||||
if err != nil {
|
||||
cclog.Warnf("updateNodeStates: no metric store for cluster %s, skipping health check: %v", req.Cluster, err)
|
||||
} else {
|
||||
for sc, nl := range m {
|
||||
if sc != "" {
|
||||
if results, err := healthRepo.HealthCheck(req.Cluster, nl, metricNames[sc]); err == nil {
|
||||
maps.Copy(healthResults, results)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Debugf("Timer updateNodeStates, MemStore HealthCheck: %s", time.Since(startMs))
|
||||
startDB := time.Now()
|
||||
|
||||
for _, node := range req.Nodes {
|
||||
state := determineState(node.States)
|
||||
healthState := schema.MonitoringStateFailed
|
||||
var healthMetrics string
|
||||
if result, ok := healthResults[node.Hostname]; ok {
|
||||
healthState = result.State
|
||||
healthMetrics = result.HealthMetrics
|
||||
}
|
||||
nodeState := schema.NodeStateDB{
|
||||
TimeStamp: requestReceived,
|
||||
NodeState: state,
|
||||
CpusAllocated: node.CpusAllocated,
|
||||
MemoryAllocated: node.MemoryAllocated,
|
||||
GpusAllocated: node.GpusAllocated,
|
||||
HealthState: healthState,
|
||||
HealthMetrics: healthMetrics,
|
||||
JobsRunning: node.JobsRunning,
|
||||
}
|
||||
|
||||
if err := repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState); err != nil {
|
||||
cclog.Errorf("updateNodeStates: updating node state for %s on %s failed: %v",
|
||||
node.Hostname, req.Cluster, err)
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Debugf("Timer updateNodeStates, SQLite Inserts: %s", time.Since(startDB))
|
||||
}
|
||||
1241
internal/api/rest.go
1241
internal/api/rest.go
File diff suppressed because it is too large
Load Diff
221
internal/api/user.go
Normal file
221
internal/api/user.go
Normal file
@@ -0,0 +1,221 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/go-chi/chi/v5"
|
||||
)
|
||||
|
||||
type APIReturnedUser struct {
|
||||
Username string `json:"username"`
|
||||
Name string `json:"name"`
|
||||
Roles []string `json:"roles"`
|
||||
Email string `json:"email"`
|
||||
Projects []string `json:"projects"`
|
||||
}
|
||||
|
||||
// getUsers godoc
|
||||
// @summary Returns a list of users
|
||||
// @tags User
|
||||
// @description Returns a JSON-encoded list of users.
|
||||
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||
// @produce json
|
||||
// @param not-just-user query bool true "If returned list should contain all users or only users with additional special roles"
|
||||
// @success 200 {array} api.APIReturnedUser "List of users returned successfully"
|
||||
// @failure 400 {string} string "Bad Request"
|
||||
// @failure 401 {string} string "Unauthorized"
|
||||
// @failure 403 {string} string "Forbidden"
|
||||
// @failure 500 {string} string "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/users/ [get]
|
||||
func (api *RestAPI) getUsers(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
handleError(fmt.Errorf("only admins are allowed to fetch a list of users"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
users, err := repository.GetUserRepository().ListUsers(r.URL.Query().Get("not-just-user") == "true")
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("listing users failed: %w", err), http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(rw).Encode(users); err != nil {
|
||||
cclog.Errorf("Failed to encode users response: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// updateUser godoc
|
||||
// @summary Update user roles and projects
|
||||
// @tags User
|
||||
// @description Allows admins to add/remove roles and projects for a user
|
||||
// @produce plain
|
||||
// @param id path string true "Username"
|
||||
// @param add-role formData string false "Role to add"
|
||||
// @param remove-role formData string false "Role to remove"
|
||||
// @param add-project formData string false "Project to add"
|
||||
// @param remove-project formData string false "Project to remove"
|
||||
// @success 200 {string} string "Success message"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/user/{id} [post]
|
||||
func (api *RestAPI) updateUser(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
handleError(fmt.Errorf("only admins are allowed to update a user"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Get Values
|
||||
newrole := r.FormValue("add-role")
|
||||
delrole := r.FormValue("remove-role")
|
||||
newproj := r.FormValue("add-project")
|
||||
delproj := r.FormValue("remove-project")
|
||||
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
|
||||
// Handle role updates
|
||||
if newrole != "" {
|
||||
if err := repository.GetUserRepository().AddRole(r.Context(), chi.URLParam(r, "id"), newrole); err != nil {
|
||||
handleError(fmt.Errorf("adding role failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Add Role Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else if delrole != "" {
|
||||
if err := repository.GetUserRepository().RemoveRole(r.Context(), chi.URLParam(r, "id"), delrole); err != nil {
|
||||
handleError(fmt.Errorf("removing role failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Remove Role Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else if newproj != "" {
|
||||
if err := repository.GetUserRepository().AddProject(r.Context(), chi.URLParam(r, "id"), newproj); err != nil {
|
||||
handleError(fmt.Errorf("adding project failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Add Project Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else if delproj != "" {
|
||||
if err := repository.GetUserRepository().RemoveProject(r.Context(), chi.URLParam(r, "id"), delproj); err != nil {
|
||||
handleError(fmt.Errorf("removing project failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Remove Project Success"}); err != nil {
|
||||
cclog.Errorf("Failed to encode response: %v", err)
|
||||
}
|
||||
} else {
|
||||
handleError(fmt.Errorf("no operation specified: must provide add-role, remove-role, add-project, or remove-project"), http.StatusBadRequest, rw)
|
||||
}
|
||||
}
|
||||
|
||||
// createUser godoc
|
||||
// @summary Create a new user
|
||||
// @tags User
|
||||
// @description Creates a new user with specified credentials and role
|
||||
// @produce plain
|
||||
// @param username formData string true "Username"
|
||||
// @param password formData string false "Password (not required for API users)"
|
||||
// @param role formData string true "User role"
|
||||
// @param name formData string false "Full name"
|
||||
// @param email formData string false "Email address"
|
||||
// @param project formData string false "Project (required for managers)"
|
||||
// @success 200 {string} string "Success message"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/users/ [post]
|
||||
func (api *RestAPI) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
rw.Header().Set("Content-Type", "text/plain")
|
||||
me := repository.GetUserFromContext(r.Context())
|
||||
if !me.HasRole(schema.RoleAdmin) {
|
||||
handleError(fmt.Errorf("only admins are allowed to create new users"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
username, password, role, name, email, project := r.FormValue("username"),
|
||||
r.FormValue("password"), r.FormValue("role"), r.FormValue("name"),
|
||||
r.FormValue("email"), r.FormValue("project")
|
||||
|
||||
// Validate username length
|
||||
if len(username) == 0 || len(username) > 100 {
|
||||
handleError(fmt.Errorf("username must be between 1 and 100 characters"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if len(password) == 0 && role != schema.GetRoleString(schema.RoleAPI) {
|
||||
handleError(fmt.Errorf("only API users are allowed to have a blank password (login will be impossible)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if len(project) != 0 && role != schema.GetRoleString(schema.RoleManager) {
|
||||
handleError(fmt.Errorf("only managers require a project (can be changed later)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
} else if len(project) == 0 && role == schema.GetRoleString(schema.RoleManager) {
|
||||
handleError(fmt.Errorf("managers require a project to manage (can be changed later)"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
if err := repository.GetUserRepository().AddUser(&schema.User{
|
||||
Username: username,
|
||||
Name: name,
|
||||
Password: password,
|
||||
Email: email,
|
||||
Projects: []string{project},
|
||||
Roles: []string{role},
|
||||
}); err != nil {
|
||||
handleError(fmt.Errorf("adding user failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintf(rw, "User %v successfully created!\n", username)
|
||||
}
|
||||
|
||||
// deleteUser godoc
|
||||
// @summary Delete a user
|
||||
// @tags User
|
||||
// @description Deletes a user from the system
|
||||
// @produce plain
|
||||
// @param username formData string true "Username to delete"
|
||||
// @success 200 {string} string "Success"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||
// @security ApiKeyAuth
|
||||
// @router /api/users/ [delete]
|
||||
func (api *RestAPI) deleteUser(rw http.ResponseWriter, r *http.Request) {
|
||||
// SecuredCheck() only worked with TokenAuth: Removed
|
||||
|
||||
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||
handleError(fmt.Errorf("only admins are allowed to delete a user"), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
username := r.FormValue("username")
|
||||
if err := repository.GetUserRepository().DelUser(username); err != nil {
|
||||
handleError(fmt.Errorf("deleting user failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
}
|
||||
189
internal/archiver/README.md
Normal file
189
internal/archiver/README.md
Normal file
@@ -0,0 +1,189 @@
|
||||
# Archiver Package
|
||||
|
||||
The `archiver` package provides asynchronous job archiving functionality for ClusterCockpit. When jobs complete, their metric data is archived from the metric store to a persistent archive backend (filesystem, S3, SQLite, etc.).
|
||||
|
||||
## Architecture
|
||||
|
||||
### Producer-Consumer Pattern
|
||||
|
||||
```
|
||||
┌──────────────┐ TriggerArchiving() ┌───────────────┐
|
||||
│ API Handler │ ───────────────────────▶ │ archiveChannel│
|
||||
│ (Job Stop) │ │ (buffer: 128)│
|
||||
└──────────────┘ └───────┬───────┘
|
||||
│
|
||||
┌─────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────────┐
|
||||
│ archivingWorker() │
|
||||
│ (goroutine) │
|
||||
└──────────┬───────────┘
|
||||
│
|
||||
▼
|
||||
1. Fetch job metadata
|
||||
2. Load metric data
|
||||
3. Calculate statistics
|
||||
4. Archive to backend
|
||||
5. Update database
|
||||
6. Call hooks
|
||||
```
|
||||
|
||||
### Components
|
||||
|
||||
- **archiveChannel**: Buffered channel (128 jobs) for async communication
|
||||
- **archivePending**: WaitGroup tracking in-flight archiving operations
|
||||
- **archivingWorker**: Background goroutine processing archiving requests
|
||||
- **shutdownCtx**: Context for graceful cancellation during shutdown
|
||||
|
||||
## Usage
|
||||
|
||||
### Initialization
|
||||
|
||||
```go
|
||||
// Start archiver with context for shutdown control
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
archiver.Start(jobRepository, ctx)
|
||||
```
|
||||
|
||||
### Archiving a Job
|
||||
|
||||
```go
|
||||
// Called automatically when a job completes
|
||||
archiver.TriggerArchiving(job)
|
||||
```
|
||||
|
||||
The function returns immediately. Actual archiving happens in the background.
|
||||
|
||||
### Graceful Shutdown
|
||||
|
||||
```go
|
||||
// Shutdown with 10 second timeout
|
||||
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
log.Printf("Archiver shutdown timeout: %v", err)
|
||||
}
|
||||
```
|
||||
|
||||
**Shutdown process:**
|
||||
1. Closes channel (rejects new jobs)
|
||||
2. Waits for pending jobs (up to timeout)
|
||||
3. Cancels context if timeout exceeded
|
||||
4. Waits for worker to exit cleanly
|
||||
|
||||
## Configuration
|
||||
|
||||
### Channel Buffer Size
|
||||
|
||||
The archiving channel has a buffer of 128 jobs. If more than 128 jobs are queued simultaneously, `TriggerArchiving()` will block until space is available.
|
||||
|
||||
To adjust:
|
||||
```go
|
||||
// In archiveWorker.go Start() function
|
||||
archiveChannel = make(chan *schema.Job, 256) // Increase buffer
|
||||
```
|
||||
|
||||
### Scope Selection
|
||||
|
||||
Archive data scopes are automatically selected based on job size:
|
||||
|
||||
- **Node scope**: Always included
|
||||
- **Core scope**: Included for jobs with ≤8 nodes (reduces data volume for large jobs)
|
||||
- **Accelerator scope**: Included if job used accelerators (`NumAcc > 0`)
|
||||
|
||||
To adjust the node threshold:
|
||||
```go
|
||||
// In archiver.go ArchiveJob() function
|
||||
if job.NumNodes <= 16 { // Change from 8 to 16
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
```
|
||||
|
||||
### Resolution
|
||||
|
||||
Data is archived at the highest available resolution (typically 60s intervals). To change:
|
||||
|
||||
```go
|
||||
// In archiver.go ArchiveJob() function
|
||||
jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 300)
|
||||
// 0 = highest resolution
|
||||
// 300 = 5-minute resolution
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Automatic Retry
|
||||
|
||||
The archiver does **not** automatically retry failed archiving operations. If archiving fails:
|
||||
|
||||
1. Error is logged
|
||||
2. Job is marked as `MonitoringStatusArchivingFailed` in database
|
||||
3. Worker continues processing other jobs
|
||||
|
||||
### Manual Retry
|
||||
|
||||
To re-archive failed jobs, query for jobs with `MonitoringStatusArchivingFailed` and call `TriggerArchiving()` again.
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Single Worker Thread
|
||||
|
||||
The archiver uses a single worker goroutine. For high-throughput systems:
|
||||
|
||||
- Large channel buffer (128) prevents blocking
|
||||
- Archiving is typically I/O bound (writing to storage)
|
||||
- Single worker prevents overwhelming storage backend
|
||||
|
||||
### Shutdown Timeout
|
||||
|
||||
Recommended timeout values:
|
||||
- **Development**: 5-10 seconds
|
||||
- **Production**: 10-30 seconds
|
||||
- **High-load**: 30-60 seconds
|
||||
|
||||
Choose based on:
|
||||
- Average archiving time per job
|
||||
- Storage backend latency
|
||||
- Acceptable shutdown delay
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Logging
|
||||
|
||||
The archiver logs:
|
||||
- **Info**: Startup, shutdown, successful completions
|
||||
- **Debug**: Individual job archiving times
|
||||
- **Error**: Archiving failures with job ID and reason
|
||||
- **Warn**: Shutdown timeout exceeded
|
||||
|
||||
### Metrics
|
||||
|
||||
Monitor these signals for archiver health:
|
||||
- Jobs with `MonitoringStatusArchivingFailed`
|
||||
- Time from job stop to successful archive
|
||||
- Shutdown timeout occurrences
|
||||
|
||||
## Thread Safety
|
||||
|
||||
All exported functions are safe for concurrent use:
|
||||
- `Start()` - Safe to call once
|
||||
- `TriggerArchiving()` - Safe from multiple goroutines
|
||||
- `Shutdown()` - Safe to call once
|
||||
|
||||
Internal state is protected by:
|
||||
- Channel synchronization (`archiveChannel`)
|
||||
- WaitGroup for pending count (`archivePending`)
|
||||
- Context for cancellation (`shutdownCtx`)
|
||||
|
||||
## Files
|
||||
|
||||
- **archiveWorker.go**: Worker lifecycle, channel management, shutdown logic
|
||||
- **archiver.go**: Core archiving logic, metric loading, statistics calculation
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `internal/repository`: Database operations for job metadata
|
||||
- `internal/metricdispatch`: Loading metric data from various backends
|
||||
- `pkg/archive`: Archive backend abstraction (filesystem, S3, SQLite)
|
||||
- `cc-lib/schema`: Job and metric data structures
|
||||
250
internal/archiver/archiveWorker.go
Normal file
250
internal/archiver/archiveWorker.go
Normal file
@@ -0,0 +1,250 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package archiver provides asynchronous job archiving functionality for ClusterCockpit.
|
||||
//
|
||||
// The archiver runs a background worker goroutine that processes job archiving requests
|
||||
// from a buffered channel. When jobs complete, their metric data is archived from the
|
||||
// metric store to the configured archive backend (filesystem, S3, etc.).
|
||||
//
|
||||
// # Architecture
|
||||
//
|
||||
// The archiver uses a producer-consumer pattern:
|
||||
// - Producer: TriggerArchiving() sends jobs to archiveChannel
|
||||
// - Consumer: archivingWorker() processes jobs from the channel
|
||||
// - Coordination: sync.WaitGroup tracks pending archive operations
|
||||
//
|
||||
// # Lifecycle
|
||||
//
|
||||
// 1. Start(repo, ctx) - Initialize worker with context for cancellation
|
||||
// 2. TriggerArchiving(job) - Queue job for archiving (called when job stops)
|
||||
// 3. archivingWorker() - Background goroutine processes jobs
|
||||
// 4. Shutdown(timeout) - Graceful shutdown with timeout
|
||||
//
|
||||
// # Graceful Shutdown
|
||||
//
|
||||
// The archiver supports graceful shutdown with configurable timeout:
|
||||
// - Closes channel to reject new jobs
|
||||
// - Waits for pending jobs to complete (up to timeout)
|
||||
// - Cancels context if timeout exceeded
|
||||
// - Ensures worker goroutine exits cleanly
|
||||
//
|
||||
// # Example Usage
|
||||
//
|
||||
// // Initialize archiver
|
||||
// ctx, cancel := context.WithCancel(context.Background())
|
||||
// defer cancel()
|
||||
// archiver.Start(jobRepository, ctx)
|
||||
//
|
||||
// // Trigger archiving when job completes
|
||||
// archiver.TriggerArchiving(job)
|
||||
//
|
||||
// // Graceful shutdown with 10 second timeout
|
||||
// if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
// log.Printf("Archiver shutdown timeout: %v", err)
|
||||
// }
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
)
|
||||
|
||||
var (
|
||||
archivePending sync.WaitGroup
|
||||
archiveChannel chan *schema.Job
|
||||
jobRepo *repository.JobRepository
|
||||
shutdownCtx context.Context
|
||||
shutdownCancel context.CancelFunc
|
||||
workerDone chan struct{}
|
||||
)
|
||||
|
||||
// Start initializes the archiver and starts the background worker goroutine.
|
||||
//
|
||||
// The archiver processes job archiving requests asynchronously via a buffered channel.
|
||||
// Jobs are sent to the channel using TriggerArchiving() and processed by the worker.
|
||||
//
|
||||
// Parameters:
|
||||
// - r: JobRepository instance for database operations
|
||||
// - ctx: Context for cancellation (shutdown signal propagation)
|
||||
//
|
||||
// The worker goroutine will run until:
|
||||
// - ctx is cancelled (via parent shutdown)
|
||||
// - archiveChannel is closed (via Shutdown())
|
||||
//
|
||||
// Must be called before TriggerArchiving(). Safe to call only once.
|
||||
func Start(r *repository.JobRepository, ctx context.Context) {
|
||||
shutdownCtx, shutdownCancel = context.WithCancel(ctx)
|
||||
archiveChannel = make(chan *schema.Job, 128)
|
||||
workerDone = make(chan struct{})
|
||||
jobRepo = r
|
||||
|
||||
go archivingWorker()
|
||||
}
|
||||
|
||||
// archivingWorker is the background goroutine that processes job archiving requests.
|
||||
//
|
||||
// The worker loop:
|
||||
// 1. Blocks waiting for jobs on archiveChannel or shutdown signal
|
||||
// 2. Fetches job metadata from repository
|
||||
// 3. Archives job data to configured backend (calls ArchiveJob)
|
||||
// 4. Updates job footprint and energy metrics in database
|
||||
// 5. Marks job as successfully archived
|
||||
// 6. Calls job stop hooks
|
||||
//
|
||||
// The worker exits when:
|
||||
// - shutdownCtx is cancelled (timeout during shutdown)
|
||||
// - archiveChannel is closed (normal shutdown)
|
||||
//
|
||||
// Errors during archiving are logged and the job is marked as failed,
|
||||
// but the worker continues processing other jobs.
|
||||
func archivingWorker() {
|
||||
defer close(workerDone)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-shutdownCtx.Done():
|
||||
cclog.Info("Archive worker received shutdown signal")
|
||||
return
|
||||
|
||||
case job, ok := <-archiveChannel:
|
||||
if !ok {
|
||||
cclog.Info("Archive channel closed, worker exiting")
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
// not using meta data, called to load JobMeta into Cache?
|
||||
// will fail if job meta not in repository
|
||||
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", *job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
|
||||
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||
// Use shutdown context to allow cancellation
|
||||
jobMeta, err := ArchiveJob(job, shutdownCtx)
|
||||
if err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", *job.ID, err.Error())
|
||||
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
|
||||
stmt := sq.Update("job").Where("job.id = ?", job.ID)
|
||||
|
||||
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", *job.ID, err.Error())
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", *job.ID, err.Error())
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
// Update the jobs database entry one last time:
|
||||
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
|
||||
if err := jobRepo.Execute(stmt); err != nil {
|
||||
cclog.Errorf("archiving job (dbid: %d) failed at db execute: %s", *job.ID, err.Error())
|
||||
archivePending.Done()
|
||||
continue
|
||||
}
|
||||
cclog.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||
cclog.Infof("archiving job (dbid: %d) successful", *job.ID)
|
||||
|
||||
repository.CallJobStopHooks(job)
|
||||
archivePending.Done()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TriggerArchiving queues a job for asynchronous archiving.
|
||||
//
|
||||
// This function should be called when a job completes (stops) to archive its
|
||||
// metric data from the metric store to the configured archive backend.
|
||||
//
|
||||
// The function:
|
||||
// 1. Increments the pending job counter (WaitGroup)
|
||||
// 2. Sends the job to the archiving channel (buffered, capacity 128)
|
||||
// 3. Returns immediately (non-blocking unless channel is full)
|
||||
//
|
||||
// The actual archiving is performed asynchronously by the worker goroutine.
|
||||
// Upon completion, the worker will decrement the pending counter.
|
||||
//
|
||||
// Panics if Start() has not been called first.
|
||||
func TriggerArchiving(job *schema.Job) {
|
||||
if archiveChannel == nil {
|
||||
cclog.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
|
||||
}
|
||||
|
||||
archivePending.Add(1)
|
||||
archiveChannel <- job
|
||||
}
|
||||
|
||||
// Shutdown performs a graceful shutdown of the archiver with a configurable timeout.
|
||||
//
|
||||
// The shutdown process:
|
||||
// 1. Closes archiveChannel - no new jobs will be accepted
|
||||
// 2. Waits for pending jobs to complete (up to timeout duration)
|
||||
// 3. If timeout is exceeded:
|
||||
// - Cancels shutdownCtx to interrupt ongoing ArchiveJob operations
|
||||
// - Returns error indicating timeout
|
||||
// 4. Waits for worker goroutine to exit cleanly
|
||||
//
|
||||
// Parameters:
|
||||
// - timeout: Maximum duration to wait for pending jobs to complete
|
||||
// (recommended: 10-30 seconds for production)
|
||||
//
|
||||
// Returns:
|
||||
// - nil if all jobs completed within timeout
|
||||
// - error if timeout was exceeded (some jobs may not have been archived)
|
||||
//
|
||||
// Jobs that don't complete within the timeout will be marked as failed.
|
||||
// The function always ensures the worker goroutine exits before returning.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
// log.Printf("Some jobs did not complete: %v", err)
|
||||
// }
|
||||
func Shutdown(timeout time.Duration) error {
|
||||
cclog.Info("Initiating archiver shutdown...")
|
||||
|
||||
// Close channel to signal no more jobs will be accepted
|
||||
close(archiveChannel)
|
||||
|
||||
// Create a channel to signal when all jobs are done
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
archivePending.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
// Wait for jobs to complete or timeout
|
||||
select {
|
||||
case <-done:
|
||||
cclog.Info("All archive jobs completed successfully")
|
||||
// Wait for worker to exit
|
||||
<-workerDone
|
||||
return nil
|
||||
case <-time.After(timeout):
|
||||
cclog.Warn("Archiver shutdown timeout exceeded, cancelling remaining operations")
|
||||
// Cancel any ongoing operations
|
||||
shutdownCancel()
|
||||
// Wait for worker to exit
|
||||
<-workerDone
|
||||
return fmt.Errorf("archiver shutdown timeout after %v", timeout)
|
||||
}
|
||||
}
|
||||
97
internal/archiver/archiver.go
Normal file
97
internal/archiver/archiver.go
Normal file
@@ -0,0 +1,97 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package archiver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
)
|
||||
|
||||
// ArchiveJob archives a completed job's metric data to the configured archive backend.
|
||||
//
|
||||
// This function performs the following operations:
|
||||
// 1. Loads all metric data for the job from the metric data repository
|
||||
// 2. Calculates job-level statistics (avg, min, max) for each metric
|
||||
// 3. Stores the job metadata and metric data to the archive backend
|
||||
//
|
||||
// Metric data is retrieved at the highest available resolution (typically 60s)
|
||||
// for the following scopes:
|
||||
// - Node scope (always)
|
||||
// - Core scope (for jobs with ≤8 nodes, to reduce data volume)
|
||||
// - Accelerator scope (if job used accelerators)
|
||||
//
|
||||
// The function respects context cancellation. If ctx is cancelled (e.g., during
|
||||
// shutdown timeout), the operation will be interrupted and return an error.
|
||||
//
|
||||
// Parameters:
|
||||
// - job: The job to archive (must be a completed job)
|
||||
// - ctx: Context for cancellation and timeout control
|
||||
//
|
||||
// Returns:
|
||||
// - *schema.Job with populated Statistics field
|
||||
// - error if data loading or archiving fails
|
||||
//
|
||||
// If config.Keys.DisableArchive is true, only job statistics are calculated
|
||||
// and returned (no data is written to archive backend).
|
||||
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
||||
allMetrics := make([]string, 0)
|
||||
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||
for _, mc := range metricConfigs {
|
||||
allMetrics = append(allMetrics, mc.Name)
|
||||
}
|
||||
|
||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||
// FIXME: Add a config option for this
|
||||
if job.NumNodes <= 8 {
|
||||
// This will add the native scope if core scope is not available
|
||||
scopes = append(scopes, schema.MetricScopeCore)
|
||||
}
|
||||
|
||||
if job.NumAcc > 0 {
|
||||
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||
}
|
||||
|
||||
jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||
if err != nil {
|
||||
cclog.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
job.Statistics = make(map[string]schema.JobStatistics)
|
||||
|
||||
for metric, data := range jobData {
|
||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
nodeData, ok := data["node"]
|
||||
if !ok {
|
||||
// This should never happen ?
|
||||
continue
|
||||
}
|
||||
|
||||
for _, series := range nodeData.Series {
|
||||
avg += series.Statistics.Avg
|
||||
min = math.Min(min, series.Statistics.Min)
|
||||
max = math.Max(max, series.Statistics.Max)
|
||||
}
|
||||
|
||||
// Round AVG Result to 2 Digits
|
||||
job.Statistics[metric] = schema.JobStatistics{
|
||||
Unit: schema.Unit{
|
||||
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||
},
|
||||
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
|
||||
Min: min,
|
||||
Max: max,
|
||||
}
|
||||
}
|
||||
|
||||
return job, archive.GetHandle().ImportJob(job, &jobData)
|
||||
}
|
||||
@@ -1,298 +1,137 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package auth implements various authentication methods
|
||||
package auth
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||
"github.com/gorilla/sessions"
|
||||
"github.com/jmoiron/sqlx"
|
||||
)
|
||||
|
||||
type AuthSource int
|
||||
|
||||
const (
|
||||
AuthViaLocalPassword AuthSource = iota
|
||||
AuthViaLDAP
|
||||
AuthViaToken
|
||||
)
|
||||
|
||||
type User struct {
|
||||
Username string `json:"username"`
|
||||
Password string `json:"-"`
|
||||
Name string `json:"name"`
|
||||
Roles []string `json:"roles"`
|
||||
AuthSource AuthSource `json:"via"`
|
||||
Email string `json:"email"`
|
||||
Projects []string `json:"projects"`
|
||||
Expiration time.Time
|
||||
}
|
||||
|
||||
type Role int
|
||||
|
||||
const (
|
||||
RoleAnonymous Role = iota
|
||||
RoleApi
|
||||
RoleUser
|
||||
RoleManager
|
||||
RoleSupport
|
||||
RoleAdmin
|
||||
RoleError
|
||||
)
|
||||
|
||||
func GetRoleString(roleInt Role) string {
|
||||
return [6]string{"anonymous", "api", "user", "manager", "support", "admin"}[roleInt]
|
||||
}
|
||||
|
||||
func getRoleEnum(roleStr string) Role {
|
||||
switch strings.ToLower(roleStr) {
|
||||
case "admin":
|
||||
return RoleAdmin
|
||||
case "support":
|
||||
return RoleSupport
|
||||
case "manager":
|
||||
return RoleManager
|
||||
case "user":
|
||||
return RoleUser
|
||||
case "api":
|
||||
return RoleApi
|
||||
case "anonymous":
|
||||
return RoleAnonymous
|
||||
default:
|
||||
return RoleError
|
||||
}
|
||||
}
|
||||
|
||||
func isValidRole(role string) bool {
|
||||
if getRoleEnum(role) == RoleError {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (u *User) HasValidRole(role string) (hasRole bool, isValid bool) {
|
||||
if isValidRole(role) {
|
||||
for _, r := range u.Roles {
|
||||
if r == role {
|
||||
return true, true
|
||||
}
|
||||
}
|
||||
return false, true
|
||||
}
|
||||
return false, false
|
||||
}
|
||||
|
||||
func (u *User) HasRole(role Role) bool {
|
||||
for _, r := range u.Roles {
|
||||
if r == GetRoleString(role) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Role-Arrays are short: performance not impacted by nested loop
|
||||
func (u *User) HasAnyRole(queryroles []Role) bool {
|
||||
for _, ur := range u.Roles {
|
||||
for _, qr := range queryroles {
|
||||
if ur == GetRoleString(qr) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Role-Arrays are short: performance not impacted by nested loop
|
||||
func (u *User) HasAllRoles(queryroles []Role) bool {
|
||||
target := len(queryroles)
|
||||
matches := 0
|
||||
for _, ur := range u.Roles {
|
||||
for _, qr := range queryroles {
|
||||
if ur == GetRoleString(qr) {
|
||||
matches += 1
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if matches == target {
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Role-Arrays are short: performance not impacted by nested loop
|
||||
func (u *User) HasNotRoles(queryroles []Role) bool {
|
||||
matches := 0
|
||||
for _, ur := range u.Roles {
|
||||
for _, qr := range queryroles {
|
||||
if ur == GetRoleString(qr) {
|
||||
matches += 1
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if matches == 0 {
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Called by API endpoint '/roles/' from frontend: Only required for admin config -> Check Admin Role
|
||||
func GetValidRoles(user *User) ([]string, error) {
|
||||
var vals []string
|
||||
if user.HasRole(RoleAdmin) {
|
||||
for i := RoleApi; i < RoleError; i++ {
|
||||
vals = append(vals, GetRoleString(i))
|
||||
}
|
||||
return vals, nil
|
||||
}
|
||||
|
||||
return vals, fmt.Errorf("%s: only admins are allowed to fetch a list of roles", user.Username)
|
||||
}
|
||||
|
||||
// Called by routerConfig web.page setup in backend: Only requires known user and/or not API user
|
||||
func GetValidRolesMap(user *User) (map[string]Role, error) {
|
||||
named := make(map[string]Role)
|
||||
if user.HasNotRoles([]Role{RoleApi, RoleAnonymous}) {
|
||||
for i := RoleApi; i < RoleError; i++ {
|
||||
named[GetRoleString(i)] = i
|
||||
}
|
||||
return named, nil
|
||||
}
|
||||
return named, fmt.Errorf("Only known users are allowed to fetch a list of roles")
|
||||
}
|
||||
|
||||
// Find highest role
|
||||
func (u *User) GetAuthLevel() Role {
|
||||
if u.HasRole(RoleAdmin) {
|
||||
return RoleAdmin
|
||||
} else if u.HasRole(RoleSupport) {
|
||||
return RoleSupport
|
||||
} else if u.HasRole(RoleManager) {
|
||||
return RoleManager
|
||||
} else if u.HasRole(RoleUser) {
|
||||
return RoleUser
|
||||
} else if u.HasRole(RoleApi) {
|
||||
return RoleApi
|
||||
} else if u.HasRole(RoleAnonymous) {
|
||||
return RoleAnonymous
|
||||
} else {
|
||||
return RoleError
|
||||
}
|
||||
}
|
||||
|
||||
func (u *User) HasProject(project string) bool {
|
||||
for _, p := range u.Projects {
|
||||
if p == project {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func GetUser(ctx context.Context) *User {
|
||||
x := ctx.Value(ContextUserKey)
|
||||
if x == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return x.(*User)
|
||||
}
|
||||
|
||||
// Authenticator is the interface for all authentication methods.
|
||||
// Each authenticator determines if it can handle a login request (CanLogin)
|
||||
// and performs the actual authentication (Login).
|
||||
type Authenticator interface {
|
||||
Init(auth *Authentication, config interface{}) error
|
||||
CanLogin(user *User, rw http.ResponseWriter, r *http.Request) bool
|
||||
Login(user *User, rw http.ResponseWriter, r *http.Request) (*User, error)
|
||||
Auth(rw http.ResponseWriter, r *http.Request) (*User, error)
|
||||
// CanLogin determines if this authenticator can handle the login request.
|
||||
// It returns the user object if available and a boolean indicating if this
|
||||
// authenticator should attempt the login. This method should not perform
|
||||
// expensive operations or actual authentication.
|
||||
CanLogin(user *schema.User, username string, rw http.ResponseWriter, r *http.Request) (*schema.User, bool)
|
||||
|
||||
// Login performs the actually authentication for the user.
|
||||
// It returns the authenticated user or an error if authentication fails.
|
||||
// The user parameter may be nil if the user doesn't exist in the database yet.
|
||||
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
||||
}
|
||||
|
||||
type ContextKey string
|
||||
var (
|
||||
initOnce sync.Once
|
||||
authInstance *Authentication
|
||||
)
|
||||
|
||||
const ContextUserKey ContextKey = "user"
|
||||
// rateLimiterEntry tracks a rate limiter and its last use time for cleanup
|
||||
type rateLimiterEntry struct {
|
||||
limiter *rate.Limiter
|
||||
lastUsed time.Time
|
||||
}
|
||||
|
||||
var ipUserLimiters sync.Map
|
||||
|
||||
// getIPUserLimiter returns a rate limiter for the given IP and username combination.
|
||||
// Rate limiters are created on demand and track 5 attempts per 15 minutes.
|
||||
func getIPUserLimiter(ip, username string) *rate.Limiter {
|
||||
key := ip + ":" + username
|
||||
now := time.Now()
|
||||
|
||||
if entry, ok := ipUserLimiters.Load(key); ok {
|
||||
rle := entry.(*rateLimiterEntry)
|
||||
rle.lastUsed = now
|
||||
return rle.limiter
|
||||
}
|
||||
|
||||
// More aggressive rate limiting: 5 attempts per 15 minutes
|
||||
newLimiter := rate.NewLimiter(rate.Every(15*time.Minute/5), 5)
|
||||
ipUserLimiters.Store(key, &rateLimiterEntry{
|
||||
limiter: newLimiter,
|
||||
lastUsed: now,
|
||||
})
|
||||
return newLimiter
|
||||
}
|
||||
|
||||
// cleanupOldRateLimiters removes rate limiters that haven't been used recently
|
||||
func cleanupOldRateLimiters(olderThan time.Time) {
|
||||
ipUserLimiters.Range(func(key, value any) bool {
|
||||
entry := value.(*rateLimiterEntry)
|
||||
if entry.lastUsed.Before(olderThan) {
|
||||
ipUserLimiters.Delete(key)
|
||||
cclog.Debugf("Cleaned up rate limiter for %v", key)
|
||||
}
|
||||
return true
|
||||
})
|
||||
}
|
||||
|
||||
// startRateLimiterCleanup starts a background goroutine to clean up old rate limiters
|
||||
func startRateLimiterCleanup() {
|
||||
go func() {
|
||||
ticker := time.NewTicker(1 * time.Hour)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
// Clean up limiters not used in the last 24 hours
|
||||
cleanupOldRateLimiters(time.Now().Add(-24 * time.Hour))
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// AuthConfig contains configuration for all authentication methods
|
||||
type AuthConfig struct {
|
||||
LdapConfig *LdapConfig `json:"ldap"`
|
||||
JwtConfig *JWTAuthConfig `json:"jwts"`
|
||||
OpenIDConfig *OpenIDConfig `json:"oidc"`
|
||||
}
|
||||
|
||||
// Keys holds the global authentication configuration
|
||||
var Keys AuthConfig
|
||||
|
||||
// Authentication manages all authentication methods and session handling
|
||||
type Authentication struct {
|
||||
db *sqlx.DB
|
||||
sessionStore *sessions.CookieStore
|
||||
SessionMaxAge time.Duration
|
||||
|
||||
authenticators []Authenticator
|
||||
LdapAuth *LdapAuthenticator
|
||||
JwtAuth *JWTAuthenticator
|
||||
LocalAuth *LocalAuthenticator
|
||||
}
|
||||
|
||||
func Init(db *sqlx.DB,
|
||||
configs map[string]interface{}) (*Authentication, error) {
|
||||
auth := &Authentication{}
|
||||
auth.db = db
|
||||
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Error("Error while initializing authentication -> decoding session key failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
auth.LocalAuth = &LocalAuthenticator{}
|
||||
if err := auth.LocalAuth.Init(auth, nil); err != nil {
|
||||
log.Error("Error while initializing authentication -> localAuth init failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
|
||||
|
||||
auth.JwtAuth = &JWTAuthenticator{}
|
||||
if err := auth.JwtAuth.Init(auth, configs["jwt"]); err != nil {
|
||||
log.Error("Error while initializing authentication -> jwtAuth init failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.authenticators = append(auth.authenticators, auth.JwtAuth)
|
||||
|
||||
if config, ok := configs["ldap"]; ok {
|
||||
auth.LdapAuth = &LdapAuthenticator{}
|
||||
if err := auth.LdapAuth.Init(auth, config); err != nil {
|
||||
log.Error("Error while initializing authentication -> ldapAuth init failed")
|
||||
return nil, err
|
||||
}
|
||||
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
|
||||
}
|
||||
|
||||
return auth, nil
|
||||
authenticators []Authenticator
|
||||
SessionMaxAge time.Duration
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthViaSession(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*User, error) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
session, err := auth.sessionStore.Get(r, "session")
|
||||
if err != nil {
|
||||
log.Error("Error while getting session store")
|
||||
cclog.Error("Error while getting session store")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -300,108 +139,451 @@ func (auth *Authentication) AuthViaSession(
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
username, _ := session.Values["username"].(string)
|
||||
projects, _ := session.Values["projects"].([]string)
|
||||
roles, _ := session.Values["roles"].([]string)
|
||||
return &User{
|
||||
// Validate session data with proper type checking
|
||||
username, ok := session.Values["username"].(string)
|
||||
if !ok || username == "" {
|
||||
cclog.Warn("Invalid session: missing or invalid username")
|
||||
// Invalidate the corrupted session
|
||||
session.Options.MaxAge = -1
|
||||
_ = auth.sessionStore.Save(r, rw, session)
|
||||
return nil, errors.New("invalid session data")
|
||||
}
|
||||
|
||||
projects, ok := session.Values["projects"].([]string)
|
||||
if !ok {
|
||||
cclog.Warn("Invalid session: projects not found or invalid type, using empty list")
|
||||
projects = []string{}
|
||||
}
|
||||
|
||||
roles, ok := session.Values["roles"].([]string)
|
||||
if !ok || len(roles) == 0 {
|
||||
cclog.Warn("Invalid session: missing or invalid roles")
|
||||
// Invalidate the corrupted session
|
||||
session.Options.MaxAge = -1
|
||||
_ = auth.sessionStore.Save(r, rw, session)
|
||||
return nil, errors.New("invalid session data")
|
||||
}
|
||||
|
||||
return &schema.User{
|
||||
Username: username,
|
||||
Projects: projects,
|
||||
Roles: roles,
|
||||
AuthType: schema.AuthSession,
|
||||
AuthSource: -1,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Handle a POST request that should log the user in, starting a new session.
|
||||
func (auth *Authentication) Login(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error)) http.Handler {
|
||||
func Init(authCfg *json.RawMessage) {
|
||||
initOnce.Do(func() {
|
||||
authInstance = &Authentication{}
|
||||
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
err := errors.New("no authenticator applied")
|
||||
username := r.FormValue("username")
|
||||
user := (*User)(nil)
|
||||
if username != "" {
|
||||
if user, _ = auth.GetUser(username); err != nil {
|
||||
// log.Warnf("login of unkown user %v", username)
|
||||
_ = err
|
||||
}
|
||||
}
|
||||
// Start background cleanup of rate limiters
|
||||
startRateLimiterCleanup()
|
||||
|
||||
for _, authenticator := range auth.authenticators {
|
||||
if !authenticator.CanLogin(user, rw, r) {
|
||||
continue
|
||||
sessKey := os.Getenv("SESSION_KEY")
|
||||
if sessKey == "" {
|
||||
cclog.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||
bytes := make([]byte, 32)
|
||||
if _, err := rand.Read(bytes); err != nil {
|
||||
cclog.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||
}
|
||||
|
||||
user, err = authenticator.Login(user, rw, r)
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||
if err != nil {
|
||||
log.Warnf("user login failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
cclog.Fatal("Error while initializing authentication -> decoding session key failed")
|
||||
}
|
||||
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||
}
|
||||
|
||||
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err == nil {
|
||||
authInstance.SessionMaxAge = d
|
||||
}
|
||||
|
||||
if authCfg == nil {
|
||||
return
|
||||
}
|
||||
|
||||
config.Validate(configSchema, *authCfg)
|
||||
dec := json.NewDecoder(bytes.NewReader(*authCfg))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
cclog.Errorf("error while decoding ldap config: %v", err)
|
||||
}
|
||||
|
||||
if Keys.LdapConfig != nil {
|
||||
ldapAuth := &LdapAuthenticator{}
|
||||
if err := ldapAuth.Init(); err != nil {
|
||||
cclog.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||
} else {
|
||||
authInstance.LdapAuth = ldapAuth
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
|
||||
}
|
||||
} else {
|
||||
cclog.Info("Missing LDAP configuration: No LDAP support!")
|
||||
}
|
||||
|
||||
if Keys.JwtConfig != nil {
|
||||
authInstance.JwtAuth = &JWTAuthenticator{}
|
||||
if err := authInstance.JwtAuth.Init(); err != nil {
|
||||
cclog.Fatal("Error while initializing authentication -> jwtAuth init failed")
|
||||
}
|
||||
|
||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||
if err := jwtSessionAuth.Init(); err != nil {
|
||||
cclog.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
|
||||
}
|
||||
|
||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||
cclog.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||
} else {
|
||||
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
|
||||
}
|
||||
} else {
|
||||
cclog.Info("Missing JWT configuration: No JWT token support!")
|
||||
}
|
||||
|
||||
authInstance.LocalAuth = &LocalAuthenticator{}
|
||||
if err := authInstance.LocalAuth.Init(); err != nil {
|
||||
cclog.Fatal("Error while initializing authentication -> localAuth init failed")
|
||||
}
|
||||
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
|
||||
})
|
||||
}
|
||||
|
||||
func GetAuthInstance() *Authentication {
|
||||
if authInstance == nil {
|
||||
cclog.Fatal("Authentication module not initialized!")
|
||||
}
|
||||
|
||||
return authInstance
|
||||
}
|
||||
|
||||
// handleUserSync syncs or updates a user in the database based on configuration.
|
||||
// This is used for LDAP, JWT and OIDC authentications when syncUserOnLogin or updateUserOnLogin is enabled.
|
||||
func handleUserSync(user *schema.User, syncUserOnLogin, updateUserOnLogin bool) {
|
||||
r := repository.GetUserRepository()
|
||||
dbUser, err := r.GetUser(user.Username)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("Error while loading user '%s': %v", user.Username, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err == sql.ErrNoRows && syncUserOnLogin { // Add new user
|
||||
if err := r.AddUser(user); err != nil {
|
||||
cclog.Errorf("Error while adding user '%s' to DB: %v", user.Username, err)
|
||||
}
|
||||
} else if err == nil && updateUserOnLogin { // Update existing user
|
||||
if err := r.UpdateUser(dbUser, user); err != nil {
|
||||
cclog.Errorf("Error while updating user '%s' in DB: %v", dbUser.Username, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleTokenUser syncs JWT token user with database
|
||||
func handleTokenUser(tokenUser *schema.User) {
|
||||
handleUserSync(tokenUser, Keys.JwtConfig.SyncUserOnLogin, Keys.JwtConfig.UpdateUserOnLogin)
|
||||
}
|
||||
|
||||
// handleOIDCUser syncs OIDC user with database
|
||||
func handleOIDCUser(OIDCUser *schema.User) {
|
||||
handleUserSync(OIDCUser, Keys.OpenIDConfig.SyncUserOnLogin, Keys.OpenIDConfig.UpdateUserOnLogin)
|
||||
}
|
||||
|
||||
// handleLdapUser syncs LDAP user with database
|
||||
func handleLdapUser(ldapUser *schema.User) {
|
||||
handleUserSync(ldapUser, Keys.LdapConfig.SyncUserOnLogin, Keys.LdapConfig.UpdateUserOnLogin)
|
||||
}
|
||||
|
||||
func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error {
|
||||
session, err := auth.sessionStore.New(r, "session")
|
||||
if err != nil {
|
||||
log.Errorf("session creation failed: %s", err.Error())
|
||||
cclog.Errorf("session creation failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
return err
|
||||
}
|
||||
|
||||
if auth.SessionMaxAge != 0 {
|
||||
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
||||
}
|
||||
if r.TLS == nil && r.Header.Get("X-Forwarded-Proto") != "https" {
|
||||
// If neither TLS or an encrypted reverse proxy are used, do not mark cookies as secure.
|
||||
cclog.Warn("Authenticating with unencrypted request. Session cookies will not have Secure flag set (insecure for production)")
|
||||
if r.Header.Get("X-Forwarded-Proto") == "" {
|
||||
// This warning will not be printed if e.g. X-Forwarded-Proto == http
|
||||
cclog.Warn("If you are using a reverse proxy, make sure X-Forwarded-Proto is set")
|
||||
}
|
||||
session.Options.Secure = false
|
||||
}
|
||||
session.Options.SameSite = http.SameSiteStrictMode
|
||||
session.Values["username"] = user.Username
|
||||
session.Values["projects"] = user.Projects
|
||||
session.Values["roles"] = user.Roles
|
||||
if err := auth.sessionStore.Save(r, rw, session); err != nil {
|
||||
log.Warnf("session save failed: %s", err.Error())
|
||||
cclog.Warnf("session save failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (auth *Authentication) Login(
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
ip, _, err := net.SplitHostPort(r.RemoteAddr)
|
||||
if err != nil {
|
||||
ip = r.RemoteAddr
|
||||
}
|
||||
|
||||
username := r.FormValue("username")
|
||||
|
||||
limiter := getIPUserLimiter(ip, username)
|
||||
if !limiter.Allow() {
|
||||
cclog.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
|
||||
onfailure(rw, r, errors.New("too many login attempts, try again in a few minutes"))
|
||||
return
|
||||
}
|
||||
|
||||
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
ctx := context.WithValue(r.Context(), ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
var dbUser *schema.User
|
||||
if username != "" {
|
||||
var err error
|
||||
dbUser, err = repository.GetUserRepository().GetUser(username)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("Error while loading user '%v'", username)
|
||||
}
|
||||
}
|
||||
|
||||
log.Warn("login failed: no authenticator applied")
|
||||
for _, authenticator := range auth.authenticators {
|
||||
var ok bool
|
||||
var user *schema.User
|
||||
if user, ok = authenticator.CanLogin(dbUser, username, rw, r); !ok {
|
||||
continue
|
||||
} else {
|
||||
cclog.Debugf("Can login with user %v", user)
|
||||
}
|
||||
|
||||
user, err := authenticator.Login(user, rw, r)
|
||||
if err != nil {
|
||||
cclog.Warnf("user login failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := auth.SaveSession(rw, r, user); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
|
||||
if r.FormValue("redirect") != "" {
|
||||
http.RedirectHandler(r.FormValue("redirect"), http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
http.RedirectHandler("/", http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
cclog.Debugf("login failed: no authenticator applied")
|
||||
onfailure(rw, r, errors.New("no authenticator applied"))
|
||||
})
|
||||
}
|
||||
|
||||
// Authenticate the user and put a User object in the
|
||||
// context of the request. If authentication fails,
|
||||
// do not continue but send client to the login screen.
|
||||
func (auth *Authentication) Auth(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error)) http.Handler {
|
||||
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
for _, authenticator := range auth.authenticators {
|
||||
user, err := authenticator.Auth(rw, r)
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
log.Warnf("authentication failed: %s", err.Error())
|
||||
cclog.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
if user == nil {
|
||||
continue
|
||||
user, err = auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
cclog.Infof("auth -> authentication failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
ctx := context.WithValue(r.Context(), ContextUserKey, user)
|
||||
}
|
||||
if user != nil {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
|
||||
log.Warnf("authentication failed: %s", "no authenticator applied")
|
||||
// http.Error(rw, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized)
|
||||
onfailure(rw, r, errors.New("unauthorized (login first or use a token)"))
|
||||
cclog.Info("auth -> authentication failed")
|
||||
onfailure(rw, r, errors.New("unauthorized (please login first)"))
|
||||
})
|
||||
}
|
||||
|
||||
// Clears the session cookie
|
||||
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||
func (auth *Authentication) AuthAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
cclog.Infof("auth api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
ipErr := securedCheck(user, r)
|
||||
if ipErr != nil {
|
||||
cclog.Infof("auth api -> secured check failed: %s", ipErr.Error())
|
||||
onfailure(rw, r, ipErr)
|
||||
return
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleAPI) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasAllRoles([]schema.Role{schema.RoleAdmin, schema.RoleAPI}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
cclog.Info("auth api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
cclog.Info("auth api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthUserAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
cclog.Infof("auth user api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleAPI) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasRole(schema.RoleAPI) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleSupport, schema.RoleAdmin}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
cclog.Info("auth user api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
cclog.Info("auth user api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthMetricStoreAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||
if err != nil {
|
||||
cclog.Infof("auth metricstore api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
switch {
|
||||
case len(user.Roles) == 1:
|
||||
if user.HasRole(schema.RoleAPI) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
case len(user.Roles) >= 2:
|
||||
if user.HasRole(schema.RoleAPI) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
default:
|
||||
cclog.Info("auth metricstore api -> authentication failed: missing role")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
}
|
||||
}
|
||||
cclog.Info("auth metricstore api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthConfigAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
cclog.Infof("auth config api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil && user.HasRole(schema.RoleAdmin) {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
cclog.Info("auth config api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) AuthFrontendAPI(
|
||||
onsuccess http.Handler,
|
||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||
) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
user, err := auth.AuthViaSession(rw, r)
|
||||
if err != nil {
|
||||
cclog.Infof("auth frontend api -> authentication failed: %s", err.Error())
|
||||
onfailure(rw, r, err)
|
||||
return
|
||||
}
|
||||
if user != nil {
|
||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||
return
|
||||
}
|
||||
cclog.Info("auth frontend api -> authentication failed: no auth")
|
||||
onfailure(rw, r, errors.New("unauthorized"))
|
||||
})
|
||||
}
|
||||
|
||||
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
session, err := auth.sessionStore.Get(r, "session")
|
||||
if err != nil {
|
||||
@@ -420,3 +602,42 @@ func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||
onsuccess.ServeHTTP(rw, r)
|
||||
})
|
||||
}
|
||||
|
||||
// Helper Moved To MiddleWare Auth Handlers
|
||||
func securedCheck(user *schema.User, r *http.Request) error {
|
||||
if user == nil {
|
||||
return fmt.Errorf("no user for secured check")
|
||||
}
|
||||
|
||||
// extract IP address for checking
|
||||
IPAddress := r.Header.Get("X-Real-Ip")
|
||||
if IPAddress == "" {
|
||||
IPAddress = r.Header.Get("X-Forwarded-For")
|
||||
}
|
||||
if IPAddress == "" {
|
||||
IPAddress = r.RemoteAddr
|
||||
}
|
||||
|
||||
// Handle both IPv4 and IPv6 addresses properly
|
||||
// For IPv6, this will strip the port and brackets
|
||||
// For IPv4, this will strip the port
|
||||
if host, _, err := net.SplitHostPort(IPAddress); err == nil {
|
||||
IPAddress = host
|
||||
}
|
||||
// If SplitHostPort fails, IPAddress is already just a host (no port)
|
||||
|
||||
// If nothing declared in config: Continue
|
||||
if len(config.Keys.APIAllowedIPs) == 0 {
|
||||
return nil
|
||||
}
|
||||
// If wildcard declared in config: Continue
|
||||
if config.Keys.APIAllowedIPs[0] == "*" {
|
||||
return nil
|
||||
}
|
||||
// check if IP is allowed
|
||||
if !util.Contains(config.Keys.APIAllowedIPs, IPAddress) {
|
||||
return fmt.Errorf("unknown ip: %v", IPAddress)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,129 +1,176 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestHasValidRole(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user"}}
|
||||
// TestGetIPUserLimiter tests the rate limiter creation and retrieval
|
||||
func TestGetIPUserLimiter(t *testing.T) {
|
||||
ip := "192.168.1.1"
|
||||
username := "testuser"
|
||||
|
||||
exists, _ := u.HasValidRole("user")
|
||||
// Get limiter for the first time
|
||||
limiter1 := getIPUserLimiter(ip, username)
|
||||
if limiter1 == nil {
|
||||
t.Fatal("Expected limiter to be created")
|
||||
}
|
||||
|
||||
if !exists {
|
||||
t.Fatalf(`User{Roles: ["user"]} -> HasValidRole("user"): EXISTS = %v, expected 'true'.`, exists)
|
||||
// Get the same limiter again
|
||||
limiter2 := getIPUserLimiter(ip, username)
|
||||
if limiter1 != limiter2 {
|
||||
t.Error("Expected to get the same limiter instance")
|
||||
}
|
||||
|
||||
// Get a different limiter for different user
|
||||
limiter3 := getIPUserLimiter(ip, "otheruser")
|
||||
if limiter1 == limiter3 {
|
||||
t.Error("Expected different limiter for different user")
|
||||
}
|
||||
|
||||
// Get a different limiter for different IP
|
||||
limiter4 := getIPUserLimiter("192.168.1.2", username)
|
||||
if limiter1 == limiter4 {
|
||||
t.Error("Expected different limiter for different IP")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasNotValidRole(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user"}}
|
||||
// TestRateLimiterBehavior tests that rate limiting works correctly
|
||||
func TestRateLimiterBehavior(t *testing.T) {
|
||||
ip := "10.0.0.1"
|
||||
username := "ratelimituser"
|
||||
|
||||
exists, _ := u.HasValidRole("manager")
|
||||
limiter := getIPUserLimiter(ip, username)
|
||||
|
||||
if exists {
|
||||
t.Fatalf(`User{Roles: ["user"]} -> HasValidRole("manager"): EXISTS = %v, expected 'false'.`, exists)
|
||||
// Should allow first 5 attempts
|
||||
for i := range 5 {
|
||||
if !limiter.Allow() {
|
||||
t.Errorf("Request %d should be allowed within rate limit", i+1)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasInvalidRole(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user"}}
|
||||
|
||||
_, valid := u.HasValidRole("invalid")
|
||||
|
||||
if valid {
|
||||
t.Fatalf(`User{Roles: ["user"]} -> HasValidRole("invalid"): VALID = %v, expected 'false'.`, valid)
|
||||
// 6th attempt should be blocked
|
||||
if limiter.Allow() {
|
||||
t.Error("Request 6 should be blocked by rate limiter")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasNotInvalidRole(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user"}}
|
||||
// TestCleanupOldRateLimiters tests the cleanup function
|
||||
func TestCleanupOldRateLimiters(t *testing.T) {
|
||||
// Clear all existing limiters first to avoid interference from other tests
|
||||
cleanupOldRateLimiters(time.Now().Add(24 * time.Hour))
|
||||
|
||||
_, valid := u.HasValidRole("user")
|
||||
// Create some new rate limiters
|
||||
limiter1 := getIPUserLimiter("1.1.1.1", "user1")
|
||||
limiter2 := getIPUserLimiter("2.2.2.2", "user2")
|
||||
|
||||
if !valid {
|
||||
t.Fatalf(`User{Roles: ["user"]} -> HasValidRole("user"): VALID = %v, expected 'true'.`, valid)
|
||||
if limiter1 == nil || limiter2 == nil {
|
||||
t.Fatal("Failed to create test limiters")
|
||||
}
|
||||
|
||||
// Cleanup limiters older than 1 second from now (should keep both)
|
||||
time.Sleep(10 * time.Millisecond) // Small delay to ensure timestamp difference
|
||||
cleanupOldRateLimiters(time.Now().Add(-1 * time.Second))
|
||||
|
||||
// Verify they still exist (should get same instance)
|
||||
if getIPUserLimiter("1.1.1.1", "user1") != limiter1 {
|
||||
t.Error("Limiter 1 was incorrectly cleaned up")
|
||||
}
|
||||
if getIPUserLimiter("2.2.2.2", "user2") != limiter2 {
|
||||
t.Error("Limiter 2 was incorrectly cleaned up")
|
||||
}
|
||||
|
||||
// Cleanup limiters older than 1 hour from now (should remove both)
|
||||
cleanupOldRateLimiters(time.Now().Add(2 * time.Hour))
|
||||
|
||||
// Getting them again should create new instances
|
||||
newLimiter1 := getIPUserLimiter("1.1.1.1", "user1")
|
||||
if newLimiter1 == limiter1 {
|
||||
t.Error("Old limiter should have been cleaned up")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasRole(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user"}}
|
||||
// TestIPv4Extraction tests extracting IPv4 addresses
|
||||
func TestIPv4Extraction(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"IPv4 with port", "192.168.1.1:8080", "192.168.1.1"},
|
||||
{"IPv4 without port", "192.168.1.1", "192.168.1.1"},
|
||||
{"Localhost with port", "127.0.0.1:3000", "127.0.0.1"},
|
||||
}
|
||||
|
||||
exists := u.HasRole(RoleUser)
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.input
|
||||
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||
result = host
|
||||
}
|
||||
|
||||
if !exists {
|
||||
t.Fatalf(`User{Roles: ["user"]} -> HasRole(RoleUser): EXISTS = %v, expected 'true'.`, exists)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasNotRole(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user"}}
|
||||
// TestIPv6Extraction tests extracting IPv6 addresses
|
||||
func TestIPv6Extraction(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"IPv6 with port", "[2001:db8::1]:8080", "2001:db8::1"},
|
||||
{"IPv6 localhost with port", "[::1]:3000", "::1"},
|
||||
{"IPv6 without port", "2001:db8::1", "2001:db8::1"},
|
||||
{"IPv6 localhost", "::1", "::1"},
|
||||
}
|
||||
|
||||
exists := u.HasRole(RoleManager)
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.input
|
||||
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||
result = host
|
||||
}
|
||||
|
||||
if exists {
|
||||
t.Fatalf(`User{Roles: ["user"]} -> HasRole(RoleManager): EXISTS = %v, expected 'false'.`, exists)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasAnyRole(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
|
||||
|
||||
result := u.HasAnyRole([]Role{RoleManager, RoleSupport, RoleAdmin})
|
||||
|
||||
if !result {
|
||||
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasAnyRole([]Role{RoleManager, RoleSupport, RoleAdmin}): RESULT = %v, expected 'true'.`, result)
|
||||
}
|
||||
// TestIPExtractionEdgeCases tests edge cases for IP extraction
|
||||
func TestIPExtractionEdgeCases(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"Hostname without port", "example.com", "example.com"},
|
||||
{"Empty string", "", ""},
|
||||
{"Just port", ":8080", ""},
|
||||
}
|
||||
|
||||
func TestHasNotAnyRole(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
|
||||
|
||||
result := u.HasAnyRole([]Role{RoleSupport, RoleAdmin})
|
||||
|
||||
if result {
|
||||
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasAllRoles([]Role{RoleSupport, RoleAdmin}): RESULT = %v, expected 'false'.`, result)
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := tt.input
|
||||
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||
result = host
|
||||
}
|
||||
|
||||
func TestHasAllRoles(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user", "manager", "support"}}
|
||||
|
||||
result := u.HasAllRoles([]Role{RoleUser, RoleManager, RoleSupport})
|
||||
|
||||
if !result {
|
||||
t.Fatalf(`User{Roles: ["user", "manager", "support"]} -> HasAllRoles([]Role{RoleUser, RoleManager, RoleSupport}): RESULT = %v, expected 'true'.`, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasNotAllRoles(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
|
||||
|
||||
result := u.HasAllRoles([]Role{RoleUser, RoleManager, RoleSupport})
|
||||
|
||||
if result {
|
||||
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasAllRoles([]Role{RoleUser, RoleManager, RoleSupport}): RESULT = %v, expected 'false'.`, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasNotRoles(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
|
||||
|
||||
result := u.HasNotRoles([]Role{RoleSupport, RoleAdmin})
|
||||
|
||||
if !result {
|
||||
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasNotRoles([]Role{RoleSupport, RoleAdmin}): RESULT = %v, expected 'true'.`, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHasAllNotRoles(t *testing.T) {
|
||||
u := User{Username: "testuser", Roles: []string{"user", "manager"}}
|
||||
|
||||
result := u.HasNotRoles([]Role{RoleUser, RoleManager})
|
||||
|
||||
if result {
|
||||
t.Fatalf(`User{Roles: ["user", "manager"]} -> HasNotRoles([]Role{RoleUser, RoleManager}): RESULT = %v, expected 'false'.`, result)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,321 +1,125 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/golang-jwt/jwt/v4"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
type JWTAuthenticator struct {
|
||||
auth *Authentication
|
||||
type JWTAuthConfig struct {
|
||||
// Specifies for how long a JWT token shall be valid
|
||||
// as a string parsable by time.ParseDuration().
|
||||
MaxAge string `json:"max-age"`
|
||||
|
||||
publicKey ed25519.PublicKey
|
||||
privateKey ed25519.PrivateKey
|
||||
publicKeyCrossLogin ed25519.PublicKey // For accepting externally generated JWTs
|
||||
// Specifies which cookie should be checked for a JWT token (if no authorization header is present)
|
||||
CookieName string `json:"cookie-name"`
|
||||
|
||||
loginTokenKey []byte // HS256 key
|
||||
// Deny login for users not in database (but defined in JWT).
|
||||
// Ignore user roles defined in JWTs ('roles' claim), get them from db.
|
||||
ValidateUser bool `json:"validate-user"`
|
||||
|
||||
config *schema.JWTAuthConfig
|
||||
// Specifies which issuer should be accepted when validating external JWTs ('iss' claim)
|
||||
TrustedIssuer string `json:"trusted-issuer"`
|
||||
|
||||
// Should an non-existent user be added to the DB based on the information in the token
|
||||
SyncUserOnLogin bool `json:"sync-user-on-login"`
|
||||
|
||||
// Should an existent user be updated in the DB based on the information in the token
|
||||
UpdateUserOnLogin bool `json:"update-user-on-login"`
|
||||
}
|
||||
|
||||
var _ Authenticator = (*JWTAuthenticator)(nil)
|
||||
|
||||
func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
|
||||
|
||||
ja.auth = auth
|
||||
ja.config = conf.(*schema.JWTAuthConfig)
|
||||
type JWTAuthenticator struct {
|
||||
publicKey ed25519.PublicKey
|
||||
privateKey ed25519.PrivateKey
|
||||
}
|
||||
|
||||
func (ja *JWTAuthenticator) Init() error {
|
||||
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
||||
if pubKey == "" || privKey == "" {
|
||||
log.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
cclog.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode JWT public key")
|
||||
cclog.Warn("Could not decode JWT public key")
|
||||
return err
|
||||
}
|
||||
ja.publicKey = ed25519.PublicKey(bytes)
|
||||
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode JWT private key")
|
||||
cclog.Warn("Could not decode JWT private key")
|
||||
return err
|
||||
}
|
||||
ja.privateKey = ed25519.PrivateKey(bytes)
|
||||
}
|
||||
|
||||
if pubKey = os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode cross login JWT HS512 key")
|
||||
return err
|
||||
}
|
||||
ja.loginTokenKey = bytes
|
||||
}
|
||||
|
||||
// Look for external public keys
|
||||
pubKeyCrossLogin, keyFound := os.LookupEnv("CROSS_LOGIN_JWT_PUBLIC_KEY")
|
||||
if keyFound && pubKeyCrossLogin != "" {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
|
||||
if err != nil {
|
||||
log.Warn("Could not decode cross login JWT public key")
|
||||
return err
|
||||
}
|
||||
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
|
||||
|
||||
// Warn if other necessary settings are not configured
|
||||
if ja.config != nil {
|
||||
if ja.config.CookieName == "" {
|
||||
log.Warn("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
}
|
||||
if !ja.config.ForceJWTValidationViaDatabase {
|
||||
log.Warn("forceJWTValidationViaDatabase not set to true: CC will accept users and roles defined in JWTs regardless of its own database!")
|
||||
}
|
||||
if ja.config.TrustedExternalIssuer == "" {
|
||||
log.Warn("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
}
|
||||
} else {
|
||||
log.Warn("cookieName and trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
}
|
||||
} else {
|
||||
ja.publicKeyCrossLogin = nil
|
||||
log.Warn("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ja *JWTAuthenticator) CanLogin(
|
||||
user *User,
|
||||
func (ja *JWTAuthenticator) AuthViaJWT(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) bool {
|
||||
|
||||
return (user != nil && user.AuthSource == AuthViaToken) || r.Header.Get("Authorization") != "" || r.URL.Query().Get("login-token") != ""
|
||||
}
|
||||
|
||||
func (ja *JWTAuthenticator) Login(
|
||||
user *User,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*User, error) {
|
||||
|
||||
rawtoken := r.Header.Get("X-Auth-Token")
|
||||
if rawtoken == "" {
|
||||
rawtoken = r.Header.Get("Authorization")
|
||||
rawtoken = strings.TrimPrefix(rawtoken, "Bearer ")
|
||||
if rawtoken == "" {
|
||||
rawtoken = r.URL.Query().Get("login-token")
|
||||
}
|
||||
}
|
||||
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
||||
if t.Method == jwt.SigningMethodEdDSA {
|
||||
return ja.publicKey, nil
|
||||
}
|
||||
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
|
||||
return ja.loginTokenKey, nil
|
||||
}
|
||||
return nil, fmt.Errorf("AUTH/JWT > unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
|
||||
})
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing jwt token")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := token.Claims.Valid(); err != nil {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
sub, _ := claims["sub"].(string)
|
||||
exp, _ := claims["exp"].(float64)
|
||||
var roles []string
|
||||
if rawroles, ok := claims["roles"].([]interface{}); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
if isValidRole(r) {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if rawrole, ok := claims["roles"].(string); ok {
|
||||
if isValidRole(rawrole) {
|
||||
roles = append(roles, rawrole)
|
||||
}
|
||||
}
|
||||
|
||||
if user == nil {
|
||||
user, err = ja.auth.GetUser(sub)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
log.Errorf("Error while loading user '%v'", sub)
|
||||
return nil, err
|
||||
} else if user == nil {
|
||||
user = &User{
|
||||
Username: sub,
|
||||
Roles: roles,
|
||||
AuthSource: AuthViaToken,
|
||||
}
|
||||
if err := ja.auth.AddUser(user); err != nil {
|
||||
log.Errorf("Error while adding user '%v' to auth from token", user.Username)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
user.Expiration = time.Unix(int64(exp), 0)
|
||||
return user, nil
|
||||
}
|
||||
|
||||
func (ja *JWTAuthenticator) Auth(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*User, error) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
rawtoken := r.Header.Get("X-Auth-Token")
|
||||
if rawtoken == "" {
|
||||
rawtoken = r.Header.Get("Authorization")
|
||||
rawtoken = strings.TrimPrefix(rawtoken, "Bearer ")
|
||||
}
|
||||
|
||||
// If no auth header was found, check for a certain cookie containing a JWT
|
||||
cookieName := ""
|
||||
cookieFound := false
|
||||
if ja.config != nil && ja.config.CookieName != "" {
|
||||
cookieName = ja.config.CookieName
|
||||
}
|
||||
|
||||
// Try to read the JWT cookie
|
||||
if rawtoken == "" && cookieName != "" {
|
||||
jwtCookie, err := r.Cookie(cookieName)
|
||||
|
||||
if err == nil && jwtCookie.Value != "" {
|
||||
rawtoken = jwtCookie.Value
|
||||
cookieFound = true
|
||||
}
|
||||
}
|
||||
|
||||
// Because a user can also log in via a token, the
|
||||
// session cookie must be checked here as well:
|
||||
// there is no token
|
||||
if rawtoken == "" {
|
||||
return ja.auth.AuthViaSession(rw, r)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Try to parse JWT
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
|
||||
if t.Method != jwt.SigningMethodEdDSA {
|
||||
return nil, errors.New("only Ed25519/EdDSA supported")
|
||||
}
|
||||
|
||||
// Is there more than one public key?
|
||||
if ja.publicKeyCrossLogin != nil && ja.config != nil && ja.config.TrustedExternalIssuer != "" {
|
||||
// Determine whether to use the external public key
|
||||
unvalidatedIssuer, success := t.Claims.(jwt.MapClaims)["iss"].(string)
|
||||
if success && unvalidatedIssuer == ja.config.TrustedExternalIssuer {
|
||||
// The (unvalidated) issuer seems to be the expected one,
|
||||
// use public cross login key from config
|
||||
return ja.publicKeyCrossLogin, nil
|
||||
}
|
||||
}
|
||||
|
||||
// No cross login key configured or issuer not expected
|
||||
// Try own key
|
||||
return ja.publicKey, nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Warn("Error while parsing token")
|
||||
cclog.Warn("Error while parsing JWT token")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check token validity
|
||||
if err := token.Claims.Valid(); err != nil {
|
||||
log.Warn("jwt token claims are not valid")
|
||||
return nil, err
|
||||
if !token.Valid {
|
||||
cclog.Warn("jwt token claims are not valid")
|
||||
return nil, errors.New("jwt token claims are not valid")
|
||||
}
|
||||
|
||||
// Token is valid, extract payload
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
sub, _ := claims["sub"].(string)
|
||||
|
||||
var roles []string
|
||||
|
||||
// Validate user + roles from JWT against database?
|
||||
if ja.config != nil && ja.config.ForceJWTValidationViaDatabase {
|
||||
user, err := ja.auth.GetUser(sub)
|
||||
|
||||
// Deny any logins for unknown usernames
|
||||
// Use shared helper to get user from JWT claims
|
||||
var user *schema.User
|
||||
user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthToken, -1)
|
||||
if err != nil {
|
||||
log.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
|
||||
// Take user roles from database instead of trusting the JWT
|
||||
roles = user.Roles
|
||||
} else {
|
||||
// Extract roles from JWT (if present)
|
||||
if rawroles, ok := claims["roles"].([]interface{}); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if cookieFound {
|
||||
// Create a session so that we no longer need the JTW Cookie
|
||||
session, err := ja.auth.sessionStore.New(r, "session")
|
||||
if err != nil {
|
||||
log.Errorf("session creation failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if ja.auth.SessionMaxAge != 0 {
|
||||
session.Options.MaxAge = int(ja.auth.SessionMaxAge.Seconds())
|
||||
}
|
||||
session.Values["username"] = sub
|
||||
session.Values["roles"] = roles
|
||||
|
||||
if err := ja.auth.sessionStore.Save(r, rw, session); err != nil {
|
||||
log.Warnf("session save failed: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return nil, err
|
||||
// If not validating user, we only get roles from JWT (no projects for this auth method)
|
||||
if !Keys.JwtConfig.ValidateUser {
|
||||
user.Roles = extractRolesFromClaims(claims, false)
|
||||
user.Projects = nil // Standard JWT auth doesn't include projects
|
||||
}
|
||||
|
||||
// (Ask browser to) Delete JWT cookie
|
||||
deletedCookie := &http.Cookie{
|
||||
Name: cookieName,
|
||||
Value: "",
|
||||
Path: "/",
|
||||
MaxAge: -1,
|
||||
HttpOnly: true,
|
||||
}
|
||||
http.SetCookie(rw, deletedCookie)
|
||||
return user, nil
|
||||
}
|
||||
|
||||
return &User{
|
||||
Username: sub,
|
||||
Roles: roles,
|
||||
AuthSource: AuthViaToken,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Generate a new JWT that can be used for authentication
|
||||
func (ja *JWTAuthenticator) ProvideJWT(user *User) (string, error) {
|
||||
|
||||
// ProvideJWT generates a new JWT that can be used for authentication
|
||||
func (ja *JWTAuthenticator) ProvideJWT(user *schema.User) (string, error) {
|
||||
if ja.privateKey == nil {
|
||||
return "", errors.New("environment variable 'JWT_PRIVATE_KEY' not set")
|
||||
}
|
||||
@@ -326,8 +130,12 @@ func (ja *JWTAuthenticator) ProvideJWT(user *User) (string, error) {
|
||||
"roles": user.Roles,
|
||||
"iat": now.Unix(),
|
||||
}
|
||||
if ja.config != nil && ja.config.MaxAge != 0 {
|
||||
claims["exp"] = now.Add(time.Duration(ja.config.MaxAge)).Unix()
|
||||
if Keys.JwtConfig.MaxAge != "" {
|
||||
d, err := time.ParseDuration(Keys.JwtConfig.MaxAge)
|
||||
if err != nil {
|
||||
return "", errors.New("cannot parse max-age config key")
|
||||
}
|
||||
claims["exp"] = now.Add(d).Unix()
|
||||
}
|
||||
|
||||
return jwt.NewWithClaims(jwt.SigningMethodEdDSA, claims).SignedString(ja.privateKey)
|
||||
|
||||
172
internal/auth/jwtCookieSession.go
Normal file
172
internal/auth/jwtCookieSession.go
Normal file
@@ -0,0 +1,172 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"crypto/ed25519"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
type JWTCookieSessionAuthenticator struct {
|
||||
publicKey ed25519.PublicKey
|
||||
privateKey ed25519.PrivateKey
|
||||
publicKeyCrossLogin ed25519.PublicKey // For accepting externally generated JWTs
|
||||
}
|
||||
|
||||
var _ Authenticator = (*JWTCookieSessionAuthenticator)(nil)
|
||||
|
||||
func (ja *JWTCookieSessionAuthenticator) Init() error {
|
||||
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
||||
if pubKey == "" || privKey == "" {
|
||||
cclog.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
return errors.New("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||
} else {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
cclog.Warn("Could not decode JWT public key")
|
||||
return err
|
||||
}
|
||||
ja.publicKey = ed25519.PublicKey(bytes)
|
||||
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
||||
if err != nil {
|
||||
cclog.Warn("Could not decode JWT private key")
|
||||
return err
|
||||
}
|
||||
ja.privateKey = ed25519.PrivateKey(bytes)
|
||||
}
|
||||
|
||||
// Look for external public keys
|
||||
pubKeyCrossLogin, keyFound := os.LookupEnv("CROSS_LOGIN_JWT_PUBLIC_KEY")
|
||||
if keyFound && pubKeyCrossLogin != "" {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
|
||||
if err != nil {
|
||||
cclog.Warn("Could not decode cross login JWT public key")
|
||||
return err
|
||||
}
|
||||
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
|
||||
} else {
|
||||
ja.publicKeyCrossLogin = nil
|
||||
cclog.Debug("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
||||
return errors.New("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
||||
}
|
||||
|
||||
// Warn if other necessary settings are not configured
|
||||
if Keys.JwtConfig != nil {
|
||||
if Keys.JwtConfig.CookieName == "" {
|
||||
cclog.Info("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
return errors.New("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
}
|
||||
if !Keys.JwtConfig.ValidateUser {
|
||||
cclog.Info("forceJWTValidationViaDatabase not set to true: CC will accept users and roles defined in JWTs regardless of its own database!")
|
||||
}
|
||||
if Keys.JwtConfig.TrustedIssuer == "" {
|
||||
cclog.Info("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
return errors.New("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
}
|
||||
} else {
|
||||
cclog.Warn("config for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
return errors.New("config for JWTs not configured (cross login via JWT cookie will fail)")
|
||||
}
|
||||
|
||||
cclog.Info("JWT Cookie Session authenticator successfully registered")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ja *JWTCookieSessionAuthenticator) CanLogin(
|
||||
user *schema.User,
|
||||
username string,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
jc := Keys.JwtConfig
|
||||
cookieName := ""
|
||||
if jc.CookieName != "" {
|
||||
cookieName = jc.CookieName
|
||||
}
|
||||
|
||||
// Try to read the JWT cookie
|
||||
if cookieName != "" {
|
||||
jwtCookie, err := r.Cookie(cookieName)
|
||||
|
||||
if err == nil && jwtCookie.Value != "" {
|
||||
return user, true
|
||||
}
|
||||
}
|
||||
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (ja *JWTCookieSessionAuthenticator) Login(
|
||||
user *schema.User,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
jc := Keys.JwtConfig
|
||||
jwtCookie, err := r.Cookie(jc.CookieName)
|
||||
var rawtoken string
|
||||
|
||||
if err == nil && jwtCookie.Value != "" {
|
||||
rawtoken = jwtCookie.Value
|
||||
}
|
||||
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
|
||||
if t.Method != jwt.SigningMethodEdDSA {
|
||||
return nil, errors.New("only Ed25519/EdDSA supported")
|
||||
}
|
||||
|
||||
unvalidatedIssuer, success := t.Claims.(jwt.MapClaims)["iss"].(string)
|
||||
if success && unvalidatedIssuer == jc.TrustedIssuer {
|
||||
// The (unvalidated) issuer seems to be the expected one,
|
||||
// use public cross login key from config
|
||||
return ja.publicKeyCrossLogin, nil
|
||||
}
|
||||
|
||||
// No cross login key configured or issuer not expected
|
||||
// Try own key
|
||||
return ja.publicKey, nil
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Warn("JWT cookie session: error while parsing token")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !token.Valid {
|
||||
cclog.Warn("jwt token claims are not valid")
|
||||
return nil, errors.New("jwt token claims are not valid")
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
|
||||
// Use shared helper to get user from JWT claims
|
||||
user, err = getUserFromJWT(claims, jc.ValidateUser, schema.AuthSession, schema.AuthViaToken)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Sync or update user if configured
|
||||
if !jc.ValidateUser && (jc.SyncUserOnLogin || jc.UpdateUserOnLogin) {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
// (Ask browser to) Delete JWT cookie
|
||||
deletedCookie := &http.Cookie{
|
||||
Name: jc.CookieName,
|
||||
Value: "",
|
||||
Path: "/",
|
||||
MaxAge: -1,
|
||||
HttpOnly: true,
|
||||
}
|
||||
http.SetCookie(rw, deletedCookie)
|
||||
|
||||
return user, nil
|
||||
}
|
||||
138
internal/auth/jwtHelpers.go
Normal file
138
internal/auth/jwtHelpers.go
Normal file
@@ -0,0 +1,138 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
// extractStringFromClaims extracts a string value from JWT claims
|
||||
func extractStringFromClaims(claims jwt.MapClaims, key string) string {
|
||||
if val, ok := claims[key].(string); ok {
|
||||
return val
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractRolesFromClaims extracts roles from JWT claims
|
||||
// If validateRoles is true, only valid roles are returned
|
||||
func extractRolesFromClaims(claims jwt.MapClaims, validateRoles bool) []string {
|
||||
var roles []string
|
||||
|
||||
if rawroles, ok := claims["roles"].([]any); ok {
|
||||
for _, rr := range rawroles {
|
||||
if r, ok := rr.(string); ok {
|
||||
if validateRoles {
|
||||
if schema.IsValidRole(r) {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
} else {
|
||||
roles = append(roles, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return roles
|
||||
}
|
||||
|
||||
// extractProjectsFromClaims extracts projects from JWT claims
|
||||
func extractProjectsFromClaims(claims jwt.MapClaims) []string {
|
||||
projects := make([]string, 0)
|
||||
|
||||
if rawprojs, ok := claims["projects"].([]any); ok {
|
||||
for _, pp := range rawprojs {
|
||||
if p, ok := pp.(string); ok {
|
||||
projects = append(projects, p)
|
||||
}
|
||||
}
|
||||
} else if rawprojs, ok := claims["projects"]; ok {
|
||||
if projSlice, ok := rawprojs.([]string); ok {
|
||||
projects = append(projects, projSlice...)
|
||||
}
|
||||
}
|
||||
|
||||
return projects
|
||||
}
|
||||
|
||||
// extractNameFromClaims extracts name from JWT claims
|
||||
// Handles both simple string and complex nested structure
|
||||
func extractNameFromClaims(claims jwt.MapClaims) string {
|
||||
// Try simple string first
|
||||
if name, ok := claims["name"].(string); ok {
|
||||
return name
|
||||
}
|
||||
|
||||
// Try nested structure: {name: {values: [...]}}
|
||||
if wrap, ok := claims["name"].(map[string]any); ok {
|
||||
if vals, ok := wrap["values"].([]any); ok {
|
||||
if len(vals) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
var name strings.Builder
|
||||
name.WriteString(fmt.Sprintf("%v", vals[0]))
|
||||
for i := 1; i < len(vals); i++ {
|
||||
name.WriteString(fmt.Sprintf(" %v", vals[i]))
|
||||
}
|
||||
return name.String()
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// getUserFromJWT creates or retrieves a user based on JWT claims
|
||||
// If validateUser is true, the user must exist in the database
|
||||
// Otherwise, a new user object is created from claims
|
||||
// authSource should be a schema.AuthSource constant (like schema.AuthViaToken)
|
||||
func getUserFromJWT(claims jwt.MapClaims, validateUser bool, authType schema.AuthType, authSource schema.AuthSource) (*schema.User, error) {
|
||||
sub := extractStringFromClaims(claims, "sub")
|
||||
if sub == "" {
|
||||
return nil, errors.New("missing 'sub' claim in JWT")
|
||||
}
|
||||
|
||||
if validateUser {
|
||||
// Validate user against database
|
||||
ur := repository.GetUserRepository()
|
||||
user, err := ur.GetUser(sub)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
cclog.Errorf("Error while loading user '%v': %v", sub, err)
|
||||
return nil, fmt.Errorf("database error: %w", err)
|
||||
}
|
||||
|
||||
// Deny any logins for unknown usernames
|
||||
if user == nil || err == sql.ErrNoRows {
|
||||
cclog.Warn("Could not find user from JWT in internal database.")
|
||||
return nil, errors.New("unknown user")
|
||||
}
|
||||
|
||||
// Return database user (with database roles)
|
||||
return user, nil
|
||||
}
|
||||
|
||||
// Create user from JWT claims
|
||||
name := extractNameFromClaims(claims)
|
||||
roles := extractRolesFromClaims(claims, true) // Validate roles
|
||||
projects := extractProjectsFromClaims(claims)
|
||||
|
||||
return &schema.User{
|
||||
Username: sub,
|
||||
Name: name,
|
||||
Roles: roles,
|
||||
Projects: projects,
|
||||
AuthType: authType,
|
||||
AuthSource: authSource,
|
||||
}, nil
|
||||
}
|
||||
280
internal/auth/jwtHelpers_test.go
Normal file
280
internal/auth/jwtHelpers_test.go
Normal file
@@ -0,0 +1,280 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
// TestExtractStringFromClaims tests extracting string values from JWT claims
|
||||
func TestExtractStringFromClaims(t *testing.T) {
|
||||
claims := jwt.MapClaims{
|
||||
"sub": "testuser",
|
||||
"email": "test@example.com",
|
||||
"age": 25, // not a string
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
key string
|
||||
expected string
|
||||
}{
|
||||
{"Existing string", "sub", "testuser"},
|
||||
{"Another string", "email", "test@example.com"},
|
||||
{"Non-existent key", "missing", ""},
|
||||
{"Non-string value", "age", ""},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractStringFromClaims(claims, tt.key)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractRolesFromClaims tests role extraction and validation
|
||||
func TestExtractRolesFromClaims(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
claims jwt.MapClaims
|
||||
validateRoles bool
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "Valid roles without validation",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": []any{"admin", "user", "invalid_role"},
|
||||
},
|
||||
validateRoles: false,
|
||||
expected: []string{"admin", "user", "invalid_role"},
|
||||
},
|
||||
{
|
||||
name: "Valid roles with validation",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": []any{"admin", "user", "api"},
|
||||
},
|
||||
validateRoles: true,
|
||||
expected: []string{"admin", "user", "api"},
|
||||
},
|
||||
{
|
||||
name: "Invalid roles with validation",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": []any{"invalid_role", "fake_role"},
|
||||
},
|
||||
validateRoles: true,
|
||||
expected: []string{}, // Should filter out invalid roles
|
||||
},
|
||||
{
|
||||
name: "No roles claim",
|
||||
claims: jwt.MapClaims{},
|
||||
validateRoles: false,
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Non-array roles",
|
||||
claims: jwt.MapClaims{
|
||||
"roles": "admin",
|
||||
},
|
||||
validateRoles: false,
|
||||
expected: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractRolesFromClaims(tt.claims, tt.validateRoles)
|
||||
|
||||
if len(result) != len(tt.expected) {
|
||||
t.Errorf("Expected %d roles, got %d", len(tt.expected), len(result))
|
||||
return
|
||||
}
|
||||
|
||||
for i, role := range result {
|
||||
if i >= len(tt.expected) || role != tt.expected[i] {
|
||||
t.Errorf("Expected role %s at position %d, got %s", tt.expected[i], i, role)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractProjectsFromClaims tests project extraction from claims
|
||||
func TestExtractProjectsFromClaims(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
claims jwt.MapClaims
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "Projects as array of interfaces",
|
||||
claims: jwt.MapClaims{
|
||||
"projects": []any{"project1", "project2", "project3"},
|
||||
},
|
||||
expected: []string{"project1", "project2", "project3"},
|
||||
},
|
||||
{
|
||||
name: "Projects as string array",
|
||||
claims: jwt.MapClaims{
|
||||
"projects": []string{"projectA", "projectB"},
|
||||
},
|
||||
expected: []string{"projectA", "projectB"},
|
||||
},
|
||||
{
|
||||
name: "No projects claim",
|
||||
claims: jwt.MapClaims{},
|
||||
expected: []string{},
|
||||
},
|
||||
{
|
||||
name: "Mixed types in projects array",
|
||||
claims: jwt.MapClaims{
|
||||
"projects": []any{"project1", 123, "project2"},
|
||||
},
|
||||
expected: []string{"project1", "project2"}, // Should skip non-strings
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractProjectsFromClaims(tt.claims)
|
||||
|
||||
if len(result) != len(tt.expected) {
|
||||
t.Errorf("Expected %d projects, got %d", len(tt.expected), len(result))
|
||||
return
|
||||
}
|
||||
|
||||
for i, project := range result {
|
||||
if i >= len(tt.expected) || project != tt.expected[i] {
|
||||
t.Errorf("Expected project %s at position %d, got %s", tt.expected[i], i, project)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractNameFromClaims tests name extraction from various formats
|
||||
func TestExtractNameFromClaims(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
claims jwt.MapClaims
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "Simple string name",
|
||||
claims: jwt.MapClaims{
|
||||
"name": "John Doe",
|
||||
},
|
||||
expected: "John Doe",
|
||||
},
|
||||
{
|
||||
name: "Nested name structure",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{"John", "Doe"},
|
||||
},
|
||||
},
|
||||
expected: "John Doe",
|
||||
},
|
||||
{
|
||||
name: "Nested name with single value",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{"Alice"},
|
||||
},
|
||||
},
|
||||
expected: "Alice",
|
||||
},
|
||||
{
|
||||
name: "No name claim",
|
||||
claims: jwt.MapClaims{},
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Empty nested values",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{},
|
||||
},
|
||||
},
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Nested with non-string values",
|
||||
claims: jwt.MapClaims{
|
||||
"name": map[string]any{
|
||||
"values": []any{123, "Smith"},
|
||||
},
|
||||
},
|
||||
expected: "123 Smith", // Should convert to string
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractNameFromClaims(tt.claims)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected '%s', got '%s'", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetUserFromJWT_NoValidation tests getUserFromJWT without database validation
|
||||
func TestGetUserFromJWT_NoValidation(t *testing.T) {
|
||||
claims := jwt.MapClaims{
|
||||
"sub": "testuser",
|
||||
"name": "Test User",
|
||||
"roles": []any{"user", "admin"},
|
||||
"projects": []any{"project1", "project2"},
|
||||
}
|
||||
|
||||
user, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
||||
if err != nil {
|
||||
t.Fatalf("Unexpected error: %v", err)
|
||||
}
|
||||
|
||||
if user.Username != "testuser" {
|
||||
t.Errorf("Expected username 'testuser', got '%s'", user.Username)
|
||||
}
|
||||
|
||||
if user.Name != "Test User" {
|
||||
t.Errorf("Expected name 'Test User', got '%s'", user.Name)
|
||||
}
|
||||
|
||||
if len(user.Roles) != 2 {
|
||||
t.Errorf("Expected 2 roles, got %d", len(user.Roles))
|
||||
}
|
||||
|
||||
if len(user.Projects) != 2 {
|
||||
t.Errorf("Expected 2 projects, got %d", len(user.Projects))
|
||||
}
|
||||
|
||||
if user.AuthType != schema.AuthToken {
|
||||
t.Errorf("Expected AuthType %v, got %v", schema.AuthToken, user.AuthType)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetUserFromJWT_MissingSub tests error when sub claim is missing
|
||||
func TestGetUserFromJWT_MissingSub(t *testing.T) {
|
||||
claims := jwt.MapClaims{
|
||||
"name": "Test User",
|
||||
}
|
||||
|
||||
_, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
||||
|
||||
if err == nil {
|
||||
t.Error("Expected error for missing sub claim")
|
||||
}
|
||||
|
||||
if err.Error() != "missing 'sub' claim in JWT" {
|
||||
t.Errorf("Expected specific error message, got: %v", err)
|
||||
}
|
||||
}
|
||||
91
internal/auth/jwtSession.go
Normal file
91
internal/auth/jwtSession.go
Normal file
@@ -0,0 +1,91 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/golang-jwt/jwt/v5"
|
||||
)
|
||||
|
||||
type JWTSessionAuthenticator struct {
|
||||
loginTokenKey []byte // HS256 key
|
||||
}
|
||||
|
||||
var _ Authenticator = (*JWTSessionAuthenticator)(nil)
|
||||
|
||||
func (ja *JWTSessionAuthenticator) Init() error {
|
||||
if pubKey := os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
|
||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||
if err != nil {
|
||||
cclog.Warn("Could not decode cross login JWT HS512 key")
|
||||
return err
|
||||
}
|
||||
ja.loginTokenKey = bytes
|
||||
}
|
||||
|
||||
cclog.Info("JWT Session authenticator successfully registered")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (ja *JWTSessionAuthenticator) CanLogin(
|
||||
user *schema.User,
|
||||
username string,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
return user, r.Header.Get("Authorization") != "" ||
|
||||
r.URL.Query().Get("login-token") != ""
|
||||
}
|
||||
|
||||
func (ja *JWTSessionAuthenticator) Login(
|
||||
user *schema.User,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
rawtoken := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ")
|
||||
if rawtoken == "" {
|
||||
rawtoken = r.URL.Query().Get("login-token")
|
||||
}
|
||||
|
||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
|
||||
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
|
||||
return ja.loginTokenKey, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
|
||||
})
|
||||
if err != nil {
|
||||
cclog.Warn("Error while parsing jwt token")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if !token.Valid {
|
||||
cclog.Warn("jwt token claims are not valid")
|
||||
return nil, errors.New("jwt token claims are not valid")
|
||||
}
|
||||
|
||||
claims := token.Claims.(jwt.MapClaims)
|
||||
|
||||
// Use shared helper to get user from JWT claims
|
||||
user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthSession, schema.AuthViaToken)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Sync or update user if configured
|
||||
if !Keys.JwtConfig.ValidateUser && (Keys.JwtConfig.SyncUserOnLogin || Keys.JwtConfig.UpdateUserOnLogin) {
|
||||
handleTokenUser(user)
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
@@ -1,171 +1,217 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/go-ldap/ldap/v3"
|
||||
)
|
||||
|
||||
type LdapConfig struct {
|
||||
URL string `json:"url"`
|
||||
UserBase string `json:"user-base"`
|
||||
SearchDN string `json:"search-dn"`
|
||||
UserBind string `json:"user-bind"`
|
||||
UserFilter string `json:"user-filter"`
|
||||
UserAttr string `json:"username-attr"`
|
||||
UIDAttr string `json:"uid-attr"`
|
||||
SyncInterval string `json:"sync-interval"` // Parsed using time.ParseDuration.
|
||||
SyncDelOldUsers bool `json:"sync-del-old-users"`
|
||||
|
||||
// Should a non-existent user be added to the DB if user exists in ldap directory
|
||||
SyncUserOnLogin bool `json:"sync-user-on-login"`
|
||||
UpdateUserOnLogin bool `json:"update-user-on-login"`
|
||||
}
|
||||
|
||||
type LdapAuthenticator struct {
|
||||
auth *Authentication
|
||||
config *schema.LdapConfig
|
||||
syncPassword string
|
||||
UserAttr string
|
||||
UIDAttr string
|
||||
}
|
||||
|
||||
var _ Authenticator = (*LdapAuthenticator)(nil)
|
||||
|
||||
func (la *LdapAuthenticator) Init(
|
||||
auth *Authentication,
|
||||
conf interface{}) error {
|
||||
|
||||
la.auth = auth
|
||||
la.config = conf.(*schema.LdapConfig)
|
||||
|
||||
func (la *LdapAuthenticator) Init() error {
|
||||
la.syncPassword = os.Getenv("LDAP_ADMIN_PASSWORD")
|
||||
if la.syncPassword == "" {
|
||||
log.Warn("environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync will not work)")
|
||||
cclog.Warn("environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync will not work)")
|
||||
}
|
||||
|
||||
if la.config != nil && la.config.SyncInterval != "" {
|
||||
interval, err := time.ParseDuration(la.config.SyncInterval)
|
||||
if err != nil {
|
||||
log.Warnf("Could not parse duration for sync interval: %v", la.config.SyncInterval)
|
||||
return err
|
||||
if Keys.LdapConfig.UserAttr != "" {
|
||||
la.UserAttr = Keys.LdapConfig.UserAttr
|
||||
} else {
|
||||
la.UserAttr = "gecos"
|
||||
}
|
||||
|
||||
if interval == 0 {
|
||||
log.Info("Sync interval is zero")
|
||||
return nil
|
||||
}
|
||||
|
||||
go func() {
|
||||
ticker := time.NewTicker(interval)
|
||||
for t := range ticker.C {
|
||||
log.Printf("sync started at %s", t.Format(time.RFC3339))
|
||||
if err := la.Sync(); err != nil {
|
||||
log.Errorf("sync failed: %s", err.Error())
|
||||
}
|
||||
log.Print("sync done")
|
||||
}
|
||||
}()
|
||||
if Keys.LdapConfig.UIDAttr != "" {
|
||||
la.UIDAttr = Keys.LdapConfig.UIDAttr
|
||||
} else {
|
||||
la.UIDAttr = "uid"
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (la *LdapAuthenticator) CanLogin(
|
||||
user *User,
|
||||
user *schema.User,
|
||||
username string,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) bool {
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
lc := Keys.LdapConfig
|
||||
|
||||
return user != nil && user.AuthSource == AuthViaLDAP
|
||||
if user != nil {
|
||||
if user.AuthSource == schema.AuthViaLDAP {
|
||||
return user, true
|
||||
}
|
||||
} else if lc.SyncUserOnLogin {
|
||||
l, err := la.getLdapConnection(true)
|
||||
if err != nil {
|
||||
cclog.Error("LDAP connection error")
|
||||
return nil, false
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
// Search for the given username
|
||||
searchRequest := ldap.NewSearchRequest(
|
||||
lc.UserBase,
|
||||
ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
|
||||
fmt.Sprintf("(&%s(%s=%s))", lc.UserFilter, la.UIDAttr, ldap.EscapeFilter(username)),
|
||||
[]string{"dn", la.UIDAttr, la.UserAttr}, nil)
|
||||
|
||||
sr, err := l.Search(searchRequest)
|
||||
if err != nil {
|
||||
cclog.Warn(err)
|
||||
return nil, false
|
||||
}
|
||||
|
||||
if len(sr.Entries) != 1 {
|
||||
cclog.Warn("LDAP: User does not exist or too many entries returned")
|
||||
return nil, false
|
||||
}
|
||||
|
||||
entry := sr.Entries[0]
|
||||
user = &schema.User{
|
||||
Username: username,
|
||||
Name: entry.GetAttributeValue(la.UserAttr),
|
||||
Roles: []string{schema.GetRoleString(schema.RoleUser)},
|
||||
Projects: make([]string, 0),
|
||||
AuthType: schema.AuthSession,
|
||||
AuthSource: schema.AuthViaLDAP,
|
||||
}
|
||||
|
||||
handleLdapUser(user)
|
||||
return user, true
|
||||
}
|
||||
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (la *LdapAuthenticator) Login(
|
||||
user *User,
|
||||
user *schema.User,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*User, error) {
|
||||
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
l, err := la.getLdapConnection(false)
|
||||
if err != nil {
|
||||
log.Warn("Error while getting ldap connection")
|
||||
cclog.Warn("Error while getting ldap connection")
|
||||
return nil, err
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
userDn := strings.Replace(la.config.UserBind, "{username}", user.Username, -1)
|
||||
userDn := strings.ReplaceAll(Keys.LdapConfig.UserBind, "{username}", ldap.EscapeDN(user.Username))
|
||||
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
|
||||
log.Error("Error while binding to ldap connection")
|
||||
return nil, err
|
||||
cclog.Errorf("AUTH/LDAP > Authentication for user %s failed: %v",
|
||||
user.Username, err)
|
||||
return nil, fmt.Errorf("Authentication failed")
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
func (la *LdapAuthenticator) Auth(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*User, error) {
|
||||
|
||||
return la.auth.AuthViaSession(rw, r)
|
||||
}
|
||||
|
||||
func (la *LdapAuthenticator) Sync() error {
|
||||
|
||||
const IN_DB int = 1
|
||||
const IN_LDAP int = 2
|
||||
const IN_BOTH int = 3
|
||||
const InDB int = 1
|
||||
const InLdap int = 2
|
||||
const InBoth int = 3
|
||||
ur := repository.GetUserRepository()
|
||||
lc := Keys.LdapConfig
|
||||
|
||||
users := map[string]int{}
|
||||
rows, err := la.auth.db.Query(`SELECT username FROM user WHERE user.ldap = 1`)
|
||||
usernames, err := ur.GetLdapUsernames()
|
||||
if err != nil {
|
||||
log.Warn("Error while querying LDAP users")
|
||||
return err
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var username string
|
||||
if err := rows.Scan(&username); err != nil {
|
||||
log.Warnf("Error while scanning for user '%s'", username)
|
||||
return err
|
||||
}
|
||||
|
||||
users[username] = IN_DB
|
||||
for _, username := range usernames {
|
||||
users[username] = InDB
|
||||
}
|
||||
|
||||
l, err := la.getLdapConnection(true)
|
||||
if err != nil {
|
||||
log.Error("LDAP connection error")
|
||||
cclog.Error("LDAP connection error")
|
||||
return err
|
||||
}
|
||||
defer l.Close()
|
||||
|
||||
ldapResults, err := l.Search(ldap.NewSearchRequest(
|
||||
la.config.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
|
||||
la.config.UserFilter, []string{"dn", "uid", "gecos"}, nil))
|
||||
lc.UserBase,
|
||||
ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
|
||||
lc.UserFilter,
|
||||
[]string{"dn", la.UIDAttr, la.UserAttr}, nil))
|
||||
if err != nil {
|
||||
log.Warn("LDAP search error")
|
||||
cclog.Warn("LDAP search error")
|
||||
return err
|
||||
}
|
||||
|
||||
newnames := map[string]string{}
|
||||
for _, entry := range ldapResults.Entries {
|
||||
username := entry.GetAttributeValue("uid")
|
||||
username := entry.GetAttributeValue(la.UIDAttr)
|
||||
if username == "" {
|
||||
return errors.New("no attribute 'uid'")
|
||||
return fmt.Errorf("no attribute '%s'", la.UIDAttr)
|
||||
}
|
||||
|
||||
_, ok := users[username]
|
||||
if !ok {
|
||||
users[username] = IN_LDAP
|
||||
newnames[username] = entry.GetAttributeValue("gecos")
|
||||
users[username] = InLdap
|
||||
newnames[username] = entry.GetAttributeValue(la.UserAttr)
|
||||
} else {
|
||||
users[username] = IN_BOTH
|
||||
users[username] = InBoth
|
||||
}
|
||||
}
|
||||
|
||||
for username, where := range users {
|
||||
if where == IN_DB && la.config.SyncDelOldUsers {
|
||||
log.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
|
||||
if _, err := la.auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username); err != nil {
|
||||
log.Errorf("User '%s' not in LDAP anymore: Delete from DB failed", username)
|
||||
if where == InDB && lc.SyncDelOldUsers {
|
||||
if err := ur.DelUser(username); err != nil {
|
||||
cclog.Errorf("User '%s' LDAP: Delete from DB failed: %v", username, err)
|
||||
return err
|
||||
}
|
||||
} else if where == IN_LDAP {
|
||||
cclog.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
|
||||
} else if where == InLdap {
|
||||
name := newnames[username]
|
||||
log.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
|
||||
if _, err := la.auth.db.Exec(`INSERT INTO user (username, ldap, name, roles) VALUES (?, ?, ?, ?)`,
|
||||
username, 1, name, "[\""+GetRoleString(RoleUser)+"\"]"); err != nil {
|
||||
log.Errorf("User '%s' new in LDAP: Insert into DB failed", username)
|
||||
|
||||
user := &schema.User{
|
||||
Username: username,
|
||||
Name: name,
|
||||
Roles: []string{schema.GetRoleString(schema.RoleUser)},
|
||||
Projects: make([]string, 0),
|
||||
AuthSource: schema.AuthViaLDAP,
|
||||
}
|
||||
|
||||
cclog.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
|
||||
if err := ur.AddUser(user); err != nil {
|
||||
cclog.Errorf("User '%s' LDAP: Insert into DB failed", username)
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -174,20 +220,20 @@ func (la *LdapAuthenticator) Sync() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// TODO: Add a connection pool or something like
|
||||
// that so that connections can be reused/cached.
|
||||
func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
|
||||
|
||||
conn, err := ldap.DialURL(la.config.Url)
|
||||
lc := Keys.LdapConfig
|
||||
conn, err := ldap.DialURL(lc.URL,
|
||||
ldap.DialWithDialer(&net.Dialer{Timeout: 10 * time.Second}))
|
||||
if err != nil {
|
||||
log.Warn("LDAP URL dial failed")
|
||||
cclog.Warn("LDAP URL dial failed")
|
||||
return nil, err
|
||||
}
|
||||
conn.SetTimeout(30 * time.Second)
|
||||
|
||||
if admin {
|
||||
if err := conn.Bind(la.config.SearchDN, la.syncPassword); err != nil {
|
||||
if err := conn.Bind(lc.SearchDN, la.syncPassword); err != nil {
|
||||
conn.Close()
|
||||
log.Warn("LDAP connection bind failed")
|
||||
cclog.Warn("LDAP connection bind failed")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
)
|
||||
|
||||
@@ -17,37 +20,29 @@ type LocalAuthenticator struct {
|
||||
|
||||
var _ Authenticator = (*LocalAuthenticator)(nil)
|
||||
|
||||
func (la *LocalAuthenticator) Init(
|
||||
auth *Authentication,
|
||||
_ interface{}) error {
|
||||
|
||||
la.auth = auth
|
||||
func (la *LocalAuthenticator) Init() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (la *LocalAuthenticator) CanLogin(
|
||||
user *User,
|
||||
user *schema.User,
|
||||
username string,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) bool {
|
||||
|
||||
return user != nil && user.AuthSource == AuthViaLocalPassword
|
||||
r *http.Request,
|
||||
) (*schema.User, bool) {
|
||||
return user, user != nil && user.AuthSource == schema.AuthViaLocalPassword
|
||||
}
|
||||
|
||||
func (la *LocalAuthenticator) Login(
|
||||
user *User,
|
||||
user *schema.User,
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*User, error) {
|
||||
|
||||
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil {
|
||||
return nil, fmt.Errorf("AUTH/LOCAL > user '%s' provided the wrong password (%w)", user.Username, e)
|
||||
r *http.Request,
|
||||
) (*schema.User, error) {
|
||||
if e := bcrypt.CompareHashAndPassword([]byte(user.Password),
|
||||
[]byte(r.FormValue("password"))); e != nil {
|
||||
cclog.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username)
|
||||
return nil, fmt.Errorf("Authentication failed")
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
func (la *LocalAuthenticator) Auth(
|
||||
rw http.ResponseWriter,
|
||||
r *http.Request) (*User, error) {
|
||||
|
||||
return la.auth.AuthViaSession(rw, r)
|
||||
}
|
||||
|
||||
273
internal/auth/oidc.go
Normal file
273
internal/auth/oidc.go
Normal file
@@ -0,0 +1,273 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/coreos/go-oidc/v3/oidc"
|
||||
"github.com/go-chi/chi/v5"
|
||||
"golang.org/x/oauth2"
|
||||
)
|
||||
|
||||
type OpenIDConfig struct {
|
||||
Provider string `json:"provider"`
|
||||
SyncUserOnLogin bool `json:"sync-user-on-login"`
|
||||
UpdateUserOnLogin bool `json:"update-user-on-login"`
|
||||
}
|
||||
|
||||
type OIDC struct {
|
||||
client *oauth2.Config
|
||||
provider *oidc.Provider
|
||||
authentication *Authentication
|
||||
clientID string
|
||||
}
|
||||
|
||||
func randString(nByte int) (string, error) {
|
||||
b := make([]byte, nByte)
|
||||
if _, err := io.ReadFull(rand.Reader, b); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return base64.RawURLEncoding.EncodeToString(b), nil
|
||||
}
|
||||
|
||||
func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value string) {
|
||||
c := &http.Cookie{
|
||||
Name: name,
|
||||
Value: value,
|
||||
MaxAge: int(time.Hour.Seconds()),
|
||||
Secure: r.TLS != nil,
|
||||
HttpOnly: true,
|
||||
SameSite: http.SameSiteLaxMode,
|
||||
}
|
||||
http.SetCookie(w, c)
|
||||
}
|
||||
|
||||
// NewOIDC creates a new OIDC authenticator with the configured provider
|
||||
func NewOIDC(a *Authentication) *OIDC {
|
||||
// Use context with timeout for provider initialization
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
provider, err := oidc.NewProvider(ctx, Keys.OpenIDConfig.Provider)
|
||||
if err != nil {
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
clientID := os.Getenv("OID_CLIENT_ID")
|
||||
if clientID == "" {
|
||||
cclog.Warn("environment variable 'OID_CLIENT_ID' not set (Open ID connect auth will not work)")
|
||||
}
|
||||
clientSecret := os.Getenv("OID_CLIENT_SECRET")
|
||||
if clientSecret == "" {
|
||||
cclog.Warn("environment variable 'OID_CLIENT_SECRET' not set (Open ID connect auth will not work)")
|
||||
}
|
||||
|
||||
client := &oauth2.Config{
|
||||
ClientID: clientID,
|
||||
ClientSecret: clientSecret,
|
||||
Endpoint: provider.Endpoint(),
|
||||
Scopes: []string{oidc.ScopeOpenID, "profile"},
|
||||
}
|
||||
|
||||
oa := &OIDC{provider: provider, client: client, clientID: clientID, authentication: a}
|
||||
|
||||
return oa
|
||||
}
|
||||
|
||||
func (oa *OIDC) RegisterEndpoints(r chi.Router) {
|
||||
r.HandleFunc("/oidc-login", oa.OAuth2Login)
|
||||
r.HandleFunc("/oidc-callback", oa.OAuth2Callback)
|
||||
}
|
||||
|
||||
func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
||||
c, err := r.Cookie("state")
|
||||
if err != nil {
|
||||
http.Error(rw, "state cookie not found", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
state := c.Value
|
||||
|
||||
c, err = r.Cookie("verifier")
|
||||
if err != nil {
|
||||
http.Error(rw, "verifier cookie not found", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
codeVerifier := c.Value
|
||||
|
||||
_ = r.ParseForm()
|
||||
if r.Form.Get("state") != state {
|
||||
http.Error(rw, "State invalid", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
code := r.Form.Get("code")
|
||||
if code == "" {
|
||||
http.Error(rw, "Code not found", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
// Exchange authorization code for token with timeout
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
token, err := oa.client.Exchange(ctx, code, oauth2.VerifierOption(codeVerifier))
|
||||
if err != nil {
|
||||
cclog.Errorf("token exchange failed: %s", err.Error())
|
||||
http.Error(rw, "Authentication failed during token exchange", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Get user info from OIDC provider with same timeout
|
||||
userInfo, err := oa.provider.UserInfo(ctx, oauth2.StaticTokenSource(token))
|
||||
if err != nil {
|
||||
cclog.Errorf("failed to get userinfo: %s", err.Error())
|
||||
http.Error(rw, "Failed to retrieve user information", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Verify ID token and nonce to prevent replay attacks
|
||||
rawIDToken, ok := token.Extra("id_token").(string)
|
||||
if !ok {
|
||||
http.Error(rw, "ID token not found in response", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
nonceCookie, err := r.Cookie("nonce")
|
||||
if err != nil {
|
||||
http.Error(rw, "nonce cookie not found", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
verifier := oa.provider.Verifier(&oidc.Config{ClientID: oa.clientID})
|
||||
idToken, err := verifier.Verify(ctx, rawIDToken)
|
||||
if err != nil {
|
||||
cclog.Errorf("ID token verification failed: %s", err.Error())
|
||||
http.Error(rw, "ID token verification failed", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if idToken.Nonce != nonceCookie.Value {
|
||||
http.Error(rw, "Nonce mismatch", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
projects := make([]string, 0)
|
||||
|
||||
// Extract custom claims from userinfo
|
||||
var claims struct {
|
||||
Username string `json:"preferred_username"`
|
||||
Name string `json:"name"`
|
||||
// Keycloak realm-level roles
|
||||
RealmAccess struct {
|
||||
Roles []string `json:"roles"`
|
||||
} `json:"realm_access"`
|
||||
// Keycloak client-level roles
|
||||
ResourceAccess struct {
|
||||
Client struct {
|
||||
Roles []string `json:"roles"`
|
||||
} `json:"clustercockpit"`
|
||||
} `json:"resource_access"`
|
||||
}
|
||||
if err := userInfo.Claims(&claims); err != nil {
|
||||
cclog.Errorf("failed to extract claims: %s", err.Error())
|
||||
http.Error(rw, "Failed to extract user claims", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if claims.Username == "" {
|
||||
http.Error(rw, "Username claim missing from OIDC provider", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Merge roles from both client-level and realm-level access
|
||||
oidcRoles := append(claims.ResourceAccess.Client.Roles, claims.RealmAccess.Roles...)
|
||||
|
||||
roleSet := make(map[string]bool)
|
||||
for _, r := range oidcRoles {
|
||||
switch r {
|
||||
case "user":
|
||||
roleSet[schema.GetRoleString(schema.RoleUser)] = true
|
||||
case "admin":
|
||||
roleSet[schema.GetRoleString(schema.RoleAdmin)] = true
|
||||
case "manager":
|
||||
roleSet[schema.GetRoleString(schema.RoleManager)] = true
|
||||
case "support":
|
||||
roleSet[schema.GetRoleString(schema.RoleSupport)] = true
|
||||
}
|
||||
}
|
||||
|
||||
var roles []string
|
||||
for role := range roleSet {
|
||||
roles = append(roles, role)
|
||||
}
|
||||
|
||||
if len(roles) == 0 {
|
||||
roles = append(roles, schema.GetRoleString(schema.RoleUser))
|
||||
}
|
||||
|
||||
user := &schema.User{
|
||||
Username: claims.Username,
|
||||
Name: claims.Name,
|
||||
Roles: roles,
|
||||
Projects: projects,
|
||||
AuthSource: schema.AuthViaOIDC,
|
||||
}
|
||||
|
||||
if Keys.OpenIDConfig.SyncUserOnLogin || Keys.OpenIDConfig.UpdateUserOnLogin {
|
||||
handleOIDCUser(user)
|
||||
}
|
||||
|
||||
if err := oa.authentication.SaveSession(rw, r, user); err != nil {
|
||||
cclog.Errorf("session save failed for user %q: %s", user.Username, err.Error())
|
||||
http.Error(rw, "Failed to create session", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
cclog.Infof("login successful: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||
userCtx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(userCtx))
|
||||
}
|
||||
|
||||
func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) {
|
||||
state, err := randString(16)
|
||||
if err != nil {
|
||||
http.Error(rw, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
setCallbackCookie(rw, r, "state", state)
|
||||
|
||||
// use PKCE to protect against CSRF attacks
|
||||
codeVerifier := oauth2.GenerateVerifier()
|
||||
setCallbackCookie(rw, r, "verifier", codeVerifier)
|
||||
|
||||
// Generate nonce for ID token replay protection
|
||||
nonce, err := randString(16)
|
||||
if err != nil {
|
||||
http.Error(rw, "Internal error", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
setCallbackCookie(rw, r, "nonce", nonce)
|
||||
|
||||
// Build redirect URL from the incoming request
|
||||
scheme := "https"
|
||||
if r.TLS == nil && r.Header.Get("X-Forwarded-Proto") != "https" {
|
||||
scheme = "http"
|
||||
}
|
||||
oa.client.RedirectURL = fmt.Sprintf("%s://%s/oidc-callback", scheme, r.Host)
|
||||
|
||||
// Redirect user to consent page to ask for permission
|
||||
url := oa.client.AuthCodeURL(state, oauth2.AccessTypeOffline,
|
||||
oauth2.S256ChallengeOption(codeVerifier),
|
||||
oidc.Nonce(nonce))
|
||||
http.Redirect(rw, r, url, http.StatusFound)
|
||||
}
|
||||
111
internal/auth/schema.go
Normal file
111
internal/auth/schema.go
Normal file
@@ -0,0 +1,111 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package auth
|
||||
|
||||
var configSchema = `
|
||||
{
|
||||
"jwts": {
|
||||
"description": "For JWT token authentication.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"max-age": {
|
||||
"description": "Configure how long a token is valid. As string parsable by time.ParseDuration()",
|
||||
"type": "string"
|
||||
},
|
||||
"cookie-name": {
|
||||
"description": "Cookie that should be checked for a JWT token.",
|
||||
"type": "string"
|
||||
},
|
||||
"validate-user": {
|
||||
"description": "Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"trusted-issuer": {
|
||||
"description": "Issuer that should be accepted when validating external JWTs ",
|
||||
"type": "string"
|
||||
},
|
||||
"sync-user-on-login": {
|
||||
"description": "Add non-existent user to DB at login attempt with values provided in JWT.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"update-user-on-login": {
|
||||
"description": "Should an existent user attributes in the DB be updated at login attempt with values provided in JWT.",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": ["max-age"]
|
||||
},
|
||||
"oidc": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"provider": {
|
||||
"description": "OpenID Connect provider URL.",
|
||||
"type": "string"
|
||||
},
|
||||
"sync-user-on-login": {
|
||||
"description": "Add non-existent user to DB at login attempt with values provided.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"update-user-on-login": {
|
||||
"description": "Should an existent user attributes in the DB be updated at login attempt with values provided.",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": ["provider"]
|
||||
},
|
||||
"ldap": {
|
||||
"description": "For LDAP Authentication and user synchronisation.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"description": "URL of LDAP directory server.",
|
||||
"type": "string"
|
||||
},
|
||||
"user-base": {
|
||||
"description": "Base DN of user tree root.",
|
||||
"type": "string"
|
||||
},
|
||||
"search-dn": {
|
||||
"description": "DN for authenticating LDAP admin account with general read rights.",
|
||||
"type": "string"
|
||||
},
|
||||
"user-bind": {
|
||||
"description": "Expression used to authenticate users via LDAP bind. Must contain uid={username}.",
|
||||
"type": "string"
|
||||
},
|
||||
"user-filter": {
|
||||
"description": "Filter to extract users for syncing.",
|
||||
"type": "string"
|
||||
},
|
||||
"username-attr": {
|
||||
"description": "Attribute with full username. Default: gecos",
|
||||
"type": "string"
|
||||
},
|
||||
"sync-interval": {
|
||||
"description": "Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.",
|
||||
"type": "string"
|
||||
},
|
||||
"sync-del-old-users": {
|
||||
"description": "Delete obsolete users in database.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"uid-attr": {
|
||||
"description": "LDAP attribute used as login username. Default: uid",
|
||||
"type": "string"
|
||||
},
|
||||
"sync-user-on-login": {
|
||||
"description": "Add non-existent user to DB at login attempt if user exists in Ldap directory",
|
||||
"type": "boolean"
|
||||
},
|
||||
"update-user-on-login": {
|
||||
"description": "Should an existent user attributes in the DB be updated at login attempt with values from LDAP.",
|
||||
"type": "boolean"
|
||||
}
|
||||
},
|
||||
"required": ["url", "user-base", "search-dn", "user-bind", "user-filter"]
|
||||
},
|
||||
"required": ["jwts"]
|
||||
}`
|
||||
@@ -1,289 +0,0 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package auth
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
sq "github.com/Masterminds/squirrel"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"golang.org/x/crypto/bcrypt"
|
||||
)
|
||||
|
||||
func (auth *Authentication) GetUser(username string) (*User, error) {
|
||||
|
||||
user := &User{Username: username}
|
||||
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
|
||||
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
|
||||
Where("user.username = ?", username).RunWith(auth.db).
|
||||
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
|
||||
log.Warnf("Error while querying user '%v' from database", username)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
user.Password = hashedPassword.String
|
||||
user.Name = name.String
|
||||
user.Email = email.String
|
||||
if rawRoles.Valid {
|
||||
if err := json.Unmarshal([]byte(rawRoles.String), &user.Roles); err != nil {
|
||||
log.Warn("Error while unmarshaling raw roles from DB")
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if rawProjects.Valid {
|
||||
if err := json.Unmarshal([]byte(rawProjects.String), &user.Projects); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return user, nil
|
||||
}
|
||||
|
||||
func (auth *Authentication) AddUser(user *User) error {
|
||||
|
||||
rolesJson, _ := json.Marshal(user.Roles)
|
||||
projectsJson, _ := json.Marshal(user.Projects)
|
||||
|
||||
cols := []string{"username", "roles", "projects"}
|
||||
vals := []interface{}{user.Username, string(rolesJson), string(projectsJson)}
|
||||
|
||||
if user.Name != "" {
|
||||
cols = append(cols, "name")
|
||||
vals = append(vals, user.Name)
|
||||
}
|
||||
if user.Email != "" {
|
||||
cols = append(cols, "email")
|
||||
vals = append(vals, user.Email)
|
||||
}
|
||||
if user.Password != "" {
|
||||
password, err := bcrypt.GenerateFromPassword([]byte(user.Password), bcrypt.DefaultCost)
|
||||
if err != nil {
|
||||
log.Error("Error while encrypting new user password")
|
||||
return err
|
||||
}
|
||||
cols = append(cols, "password")
|
||||
vals = append(vals, string(password))
|
||||
}
|
||||
|
||||
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(auth.db).Exec(); err != nil {
|
||||
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
|
||||
return err
|
||||
}
|
||||
|
||||
log.Infof("new user %#v created (roles: %s, auth-source: %d, projects: %s)", user.Username, rolesJson, user.AuthSource, projectsJson)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (auth *Authentication) DelUser(username string) error {
|
||||
|
||||
_, err := auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username)
|
||||
log.Errorf("Error while deleting user '%s' from DB", username)
|
||||
return err
|
||||
}
|
||||
|
||||
func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
|
||||
|
||||
q := sq.Select("username", "name", "email", "roles", "projects").From("user")
|
||||
if specialsOnly {
|
||||
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
|
||||
}
|
||||
|
||||
rows, err := q.RunWith(auth.db).Query()
|
||||
if err != nil {
|
||||
log.Warn("Error while querying user list")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
users := make([]*User, 0)
|
||||
defer rows.Close()
|
||||
for rows.Next() {
|
||||
rawroles := ""
|
||||
rawprojects := ""
|
||||
user := &User{}
|
||||
var name, email sql.NullString
|
||||
if err := rows.Scan(&user.Username, &name, &email, &rawroles, &rawprojects); err != nil {
|
||||
log.Warn("Error while scanning user list")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal([]byte(rawroles), &user.Roles); err != nil {
|
||||
log.Warn("Error while unmarshaling raw role list")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal([]byte(rawprojects), &user.Projects); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
user.Name = name.String
|
||||
user.Email = email.String
|
||||
users = append(users, user)
|
||||
}
|
||||
return users, nil
|
||||
}
|
||||
|
||||
func (auth *Authentication) AddRole(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
queryrole string) error {
|
||||
|
||||
newRole := strings.ToLower(queryrole)
|
||||
user, err := auth.GetUser(username)
|
||||
if err != nil {
|
||||
log.Warnf("Could not load user '%s'", username)
|
||||
return err
|
||||
}
|
||||
|
||||
exists, valid := user.HasValidRole(newRole)
|
||||
|
||||
if !valid {
|
||||
return fmt.Errorf("Supplied role is no valid option : %v", newRole)
|
||||
}
|
||||
if exists {
|
||||
return fmt.Errorf("User %v already has role %v", username, newRole)
|
||||
}
|
||||
|
||||
roles, _ := json.Marshal(append(user.Roles, newRole))
|
||||
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
|
||||
log.Errorf("Error while adding new role for user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (auth *Authentication) RemoveRole(ctx context.Context, username string, queryrole string) error {
|
||||
oldRole := strings.ToLower(queryrole)
|
||||
user, err := auth.GetUser(username)
|
||||
if err != nil {
|
||||
log.Warnf("Could not load user '%s'", username)
|
||||
return err
|
||||
}
|
||||
|
||||
exists, valid := user.HasValidRole(oldRole)
|
||||
|
||||
if !valid {
|
||||
return fmt.Errorf("Supplied role is no valid option : %v", oldRole)
|
||||
}
|
||||
if !exists {
|
||||
return fmt.Errorf("Role already deleted for user '%v': %v", username, oldRole)
|
||||
}
|
||||
|
||||
if oldRole == GetRoleString(RoleManager) && len(user.Projects) != 0 {
|
||||
return fmt.Errorf("Cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
|
||||
}
|
||||
|
||||
var newroles []string
|
||||
for _, r := range user.Roles {
|
||||
if r != oldRole {
|
||||
newroles = append(newroles, r) // Append all roles not matching requested to be deleted role
|
||||
}
|
||||
}
|
||||
|
||||
var mroles, _ = json.Marshal(newroles)
|
||||
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
|
||||
log.Errorf("Error while removing role for user '%s'", user.Username)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (auth *Authentication) AddProject(
|
||||
ctx context.Context,
|
||||
username string,
|
||||
project string) error {
|
||||
|
||||
user, err := auth.GetUser(username)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !user.HasRole(RoleManager) {
|
||||
return fmt.Errorf("user '%s' is not a manager!", username)
|
||||
}
|
||||
|
||||
if user.HasProject(project) {
|
||||
return fmt.Errorf("user '%s' already manages project '%s'", username, project)
|
||||
}
|
||||
|
||||
projects, _ := json.Marshal(append(user.Projects, project))
|
||||
if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (auth *Authentication) RemoveProject(ctx context.Context, username string, project string) error {
|
||||
user, err := auth.GetUser(username)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !user.HasRole(RoleManager) {
|
||||
return fmt.Errorf("user '%#v' is not a manager!", username)
|
||||
}
|
||||
|
||||
if !user.HasProject(project) {
|
||||
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match!", username, project)
|
||||
}
|
||||
|
||||
var exists bool
|
||||
var newprojects []string
|
||||
for _, p := range user.Projects {
|
||||
if p != project {
|
||||
newprojects = append(newprojects, p) // Append all projects not matching requested to be deleted project
|
||||
} else {
|
||||
exists = true
|
||||
}
|
||||
}
|
||||
|
||||
if exists == true {
|
||||
var result interface{}
|
||||
if len(newprojects) == 0 {
|
||||
result = "[]"
|
||||
} else {
|
||||
result, _ = json.Marshal(newprojects)
|
||||
}
|
||||
if _, err := sq.Update("user").Set("projects", result).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
} else {
|
||||
return fmt.Errorf("user %s already does not manage project %s", username, project)
|
||||
}
|
||||
}
|
||||
|
||||
func FetchUser(ctx context.Context, db *sqlx.DB, username string) (*model.User, error) {
|
||||
me := GetUser(ctx)
|
||||
if me != nil && me.Username != username && me.HasNotRoles([]Role{RoleAdmin, RoleSupport, RoleManager}) {
|
||||
return nil, errors.New("forbidden")
|
||||
}
|
||||
|
||||
user := &model.User{Username: username}
|
||||
var name, email sql.NullString
|
||||
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
|
||||
RunWith(db).QueryRow().Scan(&name, &email); err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
|
||||
/* since FetchUser will be called to retrieve full name and mail for every job in query/list */
|
||||
// log.Warnf("User '%s' Not found in DB", username)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
log.Warnf("Error while fetching user '%s'", username)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
user.Name = name.String
|
||||
user.Email = email.String
|
||||
return user, nil
|
||||
}
|
||||
@@ -1,68 +1,147 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package config implements the program configuration data structures, validation and parsing
|
||||
package config
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/resampler"
|
||||
)
|
||||
|
||||
var Keys schema.ProgramConfig = schema.ProgramConfig{
|
||||
type ProgramConfig struct {
|
||||
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||
Addr string `json:"addr"`
|
||||
|
||||
// Addresses from which secured admin API endpoints can be reached, can be wildcard "*"
|
||||
APIAllowedIPs []string `json:"api-allowed-ips"`
|
||||
|
||||
APISubjects *NATSConfig `json:"api-subjects"`
|
||||
|
||||
// Drop root permissions once .env was read and the port was taken.
|
||||
User string `json:"user"`
|
||||
Group string `json:"group"`
|
||||
|
||||
// Disable authentication (for everything: API, Web-UI, ...)
|
||||
DisableAuthentication bool `json:"disable-authentication"`
|
||||
|
||||
// If `embed-static-files` is true (default), the frontend files are directly
|
||||
// embeded into the go binary and expected to be in web/frontend. Only if
|
||||
// it is false the files in `static-files` are served instead.
|
||||
EmbedStaticFiles bool `json:"embed-static-files"`
|
||||
StaticFiles string `json:"static-files"`
|
||||
|
||||
// Path to SQLite database file
|
||||
DB string `json:"db"`
|
||||
|
||||
EnableJobTaggers bool `json:"enable-job-taggers"`
|
||||
|
||||
// Validate json input against schema
|
||||
Validate bool `json:"validate"`
|
||||
|
||||
// If 0 or empty, the session does not expire!
|
||||
SessionMaxAge string `json:"session-max-age"`
|
||||
|
||||
// If both those options are not empty, use HTTPS using those certificates.
|
||||
HTTPSCertFile string `json:"https-cert-file"`
|
||||
HTTPSKeyFile string `json:"https-key-file"`
|
||||
|
||||
// If not the empty string and `addr` does not end in ":80",
|
||||
// redirect every request incoming at port 80 to that url.
|
||||
RedirectHTTPTo string `json:"redirect-http-to"`
|
||||
|
||||
// Where to store MachineState files
|
||||
MachineStateDir string `json:"machine-state-dir"`
|
||||
|
||||
// If not zero, automatically mark jobs as stopped running X seconds longer than their walltime.
|
||||
StopJobsExceedingWalltime int `json:"stop-jobs-exceeding-walltime"`
|
||||
|
||||
// Defines time X in seconds in which jobs are considered to be "short" and will be filtered in specific views.
|
||||
ShortRunningJobsDuration int `json:"short-running-jobs-duration"`
|
||||
|
||||
// Energy Mix CO2 Emission Constant [g/kWh]
|
||||
// If entered, displays estimated CO2 emission for job based on jobs totalEnergy
|
||||
EmissionConstant int `json:"emission-constant"`
|
||||
|
||||
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
||||
EnableResampling *ResampleConfig `json:"resampling"`
|
||||
|
||||
// Systemd unit name for log viewer (default: "clustercockpit")
|
||||
SystemdUnit string `json:"systemd-unit"`
|
||||
|
||||
// Node state retention configuration
|
||||
NodeStateRetention *NodeStateRetention `json:"nodestate-retention"`
|
||||
}
|
||||
|
||||
type NodeStateRetention struct {
|
||||
Policy string `json:"policy"` // "delete" or "move"
|
||||
Age int `json:"age"` // hours, default 24
|
||||
TargetKind string `json:"target-kind"` // "file" or "s3"
|
||||
TargetPath string `json:"target-path"`
|
||||
TargetEndpoint string `json:"target-endpoint"`
|
||||
TargetBucket string `json:"target-bucket"`
|
||||
TargetAccessKey string `json:"target-access-key"`
|
||||
TargetSecretKey string `json:"target-secret-key"`
|
||||
TargetRegion string `json:"target-region"`
|
||||
TargetUsePathStyle bool `json:"target-use-path-style"`
|
||||
MaxFileSizeMB int `json:"max-file-size-mb"`
|
||||
}
|
||||
|
||||
type ResampleConfig struct {
|
||||
// Minimum number of points to trigger resampling of data
|
||||
MinimumPoints int `json:"minimum-points"`
|
||||
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
|
||||
Resolutions []int `json:"resolutions"`
|
||||
// Trigger next zoom level at less than this many visible datapoints
|
||||
Trigger int `json:"trigger"`
|
||||
}
|
||||
|
||||
type NATSConfig struct {
|
||||
SubjectJobEvent string `json:"subject-job-event"`
|
||||
SubjectNodeState string `json:"subject-node-state"`
|
||||
}
|
||||
|
||||
type IntRange struct {
|
||||
From int `json:"from"`
|
||||
To int `json:"to"`
|
||||
}
|
||||
|
||||
type TimeRange struct {
|
||||
From *time.Time `json:"from"`
|
||||
To *time.Time `json:"to"`
|
||||
Range string `json:"range,omitempty"`
|
||||
}
|
||||
|
||||
type FilterRanges struct {
|
||||
Duration *IntRange `json:"duration"`
|
||||
NumNodes *IntRange `json:"num-nodes"`
|
||||
StartTime *TimeRange `json:"start-time"`
|
||||
}
|
||||
|
||||
var Keys ProgramConfig = ProgramConfig{
|
||||
Addr: "localhost:8080",
|
||||
DisableAuthentication: false,
|
||||
EmbedStaticFiles: true,
|
||||
DBDriver: "sqlite3",
|
||||
DB: "./var/job.db",
|
||||
Archive: json.RawMessage(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`),
|
||||
DisableArchive: false,
|
||||
Validate: false,
|
||||
LdapConfig: nil,
|
||||
SessionMaxAge: "168h",
|
||||
StopJobsExceedingWalltime: 0,
|
||||
ShortRunningJobsDuration: 5 * 60,
|
||||
UiDefaults: map[string]interface{}{
|
||||
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
||||
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
||||
"plot_general_colorBackground": true,
|
||||
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
||||
"plot_general_lineWidth": 3,
|
||||
"plot_list_jobsPerPage": 50,
|
||||
"plot_list_selectedMetrics": []string{"cpu_load", "mem_used", "flops_any", "mem_bw"},
|
||||
"plot_view_plotsPerRow": 3,
|
||||
"plot_view_showPolarplot": true,
|
||||
"plot_view_showRoofline": true,
|
||||
"plot_view_showStatTable": true,
|
||||
"system_view_selectedMetric": "cpu_load",
|
||||
},
|
||||
}
|
||||
|
||||
func Init(flagConfigFile string) {
|
||||
raw, err := os.ReadFile(flagConfigFile)
|
||||
if err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
log.Fatalf("CONFIG ERROR: %v", err)
|
||||
}
|
||||
} else {
|
||||
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
|
||||
log.Fatalf("Validate config: %v\n", err)
|
||||
}
|
||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||
func Init(mainConfig json.RawMessage) {
|
||||
Validate(configSchema, mainConfig)
|
||||
dec := json.NewDecoder(bytes.NewReader(mainConfig))
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&Keys); err != nil {
|
||||
log.Fatalf("could not decode: %v", err)
|
||||
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
||||
}
|
||||
|
||||
if Keys.Clusters == nil || len(Keys.Clusters) < 1 {
|
||||
log.Fatal("At least one cluster required in config!")
|
||||
}
|
||||
if Keys.EnableResampling != nil && Keys.EnableResampling.MinimumPoints > 0 {
|
||||
resampler.SetMinimumRequiredPoints(Keys.EnableResampling.MinimumPoints)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,26 @@
|
||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved.
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
ccconf "github.com/ClusterCockpit/cc-lib/v2/ccConfig"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
)
|
||||
|
||||
func TestInit(t *testing.T) {
|
||||
fp := "../../configs/config.json"
|
||||
Init(fp)
|
||||
ccconf.Init(fp)
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
Init(cfg)
|
||||
} else {
|
||||
cclog.Abort("Main configuration must be present")
|
||||
}
|
||||
|
||||
if Keys.Addr != "0.0.0.0:443" {
|
||||
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:443", Keys.Addr)
|
||||
}
|
||||
@@ -18,7 +28,13 @@ func TestInit(t *testing.T) {
|
||||
|
||||
func TestInitMinimal(t *testing.T) {
|
||||
fp := "../../configs/config-demo.json"
|
||||
Init(fp)
|
||||
ccconf.Init(fp)
|
||||
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||
Init(cfg)
|
||||
} else {
|
||||
cclog.Abort("Main configuration must be present")
|
||||
}
|
||||
|
||||
if Keys.Addr != "127.0.0.1:8080" {
|
||||
t.Errorf("wrong addr\ngot: %s \nwant: 127.0.0.1:8080", Keys.Addr)
|
||||
}
|
||||
|
||||
51
internal/config/default_metrics.go
Normal file
51
internal/config/default_metrics.go
Normal file
@@ -0,0 +1,51 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// DEPRECATED: SUPERSEDED BY NEW USER CONFIG - userConfig.go / web.go
|
||||
|
||||
type DefaultMetricsCluster struct {
|
||||
Name string `json:"name"`
|
||||
DefaultMetrics string `json:"default-metrics"`
|
||||
}
|
||||
|
||||
type DefaultMetricsConfig struct {
|
||||
Clusters []DefaultMetricsCluster `json:"clusters"`
|
||||
}
|
||||
|
||||
func LoadDefaultMetricsConfig() (*DefaultMetricsConfig, error) {
|
||||
filePath := "default_metrics.json"
|
||||
if _, err := os.Stat(filePath); os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
data, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var cfg DefaultMetricsConfig
|
||||
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &cfg, nil
|
||||
}
|
||||
|
||||
func ParseMetricsString(s string) []string {
|
||||
parts := strings.Split(s, ",")
|
||||
var metrics []string
|
||||
for _, p := range parts {
|
||||
trimmed := strings.TrimSpace(p)
|
||||
if trimmed != "" {
|
||||
metrics = append(metrics, trimmed)
|
||||
}
|
||||
}
|
||||
return metrics
|
||||
}
|
||||
182
internal/config/schema.go
Normal file
182
internal/config/schema.go
Normal file
@@ -0,0 +1,182 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package config
|
||||
|
||||
var configSchema = `
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"addr": {
|
||||
"description": "Address where the http (or https) server will listen on (for example: 'localhost:80').",
|
||||
"type": "string"
|
||||
},
|
||||
"api-allowed-ips": {
|
||||
"description": "Addresses from which secured API endpoints can be reached",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"user": {
|
||||
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||
"type": "string"
|
||||
},
|
||||
"group": {
|
||||
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||
"type": "string"
|
||||
},
|
||||
"disable-authentication": {
|
||||
"description": "Disable authentication (for everything: API, Web-UI, ...).",
|
||||
"type": "boolean"
|
||||
},
|
||||
"embed-static-files": {
|
||||
"description": "If all files in web/frontend/public should be served from within the binary itself (they are embedded) or not.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"static-files": {
|
||||
"description": "Folder where static assets can be found, if embed-static-files is false.",
|
||||
"type": "string"
|
||||
},
|
||||
"db": {
|
||||
"description": "Path to SQLite database file (e.g., './var/job.db')",
|
||||
"type": "string"
|
||||
},
|
||||
"enable-job-taggers": {
|
||||
"description": "Turn on automatic application and jobclass taggers",
|
||||
"type": "boolean"
|
||||
},
|
||||
"validate": {
|
||||
"description": "Validate all input json documents against json schema.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"session-max-age": {
|
||||
"description": "Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire!",
|
||||
"type": "string"
|
||||
},
|
||||
"https-cert-file": {
|
||||
"description": "Filepath to SSL certificate. If also https-key-file is set use HTTPS using those certificates.",
|
||||
"type": "string"
|
||||
},
|
||||
"https-key-file": {
|
||||
"description": "Filepath to SSL key file. If also https-cert-file is set use HTTPS using those certificates.",
|
||||
"type": "string"
|
||||
},
|
||||
"redirect-http-to": {
|
||||
"description": "If not the empty string and addr does not end in :80, redirect every request incoming at port 80 to that url.",
|
||||
"type": "string"
|
||||
},
|
||||
"stop-jobs-exceeding-walltime": {
|
||||
"description": "If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job.",
|
||||
"type": "integer"
|
||||
},
|
||||
"short-running-jobs-duration": {
|
||||
"description": "Do not show running jobs shorter than X seconds.",
|
||||
"type": "integer"
|
||||
},
|
||||
"emission-constant": {
|
||||
"description": "Energy mix CO2 emission constant [g/kWh]. If set, displays estimated CO2 emission for jobs.",
|
||||
"type": "integer"
|
||||
},
|
||||
"machine-state-dir": {
|
||||
"description": "Where to store MachineState files.",
|
||||
"type": "string"
|
||||
},
|
||||
"systemd-unit": {
|
||||
"description": "Systemd unit name for log viewer (default: 'clustercockpit').",
|
||||
"type": "string"
|
||||
},
|
||||
"resampling": {
|
||||
"description": "Enable dynamic zoom in frontend metric plots.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"minimum-points": {
|
||||
"description": "Minimum points to trigger resampling of time-series data.",
|
||||
"type": "integer"
|
||||
},
|
||||
"trigger": {
|
||||
"description": "Trigger next zoom level at less than this many visible datapoints.",
|
||||
"type": "integer"
|
||||
},
|
||||
"resolutions": {
|
||||
"description": "Array of resampling target resolutions, in seconds.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["trigger", "resolutions"]
|
||||
},
|
||||
"api-subjects": {
|
||||
"description": "NATS subjects configuration for subscribing to job and node events.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"subject-job-event": {
|
||||
"description": "NATS subject for job events (start_job, stop_job)",
|
||||
"type": "string"
|
||||
},
|
||||
"subject-node-state": {
|
||||
"description": "NATS subject for node state updates",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["subject-job-event", "subject-node-state"]
|
||||
},
|
||||
"nodestate-retention": {
|
||||
"description": "Node state retention configuration for cleaning up old node_state rows.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"policy": {
|
||||
"description": "Retention policy: 'delete' to remove old rows, 'move' to archive to Parquet then delete.",
|
||||
"type": "string",
|
||||
"enum": ["delete", "move"]
|
||||
},
|
||||
"age": {
|
||||
"description": "Retention age in hours (default: 24).",
|
||||
"type": "integer"
|
||||
},
|
||||
"target-kind": {
|
||||
"description": "Target kind for parquet archiving: 'file' or 's3'.",
|
||||
"type": "string",
|
||||
"enum": ["file", "s3"]
|
||||
},
|
||||
"target-path": {
|
||||
"description": "Filesystem path for parquet file target.",
|
||||
"type": "string"
|
||||
},
|
||||
"target-endpoint": {
|
||||
"description": "S3 endpoint URL.",
|
||||
"type": "string"
|
||||
},
|
||||
"target-bucket": {
|
||||
"description": "S3 bucket name.",
|
||||
"type": "string"
|
||||
},
|
||||
"target-access-key": {
|
||||
"description": "S3 access key.",
|
||||
"type": "string"
|
||||
},
|
||||
"target-secret-key": {
|
||||
"description": "S3 secret key.",
|
||||
"type": "string"
|
||||
},
|
||||
"target-region": {
|
||||
"description": "S3 region.",
|
||||
"type": "string"
|
||||
},
|
||||
"target-use-path-style": {
|
||||
"description": "Use path-style S3 addressing.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"max-file-size-mb": {
|
||||
"description": "Maximum parquet file size in MB (default: 128).",
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": ["policy"]
|
||||
}
|
||||
}
|
||||
}`
|
||||
29
internal/config/validate.go
Normal file
29
internal/config/validate.go
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/santhosh-tekuri/jsonschema/v5"
|
||||
)
|
||||
|
||||
func Validate(schema string, instance json.RawMessage) {
|
||||
sch, err := jsonschema.CompileString("schema.json", schema)
|
||||
if err != nil {
|
||||
cclog.Fatalf("%#v", err)
|
||||
}
|
||||
|
||||
var v any
|
||||
if err := json.Unmarshal([]byte(instance), &v); err != nil {
|
||||
cclog.Fatal(err)
|
||||
}
|
||||
|
||||
if err = sch.Validate(v); err != nil {
|
||||
cclog.Fatalf("%#v", err)
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user