mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-01-15 17:21:46 +01:00
Compare commits
1126 Commits
v1.2.0
...
upstream-t
| Author | SHA1 | Date | |
|---|---|---|---|
| d12da655e9 | |||
| 50df63a2d2 | |||
| d23f20f42a | |||
| 965561956e | |||
| 5a65044caf | |||
| 1cd4a57bd3 | |||
| b35172e2f7 | |||
| 3cfcd30128 | |||
| e56532e5c8 | |||
| fdee4f8938 | |||
|
|
7acc89e42d | ||
|
|
af7d208c21 | ||
|
|
91b90d033e | ||
|
|
7a0975b94d | ||
|
|
c58b01a602 | ||
|
|
8244449646 | ||
|
|
436afa4a61 | ||
| 06ed056d43 | |||
| d446c13546 | |||
| 6e74fa294a | |||
|
|
43bdb56072 | ||
| e707fd0893 | |||
|
|
19c8e9beb1 | ||
|
|
32e5353847 | ||
|
|
d2f2d78954 | ||
| b8fdfc30c0 | |||
| 79a2ca8ae8 | |||
| d1a78c13a4 | |||
| f4b00e9de1 | |||
| 0a5e155096 | |||
| 4ecc050c4c | |||
| 88dc5036b3 | |||
| d30c6ef3bf | |||
|
0419fec810
|
|||
|
43e5fd1131
|
|||
|
|
11e94124cc | ||
|
|
102109388b | ||
|
|
60a69aa0a2 | ||
| 5e2cbd75fa | |||
| 14f1192ccb | |||
| 72b2560ecf | |||
| 7fce6fa401 | |||
| e6286768a7 | |||
| 0306723307 | |||
| 6f49998ad3 | |||
| 457c944ec6 | |||
| 33c38f9464 | |||
| 46351389b6 | |||
|
|
d56b0e93db | ||
| d567a5312e | |||
| 97a322354f | |||
| 554527445b | |||
|
|
c5aff1a2ca | ||
| 987cc40318 | |||
| 104fd1576a | |||
| 72ce3954b4 | |||
| cfa7461855 | |||
| 44cda8a232 | |||
| cf119e6843 | |||
|
|
451744f321 | ||
|
|
ca6682b94b | ||
|
|
cbad2341c3 | ||
|
|
a956c7b135 | ||
|
|
ea6caeb2f0 | ||
|
|
c17e8b1156 | ||
|
|
b993b1e096 | ||
| d7d81e352d | |||
| 078c608bda | |||
| f2e57f9edd | |||
| 5698d5216f | |||
| 10aa2bfbd3 | |||
| 6cfed989ff | |||
| ab70acd582 | |||
|
|
79e1c236fe | ||
|
|
fed62b6c45 | ||
|
|
0d62181272 | ||
|
|
290a71bd48 | ||
|
|
6e385db378 | ||
|
|
ffe8329b84 | ||
| f13be109c2 | |||
| d24d85b970 | |||
| 8d44ac90ad | |||
|
|
4083de2a51 | ||
|
|
131df075db | ||
|
|
afd6f50ba2 | ||
|
|
ad01366705 | ||
| 6325793902 | |||
|
|
8ea176f9da | ||
| 03b5272e44 | |||
| 7da01975f7 | |||
| 7cff8bbfd2 | |||
|
|
c98cbb33f8 | ||
| f3ea95535b | |||
| b9b84b7971 | |||
| be7340ca30 | |||
| 881c4566dd | |||
| 7efbb0217f | |||
| 9e2ce39cde | |||
|
|
0ff6cae1c3 | ||
|
|
d02ba3d717 | ||
| 6aa830adb6 | |||
| be6603cbb9 | |||
|
|
8d208929d5 | ||
|
|
cb0f96b737 | ||
|
|
83723ab050 | ||
|
|
3abaefa550 | ||
|
|
389010dbbd | ||
| 81fe2c043e | |||
| c76e9bb3fe | |||
| 48b68d3410 | |||
| 2b64b31393 | |||
| 2333068de7 | |||
| 78530029ef | |||
| 329b6e5640 | |||
|
|
967f0a3294 | ||
|
|
6eb779d359 | ||
|
|
414147177a | ||
|
|
3b37f3630c | ||
|
|
7c1a818582 | ||
|
|
c4cf7e9707 | ||
|
|
1ceb681521 | ||
|
|
443176a0d1 | ||
|
|
261905a364 | ||
| e00288b160 | |||
| f141ca926f | |||
| f7a0954213 | |||
|
|
da8d562eba | ||
|
|
399af8592c | ||
| 6239e7f19b | |||
| d0e1b7186c | |||
| fea3292f50 | |||
| 9973aa9ffa | |||
| 0b38a980d2 | |||
| 20838b6882 | |||
| 8f4ef1e274 | |||
| e1c7583670 | |||
| 39a2157d46 | |||
| dd63e7157a | |||
| 340efd7926 | |||
| ecc6194b57 | |||
|
|
90c3381954 | ||
|
|
21334c8026 | ||
|
|
cbdef6ce9e | ||
|
|
591cd9fd66 | ||
|
|
e8d2a45afb | ||
|
|
3b533938a6 | ||
|
|
9fe342a7e9 | ||
|
|
2152ced97a | ||
|
|
404be5f317 | ||
|
|
f56783a439 | ||
|
|
fb278182d3 | ||
|
|
c2c63d2f67 | ||
|
|
7f740455fe | ||
|
|
946b992746 | ||
|
|
a6c43e6f2f | ||
|
|
ecad52c18d | ||
|
|
e49e5a0474 | ||
|
|
9231b3cfca | ||
|
|
68e0159292 | ||
|
|
1a674590bf | ||
|
|
1ef47e7b3f | ||
|
|
214a2762df | ||
| cb5d06decd | |||
| 8555a88202 | |||
|
|
2287f4493a | ||
|
|
bb357f7cab | ||
|
|
d9b240cd2d | ||
|
|
bea5ee96d9 | ||
|
|
7d205fd526 | ||
|
|
c15b2a0cbb | ||
|
|
7ccba30a3d | ||
|
|
8091485588 | ||
|
|
1413f968d6 | ||
|
|
d1d1bb09e9 | ||
|
|
3c1a7e0171 | ||
|
|
0cb50f2f01 | ||
|
|
2287586700 | ||
|
|
ea7660ddb3 | ||
|
|
44e98e8f2f | ||
|
|
856ccbb969 | ||
|
|
0920286b4c | ||
|
|
f34e10cfd9 | ||
| ae5d202661 | |||
| bc43c844fc | |||
| 67be9aa27b | |||
| 047b997a22 | |||
| bac51891b7 | |||
|
|
714d6af7cd | ||
| 6efd6334bb | |||
| 91f4475d76 | |||
|
|
de309784b4 | ||
|
|
a623cf53f3 | ||
| 440cd59e50 | |||
| eefb6f6265 | |||
| f5e1226837 | |||
| 151f7e701f | |||
| 40398497c2 | |||
|
|
cda10788fb | ||
|
|
845905d9c8 | ||
| 89055506d6 | |||
|
|
5908ae7905 | ||
|
|
4131665284 | ||
|
|
6a43dfb0d7 | ||
| 3d38d78845 | |||
| 600f19ac80 | |||
|
|
0a3a664653 | ||
|
|
471ec1cd2e | ||
|
|
e296cd7ca0 | ||
|
|
31cfa8cd7c | ||
|
|
70fe8aa367 | ||
|
|
cc9dafac6f | ||
|
|
32429f1481 | ||
| 9485a463b8 | |||
| 35c6ab4a08 | |||
| e58b0fa015 | |||
| beb92967e5 | |||
| 015583f1cd | |||
| d40c54b802 | |||
| 647665b6b9 | |||
| 4fc78bc382 | |||
| 50d000e7e2 | |||
|
|
ad500c4bef | ||
|
|
916077c6f8 | ||
|
|
935fb238a4 | ||
|
|
d03e5b4562 | ||
|
|
05c45c6468 | ||
| 9020613a8b | |||
| be92d5943d | |||
|
|
b2368a0751 | ||
| 7948d5f773 | |||
|
|
1a16851ad0 | ||
|
|
810c14a839 | ||
|
|
df0e8eb228 | ||
| 79605c8a9e | |||
|
|
9b644119ae | ||
|
|
ffa9919019 | ||
|
55ca892f90
|
|||
|
eaca187032
|
|||
|
|
3b9d05cc6d | ||
| d00881de2e | |||
| d8e85cf75d | |||
| 39f21763e4 | |||
|
|
af43901ca3 | ||
|
|
62565b9ae2 | ||
|
|
bca176170c | ||
|
|
2a91ca0cff | ||
|
|
19a75554b0 | ||
|
|
58ae476a3e | ||
|
|
44d8254a0b | ||
|
|
bd2cdfcef2 | ||
| a50b832c2a | |||
|
|
10194105e3 | ||
|
|
b474288df7 | ||
|
|
f338209f32 | ||
|
|
bef832e45b | ||
|
|
71cfb4db77 | ||
| 86453e7e11 | |||
|
|
98b9f8e62d | ||
| 44cd8d258d | |||
| 764b65d094 | |||
|
|
4d2c64b012 | ||
|
|
35c0b0be58 | ||
|
|
7a54e2cfb3 | ||
|
|
54283f6d3c | ||
|
|
697acd1d88 | ||
|
|
5cdb80b4d6 | ||
|
|
e48ff8be73 | ||
|
|
096217eea6 | ||
|
|
ed5290be86 | ||
|
|
b036c3903c | ||
|
|
57b43b7b60 | ||
| ab1ddb7bd1 | |||
| 881f2f32f4 | |||
| 0754ba5292 | |||
|
|
743a89c3a2 | ||
|
|
6692c3ab7c | ||
|
|
c16a5fdac4 | ||
|
|
60ec7e54f5 | ||
| dd48f5ab87 | |||
|
|
db674ec31d | ||
|
|
48150ffc8b | ||
|
|
1ad80efab6 | ||
|
|
aa8789f8f8 | ||
|
|
56e3f2da5c | ||
|
|
a4104822e2 | ||
|
|
c13f386e3b | ||
| 4bd73450b5 | |||
| 64da28e814 | |||
| 639e1b9c6d | |||
|
|
63e828d2df | ||
|
|
b8c30b5703 | ||
|
|
805ea91fc2 | ||
|
|
c4c422da57 | ||
| 544fb35121 | |||
| 43edccb284 | |||
| 7531ba4b5c | |||
| 983aa592d8 | |||
| 8378784231 | |||
| dca25cc601 | |||
|
|
c8fe81cd80 | ||
| c0a4724f57 | |||
| 484c52d813 | |||
|
|
47843b2087 | ||
|
|
c3a6126799 | ||
|
|
e94b250541 | ||
|
|
db5f6c7540 | ||
|
|
79a6c9e90d | ||
| e2e67e3977 | |||
| 6c06450701 | |||
|
|
d7379a1af2 | ||
|
|
d731611e0c | ||
|
|
dceb92ba8e | ||
|
|
1e039cb1bf | ||
|
6f3e1ffbe3
|
|||
|
|
6a6dca3fce | ||
|
|
d6d92071bf | ||
|
|
d40657dc64 | ||
|
|
6dde2a1e59 | ||
|
|
b7823cec16 | ||
|
|
eabd7b8d51 | ||
|
|
27ec445e54 | ||
|
|
ad108b285f | ||
|
|
f471214ef7 | ||
|
|
a0190f8f40 | ||
| 82af984023 | |||
| 0373010497 | |||
|
|
c22d869aa7 | ||
| 87c93e90cd | |||
| 3d6dca9386 | |||
|
|
f946e7e6ab | ||
|
|
d50dfa5867 | ||
| 249128e011 | |||
| ca16a80b1f | |||
|
|
e789e7ba9b | ||
|
|
5048f7be14 | ||
|
|
0e3603f596 | ||
| 9cd4b3c1cc | |||
| 1d9aa75960 | |||
|
|
0a24ef70e0 | ||
| 3b5d3d671e | |||
| 7db83d216e | |||
| d1a7002422 | |||
| 1d8e7e072f | |||
|
7466fe7a34
|
|||
|
|
24cf5047da | ||
|
|
1f103e5ef5 | ||
|
|
9e87974eb1 | ||
|
|
d806cf76c4 | ||
|
|
6e2703998d | ||
| 6f9737c2c2 | |||
|
|
5e696c10d5 | ||
|
|
927e25c72c | ||
| 8b1b99ba35 | |||
| 2c102cd1ff | |||
|
|
42c4926c47 | ||
|
|
703556d893 | ||
|
|
0b529a5c3c | ||
|
|
5186b3f61e | ||
| 4dc0da5099 | |||
| 1bad6ba065 | |||
| 3efee22536 | |||
| eef48ac3a3 | |||
| e35cfbc3dd | |||
| 4a5fd96b32 | |||
|
|
bdffe73f59 | ||
| cdfe722457 | |||
| 0aecea6de2 | |||
| 5a88c77171 | |||
| 8003217092 | |||
| 9b325041c1 | |||
| 1e7fbe5d56 | |||
| 0261c263f9 | |||
| 8d6ae85b0d | |||
| f14bdb3068 | |||
| 3c66840f95 | |||
| 733e3ea9d5 | |||
|
ca634bb707
|
|||
| 9abc206d1a | |||
| 85f17c0fd8 | |||
| 14bad81b9f | |||
|
|
ffd596e2c7 | ||
| 99f8187092 | |||
| f30b784f45 | |||
| f06b5f8fc0 | |||
| 2e781b900d | |||
| d76b1ae75d | |||
| 40110580e0 | |||
| eab7961a83 | |||
| 432e06e801 | |||
| fe1ff5c7a3 | |||
| 6e66b8e08b | |||
| 7abdd0545e | |||
|
|
3f1768e467 | ||
|
|
f464921ae3 | ||
|
|
7603ad3fb0 | ||
|
|
be7ccc78b8 | ||
|
|
b3135c982f | ||
|
13386175f5
|
|||
|
23e8f3dc2d
|
|||
|
|
b323ce2eef | ||
|
|
08e323ba51 | ||
|
|
9f50f36b1d | ||
|
|
4399c1d590 | ||
|
|
f7376f6dca | ||
|
|
518cb34340 | ||
|
|
f210a5f508 | ||
|
|
9ebc49dd1c | ||
|
|
c119eeb468 | ||
|
|
ab616f8f79 | ||
|
|
69286881e4 | ||
|
|
4419df8d1b | ||
|
|
aed2bd48fc | ||
|
|
d3d752f90c | ||
|
|
33ecfe88ef | ||
|
|
fd52fdd35b | ||
|
|
1d13d3dccf | ||
|
|
1c84bcae35 | ||
|
|
df497d5952 | ||
|
|
f65e122f8d | ||
| 161f0744aa | |||
| 95de9ad3b3 | |||
|
|
d5c170055f | ||
|
|
61f0521072 | ||
|
|
6ca14c55f2 | ||
|
|
1309d09aee | ||
| aba75b3a19 | |||
|
|
e87481d8db | ||
| acaad69917 | |||
|
|
ff588ad57a | ||
| 65df27154c | |||
| 8dfa1957f4 | |||
| 570eba3794 | |||
| 94a39fc61f | |||
| 2d359e5f99 | |||
|
|
04692e0c44 | ||
|
|
809fd23b88 | ||
|
|
e3653daea3 | ||
|
|
48fa75386c | ||
|
|
1b3a12a4dc | ||
|
|
543ddf540e | ||
|
|
a3fb471546 | ||
|
|
277f964b30 | ||
|
|
9bcf7adb67 | ||
|
|
f343fa0071 | ||
|
|
e5862e9218 | ||
|
|
29ae2423f8 | ||
|
|
1755a4a7df | ||
|
|
25d3325049 | ||
|
|
fb6a4c3b87 | ||
| 317f80a984 | |||
| 28cdc1d9e5 | |||
| c2087b15d5 | |||
| a8d785beb3 | |||
|
|
a6784b5549 | ||
|
|
d770292be8 | ||
|
|
b3a1037ade | ||
|
|
02946cf0b4 | ||
|
|
cf051d5108 | ||
|
|
96977c6183 | ||
|
|
73d83164fc | ||
|
|
1064f5e4a8 | ||
|
|
5be98c7087 | ||
|
|
0d689c7dff | ||
|
|
1f24ed46a0 | ||
|
|
92b4159f9e | ||
|
|
5817b41e29 | ||
| d6b132e3a6 | |||
|
|
318f70f34c | ||
|
|
e41525d40a | ||
|
|
a102220e52 | ||
|
|
e9a214c5b2 | ||
|
|
c53f5eb144 | ||
|
|
9ed64e0388 | ||
|
|
93040d4629 | ||
|
|
0144ad43f5 | ||
|
|
8da2fc30c3 | ||
| 0e27ae7795 | |||
| 33c6cdb9fe | |||
|
|
73b7014469 | ||
| 25aaf55b93 | |||
| 6a7546c43b | |||
| 0adda4bf7b | |||
|
|
f5f36427a4 | ||
|
|
590bfd3a10 | ||
|
|
16db9bd1a2 | ||
|
|
d0af933b35 | ||
|
|
2b56b40e6d | ||
|
|
4b2d7068b3 | ||
|
|
bd93b8be8e | ||
|
|
aa3fe2b872 | ||
|
|
a61ff915ac | ||
|
|
0a3e678329 | ||
|
|
d4336b0dcb | ||
|
|
65d2698af4 | ||
|
|
6454576417 | ||
|
|
a485bd5977 | ||
|
|
e733688fd0 | ||
|
|
e86f6a8cbd | ||
|
|
fcc9e17664 | ||
|
|
5c9d4ffa9a | ||
|
|
419bc2747b | ||
|
|
1ee99d6866 | ||
|
|
3ab8973895 | ||
|
|
acfa3baeb5 | ||
|
|
c21d7cf101 | ||
|
|
ec895e1d9e | ||
|
|
c964f09a4f | ||
|
|
0bc32f27df | ||
|
|
6640e93ce9 | ||
|
|
d7aefe0cf0 | ||
|
|
187fe5b361 | ||
|
|
b31aea7bc5 | ||
|
c661baf058
|
|||
|
|
0fe0461340 | ||
|
|
d5394c9e92 | ||
|
|
42135fd26c | ||
|
|
38569f55c7 | ||
|
|
5ce03c2db3 | ||
|
|
1031b3eb79 | ||
|
|
fcdf4cd476 | ||
| 6268dffff8 | |||
| c10737bfd7 | |||
|
|
bd0cc69668 | ||
|
|
84fffac264 | ||
|
|
5bf968010e | ||
|
|
61bc095d01 | ||
|
|
e376f97547 | ||
|
|
f2428d3cb3 | ||
|
|
2fdac85d31 | ||
|
|
b731395689 | ||
|
|
07405e3466 | ||
|
|
fc0c76bd77 | ||
|
|
d209547968 | ||
| 632b9fc5ea | |||
| 702591b4ec | |||
|
|
c562746e5f | ||
|
|
c0443cbec2 | ||
|
|
0191bc3821 | ||
|
|
633bd42036 | ||
|
|
998ef8d834 | ||
|
|
c25b076ca9 | ||
|
|
f43379f365 | ||
|
|
d902c0acf4 | ||
|
|
58e678d72c | ||
|
|
cbc49669d0 | ||
|
|
78bb638fd6 | ||
|
|
7a61bae471 | ||
|
|
e1b992526e | ||
|
|
1b043838ea | ||
|
|
07e72294dc | ||
|
|
b6b37ee68b | ||
|
|
43cb1f1bff | ||
|
|
f7a67c72bf | ||
|
|
c5476d08fa | ||
|
|
8af92b1557 | ||
|
|
eaa826bb8a | ||
|
|
140b3c371d | ||
|
|
f158eaa29c | ||
|
|
c4b98ade53 | ||
|
|
f2e85306ca | ||
|
|
42b9de8360 | ||
|
|
6c244f3121 | ||
|
|
9f56213d2f | ||
|
|
fb2f7cf680 | ||
|
|
8fcdd24f84 | ||
|
|
aaafde4a7c | ||
|
|
2b23003556 | ||
|
|
5681062f01 | ||
|
|
d61bf212f5 | ||
|
|
2bd7c8d51e | ||
|
|
1e63cdbcda | ||
|
|
86d85f12be | ||
|
|
dd470d49ec | ||
|
|
95d8062b00 | ||
|
|
8f82399214 | ||
|
|
6247150e9c | ||
|
5266644725
|
|||
|
81d9e96552
|
|||
|
|
4ec9f06114 | ||
|
0033e9f6c0
|
|||
|
571652c314
|
|||
|
|
7ec233e18a | ||
|
|
13c9a12336 | ||
|
|
83d472ecd6 | ||
|
|
c21da6512a | ||
|
|
4b4374e0df | ||
|
|
407276a04d | ||
|
|
64f60905b4 | ||
|
|
9e6072fed2 | ||
|
|
a3e5c424fd | ||
|
|
6683a350aa | ||
|
|
05bfa9b546 | ||
|
|
735988decb | ||
|
|
d0580592be | ||
|
|
817076bdbf | ||
|
|
736236e9ca | ||
|
|
3f4114c51b | ||
|
|
5c2c493c56 | ||
|
|
2c383ebea1 | ||
|
|
91e73450cf | ||
|
|
e55798944e | ||
|
|
5ea11a5ad2 | ||
|
|
2a3383e9e6 | ||
|
|
e871703724 | ||
|
|
1ee367d7be | ||
|
|
bce536b9b4 | ||
|
|
7c9182e0b0 | ||
|
|
aa915d639d | ||
|
|
9489ebc7d6 | ||
|
2a5c525193
|
|||
|
9e2d981c60
|
|||
|
|
53dfe9e4f5 | ||
|
48e95fbdb0
|
|||
|
fd94d85edf
|
|||
|
f2d1a85afb
|
|||
|
0bdbcb8bab
|
|||
|
|
7b91a819be | ||
| bc89025924 | |||
|
|
16bcaef4c3 | ||
|
|
fcbfa451f2 | ||
|
|
559ce53ca4 | ||
|
|
ee2c5b58d7 | ||
|
|
d98d998106 | ||
|
212c45e070
|
|||
|
143fa9b6ed
|
|||
|
4849928288
|
|||
|
|
9248ee8868 | ||
|
|
1616d96732 | ||
| 0bbedd1600 | |||
|
|
c7e49644d8 | ||
|
010c903c74
|
|||
|
e4d12e3537
|
|||
|
051cc8384e
|
|||
|
49a94170d2
|
|||
|
|
42e8e37bd4 | ||
|
|
5d2c350ce2 | ||
|
|
85dc0362c1 | ||
|
|
01c06728eb | ||
|
|
257250714d | ||
|
|
3b769c3059 | ||
|
|
a7395ed45b | ||
|
|
ab07c7928f | ||
|
|
b0c0d15505 | ||
|
|
fcf50790da | ||
|
|
1e43654607 | ||
|
|
4fecbe820d | ||
|
|
763c9dfa6b | ||
|
9de5879786
|
|||
|
|
9396e7492c | ||
|
3ac3415178
|
|||
|
1aae1c59d0
|
|||
|
907e80a01c
|
|||
|
|
8a10b69716 | ||
|
|
1a3cf7edd6 | ||
|
|
76d0fc979b | ||
|
|
a42d8ece35 | ||
|
|
93377f53fc | ||
|
|
c853d74ba0 | ||
|
|
0b9f74f4f4 | ||
|
|
5da6baf828 | ||
|
5766945006
|
|||
|
a53d473b58
|
|||
|
|
d1207ad80e | ||
|
|
e2efe71b33 | ||
|
|
2aef6ed9c0 | ||
|
|
fcb6db0603 | ||
| 01b1136316 | |||
|
|
2512fe9e75 | ||
|
|
f89b5cd2ec | ||
|
|
ab284ed208 | ||
|
|
00a578657c | ||
|
|
38ce40ae7d | ||
| e1be6c7138 | |||
| 28539e60b0 | |||
|
adb11b3ed0
|
|||
|
|
f1e6dedd44 | ||
|
|
8ea1454c06 | ||
| 81b8d578f2 | |||
|
|
16b11db39c | ||
| 0d923cc920 | |||
| c523e93564 | |||
| d588798ea1 | |||
| a11f165f2a | |||
|
|
d4f487d554 | ||
|
|
93d5a0e532 | ||
|
|
00ddc462d2 | ||
|
|
5f4a74f8ba | ||
|
|
a8eff6fbd1 | ||
|
|
baa7367ebe | ||
|
|
69f8a34aac | ||
|
|
21b3a67988 | ||
|
|
d89574ce73 | ||
| ddeac6b9d9 | |||
| 17906ec0eb | |||
|
|
311c088d3d | ||
| a2584d6083 | |||
| 35bd7739c6 | |||
| 7f43c88a39 | |||
|
|
fc1c54a141 | ||
|
|
2af111c584 | ||
| c093cca8b1 | |||
|
|
2bb1b78ba4 | ||
| 3ab26172c4 | |||
| cdd45ce88b | |||
|
210a7d3136
|
|||
|
92ec64d80f
|
|||
|
ff37f71fdb
|
|||
|
6056341525
|
|||
|
|
075612f5bd | ||
| 1a87ed8210 | |||
|
|
c05ffeb16d | ||
| ee3710c5ed | |||
| 4327c4b1f7 | |||
| 492e56a098 | |||
| f0257a2784 | |||
| ec1ead89ab | |||
|
|
ae53e87aba | ||
|
|
939dd2320a | ||
|
|
2c8b73e2e2 | ||
|
|
eabc6212ea | ||
|
|
c120d6517f | ||
|
|
597ee1dad7 | ||
|
|
c4a901504d | ||
|
|
f5cc5d07fd | ||
|
|
8a0e6c921c | ||
|
|
bf1bff9ace | ||
|
|
06f24e988f | ||
|
|
ae327f545e | ||
|
|
35012b18c5 | ||
|
|
9688bad622 | ||
|
|
447b8d3372 | ||
|
|
01102cb9b0 | ||
|
|
934d1a6114 | ||
|
|
6f74c8cb77 | ||
|
|
63b9e619a4 | ||
|
|
82e28f26d7 | ||
|
|
ca9fd96baa | ||
|
|
39b22267d6 | ||
|
|
60d7984d66 | ||
|
|
33d219d2ac | ||
|
|
85a77e05af | ||
|
|
3dfeabcec6 | ||
|
|
673fdc443c | ||
|
|
2f6e5a7648 | ||
|
|
2cbe8e9517 | ||
|
|
2f0460d6ec | ||
|
|
37f4ed7770 | ||
|
|
e3104c61cb | ||
|
|
bc434ee8cb | ||
|
|
f4102b948e | ||
|
|
ed991de11a | ||
|
|
322e161064 | ||
|
|
1adc741cc2 | ||
|
|
4eff87bbf7 | ||
|
|
fc6970d08a | ||
|
|
f616c7e1c6 | ||
|
|
89ec749172 | ||
|
|
182f0f2c64 | ||
|
|
e3681495ce | ||
|
|
37415fa261 | ||
|
|
7243dbe763 | ||
|
|
0ff5c4bedd | ||
|
|
f047f89ad5 | ||
|
|
0eb0aa1d3b | ||
|
|
6019891591 | ||
|
|
615281601c | ||
|
|
82baf5d384 | ||
|
|
6fe93ecb7e | ||
|
|
b3222f3523 | ||
|
|
3b94863521 | ||
|
|
582dc8bf46 | ||
|
|
a9868fd275 | ||
|
|
218e56576a | ||
|
|
c50e79375a | ||
|
|
dcb8308f35 | ||
|
|
183b310696 | ||
|
|
c7d0c86d52 | ||
|
|
48225662b1 | ||
|
|
f53fc088ec | ||
|
|
05517fcbcd | ||
|
|
18af51b0a4 | ||
|
|
ede3da7a87 | ||
|
|
8e3327ef6a | ||
|
|
827f6daabc | ||
|
|
2567442321 | ||
|
|
9cf5478519 | ||
|
|
e5275311c2 | ||
|
|
21e4870e4c | ||
|
|
beba7c8d2e | ||
|
|
fe35313305 | ||
|
|
d7a8bbf40b | ||
|
|
f1893c596e | ||
|
|
6367c1ab4d | ||
|
|
9579887fc4 | ||
|
|
e29be2f140 | ||
|
|
2736b5d1ef | ||
|
|
ff52fb16b6 | ||
|
|
ccbf3867e1 | ||
|
|
f0de422c6e | ||
|
|
64cc19b252 | ||
|
|
26226009f0 | ||
|
|
d10e09da02 | ||
|
|
00a2e58fee | ||
|
|
b1cb45dfe6 | ||
|
|
a2951d1f05 | ||
|
|
c0b1e97602 | ||
|
|
71621a9dc4 | ||
|
|
b3ed2afebe | ||
|
|
704620baff | ||
|
|
8feb805167 | ||
|
|
065b32755a | ||
|
|
1b5f4bff2c | ||
|
|
8e1c5a485f | ||
| 5fa6c9db35 | |||
| 5482b9be2c | |||
|
|
7400273b0a | ||
|
|
0b7cdde4a0 | ||
|
|
d5382aec4f | ||
|
|
df484dc816 | ||
|
|
7ea4086807 | ||
|
|
b04bf6a951 | ||
| 7c33dcf630 | |||
| 5e65e21f0b | |||
| 53ca38ce53 | |||
|
|
398e3c1b91 | ||
| 508978d586 | |||
| e267481f71 | |||
|
|
193bee5ac8 | ||
| f58efa2871 | |||
| 6568b6d723 | |||
|
|
4b1b34d8a7 | ||
| 39c09f8565 | |||
|
|
275a77807e | ||
|
|
6443541a79 | ||
|
|
5eb6f7d307 | ||
|
|
bce2a66177 | ||
|
|
7602641909 | ||
|
|
54f3a261c5 | ||
|
|
906bac965f | ||
|
|
4ec1de6900 | ||
|
|
8ded131666 | ||
| 47b14f932e | |||
|
|
838ebb3f69 | ||
| c459724114 | |||
| b0c9d1164d | |||
| 7c51d88501 | |||
| 5b03cf826b | |||
| f305863616 | |||
| db5809d522 | |||
|
|
83df6f015c | ||
| e7231b0e13 | |||
|
|
cff60eb51c | ||
|
f914a312f5
|
|||
| 56ebb301ca | |||
|
|
a59df12595 | ||
|
|
5cc7fc6ccb | ||
|
|
55027cb630 | ||
|
|
036eba68e1 | ||
|
|
d34e0d9348 | ||
|
|
31765ce0ef | ||
|
|
9fe7cdca92 | ||
|
|
adc3502b6b | ||
|
|
95fe369648 | ||
|
|
01845a0cb7 | ||
|
|
708eaf4178 | ||
|
|
d629a58712 | ||
|
|
90886b63d6 | ||
|
|
084f89fa32 | ||
|
|
ceb3a095d8 | ||
|
|
1758275f11 | ||
|
|
e74e506ffe | ||
|
|
599a36466a | ||
|
|
613e128cab | ||
|
|
e4f8022b7a | ||
|
|
5603c41900 | ||
| a8a27c9b51 | |||
|
|
b70de5a4be | ||
|
|
b1fd07cd30 | ||
|
|
6ab2e02fe6 | ||
|
|
5535c5780c | ||
|
|
49e0a2c055 | ||
|
|
efbe53b6b4 | ||
|
5e074dad10
|
|||
|
d6a88896d0
|
|||
|
5c99f5f8bb
|
|||
|
e1faba0ff2
|
|||
|
ba2f406bc0
|
|||
|
9b6db4684a
|
|||
|
|
561fd41d5d | ||
|
|
ce9995dac7 | ||
|
|
0afaea9513 | ||
|
|
9b5c6e3164 | ||
|
|
e6ebec8c1e | ||
|
|
2551921ed6 | ||
|
|
e02575aad7 | ||
|
|
ff3502c87a | ||
|
|
017f9b2140 | ||
|
|
c80d3a6958 | ||
|
|
3ca1127685 | ||
|
|
18369da5bc | ||
|
|
e65100cdc8 | ||
|
|
6a1cb51c2f | ||
|
c4d93e492b
|
|||
|
c2f72f72ac
|
|||
|
721b6b2afa
|
|||
|
b6f011c669
|
|||
|
801607fc16
|
|||
|
01a4d33514
|
|||
|
e348ec74fd
|
|||
|
0458675608
|
|||
|
c61ffce0e9
|
|||
|
68a97dc980
|
|||
|
a07d167390
|
|||
|
|
a8721dcc69 | ||
|
|
68cf952ac6 | ||
|
|
e14d6a81fe | ||
|
|
a4912893a8 | ||
|
0adfb631ef
|
|||
|
b64ce1f67f
|
|||
|
e8e3b1595d
|
|||
|
f1427d5272
|
|||
|
|
bf6b87d65c | ||
|
|
0240997257 | ||
|
|
f1e341f0b9 | ||
|
a54acb8c42
|
|||
|
c6ede67589
|
|||
|
|
11176da5d8 | ||
|
|
0a604336c4 | ||
|
|
be9df7649f | ||
|
|
63fb923995 | ||
|
|
3afe40083d | ||
|
|
9d4767539c | ||
|
ac9bba8b5b
|
|||
|
80c46bea7f
|
|||
|
|
614f694777 | ||
|
|
1072d7b449 | ||
|
1b70596735
|
|||
|
|
61eebc9fbd | ||
|
b05909969f
|
|||
|
bd89ce7cc9
|
|||
|
130613b717
|
|||
|
b3c1f39a0e
|
|||
|
97c807cd33
|
|||
|
aede5f71ec
|
|||
|
786770f56a
|
|||
|
|
74d4f00784 | ||
|
d61c4235dc
|
|||
|
e8794b8c79
|
|||
|
552da005dc
|
|||
|
|
51452d2e68 | ||
|
5c5484b4d2
|
|||
|
|
9974a851e8 | ||
| 6c0bfc6c35 | |||
|
|
41bbd203cc | ||
|
|
4344c26bef | ||
|
|
e1c1c06fb2 | ||
|
|
70e63764ff | ||
|
|
d10f3e3af6 | ||
|
|
a4397d5447 | ||
|
|
320c87a1db | ||
|
|
8d1228c9e8 | ||
|
|
420bec7c46 | ||
|
|
ba1658beac | ||
|
|
575753038b | ||
|
|
061c9f0979 | ||
|
|
b48d1b8ad6 | ||
| dff7aeefb8 | |||
| 54f7980162 | |||
|
|
684cb5a376 | ||
|
|
597bccc080 | ||
|
|
72557fd0bf | ||
|
|
0b2f2214f9 | ||
|
|
ef51e69ffb | ||
|
|
c9eb40f455 | ||
|
|
b66750339d | ||
|
|
136460567c | ||
|
|
f80123c85d | ||
|
|
a22340196f | ||
|
|
cbaeffde2c | ||
| 649d50812b | |||
| b67f5436f8 | |||
| b637ddeb28 | |||
|
|
a20b7eacd6 | ||
| 6df639a0c3 | |||
|
|
d4a9887532 | ||
|
|
79b08a181d | ||
|
|
758cef1bd3 | ||
| fb8bbea99d | |||
| 9b261a4778 | |||
|
|
aafa29db8b | ||
|
|
896c39f9bc | ||
|
|
3a97ff7f57 | ||
|
|
7518c41fab | ||
|
|
8cb00a5340 | ||
|
|
baa51db26c | ||
|
|
fc260b2291 | ||
|
|
43ebb01b63 | ||
|
|
067dc0df5d | ||
| 6828c97415 | |||
| 50401e0030 | |||
| c3d2508693 | |||
| 642fd5cc91 | |||
| e8fb5a0030 | |||
|
|
0dee5073c6 | ||
|
|
b9b452f043 | ||
|
|
ddd3fad1c6 | ||
|
|
1f5723a97e | ||
|
|
5a177c952d | ||
|
|
86e456d152 | ||
| 03895f9e45 | |||
| 5c79f44055 | |||
| 83c38e74db | |||
| 1e5f2944cf | |||
|
|
e45ecbdef7 | ||
| c65694b36c | |||
| 0005469101 | |||
|
|
60b56bd41a | ||
|
|
81fe492655 | ||
|
|
849b7e038d | ||
|
|
82f5257cf1 | ||
|
|
e347659db4 | ||
|
|
7940317857 | ||
|
|
58415ab5c3 | ||
|
|
1176974a78 | ||
|
|
ce792426e6 | ||
| e92e727279 | |||
| f761900a3e | |||
| 32a57661fd | |||
|
|
5004e44934 | ||
| 99d55f05f8 | |||
| 9fd839fad8 | |||
|
|
1c7cc9e16f | ||
| 06d01962a6 | |||
| 2c2c1accb5 | |||
| 105b7eabf0 | |||
|
|
de1d83e1a9 | ||
|
|
ce97780741 | ||
| e81e56ea1d | |||
| aa6336ea1e | |||
| dd887cbb1f | |||
|
|
860053be67 | ||
| 5a4671b7b1 | |||
|
|
ec581e3509 | ||
| e7ae9dd06d | |||
| 0c7f55ff8d | |||
| bcd7f47409 | |||
| 476caebe7f | |||
|
|
dadc81c026 | ||
|
|
cc719d0ae5 | ||
|
|
53af79cf0d | ||
|
|
f534ad66e1 | ||
|
|
2d8cf02296 | ||
|
|
71386f8466 | ||
|
|
c897c8e56b | ||
|
|
2036069051 | ||
|
|
be6c63e526 | ||
|
|
a2af9c152a | ||
|
|
63f3dc926c | ||
|
|
21dde870c6 | ||
|
|
04f37a85ce | ||
| 10a332083b | |||
|
|
6818d1de62 | ||
|
|
1b10b75e25 | ||
|
|
b829a5aafe | ||
|
|
07073e290a | ||
|
|
ee6d286cd7 | ||
|
|
119637cb9b | ||
|
|
ee4097a2dd | ||
|
|
1185737eaa | ||
|
|
7d14086e54 | ||
|
|
78494cd30e | ||
|
|
ead5c54bcb | ||
|
|
b5b355c16c | ||
|
|
3067d7b250 | ||
|
|
9bc36152d9 | ||
|
|
c1b944b838 | ||
|
|
175a88f1c4 | ||
|
|
aac3e7d2f4 | ||
|
|
c0488b8cbe | ||
|
|
d66703c4d0 | ||
|
|
173975aadd | ||
|
|
d97fa37d2c | ||
|
|
782262b52e | ||
|
|
b8213ef6be | ||
|
|
e34623b1ce | ||
|
|
4e375ff32b | ||
|
|
f7529be3ea | ||
|
|
1aa9720405 | ||
|
|
709880ff5a | ||
|
|
6b78b4e12b | ||
|
|
f342a65aba | ||
|
|
dc860f8fd9 | ||
|
|
f8f900151a | ||
|
|
8d409eed0f | ||
|
|
dc86523cce | ||
|
|
506d112cce | ||
|
|
eb7f92282d | ||
|
|
3468e987b6 | ||
|
|
5acd9ece7f | ||
|
|
8bc43baf2c | ||
|
|
a2c99fb56d | ||
|
|
9689f95ea1 | ||
|
|
84d6b48353 | ||
|
|
bf64fc5213 | ||
|
|
d9f9c8aaf5 | ||
| 2502989ca2 | |||
| ba7cc9168e | |||
| dc0d9fe038 | |||
| 0e6c6937cd | |||
|
|
280b16c11c | ||
| 4b922c575e | |||
|
|
09528ed6b9 | ||
|
|
e61ff01518 | ||
|
|
a4c68bf7fe | ||
| bb1c8cc25d | |||
| 4b06fa788d | |||
|
|
ab08600486 | ||
| 7a5ccff6da | |||
| a407a5cf01 | |||
| 2b3e2f25ec | |||
| ed5ecbd914 | |||
| 2d4759114e | |||
| c68b9fec42 | |||
|
|
0f34c8cac6 | ||
|
|
d388a45630 | ||
| d839c53642 |
15
.github/dependabot.yml
vendored
Normal file
15
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# To get started with Dependabot version updates, you'll need to specify which
|
||||||
|
# package ecosystems to update and where the package manifests are located.
|
||||||
|
# Please see the documentation for all configuration options:
|
||||||
|
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||||
|
|
||||||
|
version: 2
|
||||||
|
updates:
|
||||||
|
- package-ecosystem: "gomod"
|
||||||
|
directory: "/"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
|
- package-ecosystem: "npm"
|
||||||
|
directory: "/web/frontend"
|
||||||
|
schedule:
|
||||||
|
interval: "weekly"
|
||||||
331
.github/workflows/Release.yml
vendored
331
.github/workflows/Release.yml
vendored
@@ -1,331 +0,0 @@
|
|||||||
# See: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions
|
|
||||||
|
|
||||||
# Workflow name
|
|
||||||
name: Release
|
|
||||||
|
|
||||||
# Run on tag push
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- '**'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
|
|
||||||
#
|
|
||||||
# Build on AlmaLinux 8.5 using golang-1.18.2
|
|
||||||
#
|
|
||||||
AlmaLinux-RPM-build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
# See: https://hub.docker.com/_/almalinux
|
|
||||||
container: almalinux:8.5
|
|
||||||
# The job outputs link to the outputs of the 'rpmrename' step
|
|
||||||
# Only job outputs can be used in child jobs
|
|
||||||
outputs:
|
|
||||||
rpm : ${{steps.rpmrename.outputs.RPM}}
|
|
||||||
srpm : ${{steps.rpmrename.outputs.SRPM}}
|
|
||||||
steps:
|
|
||||||
|
|
||||||
# Use dnf to install development packages
|
|
||||||
- name: Install development packages
|
|
||||||
run: |
|
|
||||||
dnf --assumeyes group install "Development Tools" "RPM Development Tools"
|
|
||||||
dnf --assumeyes install wget openssl-devel diffutils delve which npm
|
|
||||||
dnf --assumeyes install 'dnf-command(builddep)'
|
|
||||||
|
|
||||||
# Checkout git repository and submodules
|
|
||||||
# fetch-depth must be 0 to use git describe
|
|
||||||
# See: https://github.com/marketplace/actions/checkout
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
# Use dnf to install build dependencies
|
|
||||||
- name: Install build dependencies
|
|
||||||
run: |
|
|
||||||
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
|
||||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
|
||||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
|
|
||||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
|
|
||||||
rpm -i go*.rpm
|
|
||||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
|
||||||
#dnf --assumeyes builddep build/package/cc-backend.spec
|
|
||||||
|
|
||||||
- name: RPM build ClusterCockpit
|
|
||||||
id: rpmbuild
|
|
||||||
run: make RPM
|
|
||||||
|
|
||||||
# AlmaLinux 8.5 is a derivate of RedHat Enterprise Linux 8 (UBI8),
|
|
||||||
# so the created RPM both contain the substring 'el8' in the RPM file names
|
|
||||||
# This step replaces the substring 'el8' to 'alma85'. It uses the move operation
|
|
||||||
# because it is unclear whether the default AlmaLinux 8.5 container contains the
|
|
||||||
# 'rename' command. This way we also get the new names for output.
|
|
||||||
- name: Rename RPMs (s/el8/alma85/)
|
|
||||||
id: rpmrename
|
|
||||||
run: |
|
|
||||||
OLD_RPM="${{steps.rpmbuild.outputs.RPM}}"
|
|
||||||
OLD_SRPM="${{steps.rpmbuild.outputs.SRPM}}"
|
|
||||||
NEW_RPM="${OLD_RPM/el8/alma85}"
|
|
||||||
NEW_SRPM=${OLD_SRPM/el8/alma85}
|
|
||||||
mv "${OLD_RPM}" "${NEW_RPM}"
|
|
||||||
mv "${OLD_SRPM}" "${NEW_SRPM}"
|
|
||||||
echo "::set-output name=SRPM::${NEW_SRPM}"
|
|
||||||
echo "::set-output name=RPM::${NEW_RPM}"
|
|
||||||
|
|
||||||
# See: https://github.com/actions/upload-artifact
|
|
||||||
- name: Save RPM as artifact
|
|
||||||
uses: actions/upload-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend RPM for AlmaLinux 8.5
|
|
||||||
path: ${{ steps.rpmrename.outputs.RPM }}
|
|
||||||
- name: Save SRPM as artifact
|
|
||||||
uses: actions/upload-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend SRPM for AlmaLinux 8.5
|
|
||||||
path: ${{ steps.rpmrename.outputs.SRPM }}
|
|
||||||
|
|
||||||
#
|
|
||||||
# Build on UBI 8 using golang-1.18.2
|
|
||||||
#
|
|
||||||
UBI-8-RPM-build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
# See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c359854d70cc534b3a3784e?container-tabs=gti
|
|
||||||
container: registry.access.redhat.com/ubi8/ubi:8.5-226.1645809065
|
|
||||||
# The job outputs link to the outputs of the 'rpmbuild' step
|
|
||||||
outputs:
|
|
||||||
rpm : ${{steps.rpmbuild.outputs.RPM}}
|
|
||||||
srpm : ${{steps.rpmbuild.outputs.SRPM}}
|
|
||||||
steps:
|
|
||||||
|
|
||||||
# Use dnf to install development packages
|
|
||||||
- name: Install development packages
|
|
||||||
run: dnf --assumeyes --disableplugin=subscription-manager install rpm-build go-srpm-macros rpm-build-libs rpm-libs gcc make python38 git wget openssl-devel diffutils delve which
|
|
||||||
|
|
||||||
# Checkout git repository and submodules
|
|
||||||
# fetch-depth must be 0 to use git describe
|
|
||||||
# See: https://github.com/marketplace/actions/checkout
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
# Use dnf to install build dependencies
|
|
||||||
- name: Install build dependencies
|
|
||||||
run: |
|
|
||||||
wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
|
||||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \
|
|
||||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \
|
|
||||||
http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm
|
|
||||||
rpm -i go*.rpm
|
|
||||||
dnf --assumeyes --disableplugin=subscription-manager install npm
|
|
||||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
|
||||||
#dnf --assumeyes builddep build/package/cc-backend.spec
|
|
||||||
|
|
||||||
- name: RPM build ClusterCockpit
|
|
||||||
id: rpmbuild
|
|
||||||
run: make RPM
|
|
||||||
|
|
||||||
# See: https://github.com/actions/upload-artifact
|
|
||||||
- name: Save RPM as artifact
|
|
||||||
uses: actions/upload-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend RPM for UBI 8
|
|
||||||
path: ${{ steps.rpmbuild.outputs.RPM }}
|
|
||||||
- name: Save SRPM as artifact
|
|
||||||
uses: actions/upload-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend SRPM for UBI 8
|
|
||||||
path: ${{ steps.rpmbuild.outputs.SRPM }}
|
|
||||||
|
|
||||||
#
|
|
||||||
# Build on Ubuntu 20.04 using official go 1.19.1 package
|
|
||||||
#
|
|
||||||
Ubuntu-focal-build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
container: ubuntu:20.04
|
|
||||||
# The job outputs link to the outputs of the 'debrename' step
|
|
||||||
# Only job outputs can be used in child jobs
|
|
||||||
outputs:
|
|
||||||
deb : ${{steps.debrename.outputs.DEB}}
|
|
||||||
steps:
|
|
||||||
# Use apt to install development packages
|
|
||||||
- name: Install development packages
|
|
||||||
run: |
|
|
||||||
apt update && apt --assume-yes upgrade
|
|
||||||
apt --assume-yes install build-essential sed git wget bash
|
|
||||||
apt --assume-yes install npm
|
|
||||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
|
||||||
# Checkout git repository and submodules
|
|
||||||
# fetch-depth must be 0 to use git describe
|
|
||||||
# See: https://github.com/marketplace/actions/checkout
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
fetch-depth: 0
|
|
||||||
# Use official golang package
|
|
||||||
- name: Install Golang
|
|
||||||
run: |
|
|
||||||
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
|
||||||
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
|
||||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
|
||||||
go version
|
|
||||||
- name: DEB build ClusterCockpit
|
|
||||||
id: dpkg-build
|
|
||||||
run: |
|
|
||||||
ls -la
|
|
||||||
pwd
|
|
||||||
env
|
|
||||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
|
||||||
git config --global --add safe.directory $(pwd)
|
|
||||||
make DEB
|
|
||||||
- name: Rename DEB (add '_ubuntu20.04')
|
|
||||||
id: debrename
|
|
||||||
run: |
|
|
||||||
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
|
||||||
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu20.04.deb"
|
|
||||||
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
|
||||||
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
|
||||||
# See: https://github.com/actions/upload-artifact
|
|
||||||
- name: Save DEB as artifact
|
|
||||||
uses: actions/upload-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend DEB for Ubuntu 20.04
|
|
||||||
path: ${{ steps.debrename.outputs.DEB }}
|
|
||||||
|
|
||||||
#
|
|
||||||
# Build on Ubuntu 20.04 using official go 1.19.1 package
|
|
||||||
#
|
|
||||||
Ubuntu-jammy-build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
container: ubuntu:22.04
|
|
||||||
# The job outputs link to the outputs of the 'debrename' step
|
|
||||||
# Only job outputs can be used in child jobs
|
|
||||||
outputs:
|
|
||||||
deb : ${{steps.debrename.outputs.DEB}}
|
|
||||||
steps:
|
|
||||||
# Use apt to install development packages
|
|
||||||
- name: Install development packages
|
|
||||||
run: |
|
|
||||||
apt update && apt --assume-yes upgrade
|
|
||||||
apt --assume-yes install build-essential sed git wget bash npm
|
|
||||||
npm install --global yarn rollup svelte rollup-plugin-svelte
|
|
||||||
# Checkout git repository and submodules
|
|
||||||
# fetch-depth must be 0 to use git describe
|
|
||||||
# See: https://github.com/marketplace/actions/checkout
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
with:
|
|
||||||
submodules: recursive
|
|
||||||
fetch-depth: 0
|
|
||||||
# Use official golang package
|
|
||||||
- name: Install Golang
|
|
||||||
run: |
|
|
||||||
wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz
|
|
||||||
tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz
|
|
||||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
|
||||||
go version
|
|
||||||
- name: DEB build ClusterCockpit
|
|
||||||
id: dpkg-build
|
|
||||||
run: |
|
|
||||||
ls -la
|
|
||||||
pwd
|
|
||||||
env
|
|
||||||
export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH
|
|
||||||
git config --global --add safe.directory $(pwd)
|
|
||||||
make DEB
|
|
||||||
- name: Rename DEB (add '_ubuntu22.04')
|
|
||||||
id: debrename
|
|
||||||
run: |
|
|
||||||
OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev)
|
|
||||||
NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu22.04.deb"
|
|
||||||
mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}"
|
|
||||||
echo "::set-output name=DEB::${NEW_DEB_FILE}"
|
|
||||||
# See: https://github.com/actions/upload-artifact
|
|
||||||
- name: Save DEB as artifact
|
|
||||||
uses: actions/upload-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend DEB for Ubuntu 22.04
|
|
||||||
path: ${{ steps.debrename.outputs.DEB }}
|
|
||||||
|
|
||||||
#
|
|
||||||
# Create release with fresh RPMs
|
|
||||||
#
|
|
||||||
Release:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
# We need the RPMs, so add dependency
|
|
||||||
needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build, Ubuntu-jammy-build]
|
|
||||||
|
|
||||||
steps:
|
|
||||||
# See: https://github.com/actions/download-artifact
|
|
||||||
- name: Download AlmaLinux 8.5 RPM
|
|
||||||
uses: actions/download-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend RPM for AlmaLinux 8.5
|
|
||||||
- name: Download AlmaLinux 8.5 SRPM
|
|
||||||
uses: actions/download-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend SRPM for AlmaLinux 8.5
|
|
||||||
|
|
||||||
- name: Download UBI 8 RPM
|
|
||||||
uses: actions/download-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend RPM for UBI 8
|
|
||||||
- name: Download UBI 8 SRPM
|
|
||||||
uses: actions/download-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend SRPM for UBI 8
|
|
||||||
|
|
||||||
- name: Download Ubuntu 20.04 DEB
|
|
||||||
uses: actions/download-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend DEB for Ubuntu 20.04
|
|
||||||
|
|
||||||
- name: Download Ubuntu 22.04 DEB
|
|
||||||
uses: actions/download-artifact@v2
|
|
||||||
with:
|
|
||||||
name: cc-backend DEB for Ubuntu 22.04
|
|
||||||
|
|
||||||
# The download actions do not publish the name of the downloaded file,
|
|
||||||
# so we re-use the job outputs of the parent jobs. The files are all
|
|
||||||
# downloaded to the current folder.
|
|
||||||
# The gh-release action afterwards does not accept file lists but all
|
|
||||||
# files have to be listed at 'files'. The step creates one output per
|
|
||||||
# RPM package (2 per distro)
|
|
||||||
- name: Set RPM variables
|
|
||||||
id: files
|
|
||||||
run: |
|
|
||||||
ALMA_85_RPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.rpm}}")
|
|
||||||
ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}")
|
|
||||||
UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}")
|
|
||||||
UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}")
|
|
||||||
U_2004_DEB=$(basename "${{ needs.Ubuntu-focal-build.outputs.deb}}")
|
|
||||||
U_2204_DEB=$(basename "${{ needs.Ubuntu-jammy-build.outputs.deb}}")
|
|
||||||
echo "ALMA_85_RPM::${ALMA_85_RPM}"
|
|
||||||
echo "ALMA_85_SRPM::${ALMA_85_SRPM}"
|
|
||||||
echo "UBI_8_RPM::${UBI_8_RPM}"
|
|
||||||
echo "UBI_8_SRPM::${UBI_8_SRPM}"
|
|
||||||
echo "U_2004_DEB::${U_2004_DEB}"
|
|
||||||
echo "U_2204_DEB::${U_2204_DEB}"
|
|
||||||
echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}"
|
|
||||||
echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}"
|
|
||||||
echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}"
|
|
||||||
echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}"
|
|
||||||
echo "::set-output name=U_2004_DEB::${U_2004_DEB}"
|
|
||||||
echo "::set-output name=U_2204_DEB::${U_2204_DEB}"
|
|
||||||
|
|
||||||
# See: https://github.com/softprops/action-gh-release
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v1
|
|
||||||
if: startsWith(github.ref, 'refs/tags/')
|
|
||||||
with:
|
|
||||||
name: cc-backend-${{github.ref_name}}
|
|
||||||
files: |
|
|
||||||
${{ steps.files.outputs.ALMA_85_RPM }}
|
|
||||||
${{ steps.files.outputs.ALMA_85_SRPM }}
|
|
||||||
${{ steps.files.outputs.UBI_8_RPM }}
|
|
||||||
${{ steps.files.outputs.UBI_8_SRPM }}
|
|
||||||
${{ steps.files.outputs.U_2004_DEB }}
|
|
||||||
${{ steps.files.outputs.U_2204_DEB }}
|
|
||||||
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
|||||||
- name: Install Go
|
- name: Install Go
|
||||||
uses: actions/setup-go@v4
|
uses: actions/setup-go@v4
|
||||||
with:
|
with:
|
||||||
go-version: 1.19.x
|
go-version: 1.25.x
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
- name: Build, Vet & Test
|
- name: Build, Vet & Test
|
||||||
|
|||||||
29
.gitignore
vendored
29
.gitignore
vendored
@@ -1,19 +1,32 @@
|
|||||||
/cc-backend
|
/cc-backend
|
||||||
|
|
||||||
/var/job-archive
|
|
||||||
/var/*.db
|
|
||||||
/var/machine-state
|
|
||||||
|
|
||||||
/.env
|
/.env
|
||||||
/config.json
|
/config.json
|
||||||
|
/uiConfig.json
|
||||||
|
|
||||||
|
/var/job-archive
|
||||||
|
/var/machine-state
|
||||||
|
/var/*.db-shm
|
||||||
|
/var/*.db-wal
|
||||||
|
/var/*.db
|
||||||
|
/var/*.txt
|
||||||
|
|
||||||
|
/var/checkpoints*
|
||||||
|
|
||||||
|
migrateTimestamps.pl
|
||||||
|
test_ccms_write_api*
|
||||||
|
|
||||||
/web/frontend/public/build
|
/web/frontend/public/build
|
||||||
/web/frontend/node_modules
|
/web/frontend/node_modules
|
||||||
/.vscode/*
|
|
||||||
/archive-migration
|
/archive-migration
|
||||||
/archive-manager
|
/archive-manager
|
||||||
var/job.db-shm
|
|
||||||
var/job.db-wal
|
|
||||||
|
|
||||||
|
/internal/repository/testdata/job.db-shm
|
||||||
|
/internal/repository/testdata/job.db-wal
|
||||||
|
|
||||||
|
/.vscode/*
|
||||||
dist/
|
dist/
|
||||||
*.db
|
*.db
|
||||||
|
.idea
|
||||||
|
tools/archive-migration/archive-migration
|
||||||
|
tools/archive-manager/archive-manager
|
||||||
|
|||||||
@@ -34,19 +34,6 @@ builds:
|
|||||||
main: ./tools/archive-manager
|
main: ./tools/archive-manager
|
||||||
tags:
|
tags:
|
||||||
- static_build
|
- static_build
|
||||||
- env:
|
|
||||||
- CGO_ENABLED=0
|
|
||||||
goos:
|
|
||||||
- linux
|
|
||||||
goarch:
|
|
||||||
- amd64
|
|
||||||
goamd64:
|
|
||||||
- v3
|
|
||||||
id: "archive-migration"
|
|
||||||
binary: archive-migration
|
|
||||||
main: ./tools/archive-migration
|
|
||||||
tags:
|
|
||||||
- static_build
|
|
||||||
- env:
|
- env:
|
||||||
- CGO_ENABLED=0
|
- CGO_ENABLED=0
|
||||||
goos:
|
goos:
|
||||||
@@ -70,7 +57,7 @@ archives:
|
|||||||
{{- else }}{{ .Arch }}{{ end }}
|
{{- else }}{{ .Arch }}{{ end }}
|
||||||
{{- if .Arm }}v{{ .Arm }}{{ end }}
|
{{- if .Arm }}v{{ .Arm }}{{ end }}
|
||||||
checksum:
|
checksum:
|
||||||
name_template: 'checksums.txt'
|
name_template: "checksums.txt"
|
||||||
snapshot:
|
snapshot:
|
||||||
name_template: "{{ incpatch .Version }}-next"
|
name_template: "{{ incpatch .Version }}-next"
|
||||||
changelog:
|
changelog:
|
||||||
@@ -100,6 +87,7 @@ changelog:
|
|||||||
release:
|
release:
|
||||||
draft: false
|
draft: false
|
||||||
footer: |
|
footer: |
|
||||||
Supports job archive version 1 and database version 6.
|
Supports job archive version 2 and database version 8.
|
||||||
|
Please check out the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for further details on breaking changes.
|
||||||
|
|
||||||
# vim: set ts=2 sw=2 tw=0 fo=cnqoj
|
# vim: set ts=2 sw=2 tw=0 fo=cnqoj
|
||||||
|
|||||||
26
AGENTS.md
Normal file
26
AGENTS.md
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# ClusterCockpit Backend - Agent Guidelines
|
||||||
|
|
||||||
|
## Build/Test Commands
|
||||||
|
|
||||||
|
- Build: `make` or `go build ./cmd/cc-backend`
|
||||||
|
- Run all tests: `make test` (runs: `go clean -testcache && go build ./... && go vet ./... && go test ./...`)
|
||||||
|
- Run single test: `go test -run TestName ./path/to/package`
|
||||||
|
- Run single test file: `go test ./path/to/package -run TestName`
|
||||||
|
- Frontend build: `cd web/frontend && npm install && npm run build`
|
||||||
|
- Generate GraphQL: `make graphql` (uses gqlgen)
|
||||||
|
- Generate Swagger: `make swagger` (uses swaggo/swag)
|
||||||
|
|
||||||
|
## Code Style
|
||||||
|
|
||||||
|
- **Formatting**: Use `gofumpt` for all Go files (strict requirement)
|
||||||
|
- **Copyright header**: All files must include copyright header (see existing files)
|
||||||
|
- **Package docs**: Document packages with comprehensive package-level comments explaining purpose, usage, configuration
|
||||||
|
- **Imports**: Standard library first, then external packages, then internal packages (grouped with blank lines)
|
||||||
|
- **Naming**: Use camelCase for private, PascalCase for exported; descriptive names (e.g., `JobRepository`, `handleError`)
|
||||||
|
- **Error handling**: Return errors, don't panic; use custom error types where appropriate; log with cclog package
|
||||||
|
- **Logging**: Use `cclog` package (e.g., `cclog.Errorf()`, `cclog.Warnf()`, `cclog.Debugf()`)
|
||||||
|
- **Testing**: Use standard `testing` package; use `testify/assert` for assertions; name tests `TestFunctionName`
|
||||||
|
- **Comments**: Document all exported functions/types with godoc-style comments
|
||||||
|
- **Structs**: Document fields with inline comments, especially for complex configurations
|
||||||
|
- **HTTP handlers**: Return proper status codes; use `handleError()` helper for consistent error responses
|
||||||
|
- **JSON**: Use struct tags for JSON marshaling; `DisallowUnknownFields()` for strict decoding
|
||||||
215
CLAUDE.md
Normal file
215
CLAUDE.md
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
This file provides guidance to Claude Code (claude.ai/code) when working with
|
||||||
|
code in this repository.
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
ClusterCockpit is a job-specific performance monitoring framework for HPC
|
||||||
|
clusters. This is a Golang backend that provides REST and GraphQL APIs, serves a
|
||||||
|
Svelte-based frontend, and manages job archives and metric data from various
|
||||||
|
time-series databases.
|
||||||
|
|
||||||
|
## Build and Development Commands
|
||||||
|
|
||||||
|
### Building
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Build everything (frontend + backend)
|
||||||
|
make
|
||||||
|
|
||||||
|
# Build only the frontend
|
||||||
|
make frontend
|
||||||
|
|
||||||
|
# Build only the backend (requires frontend to be built first)
|
||||||
|
go build -ldflags='-s -X main.date=$(date +"%Y-%m-%d:T%H:%M:%S") -X main.version=1.4.4 -X main.commit=$(git rev-parse --short HEAD)' ./cmd/cc-backend
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run all tests
|
||||||
|
make test
|
||||||
|
|
||||||
|
# Run tests with verbose output
|
||||||
|
go test -v ./...
|
||||||
|
|
||||||
|
# Run tests for a specific package
|
||||||
|
go test ./internal/repository
|
||||||
|
```
|
||||||
|
|
||||||
|
### Code Generation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Regenerate GraphQL schema and resolvers (after modifying api/*.graphqls)
|
||||||
|
make graphql
|
||||||
|
|
||||||
|
# Regenerate Swagger/OpenAPI docs (after modifying API comments)
|
||||||
|
make swagger
|
||||||
|
```
|
||||||
|
|
||||||
|
### Frontend Development
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd web/frontend
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
npm install
|
||||||
|
|
||||||
|
# Build for production
|
||||||
|
npm run build
|
||||||
|
|
||||||
|
# Development mode with watch
|
||||||
|
npm run dev
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Initialize database and create admin user
|
||||||
|
./cc-backend -init-db -add-user demo:admin:demo
|
||||||
|
|
||||||
|
# Start server in development mode (enables GraphQL Playground and Swagger UI)
|
||||||
|
./cc-backend -server -dev -loglevel info
|
||||||
|
|
||||||
|
# Start demo with sample data
|
||||||
|
./startDemo.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Backend Structure
|
||||||
|
|
||||||
|
The backend follows a layered architecture with clear separation of concerns:
|
||||||
|
|
||||||
|
- **cmd/cc-backend**: Entry point, orchestrates initialization of all subsystems
|
||||||
|
- **internal/repository**: Data access layer using repository pattern
|
||||||
|
- Abstracts database operations (SQLite3 only)
|
||||||
|
- Implements LRU caching for performance
|
||||||
|
- Provides repositories for Job, User, Node, and Tag entities
|
||||||
|
- Transaction support for batch operations
|
||||||
|
- **internal/api**: REST API endpoints (Swagger/OpenAPI documented)
|
||||||
|
- **internal/graph**: GraphQL API (uses gqlgen)
|
||||||
|
- Schema in `api/*.graphqls`
|
||||||
|
- Generated code in `internal/graph/generated/`
|
||||||
|
- Resolvers in `internal/graph/schema.resolvers.go`
|
||||||
|
- **internal/auth**: Authentication layer
|
||||||
|
- Supports local accounts, LDAP, OIDC, and JWT tokens
|
||||||
|
- Implements rate limiting for login attempts
|
||||||
|
- **internal/metricdata**: Metric data repository abstraction
|
||||||
|
- Pluggable backends: cc-metric-store, Prometheus, InfluxDB
|
||||||
|
- Each cluster can have a different metric data backend
|
||||||
|
- **internal/archiver**: Job archiving to file-based archive
|
||||||
|
- **pkg/archive**: Job archive backend implementations
|
||||||
|
- File system backend (default)
|
||||||
|
- S3 backend
|
||||||
|
- SQLite backend (experimental)
|
||||||
|
- **pkg/nats**: NATS integration for metric ingestion
|
||||||
|
|
||||||
|
### Frontend Structure
|
||||||
|
|
||||||
|
- **web/frontend**: Svelte 5 application
|
||||||
|
- Uses Rollup for building
|
||||||
|
- Components organized by feature (analysis, job, user, etc.)
|
||||||
|
- GraphQL client using @urql/svelte
|
||||||
|
- Bootstrap 5 + SvelteStrap for UI
|
||||||
|
- uPlot for time-series visualization
|
||||||
|
- **web/templates**: Server-side Go templates
|
||||||
|
|
||||||
|
### Key Concepts
|
||||||
|
|
||||||
|
**Job Archive**: Completed jobs are stored in a file-based archive following the
|
||||||
|
[ClusterCockpit job-archive
|
||||||
|
specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||||
|
Each job has a `meta.json` file with metadata and metric data files.
|
||||||
|
|
||||||
|
**Metric Data Repositories**: Time-series metric data is stored separately from
|
||||||
|
job metadata. The system supports multiple backends (cc-metric-store is
|
||||||
|
recommended). Configuration is per-cluster in `config.json`.
|
||||||
|
|
||||||
|
**Authentication Flow**:
|
||||||
|
|
||||||
|
1. Multiple authenticators can be configured (local, LDAP, OIDC, JWT)
|
||||||
|
2. Each authenticator's `CanLogin` method is called to determine if it should handle the request
|
||||||
|
3. The first authenticator that returns true performs the actual `Login`
|
||||||
|
4. JWT tokens are used for API authentication
|
||||||
|
|
||||||
|
**Database Migrations**: SQL migrations in `internal/repository/migrations/` are
|
||||||
|
applied automatically on startup. Version tracking in `version` table.
|
||||||
|
|
||||||
|
**Scopes**: Metrics can be collected at different scopes:
|
||||||
|
|
||||||
|
- Node scope (always available)
|
||||||
|
- Core scope (for jobs with ≤8 nodes)
|
||||||
|
- Accelerator scope (for GPU/accelerator metrics)
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- **config.json**: Main configuration (clusters, metric repositories, archive settings)
|
||||||
|
- **.env**: Environment variables (secrets like JWT keys)
|
||||||
|
- Copy from `configs/env-template.txt`
|
||||||
|
- NEVER commit this file
|
||||||
|
- **cluster.json**: Cluster topology and metric definitions (loaded from archive or config)
|
||||||
|
|
||||||
|
## Database
|
||||||
|
|
||||||
|
- Default: SQLite 3 (`./var/job.db`)
|
||||||
|
- Connection managed by `internal/repository`
|
||||||
|
- Schema version in `internal/repository/migration.go`
|
||||||
|
|
||||||
|
## Code Generation
|
||||||
|
|
||||||
|
**GraphQL** (gqlgen):
|
||||||
|
|
||||||
|
- Schema: `api/*.graphqls`
|
||||||
|
- Config: `gqlgen.yml`
|
||||||
|
- Generated code: `internal/graph/generated/`
|
||||||
|
- Custom resolvers: `internal/graph/schema.resolvers.go`
|
||||||
|
- Run `make graphql` after schema changes
|
||||||
|
|
||||||
|
**Swagger/OpenAPI**:
|
||||||
|
|
||||||
|
- Annotations in `internal/api/*.go`
|
||||||
|
- Generated docs: `api/docs.go`, `api/swagger.yaml`
|
||||||
|
- Run `make swagger` after API changes
|
||||||
|
|
||||||
|
## Testing Conventions
|
||||||
|
|
||||||
|
- Test files use `_test.go` suffix
|
||||||
|
- Test data in `testdata/` subdirectories
|
||||||
|
- Repository tests use in-memory SQLite
|
||||||
|
- API tests use httptest
|
||||||
|
|
||||||
|
## Common Workflows
|
||||||
|
|
||||||
|
### Adding a new GraphQL field
|
||||||
|
|
||||||
|
1. Edit schema in `api/*.graphqls`
|
||||||
|
2. Run `make graphql`
|
||||||
|
3. Implement resolver in `internal/graph/schema.resolvers.go`
|
||||||
|
|
||||||
|
### Adding a new REST endpoint
|
||||||
|
|
||||||
|
1. Add handler in `internal/api/*.go`
|
||||||
|
2. Add route in `internal/api/rest.go`
|
||||||
|
3. Add Swagger annotations
|
||||||
|
4. Run `make swagger`
|
||||||
|
|
||||||
|
### Adding a new metric data backend
|
||||||
|
|
||||||
|
1. Implement `MetricDataRepository` interface in `internal/metricdata/`
|
||||||
|
2. Register in `metricdata.Init()` switch statement
|
||||||
|
3. Update config.json schema documentation
|
||||||
|
|
||||||
|
### Modifying database schema
|
||||||
|
|
||||||
|
1. Create new migration in `internal/repository/migrations/`
|
||||||
|
2. Increment `repository.Version`
|
||||||
|
3. Test with fresh database and existing database
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- Go 1.24.0+ (check go.mod for exact version)
|
||||||
|
- Node.js (for frontend builds)
|
||||||
|
- SQLite 3 (only supported database)
|
||||||
|
- Optional: NATS server for metric ingestion
|
||||||
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2022 NHR@FAU, University Erlangen-Nuremberg
|
Copyright (c) NHR@FAU, University Erlangen-Nuremberg
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
|||||||
136
Makefile
136
Makefile
@@ -1,8 +1,6 @@
|
|||||||
TARGET = ./cc-backend
|
TARGET = ./cc-backend
|
||||||
VAR = ./var
|
|
||||||
CFG = config.json .env
|
|
||||||
FRONTEND = ./web/frontend
|
FRONTEND = ./web/frontend
|
||||||
VERSION = 1.2.0
|
VERSION = 1.4.4
|
||||||
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
|
||||||
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
|
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
|
||||||
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
|
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
|
||||||
@@ -22,17 +20,27 @@ SVELTE_COMPONENTS = status \
|
|||||||
header
|
header
|
||||||
|
|
||||||
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
|
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
|
||||||
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
|
||||||
$(wildcard $(FRONTEND)/src/*.js) \
|
$(wildcard $(FRONTEND)/src/*.js) \
|
||||||
$(wildcard $(FRONTEND)/src/filters/*.svelte) \
|
$(wildcard $(FRONTEND)/src/analysis/*.svelte) \
|
||||||
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
|
$(wildcard $(FRONTEND)/src/config/*.svelte) \
|
||||||
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
|
$(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/*.js) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/header/*.svelte) \
|
||||||
|
$(wildcard $(FRONTEND)/src/job/*.svelte)
|
||||||
|
|
||||||
.PHONY: clean distclean test tags frontend $(TARGET)
|
.PHONY: clean distclean test tags frontend swagger graphql $(TARGET)
|
||||||
|
|
||||||
.NOTPARALLEL:
|
.NOTPARALLEL:
|
||||||
|
|
||||||
$(TARGET): $(VAR) $(CFG) $(SVELTE_TARGETS)
|
$(TARGET): $(SVELTE_TARGETS)
|
||||||
$(info ===> BUILD cc-backend)
|
$(info ===> BUILD cc-backend)
|
||||||
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
|
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
|
||||||
|
|
||||||
@@ -40,6 +48,15 @@ frontend:
|
|||||||
$(info ===> BUILD frontend)
|
$(info ===> BUILD frontend)
|
||||||
cd web/frontend && npm install && npm run build
|
cd web/frontend && npm install && npm run build
|
||||||
|
|
||||||
|
swagger:
|
||||||
|
$(info ===> GENERATE swagger)
|
||||||
|
@go tool github.com/swaggo/swag/cmd/swag init --parseDependency -d ./internal/api -g rest.go -o ./api
|
||||||
|
@mv ./api/docs.go ./internal/api/docs.go
|
||||||
|
|
||||||
|
graphql:
|
||||||
|
$(info ===> GENERATE graphql)
|
||||||
|
@go tool github.com/99designs/gqlgen
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
$(info ===> CLEAN)
|
$(info ===> CLEAN)
|
||||||
@go clean
|
@go clean
|
||||||
@@ -49,7 +66,7 @@ distclean:
|
|||||||
@$(MAKE) clean
|
@$(MAKE) clean
|
||||||
$(info ===> DISTCLEAN)
|
$(info ===> DISTCLEAN)
|
||||||
@rm -rf $(FRONTEND)/node_modules
|
@rm -rf $(FRONTEND)/node_modules
|
||||||
@rm -rf $(VAR)
|
@rm -rf ./var
|
||||||
|
|
||||||
test:
|
test:
|
||||||
$(info ===> TESTING)
|
$(info ===> TESTING)
|
||||||
@@ -63,103 +80,8 @@ tags:
|
|||||||
@ctags -R
|
@ctags -R
|
||||||
|
|
||||||
$(VAR):
|
$(VAR):
|
||||||
@mkdir $(VAR)
|
@mkdir -p $(VAR)
|
||||||
|
|
||||||
config.json:
|
|
||||||
$(info ===> Initialize config.json file)
|
|
||||||
@cp configs/config.json config.json
|
|
||||||
|
|
||||||
.env:
|
|
||||||
$(info ===> Initialize .env file)
|
|
||||||
@cp configs/env-template.txt .env
|
|
||||||
|
|
||||||
$(SVELTE_TARGETS): $(SVELTE_SRC)
|
$(SVELTE_TARGETS): $(SVELTE_SRC)
|
||||||
$(info ===> BUILD frontend)
|
$(info ===> BUILD frontend)
|
||||||
cd web/frontend && npm install && npm run build
|
cd web/frontend && npm install && npm run build
|
||||||
|
|
||||||
install: $(TARGET)
|
|
||||||
@WORKSPACE=$(PREFIX)
|
|
||||||
@if [ -z "$${WORKSPACE}" ]; then exit 1; fi
|
|
||||||
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
|
|
||||||
@install -Dpm 755 $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
|
|
||||||
@install -Dpm 600 configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
|
|
||||||
|
|
||||||
.ONESHELL:
|
|
||||||
.PHONY: RPM
|
|
||||||
RPM: build/package/cc-backend.spec
|
|
||||||
@WORKSPACE="$${PWD}"
|
|
||||||
@SPECFILE="$${WORKSPACE}/build/package/cc-backend.spec"
|
|
||||||
# Setup RPM build tree
|
|
||||||
@eval $$(rpm --eval "ARCH='%{_arch}' RPMDIR='%{_rpmdir}' SOURCEDIR='%{_sourcedir}' SPECDIR='%{_specdir}' SRPMDIR='%{_srcrpmdir}' BUILDDIR='%{_builddir}'")
|
|
||||||
@mkdir --parents --verbose "$${RPMDIR}" "$${SOURCEDIR}" "$${SPECDIR}" "$${SRPMDIR}" "$${BUILDDIR}"
|
|
||||||
# Create source tarball
|
|
||||||
@COMMITISH="HEAD"
|
|
||||||
@VERS=$$(git describe --tags $${COMMITISH})
|
|
||||||
@VERS=$${VERS#v}
|
|
||||||
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
|
|
||||||
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
|
|
||||||
@eval $$(rpmspec --query --queryformat "NAME='%{name}' VERSION='%{version}' RELEASE='%{release}' NVR='%{NVR}' NVRA='%{NVRA}'" --define="VERS $${VERS}" "$${SPECFILE}")
|
|
||||||
@PREFIX="$${NAME}-$${VERSION}"
|
|
||||||
@FORMAT="tar.gz"
|
|
||||||
@SRCFILE="$${SOURCEDIR}/$${PREFIX}.$${FORMAT}"
|
|
||||||
@git archive --verbose --format "$${FORMAT}" --prefix="$${PREFIX}/" --output="$${SRCFILE}" $${COMMITISH}
|
|
||||||
# Build RPM and SRPM
|
|
||||||
@rpmbuild -ba --define="VERS $${VERS}" --rmsource --clean "$${SPECFILE}"
|
|
||||||
# Report RPMs and SRPMs when in GitHub Workflow
|
|
||||||
@if [ "$${GITHUB_ACTIONS}" = true ]; then
|
|
||||||
@ RPMFILE="$${RPMDIR}/$${ARCH}/$${NVRA}.rpm"
|
|
||||||
@ SRPMFILE="$${SRPMDIR}/$${NVR}.src.rpm"
|
|
||||||
@ echo "RPM: $${RPMFILE}"
|
|
||||||
@ echo "SRPM: $${SRPMFILE}"
|
|
||||||
@ echo "::set-output name=SRPM::$${SRPMFILE}"
|
|
||||||
@ echo "::set-output name=RPM::$${RPMFILE}"
|
|
||||||
@fi
|
|
||||||
|
|
||||||
.ONESHELL:
|
|
||||||
.PHONY: DEB
|
|
||||||
DEB: build/package/cc-backend.deb.control
|
|
||||||
@BASEDIR=$${PWD}
|
|
||||||
@WORKSPACE=$${PWD}/.dpkgbuild
|
|
||||||
@DEBIANDIR=$${WORKSPACE}/debian
|
|
||||||
@DEBIANBINDIR=$${WORKSPACE}/DEBIAN
|
|
||||||
@mkdir --parents --verbose $$WORKSPACE $$DEBIANBINDIR
|
|
||||||
#@mkdir --parents --verbose $$DEBIANDIR
|
|
||||||
@CONTROLFILE="$${BASEDIR}/build/package/cc-backend.deb.control"
|
|
||||||
@COMMITISH="HEAD"
|
|
||||||
@VERS=$$(git describe --tags --abbrev=0 $${COMMITISH})
|
|
||||||
@VERS=$${VERS#v}
|
|
||||||
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
|
|
||||||
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
|
|
||||||
@ARCH=$$(uname -m)
|
|
||||||
@ARCH=$$(echo $$ARCH | sed -e s+'_'+'-'+g)
|
|
||||||
@if [ "$${ARCH}" = "x86-64" ]; then ARCH=amd64; fi
|
|
||||||
@PREFIX="$${NAME}-$${VERSION}_$${ARCH}"
|
|
||||||
@SIZE_BYTES=$$(du -bcs --exclude=.dpkgbuild "$${WORKSPACE}"/ | awk '{print $$1}' | head -1 | sed -e 's/^0\+//')
|
|
||||||
@SIZE="$$(awk -v size="$$SIZE_BYTES" 'BEGIN {print (size/1024)+1}' | awk '{print int($$0)}')"
|
|
||||||
#@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANDIR}/control
|
|
||||||
@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANBINDIR}/control
|
|
||||||
@mkdir --parents --verbose "$${WORKSPACE}"/$(VAR)
|
|
||||||
@touch "$${WORKSPACE}"/$(VAR)/job.db
|
|
||||||
@cd web/frontend && yarn install && yarn build && cd -
|
|
||||||
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
|
|
||||||
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
|
|
||||||
@cp $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
|
|
||||||
@chmod 0755 $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
|
|
||||||
@mkdir --parents --verbose $${WORKSPACE}/etc/$(TARGET)
|
|
||||||
@cp configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
|
|
||||||
@chmod 0600 $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
|
|
||||||
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/systemd/system
|
|
||||||
@cp build/package/$(TARGET).service $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
|
|
||||||
@chmod 0644 $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
|
|
||||||
@mkdir --parents --verbose $${WORKSPACE}/etc/default
|
|
||||||
@cp build/package/$(TARGET).config $${WORKSPACE}/etc/default/$(TARGET)
|
|
||||||
@chmod 0600 $${WORKSPACE}/etc/default/$(TARGET)
|
|
||||||
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/sysusers.d
|
|
||||||
@cp build/package/$(TARGET).sysusers $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
|
|
||||||
@chmod 0644 $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
|
|
||||||
@DEB_FILE="cc-metric-store_$${VERS}_$${ARCH}.deb"
|
|
||||||
@dpkg-deb -b $${WORKSPACE} "$$DEB_FILE"
|
|
||||||
@rm -r "$${WORKSPACE}"
|
|
||||||
@if [ "$${GITHUB_ACTIONS}" = "true" ]; then
|
|
||||||
@ echo "::set-output name=DEB::$${DEB_FILE}"
|
|
||||||
@fi
|
|
||||||
|
|||||||
203
README.md
203
README.md
@@ -1,9 +1,19 @@
|
|||||||
|
# NOTE
|
||||||
|
|
||||||
|
While we do our best to keep the master branch in a usable state, there is no guarantee the master branch works.
|
||||||
|
Please do not use it for production!
|
||||||
|
|
||||||
|
Please have a look at the [Release
|
||||||
|
Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md)
|
||||||
|
for breaking changes!
|
||||||
|
|
||||||
# ClusterCockpit REST and GraphQL API backend
|
# ClusterCockpit REST and GraphQL API backend
|
||||||
|
|
||||||
[](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml)
|
[](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml)
|
||||||
|
|
||||||
This is a Golang backend implementation for a REST and GraphQL API according to
|
This is a Golang backend implementation for a REST and GraphQL API according to
|
||||||
the [ClusterCockpit specifications](https://github.com/ClusterCockpit/cc-specifications). It also
|
the [ClusterCockpit
|
||||||
|
specifications](https://github.com/ClusterCockpit/cc-specifications). It also
|
||||||
includes a web interface for ClusterCockpit. This implementation replaces the
|
includes a web interface for ClusterCockpit. This implementation replaces the
|
||||||
previous PHP Symfony based ClusterCockpit web interface. The reasons for
|
previous PHP Symfony based ClusterCockpit web interface. The reasons for
|
||||||
switching from PHP Symfony to a Golang based solution are explained
|
switching from PHP Symfony to a Golang based solution are explained
|
||||||
@@ -11,31 +21,30 @@ switching from PHP Symfony to a Golang based solution are explained
|
|||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
|
This is a Golang web backend for the ClusterCockpit job-specific performance
|
||||||
|
monitoring framework. It provides a REST API for integrating ClusterCockpit with
|
||||||
|
an HPC cluster batch system and external analysis scripts. Data exchange between
|
||||||
|
the web front-end and the back-end is based on a GraphQL API. The web frontend
|
||||||
|
is also served by the backend using [Svelte](https://svelte.dev/) components.
|
||||||
|
Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using
|
||||||
|
[Bootstrap Icons](https://icons.getbootstrap.com/).
|
||||||
|
|
||||||
This is a Golang web backend for the ClusterCockpit job-specific performance monitoring framework.
|
The backend uses [SQLite 3](https://sqlite.org/) as the relational SQL database.
|
||||||
It provides a REST API for integrating ClusterCockpit with an HPC cluster batch system and external analysis scripts.
|
While there are metric data backends for the InfluxDB and Prometheus time series
|
||||||
Data exchange between the web front-end and the back-end is based on a GraphQL API.
|
databases, the only tested and supported setup is to use cc-metric-store as the
|
||||||
The web frontend is also served by the backend using [Svelte](https://svelte.dev/) components.
|
metric data backend. Documentation on how to integrate ClusterCockpit with other
|
||||||
Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using [Bootstrap Icons](https://icons.getbootstrap.com/).
|
time series databases will be added in the future.
|
||||||
|
|
||||||
The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by default.
|
|
||||||
Optionally it can use a MySQL/MariaDB database server.
|
|
||||||
While there are metric data backends for the InfluxDB and Prometheus time series databases, the only tested and supported setup is to use cc-metric-store as the metric data backend.
|
|
||||||
Documentation on how to integrate ClusterCockpit with other time series databases will be added in the future.
|
|
||||||
|
|
||||||
Completed batch jobs are stored in a file-based job archive according to
|
Completed batch jobs are stored in a file-based job archive according to
|
||||||
[this specification] (https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
[this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
||||||
The backend supports authentication via local accounts, an external LDAP
|
The backend supports authentication via local accounts, an external LDAP
|
||||||
directory, and JWT tokens. Authorization for APIs is implemented with
|
directory, and JWT tokens. Authorization for APIs is implemented with
|
||||||
[JWT](https://jwt.io/) tokens created with public/private key encryption.
|
[JWT](https://jwt.io/) tokens created with public/private key encryption.
|
||||||
|
|
||||||
You find more detailed information here:
|
You find a detailed documentation on the [ClusterCockpit
|
||||||
* `./configs/README.md`: Infos about configuration and setup of cc-backend.
|
Webpage](https://clustercockpit.org).
|
||||||
* `./init/README.md`: Infos on how to setup cc-backend as systemd service on Linux.
|
|
||||||
* `./tools/README.md`: Infos on the JWT authorizatin token workflows in ClusterCockpit.
|
|
||||||
* `./docs`: You can find further documentation here. There is also a Hands-on tutorial that is recommended to get familiar with the ClusterCockpit setup.
|
|
||||||
|
|
||||||
**NOTE**
|
## Build requirements
|
||||||
|
|
||||||
ClusterCockpit requires a current version of the golang toolchain and node.js.
|
ClusterCockpit requires a current version of the golang toolchain and node.js.
|
||||||
You can check `go.mod` to see what is the current minimal golang version needed.
|
You can check `go.mod` to see what is the current minimal golang version needed.
|
||||||
@@ -46,7 +55,7 @@ on the Go standard library, it is crucial for security and performance to use a
|
|||||||
current version of golang. In addition, an old golang toolchain may limit the supported
|
current version of golang. In addition, an old golang toolchain may limit the supported
|
||||||
versions of third-party packages.
|
versions of third-party packages.
|
||||||
|
|
||||||
## How to try ClusterCockpit with a demo setup.
|
## How to try ClusterCockpit with a demo setup
|
||||||
|
|
||||||
We provide a shell script that downloads demo data and automatically starts the
|
We provide a shell script that downloads demo data and automatically starts the
|
||||||
cc-backend. You will need `wget`, `go`, `node`, `npm` in your path to
|
cc-backend. You will need `wget`, `go`, `node`, `npm` in your path to
|
||||||
@@ -58,31 +67,37 @@ cd ./cc-backend
|
|||||||
./startDemo.sh
|
./startDemo.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also try the demo using the lates release binary.
|
You can also try the demo using the latest release binary.
|
||||||
Create a folder and put the release binary `cc-backend` into this folder.
|
Create a folder and put the release binary `cc-backend` into this folder.
|
||||||
Execute the following steps:
|
Execute the following steps:
|
||||||
```
|
|
||||||
$ ./cc-backend -init
|
```shell
|
||||||
$ vim config.json (Add a second cluster entry and name the clusters alex and fritz)
|
./cc-backend -init
|
||||||
$ wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
|
vim config.json (Add a second cluster entry and name the clusters alex and fritz)
|
||||||
$ tar xf job-archive-demo.tar
|
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
|
||||||
$ ./cc-backend -init-db -add-user demo:admin:demo -loglevel info
|
tar xf job-archive-demo.tar
|
||||||
$ ./cc-backend -server -dev -loglevel info
|
./cc-backend -init-db -add-user demo:admin:demo -loglevel info
|
||||||
|
./cc-backend -server -dev -loglevel info
|
||||||
```
|
```
|
||||||
|
|
||||||
You can access the web interface at http://localhost:8080.
|
You can access the web interface at [http://localhost:8080](http://localhost:8080).
|
||||||
Credentials for login are `demo:demo`.
|
Credentials for login are `demo:demo`.
|
||||||
Please note that some views do not work without a metric backend (e.g., the
|
Please note that some views do not work without a metric backend (e.g., the
|
||||||
Analysis, Systems and Status views).
|
Analysis, Systems and Status views).
|
||||||
|
|
||||||
## Howto build and run
|
## How to build and run
|
||||||
|
|
||||||
There is a Makefile to automate the build of cc-backend. The Makefile supports the following targets:
|
There is a Makefile to automate the build of cc-backend. The Makefile supports
|
||||||
* `$ make`: Initialize `var` directory and build svelte frontend and backend binary. Note that there is no proper prerequesite handling. Any change of frontend source files will result in a complete rebuild.
|
the following targets:
|
||||||
* `$ make clean`: Clean go build cache and remove binary.
|
|
||||||
* `$ make test`: Run the tests that are also run in the GitHub workflow setup.
|
- `make`: Initialize `var` directory and build svelte frontend and backend
|
||||||
|
binary. Note that there is no proper prerequisite handling. Any change of
|
||||||
|
frontend source files will result in a complete rebuild.
|
||||||
|
- `make clean`: Clean go build cache and remove binary.
|
||||||
|
- `make test`: Run the tests that are also run in the GitHub workflow setup.
|
||||||
|
|
||||||
A common workflow for setting up cc-backend from scratch is:
|
A common workflow for setting up cc-backend from scratch is:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
git clone https://github.com/ClusterCockpit/cc-backend.git
|
git clone https://github.com/ClusterCockpit/cc-backend.git
|
||||||
|
|
||||||
@@ -113,89 +128,43 @@ ln -s <your-existing-job-archive> ./var/job-archive
|
|||||||
./cc-backend -help
|
./cc-backend -help
|
||||||
```
|
```
|
||||||
|
|
||||||
### Run as systemd daemon
|
|
||||||
|
|
||||||
To run this program as a daemon, cc-backend comes with a [example systemd setup](./init/README.md).
|
|
||||||
|
|
||||||
## Configuration and setup
|
|
||||||
|
|
||||||
cc-backend can be used as a local web interface for an existing job archive or
|
|
||||||
as a server for the ClusterCockpit monitoring framework.
|
|
||||||
|
|
||||||
Create your job archive according to [this specification] (https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
|
|
||||||
At least one cluster directory with a valid `cluster.json` file is required. If
|
|
||||||
you configure the job archive from scratch, you must also create the job
|
|
||||||
archive version file that contains the job archive version as an integer.
|
|
||||||
You can retrieve the currently supported version by running the following
|
|
||||||
command:
|
|
||||||
```
|
|
||||||
$ ./cc-backend -version
|
|
||||||
```
|
|
||||||
It is ok to have no jobs in the job archive.
|
|
||||||
|
|
||||||
### Configuration
|
|
||||||
|
|
||||||
A configuration file in JSON format must be specified with `-config` to override the default settings.
|
|
||||||
By default, a `config.json` file located in the current directory of the `cc-backend` process will be loaded even without the `-config` flag.
|
|
||||||
Documentation of all supported configuration and command line options can be found [here](./configs/README.md).
|
|
||||||
|
|
||||||
## Database initialization and migration
|
|
||||||
|
|
||||||
Each `cc-backend` version supports a specific database version.
|
|
||||||
At startup, the version of the sqlite database is checked and `cc-backend` terminates if the version does not match.
|
|
||||||
`cc-backend` supports the migration of the database schema to the required version with the command line option `-migrate-db`.
|
|
||||||
If the database file does not exist yet, it will be created and initialized with the command line option `-migrate-db`.
|
|
||||||
If you want to use a newer database version with an older version of cc-backend, you can downgrade a database with the external tool [migrate](https://github.com/golang-migrate/migrate).
|
|
||||||
In this case, you must specify the path to the migration files in a current source tree: `./internal/repository/migrations/`.
|
|
||||||
|
|
||||||
## Development and testing
|
|
||||||
When making changes to the REST or GraphQL API, the appropriate code generators must be used.
|
|
||||||
You must always rebuild `cc-backend` after updating the API files.
|
|
||||||
|
|
||||||
### Update GraphQL schema
|
|
||||||
|
|
||||||
This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL API.
|
|
||||||
The schema can be found in `./api/schema.graphqls`.
|
|
||||||
After changing it, you need to run `go run github.com/99designs/gqlgen`, which will update `./internal/graph/model`.
|
|
||||||
If new resolvers are needed, they will be added to `./internal/graph/schema.resolvers.go`, where you will then need to implement them.
|
|
||||||
If you start `cc-backend` with the `-dev` flag, the GraphQL Playground UI is available at http://localhost:8080/playground.
|
|
||||||
|
|
||||||
### Update Swagger UI
|
|
||||||
|
|
||||||
This project integrates [swagger ui] (https://swagger.io/tools/swagger-ui/) to document and test its REST API.
|
|
||||||
The swagger documentation files can be found in `./api/`.
|
|
||||||
You can generate the swagger-ui configuration by running `go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api `.
|
|
||||||
You need to move the created `./api/docs.go` to `./internal/api/docs.go`.
|
|
||||||
If you start cc-backend with the `-dev` flag, the Swagger interface is available
|
|
||||||
at http://localhost:8080/swagger/.
|
|
||||||
You must enter a JWT key for a user with the API role.
|
|
||||||
|
|
||||||
**NOTE**
|
|
||||||
|
|
||||||
The user who owns the JWT key must not be logged into the same browser (have a
|
|
||||||
running session), or the Swagger requests will not work. It is recommended to
|
|
||||||
create a separate user that has only the API role.
|
|
||||||
|
|
||||||
## Development and testing
|
|
||||||
In case the REST or GraphQL API is changed the according code generators have to be used.
|
|
||||||
|
|
||||||
## Project file structure
|
## Project file structure
|
||||||
|
|
||||||
- [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api) contains the API schema files for the REST and GraphQL APIs. The REST API is documented in the OpenAPI 3.0 format in [./api/openapi.yaml](./api/openapi.yaml).
|
- [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api)
|
||||||
- [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend) contains `main.go` for the main application.
|
contains the API schema files for the REST and GraphQL APIs. The REST API is
|
||||||
- [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs) contains documentation about configuration and command line options and required environment variables. A sample configuration file is provided.
|
documented in the OpenAPI 3.0 format in
|
||||||
- [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs) contains more in-depth documentation.
|
[./api/openapi.yaml](./api/openapi.yaml).
|
||||||
- [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init) contains an example of setting up systemd for production use.
|
- [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend)
|
||||||
- [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal) contains library source code that is not intended for use by others.
|
contains `main.go` for the main application.
|
||||||
- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg) contains Go packages that can be used by other projects.
|
- [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs)
|
||||||
- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools) Additional command line helper tools.
|
contains documentation about configuration and command line options and required
|
||||||
- [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager) Commands for getting infos about and existing job archive.
|
environment variables. A sample configuration file is provided.
|
||||||
- [`archive-migration`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-migration) Tool to migrate from previous to current job archive version.
|
- [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs)
|
||||||
- [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey) Tool to convert external pubkey for use in `cc-backend`.
|
contains more in-depth documentation.
|
||||||
- [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair) contains a small application to generate a compatible JWT keypair. You find documentation on how to use it [here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
|
- [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init)
|
||||||
- [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web) Server-side templates and frontend-related files:
|
contains an example of setting up systemd for production use.
|
||||||
- [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend) Svelte components and static assets for the frontend UI
|
- [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal)
|
||||||
- [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates) Server-side Go templates
|
contains library source code that is not intended for use by others.
|
||||||
- [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml) Configures the behaviour and generation of [gqlgen](https://github.com/99designs/gqlgen).
|
- [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
|
||||||
- [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh) is a shell script that sets up demo data, and builds and starts `cc-backend`.
|
contains Go packages that can be used by other projects.
|
||||||
|
- [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
|
||||||
|
Additional command line helper tools.
|
||||||
|
- [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
|
||||||
|
Commands for getting infos about and existing job archive.
|
||||||
|
- [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
|
||||||
|
Tool to convert external pubkey for use in `cc-backend`.
|
||||||
|
- [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
|
||||||
|
contains a small application to generate a compatible JWT keypair. You find
|
||||||
|
documentation on how to use it
|
||||||
|
[here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
|
||||||
|
- [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web)
|
||||||
|
Server-side templates and frontend-related files:
|
||||||
|
- [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend)
|
||||||
|
Svelte components and static assets for the frontend UI
|
||||||
|
- [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates)
|
||||||
|
Server-side Go templates
|
||||||
|
- [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml)
|
||||||
|
Configures the behaviour and generation of
|
||||||
|
[gqlgen](https://github.com/99designs/gqlgen).
|
||||||
|
- [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh)
|
||||||
|
is a shell script that sets up demo data, and builds and starts `cc-backend`.
|
||||||
|
|||||||
@@ -1,36 +1,47 @@
|
|||||||
# `cc-backend` version 1.2.0
|
# `cc-backend` version 1.4.4
|
||||||
|
|
||||||
Supports job archive version 1 and database version 6.
|
Supports job archive version 2 and database version 8.
|
||||||
|
|
||||||
This is a minor release of `cc-backend`, the API backend and frontend
|
This is a bug fix release of `cc-backend`, the API backend and frontend
|
||||||
implementation of ClusterCockpit.
|
implementation of ClusterCockpit.
|
||||||
|
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
|
||||||
|
|
||||||
** Breaking changes **
|
## Breaking changes
|
||||||
|
|
||||||
* The LDAP configuration option user_filter was changed and now should not include
|
The option `apiAllowedIPs` is now a required configuration attribute in
|
||||||
the uid wildcard. Example:
|
`config.json`. This option restricts access to the admin API.
|
||||||
- Old: `"user_filter": "(&(objectclass=posixAccount)(uid=*))"`
|
|
||||||
- New: `"user_filter": "(&(objectclass=posixAccount))"`
|
|
||||||
|
|
||||||
* The aggregate job statistic core hours is now computed using the job table
|
To retain the previous behavior that the API is per default accessible from
|
||||||
column `num_hwthreads`. In a future release this column will be renamed to
|
everywhere set:
|
||||||
`num_cores`. For correct display of core hours `num_hwthreads` must be correctly
|
|
||||||
filled on job start. If your existing jobs do not provide the correct value in
|
|
||||||
this column then you can set this with one SQL INSERT statement. This only applies
|
|
||||||
if you have exclusive jobs, only. Please be aware that we treat this column as
|
|
||||||
it is the number of cores. In case you have SMT enabled and `num_hwthreads`
|
|
||||||
is not the number of cores the core hours will be too high by a factor!
|
|
||||||
|
|
||||||
* The jwts key is now mandatory in config.json. It has to set max-age for
|
```json
|
||||||
validity. Some key names have changed, please refer to
|
"apiAllowedIPs": [
|
||||||
[config documentation](./configs/README.md) for details.
|
"*"
|
||||||
|
]
|
||||||
** NOTE **
|
|
||||||
If you are using the sqlite3 backend the `PRAGMA` option `foreign_keys` must be
|
|
||||||
explicitly set to ON. If using the sqlite3 console it is per default set to
|
|
||||||
OFF! On every console session you must set:
|
|
||||||
```
|
```
|
||||||
sqlite> PRAGMA foreign_keys = ON;
|
|
||||||
|
|
||||||
```
|
## Breaking changes for minor release 1.4.x
|
||||||
Otherwise if you delete jobs the jobtag relation table will not be updated accordingly!
|
|
||||||
|
- You need to perform a database migration. Depending on your database size the
|
||||||
|
migration might require several hours!
|
||||||
|
- You need to adapt the `cluster.json` configuration files in the job-archive,
|
||||||
|
add new required attributes to the metric list and after that edit
|
||||||
|
`./job-archive/version.txt` to version 2. Only metrics that have the footprint
|
||||||
|
attribute set can be filtered and show up in the footprint UI and polar plot.
|
||||||
|
- Continuous scrolling is default now in all job lists. You can change this back
|
||||||
|
to paging globally, also every user can configure to use paging or continuous
|
||||||
|
scrolling individually.
|
||||||
|
- Tags have a scope now. Existing tags will get global scope in the database
|
||||||
|
migration.
|
||||||
|
|
||||||
|
## New features
|
||||||
|
|
||||||
|
- Enable to delete tags from the web interface
|
||||||
|
|
||||||
|
## Known issues
|
||||||
|
|
||||||
|
- Currently energy footprint metrics of type energy are ignored for calculating
|
||||||
|
total energy.
|
||||||
|
- Resampling for running jobs only works with cc-metric-store
|
||||||
|
- With energy footprint metrics of type power the unit is ignored and it is
|
||||||
|
assumed the metric has the unit Watt.
|
||||||
|
|||||||
@@ -4,133 +4,222 @@ scalar Any
|
|||||||
scalar NullableFloat
|
scalar NullableFloat
|
||||||
scalar MetricScope
|
scalar MetricScope
|
||||||
scalar JobState
|
scalar JobState
|
||||||
|
scalar SchedulerState
|
||||||
|
scalar MonitoringState
|
||||||
|
|
||||||
|
type Node {
|
||||||
|
id: ID!
|
||||||
|
hostname: String!
|
||||||
|
cluster: String!
|
||||||
|
subCluster: String!
|
||||||
|
jobsRunning: Int!
|
||||||
|
cpusAllocated: Int
|
||||||
|
memoryAllocated: Int
|
||||||
|
gpusAllocated: Int
|
||||||
|
schedulerState: SchedulerState!
|
||||||
|
healthState: MonitoringState!
|
||||||
|
metaData: Any
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeStates {
|
||||||
|
state: String!
|
||||||
|
count: Int!
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeStatesTimed {
|
||||||
|
state: String!
|
||||||
|
counts: [Int!]!
|
||||||
|
times: [Int!]!
|
||||||
|
}
|
||||||
|
|
||||||
type Job {
|
type Job {
|
||||||
id: ID!
|
id: ID!
|
||||||
jobId: Int!
|
jobId: Int!
|
||||||
user: String!
|
user: String!
|
||||||
project: String!
|
project: String!
|
||||||
cluster: String!
|
cluster: String!
|
||||||
subCluster: String!
|
subCluster: String!
|
||||||
startTime: Time!
|
startTime: Time!
|
||||||
duration: Int!
|
duration: Int!
|
||||||
walltime: Int!
|
walltime: Int!
|
||||||
numNodes: Int!
|
numNodes: Int!
|
||||||
numHWThreads: Int!
|
numHWThreads: Int!
|
||||||
numAcc: Int!
|
numAcc: Int!
|
||||||
SMT: Int!
|
energy: Float!
|
||||||
exclusive: Int!
|
SMT: Int!
|
||||||
partition: String!
|
shared: String!
|
||||||
arrayJobId: Int!
|
partition: String!
|
||||||
|
arrayJobId: Int!
|
||||||
monitoringStatus: Int!
|
monitoringStatus: Int!
|
||||||
state: JobState!
|
state: JobState!
|
||||||
tags: [Tag!]!
|
tags: [Tag!]!
|
||||||
resources: [Resource!]!
|
resources: [Resource!]!
|
||||||
concurrentJobs: JobLinkResultList
|
concurrentJobs: JobLinkResultList
|
||||||
|
footprint: [FootprintValue]
|
||||||
metaData: Any
|
energyFootprint: [EnergyFootprintValue]
|
||||||
userData: User
|
metaData: Any
|
||||||
|
userData: User
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobLink {
|
type JobLink {
|
||||||
id: ID!
|
id: ID!
|
||||||
jobId: Int!
|
jobId: Int!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Cluster {
|
type Cluster {
|
||||||
name: String!
|
name: String!
|
||||||
partitions: [String!]! # Slurm partitions
|
partitions: [String!]! # Slurm partitions
|
||||||
metricConfig: [MetricConfig!]!
|
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubCluster {
|
type SubCluster {
|
||||||
name: String!
|
name: String!
|
||||||
nodes: String!
|
nodes: String!
|
||||||
numberOfNodes: Int!
|
numberOfNodes: Int!
|
||||||
processorType: String!
|
processorType: String!
|
||||||
socketsPerNode: Int!
|
socketsPerNode: Int!
|
||||||
coresPerSocket: Int!
|
coresPerSocket: Int!
|
||||||
threadsPerCore: Int!
|
threadsPerCore: Int!
|
||||||
flopRateScalar: MetricValue!
|
flopRateScalar: MetricValue!
|
||||||
flopRateSimd: MetricValue!
|
flopRateSimd: MetricValue!
|
||||||
memoryBandwidth: MetricValue!
|
memoryBandwidth: MetricValue!
|
||||||
topology: Topology!
|
topology: Topology!
|
||||||
|
metricConfig: [MetricConfig!]!
|
||||||
|
footprint: [String!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type FootprintValue {
|
||||||
|
name: String!
|
||||||
|
stat: String!
|
||||||
|
value: Float!
|
||||||
|
}
|
||||||
|
|
||||||
|
type EnergyFootprintValue {
|
||||||
|
hardware: String!
|
||||||
|
metric: String!
|
||||||
|
value: Float!
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricValue {
|
type MetricValue {
|
||||||
|
name: String
|
||||||
unit: Unit!
|
unit: Unit!
|
||||||
value: Float!
|
value: Float!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Topology {
|
type Topology {
|
||||||
node: [Int!]
|
node: [Int!]
|
||||||
socket: [[Int!]!]
|
socket: [[Int!]!]
|
||||||
memoryDomain: [[Int!]!]
|
memoryDomain: [[Int!]!]
|
||||||
die: [[Int!]!]
|
die: [[Int!]!]
|
||||||
core: [[Int!]!]
|
core: [[Int!]!]
|
||||||
accelerators: [Accelerator!]
|
accelerators: [Accelerator!]
|
||||||
}
|
}
|
||||||
|
|
||||||
type Accelerator {
|
type Accelerator {
|
||||||
id: String!
|
id: String!
|
||||||
type: String!
|
type: String!
|
||||||
model: String!
|
model: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubClusterConfig {
|
type SubClusterConfig {
|
||||||
name: String!
|
name: String!
|
||||||
peak: Float
|
peak: Float
|
||||||
normal: Float
|
normal: Float
|
||||||
caution: Float
|
caution: Float
|
||||||
alert: Float
|
alert: Float
|
||||||
remove: Boolean
|
remove: Boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricConfig {
|
type MetricConfig {
|
||||||
name: String!
|
name: String!
|
||||||
unit: Unit!
|
unit: Unit!
|
||||||
scope: MetricScope!
|
scope: MetricScope!
|
||||||
aggregation: String!
|
aggregation: String!
|
||||||
timestep: Int!
|
timestep: Int!
|
||||||
peak: Float!
|
peak: Float!
|
||||||
normal: Float
|
normal: Float
|
||||||
caution: Float!
|
caution: Float!
|
||||||
alert: Float!
|
alert: Float!
|
||||||
|
lowerIsBetter: Boolean
|
||||||
subClusters: [SubClusterConfig!]!
|
subClusters: [SubClusterConfig!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Tag {
|
type Tag {
|
||||||
id: ID!
|
id: ID!
|
||||||
type: String!
|
type: String!
|
||||||
name: String!
|
name: String!
|
||||||
|
scope: String!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Resource {
|
type Resource {
|
||||||
hostname: String!
|
hostname: String!
|
||||||
hwthreads: [Int!]
|
hwthreads: [Int!]
|
||||||
accelerators: [String!]
|
accelerators: [String!]
|
||||||
configuration: String
|
configuration: String
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobMetricWithName {
|
type JobMetricWithName {
|
||||||
name: String!
|
name: String!
|
||||||
scope: MetricScope!
|
scope: MetricScope!
|
||||||
metric: JobMetric!
|
metric: JobMetric!
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ClusterMetricWithName {
|
||||||
|
name: String!
|
||||||
|
unit: Unit
|
||||||
|
timestep: Int!
|
||||||
|
data: [NullableFloat!]!
|
||||||
|
}
|
||||||
|
|
||||||
type JobMetric {
|
type JobMetric {
|
||||||
unit: Unit
|
unit: Unit
|
||||||
timestep: Int!
|
timestep: Int!
|
||||||
series: [Series!]
|
series: [Series!]
|
||||||
statisticsSeries: StatsSeries
|
statisticsSeries: StatsSeries
|
||||||
}
|
}
|
||||||
|
|
||||||
type Series {
|
type Series {
|
||||||
hostname: String!
|
hostname: String!
|
||||||
id: String
|
id: String
|
||||||
statistics: MetricStatistics
|
statistics: MetricStatistics
|
||||||
data: [NullableFloat!]!
|
data: [NullableFloat!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type StatsSeries {
|
||||||
|
mean: [NullableFloat!]!
|
||||||
|
median: [NullableFloat!]!
|
||||||
|
min: [NullableFloat!]!
|
||||||
|
max: [NullableFloat!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type NamedStatsWithScope {
|
||||||
|
name: String!
|
||||||
|
scope: MetricScope!
|
||||||
|
stats: [ScopedStats!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type ScopedStats {
|
||||||
|
hostname: String!
|
||||||
|
id: String
|
||||||
|
data: MetricStatistics!
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobStats {
|
||||||
|
id: Int!
|
||||||
|
jobId: String!
|
||||||
|
startTime: Int!
|
||||||
|
duration: Int!
|
||||||
|
cluster: String!
|
||||||
|
subCluster: String!
|
||||||
|
numNodes: Int!
|
||||||
|
numHWThreads: Int
|
||||||
|
numAccelerators: Int
|
||||||
|
stats: [NamedStats!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type NamedStats {
|
||||||
|
name: String!
|
||||||
|
data: MetricStatistics!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Unit {
|
type Unit {
|
||||||
@@ -144,20 +233,14 @@ type MetricStatistics {
|
|||||||
max: Float!
|
max: Float!
|
||||||
}
|
}
|
||||||
|
|
||||||
type StatsSeries {
|
|
||||||
mean: [NullableFloat!]!
|
|
||||||
min: [NullableFloat!]!
|
|
||||||
max: [NullableFloat!]!
|
|
||||||
}
|
|
||||||
|
|
||||||
type MetricFootprints {
|
type MetricFootprints {
|
||||||
metric: String!
|
metric: String!
|
||||||
data: [NullableFloat!]!
|
data: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Footprints {
|
type Footprints {
|
||||||
timeWeights: TimeWeights!
|
timeWeights: TimeWeights!
|
||||||
metrics: [MetricFootprints!]!
|
metrics: [MetricFootprints!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type TimeWeights {
|
type TimeWeights {
|
||||||
@@ -166,87 +249,221 @@ type TimeWeights {
|
|||||||
coreHours: [NullableFloat!]!
|
coreHours: [NullableFloat!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Aggregate { USER, PROJECT, CLUSTER }
|
enum Aggregate {
|
||||||
enum SortByAggregate { TOTALWALLTIME, TOTALJOBS, TOTALNODES, TOTALNODEHOURS, TOTALCORES, TOTALCOREHOURS, TOTALACCS, TOTALACCHOURS }
|
USER
|
||||||
|
PROJECT
|
||||||
|
CLUSTER
|
||||||
|
SUBCLUSTER
|
||||||
|
}
|
||||||
|
enum SortByAggregate {
|
||||||
|
TOTALWALLTIME
|
||||||
|
TOTALJOBS
|
||||||
|
TOTALUSERS
|
||||||
|
TOTALNODES
|
||||||
|
TOTALNODEHOURS
|
||||||
|
TOTALCORES
|
||||||
|
TOTALCOREHOURS
|
||||||
|
TOTALACCS
|
||||||
|
TOTALACCHOURS
|
||||||
|
}
|
||||||
|
|
||||||
type NodeMetrics {
|
type NodeMetrics {
|
||||||
host: String!
|
host: String!
|
||||||
|
state: String!
|
||||||
subCluster: String!
|
subCluster: String!
|
||||||
metrics: [JobMetricWithName!]!
|
metrics: [JobMetricWithName!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClusterMetrics {
|
||||||
|
nodeCount: Int!
|
||||||
|
metrics: [ClusterMetricWithName!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodesResultList {
|
||||||
|
items: [NodeMetrics!]!
|
||||||
|
offset: Int
|
||||||
|
limit: Int
|
||||||
|
count: Int
|
||||||
|
totalNodes: Int
|
||||||
|
hasNextPage: Boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClusterSupport {
|
||||||
|
cluster: String!
|
||||||
|
subClusters: [String!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type GlobalMetricListItem {
|
||||||
|
name: String!
|
||||||
|
unit: Unit!
|
||||||
|
scope: MetricScope!
|
||||||
|
footprint: String
|
||||||
|
availability: [ClusterSupport!]!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Count {
|
type Count {
|
||||||
name: String!
|
name: String!
|
||||||
count: Int!
|
count: Int!
|
||||||
}
|
}
|
||||||
|
|
||||||
type User {
|
type User {
|
||||||
username: String!
|
username: String!
|
||||||
name: String!
|
name: String!
|
||||||
email: String!
|
email: String!
|
||||||
|
}
|
||||||
|
|
||||||
|
input MetricStatItem {
|
||||||
|
metricName: String!
|
||||||
|
range: FloatRange!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Query {
|
type Query {
|
||||||
clusters: [Cluster!]! # List of all clusters
|
clusters: [Cluster!]! # List of all clusters
|
||||||
tags: [Tag!]! # List of all tags
|
tags: [Tag!]! # List of all tags
|
||||||
|
globalMetrics: [GlobalMetricListItem!]!
|
||||||
|
|
||||||
user(username: String!): User
|
user(username: String!): User
|
||||||
allocatedNodes(cluster: String!): [Count!]!
|
allocatedNodes(cluster: String!): [Count!]!
|
||||||
|
|
||||||
|
## Node Queries New
|
||||||
|
node(id: ID!): Node
|
||||||
|
nodes(filter: [NodeFilter!], order: OrderByInput): NodeStateResultList!
|
||||||
|
nodeStates(filter: [NodeFilter!]): [NodeStates!]!
|
||||||
|
nodeStatesTimed(filter: [NodeFilter!], type: String!): [NodeStatesTimed!]!
|
||||||
|
|
||||||
job(id: ID!): Job
|
job(id: ID!): Job
|
||||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
jobMetrics(
|
||||||
|
id: ID!
|
||||||
|
metrics: [String!]
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
resolution: Int
|
||||||
|
): [JobMetricWithName!]!
|
||||||
|
|
||||||
|
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||||
|
|
||||||
|
scopedJobStats(
|
||||||
|
id: ID!
|
||||||
|
metrics: [String!]
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
): [NamedStatsWithScope!]!
|
||||||
|
|
||||||
|
jobs(
|
||||||
|
filter: [JobFilter!]
|
||||||
|
page: PageRequest
|
||||||
|
order: OrderByInput
|
||||||
|
): JobResultList!
|
||||||
|
|
||||||
|
jobsStatistics(
|
||||||
|
filter: [JobFilter!]
|
||||||
|
metrics: [String!]
|
||||||
|
page: PageRequest
|
||||||
|
sortBy: SortByAggregate
|
||||||
|
groupBy: Aggregate
|
||||||
|
numDurationBins: String
|
||||||
|
numMetricBins: Int
|
||||||
|
): [JobsStatistics!]!
|
||||||
|
|
||||||
|
jobsMetricStats(filter: [JobFilter!], metrics: [String!]): [JobStats!]!
|
||||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||||
|
|
||||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
rooflineHeatmap(
|
||||||
jobsStatistics(filter: [JobFilter!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate): [JobsStatistics!]!
|
filter: [JobFilter!]!
|
||||||
|
rows: Int!
|
||||||
|
cols: Int!
|
||||||
|
minX: Float!
|
||||||
|
minY: Float!
|
||||||
|
maxX: Float!
|
||||||
|
maxY: Float!
|
||||||
|
): [[Float!]!]!
|
||||||
|
|
||||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
nodeMetrics(
|
||||||
|
cluster: String!
|
||||||
|
nodes: [String!]
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
metrics: [String!]
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
): [NodeMetrics!]!
|
||||||
|
|
||||||
nodeMetrics(cluster: String!, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
nodeMetricsList(
|
||||||
|
cluster: String!
|
||||||
|
subCluster: String!
|
||||||
|
stateFilter: String!
|
||||||
|
nodeFilter: String!
|
||||||
|
scopes: [MetricScope!]
|
||||||
|
metrics: [String!]
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
page: PageRequest
|
||||||
|
resolution: Int
|
||||||
|
): NodesResultList!
|
||||||
|
|
||||||
|
clusterMetrics(
|
||||||
|
cluster: String!
|
||||||
|
metrics: [String!]
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
): ClusterMetrics!
|
||||||
}
|
}
|
||||||
|
|
||||||
type Mutation {
|
type Mutation {
|
||||||
createTag(type: String!, name: String!): Tag!
|
createTag(type: String!, name: String!, scope: String!): Tag!
|
||||||
deleteTag(id: ID!): ID!
|
deleteTag(id: ID!): ID!
|
||||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||||
|
removeTagFromList(tagIds: [ID!]!): [Int!]!
|
||||||
|
|
||||||
updateConfiguration(name: String!, value: String!): String
|
updateConfiguration(name: String!, value: String!): String
|
||||||
}
|
}
|
||||||
|
|
||||||
type IntRangeOutput { from: Int!, to: Int! }
|
type IntRangeOutput {
|
||||||
type TimeRangeOutput { from: Time!, to: Time! }
|
from: Int!
|
||||||
|
to: Int!
|
||||||
|
}
|
||||||
|
type TimeRangeOutput {
|
||||||
|
range: String
|
||||||
|
from: Time!
|
||||||
|
to: Time!
|
||||||
|
}
|
||||||
|
|
||||||
|
input NodeFilter {
|
||||||
|
hostname: StringInput
|
||||||
|
cluster: StringInput
|
||||||
|
subcluster: StringInput
|
||||||
|
schedulerState: SchedulerState
|
||||||
|
healthState: MonitoringState
|
||||||
|
timeStart: Int
|
||||||
|
}
|
||||||
|
|
||||||
input JobFilter {
|
input JobFilter {
|
||||||
tags: [ID!]
|
tags: [ID!]
|
||||||
jobId: StringInput
|
dbId: [ID!]
|
||||||
arrayJobId: Int
|
jobId: StringInput
|
||||||
user: StringInput
|
arrayJobId: Int
|
||||||
project: StringInput
|
user: StringInput
|
||||||
jobName: StringInput
|
project: StringInput
|
||||||
cluster: StringInput
|
jobName: StringInput
|
||||||
partition: StringInput
|
cluster: StringInput
|
||||||
duration: IntRange
|
partition: StringInput
|
||||||
|
duration: IntRange
|
||||||
|
energy: FloatRange
|
||||||
|
|
||||||
minRunningFor: Int
|
minRunningFor: Int
|
||||||
|
|
||||||
numNodes: IntRange
|
numNodes: IntRange
|
||||||
numAccelerators: IntRange
|
numAccelerators: IntRange
|
||||||
numHWThreads: IntRange
|
numHWThreads: IntRange
|
||||||
|
|
||||||
startTime: TimeRange
|
startTime: TimeRange
|
||||||
state: [JobState!]
|
state: [JobState!]
|
||||||
flopsAnyAvg: FloatRange
|
metricStats: [MetricStatItem!]
|
||||||
memBwAvg: FloatRange
|
shared: String
|
||||||
loadAvg: FloatRange
|
node: StringInput
|
||||||
memUsedMax: FloatRange
|
|
||||||
|
|
||||||
exclusive: Int
|
|
||||||
node: StringInput
|
|
||||||
}
|
}
|
||||||
|
|
||||||
input OrderByInput {
|
input OrderByInput {
|
||||||
field: String!
|
field: String!
|
||||||
|
type: String!
|
||||||
order: SortDirectionEnum! = ASC
|
order: SortDirectionEnum! = ASC
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -256,29 +473,46 @@ enum SortDirectionEnum {
|
|||||||
}
|
}
|
||||||
|
|
||||||
input StringInput {
|
input StringInput {
|
||||||
eq: String
|
eq: String
|
||||||
neq: String
|
neq: String
|
||||||
contains: String
|
contains: String
|
||||||
startsWith: String
|
startsWith: String
|
||||||
endsWith: String
|
endsWith: String
|
||||||
in: [String!]
|
in: [String!]
|
||||||
}
|
}
|
||||||
|
|
||||||
input IntRange { from: Int!, to: Int! }
|
input IntRange {
|
||||||
input FloatRange { from: Float!, to: Float! }
|
from: Int!
|
||||||
input TimeRange { from: Time, to: Time }
|
to: Int!
|
||||||
|
}
|
||||||
|
input TimeRange {
|
||||||
|
range: String
|
||||||
|
from: Time
|
||||||
|
to: Time
|
||||||
|
}
|
||||||
|
|
||||||
|
input FloatRange {
|
||||||
|
from: Float!
|
||||||
|
to: Float!
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeStateResultList {
|
||||||
|
items: [Node!]!
|
||||||
|
count: Int
|
||||||
|
}
|
||||||
|
|
||||||
type JobResultList {
|
type JobResultList {
|
||||||
items: [Job!]!
|
items: [Job!]!
|
||||||
offset: Int
|
offset: Int
|
||||||
limit: Int
|
limit: Int
|
||||||
count: Int
|
count: Int
|
||||||
|
hasNextPage: Boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobLinkResultList {
|
type JobLinkResultList {
|
||||||
listQuery: String
|
listQuery: String
|
||||||
items: [JobLink!]!
|
items: [JobLink!]!
|
||||||
count: Int
|
count: Int
|
||||||
}
|
}
|
||||||
|
|
||||||
type HistoPoint {
|
type HistoPoint {
|
||||||
@@ -286,26 +520,42 @@ type HistoPoint {
|
|||||||
value: Int!
|
value: Int!
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobsStatistics {
|
type MetricHistoPoints {
|
||||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
metric: String!
|
||||||
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
unit: String!
|
||||||
totalJobs: Int! # Number of jobs
|
stat: String
|
||||||
runningJobs: Int! # Number of running jobs
|
data: [MetricHistoPoint!]
|
||||||
shortJobs: Int! # Number of jobs with a duration of less than duration
|
}
|
||||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
|
||||||
totalNodes: Int! # Sum of the nodes of all matched jobs
|
type MetricHistoPoint {
|
||||||
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
bin: Int
|
||||||
totalCores: Int! # Sum of the cores of all matched jobs
|
count: Int!
|
||||||
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
min: Int
|
||||||
totalAccs: Int! # Sum of the accs of all matched jobs
|
max: Int
|
||||||
totalAccHours: Int! # Sum of the gpu hours of all matched jobs
|
}
|
||||||
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
|
||||||
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
type JobsStatistics {
|
||||||
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
id: ID! # If `groupBy` was used, ID of the user/project/cluster/subcluster
|
||||||
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
name: String! # if User-Statistics: Given Name of Account (ID) Owner
|
||||||
|
totalUsers: Int! # if *not* User-Statistics: Number of active users (based on running jobs)
|
||||||
|
totalJobs: Int! # Number of jobs
|
||||||
|
runningJobs: Int! # Number of running jobs
|
||||||
|
shortJobs: Int! # Number of jobs with a duration of less than config'd ShortRunningJobsDuration
|
||||||
|
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||||
|
totalNodes: Int! # Sum of the nodes of all matched jobs
|
||||||
|
totalNodeHours: Int! # Sum of the node hours of all matched jobs
|
||||||
|
totalCores: Int! # Sum of the cores of all matched jobs
|
||||||
|
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
||||||
|
totalAccs: Int! # Sum of the accs of all matched jobs
|
||||||
|
totalAccHours: Int! # Sum of the gpu hours of all matched jobs
|
||||||
|
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
||||||
|
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
||||||
|
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
|
||||||
|
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
|
||||||
|
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
|
||||||
}
|
}
|
||||||
|
|
||||||
input PageRequest {
|
input PageRequest {
|
||||||
itemsPerPage: Int!
|
itemsPerPage: Int!
|
||||||
page: Int!
|
page: Int!
|
||||||
}
|
}
|
||||||
|
|||||||
1259
api/swagger.json
1259
api/swagger.json
File diff suppressed because it is too large
Load Diff
938
api/swagger.yaml
938
api/swagger.yaml
File diff suppressed because it is too large
Load Diff
@@ -1,17 +0,0 @@
|
|||||||
CC_USER=clustercockpit
|
|
||||||
|
|
||||||
CC_GROUP=clustercockpit
|
|
||||||
|
|
||||||
CC_HOME=/tmp
|
|
||||||
|
|
||||||
LOG_DIR=/var/log
|
|
||||||
|
|
||||||
DATA_DIR=/var/run/cc-backend
|
|
||||||
|
|
||||||
MAX_OPEN_FILES=10000
|
|
||||||
|
|
||||||
CONF_DIR=/etc/cc-backend
|
|
||||||
|
|
||||||
CONF_FILE=/etc/cc-backend/cc-backend.json
|
|
||||||
|
|
||||||
RESTART_ON_UPGRADE=true
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
Package: cc-backend
|
|
||||||
Version: {VERSION}
|
|
||||||
Installed-Size: {INSTALLED_SIZE}
|
|
||||||
Architecture: {ARCH}
|
|
||||||
Maintainer: thomas.gruber@fau.de
|
|
||||||
Depends: libc6 (>= 2.2.1)
|
|
||||||
Build-Depends: debhelper-compat (= 13), git, golang-go, npm, yarn
|
|
||||||
Description: ClusterCockpit backend and web frontend
|
|
||||||
Homepage: https://github.com/ClusterCockpit/cc-backend
|
|
||||||
Source: cc-backend
|
|
||||||
Rules-Requires-Root: no
|
|
||||||
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=ClusterCockpit backend and web frontend (cc-backend)
|
|
||||||
Documentation=https://github.com/ClusterCockpit/cc-backend
|
|
||||||
Wants=network-online.target
|
|
||||||
After=network-online.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
EnvironmentFile=/etc/default/cc-backend
|
|
||||||
Type=simple
|
|
||||||
User=clustercockpit
|
|
||||||
Group=clustercockpit
|
|
||||||
Restart=on-failure
|
|
||||||
TimeoutStopSec=100
|
|
||||||
LimitNOFILE=infinity
|
|
||||||
ExecStart=/usr/bin/cc-backend --config ${CONF_FILE}
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
@@ -1,70 +0,0 @@
|
|||||||
Name: cc-backend
|
|
||||||
Version: %{VERS}
|
|
||||||
Release: 1%{?dist}
|
|
||||||
Summary: ClusterCockpit backend and web frontend
|
|
||||||
|
|
||||||
License: MIT
|
|
||||||
Source0: %{name}-%{version}.tar.gz
|
|
||||||
|
|
||||||
#BuildRequires: go-toolset
|
|
||||||
#BuildRequires: systemd-rpm-macros
|
|
||||||
#BuildRequires: npm
|
|
||||||
|
|
||||||
Provides: %{name} = %{version}
|
|
||||||
|
|
||||||
%description
|
|
||||||
ClusterCockpit backend and web frontend
|
|
||||||
|
|
||||||
%global debug_package %{nil}
|
|
||||||
|
|
||||||
%prep
|
|
||||||
%autosetup
|
|
||||||
|
|
||||||
|
|
||||||
%build
|
|
||||||
#CURRENT_TIME=$(date +%Y-%m-%d:T%H:%M:\%S)
|
|
||||||
#LD_FLAGS="-s -X main.buildTime=${CURRENT_TIME} -X main.version=%{VERS}"
|
|
||||||
mkdir ./var
|
|
||||||
touch ./var/job.db
|
|
||||||
cd web/frontend && yarn install && yarn build && cd -
|
|
||||||
go build -ldflags="-s -X main.version=%{VERS}" ./cmd/cc-backend
|
|
||||||
|
|
||||||
|
|
||||||
%install
|
|
||||||
# Install cc-backend
|
|
||||||
#make PREFIX=%{buildroot} install
|
|
||||||
install -Dpm 755 cc-backend %{buildroot}/%{_bindir}/%{name}
|
|
||||||
install -Dpm 0600 configs/config.json %{buildroot}%{_sysconfdir}/%{name}/%{name}.json
|
|
||||||
# Integrate into system
|
|
||||||
install -Dpm 0644 build/package/%{name}.service %{buildroot}%{_unitdir}/%{name}.service
|
|
||||||
install -Dpm 0600 build/package/%{name}.config %{buildroot}%{_sysconfdir}/default/%{name}
|
|
||||||
install -Dpm 0644 build/package/%{name}.sysusers %{buildroot}%{_sysusersdir}/%{name}.conf
|
|
||||||
|
|
||||||
|
|
||||||
%check
|
|
||||||
# go test should be here... :)
|
|
||||||
|
|
||||||
%pre
|
|
||||||
%sysusers_create_package scripts/%{name}.sysusers
|
|
||||||
|
|
||||||
%post
|
|
||||||
%systemd_post %{name}.service
|
|
||||||
|
|
||||||
%preun
|
|
||||||
%systemd_preun %{name}.service
|
|
||||||
|
|
||||||
%files
|
|
||||||
# Binary
|
|
||||||
%attr(-,clustercockpit,clustercockpit) %{_bindir}/%{name}
|
|
||||||
# Config
|
|
||||||
%dir %{_sysconfdir}/%{name}
|
|
||||||
%attr(0600,clustercockpit,clustercockpit) %config(noreplace) %{_sysconfdir}/%{name}/%{name}.json
|
|
||||||
# Systemd
|
|
||||||
%{_unitdir}/%{name}.service
|
|
||||||
%{_sysconfdir}/default/%{name}
|
|
||||||
%{_sysusersdir}/%{name}.conf
|
|
||||||
|
|
||||||
%changelog
|
|
||||||
* Mon Mar 07 2022 Thomas Gruber - 0.1
|
|
||||||
- Initial metric store implementation
|
|
||||||
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
#Type Name ID GECOS Home directory Shell
|
|
||||||
u clustercockpit - "User for ClusterCockpit" /run/cc-backend /sbin/nologin
|
|
||||||
38
cmd/cc-backend/cli.go
Normal file
38
cmd/cc-backend/cli.go
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||||
|
// This file defines all command-line flags and their default values.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "flag"
|
||||||
|
|
||||||
|
var (
|
||||||
|
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB,
|
||||||
|
flagForceDB, flagDev, flagVersion, flagLogDateTime, flagApplyTags bool
|
||||||
|
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||||
|
)
|
||||||
|
|
||||||
|
func cliInit() {
|
||||||
|
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize sqlite database file, config.json and .env")
|
||||||
|
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
|
||||||
|
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'hpc_user' table with ldap")
|
||||||
|
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
|
||||||
|
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
|
||||||
|
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
|
||||||
|
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
|
||||||
|
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
|
||||||
|
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
|
||||||
|
flag.BoolVar(&flagApplyTags, "apply-tags", false, "Run taggers on all completed jobs and exit")
|
||||||
|
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||||
|
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||||
|
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||||
|
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: <username>:[admin,support,manager,api,user]:<password>")
|
||||||
|
flag.StringVar(&flagDelUser, "del-user", "", "Remove a existing user. Argument format: <username>")
|
||||||
|
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
|
||||||
|
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
|
||||||
|
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug, info , warn (default), err, crit]`")
|
||||||
|
flag.Parse()
|
||||||
|
}
|
||||||
119
cmd/cc-backend/init.go
Normal file
119
cmd/cc-backend/init.go
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||||
|
// This file contains bootstrap logic for initializing the environment,
|
||||||
|
// creating default configuration files, and setting up the database.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
const envString = `
|
||||||
|
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
|
||||||
|
# You can generate your own keypair using the gen-keypair tool
|
||||||
|
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
|
||||||
|
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
|
||||||
|
|
||||||
|
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
|
||||||
|
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
|
||||||
|
`
|
||||||
|
|
||||||
|
const configString = `
|
||||||
|
{
|
||||||
|
"main": {
|
||||||
|
"addr": "127.0.0.1:8080",
|
||||||
|
"short-running-jobs-duration": 300,
|
||||||
|
"resampling": {
|
||||||
|
"minimumPoints": 600,
|
||||||
|
"trigger": 180,
|
||||||
|
"resolutions": [
|
||||||
|
240,
|
||||||
|
60
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"apiAllowedIPs": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"emission-constant": 317
|
||||||
|
},
|
||||||
|
"cron": {
|
||||||
|
"commit-job-worker": "2m",
|
||||||
|
"duration-worker": "5m",
|
||||||
|
"footprint-worker": "10m"
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"kind": "file",
|
||||||
|
"path": "./var/job-archive"
|
||||||
|
},
|
||||||
|
"auth": {
|
||||||
|
"jwts": {
|
||||||
|
"max-age": "2000h"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"clusters": [
|
||||||
|
{
|
||||||
|
"name": "name",
|
||||||
|
"metricDataRepository": {
|
||||||
|
"kind": "cc-metric-store",
|
||||||
|
"url": "http://localhost:8082",
|
||||||
|
"token": ""
|
||||||
|
},
|
||||||
|
"filterRanges": {
|
||||||
|
"numNodes": {
|
||||||
|
"from": 1,
|
||||||
|
"to": 64
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"from": 0,
|
||||||
|
"to": 86400
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"from": "2023-01-01T00:00:00Z",
|
||||||
|
"to": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
`
|
||||||
|
|
||||||
|
func initEnv() {
|
||||||
|
if util.CheckFileExists("var") {
|
||||||
|
cclog.Exit("Directory ./var already exists. Cautiously exiting application initialization.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
|
||||||
|
cclog.Abortf("Could not write default ./config.json with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
|
||||||
|
cclog.Abortf("Could not write default ./.env file with permissions '0o666'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.Mkdir("var", 0o777); err != nil {
|
||||||
|
cclog.Abortf("Could not create default ./var folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
err := repository.MigrateDB("./var/job.db")
|
||||||
|
if err != nil {
|
||||||
|
cclog.Abortf("Could not initialize default SQLite database as './var/job.db'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
if err := os.Mkdir("var/job-archive", 0o777); err != nil {
|
||||||
|
cclog.Abortf("Could not create default ./var/job-archive folder with permissions '0o777'. Application initialization failed, exited.\nError: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
archiveCfg := "{\"kind\": \"file\",\"path\": \"./var/job-archive\"}"
|
||||||
|
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||||
|
cclog.Abortf("Could not initialize job-archive, exited.\nError: %s\n", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
390
cmd/cc-backend/server.go
Normal file
390
cmd/cc-backend/server.go
Normal file
@@ -0,0 +1,390 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
// Package main provides the entry point for the ClusterCockpit backend server.
|
||||||
|
// This file contains HTTP server setup, routing configuration, and
|
||||||
|
// authentication middleware integration.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/tls"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/99designs/gqlgen/graphql/handler"
|
||||||
|
"github.com/99designs/gqlgen/graphql/handler/transport"
|
||||||
|
"github.com/99designs/gqlgen/graphql/playground"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/web"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/runtimeEnv"
|
||||||
|
"github.com/gorilla/handlers"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
httpSwagger "github.com/swaggo/http-swagger"
|
||||||
|
)
|
||||||
|
|
||||||
|
var buildInfo web.Build
|
||||||
|
|
||||||
|
// Environment variable names
|
||||||
|
const (
|
||||||
|
envDebug = "DEBUG"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Server encapsulates the HTTP server state and dependencies
|
||||||
|
type Server struct {
|
||||||
|
router *mux.Router
|
||||||
|
server *http.Server
|
||||||
|
restAPIHandle *api.RestAPI
|
||||||
|
natsAPIHandle *api.NatsAPI
|
||||||
|
}
|
||||||
|
|
||||||
|
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
rw.WriteHeader(http.StatusUnauthorized)
|
||||||
|
json.NewEncoder(rw).Encode(map[string]string{
|
||||||
|
"status": http.StatusText(http.StatusUnauthorized),
|
||||||
|
"error": err.Error(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewServer creates and initializes a new Server instance
|
||||||
|
func NewServer(version, commit, buildDate string) (*Server, error) {
|
||||||
|
buildInfo = web.Build{Version: version, Hash: commit, Buildtime: buildDate}
|
||||||
|
|
||||||
|
s := &Server{
|
||||||
|
router: mux.NewRouter(),
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := s.init(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) init() error {
|
||||||
|
// Setup the http.Handler/Router used by the server
|
||||||
|
graph.Init()
|
||||||
|
resolver := graph.GetResolverInstance()
|
||||||
|
graphQLServer := handler.New(
|
||||||
|
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
|
||||||
|
|
||||||
|
graphQLServer.AddTransport(transport.POST{})
|
||||||
|
|
||||||
|
if os.Getenv(envDebug) != "1" {
|
||||||
|
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
|
||||||
|
// The problem with this is that then, no more stacktrace is printed to stderr.
|
||||||
|
graphQLServer.SetRecoverFunc(func(ctx context.Context, err any) error {
|
||||||
|
switch e := err.(type) {
|
||||||
|
case string:
|
||||||
|
return fmt.Errorf("MAIN > Panic: %s", e)
|
||||||
|
case error:
|
||||||
|
return fmt.Errorf("MAIN > Panic caused by: %s", e.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.New("MAIN > Internal server error (panic)")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
authHandle := auth.GetAuthInstance()
|
||||||
|
|
||||||
|
s.restAPIHandle = api.New()
|
||||||
|
|
||||||
|
info := map[string]any{}
|
||||||
|
info["hasOpenIDConnect"] = false
|
||||||
|
|
||||||
|
if auth.Keys.OpenIDConfig != nil {
|
||||||
|
openIDConnect := auth.NewOIDC(authHandle)
|
||||||
|
openIDConnect.RegisterEndpoints(s.router)
|
||||||
|
info["hasOpenIDConnect"] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
s.router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
cclog.Debugf("##%v##", info)
|
||||||
|
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
|
||||||
|
}).Methods(http.MethodGet)
|
||||||
|
s.router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
|
||||||
|
})
|
||||||
|
s.router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
|
||||||
|
})
|
||||||
|
|
||||||
|
secured := s.router.PathPrefix("/").Subrouter()
|
||||||
|
securedapi := s.router.PathPrefix("/api").Subrouter()
|
||||||
|
userapi := s.router.PathPrefix("/userapi").Subrouter()
|
||||||
|
configapi := s.router.PathPrefix("/config").Subrouter()
|
||||||
|
frontendapi := s.router.PathPrefix("/frontend").Subrouter()
|
||||||
|
metricstoreapi := s.router.PathPrefix("/metricstore").Subrouter()
|
||||||
|
|
||||||
|
if !config.Keys.DisableAuthentication {
|
||||||
|
// Create login failure handler (used by both /login and /jwt-login)
|
||||||
|
loginFailureHandler := func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
rw.WriteHeader(http.StatusUnauthorized)
|
||||||
|
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||||
|
Title: "Login failed - ClusterCockpit",
|
||||||
|
MsgType: "alert-warning",
|
||||||
|
Message: err.Error(),
|
||||||
|
Build: buildInfo,
|
||||||
|
Infos: info,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
s.router.Handle("/login", authHandle.Login(loginFailureHandler)).Methods(http.MethodPost)
|
||||||
|
s.router.Handle("/jwt-login", authHandle.Login(loginFailureHandler))
|
||||||
|
|
||||||
|
s.router.Handle("/logout", authHandle.Logout(
|
||||||
|
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||||
|
Title: "Bye - ClusterCockpit",
|
||||||
|
MsgType: "alert-info",
|
||||||
|
Message: "Logout successful",
|
||||||
|
Build: buildInfo,
|
||||||
|
Infos: info,
|
||||||
|
})
|
||||||
|
}))).Methods(http.MethodPost)
|
||||||
|
|
||||||
|
secured.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.Auth(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
|
||||||
|
// On failure:
|
||||||
|
func(rw http.ResponseWriter, r *http.Request, err error) {
|
||||||
|
rw.WriteHeader(http.StatusUnauthorized)
|
||||||
|
web.RenderTemplate(rw, "login.tmpl", &web.Page{
|
||||||
|
Title: "Authentication failed - ClusterCockpit",
|
||||||
|
MsgType: "alert-danger",
|
||||||
|
Message: err.Error(),
|
||||||
|
Build: buildInfo,
|
||||||
|
Infos: info,
|
||||||
|
Redirect: r.RequestURI,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
securedapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthAPI(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
|
||||||
|
userapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthUserAPI(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
|
||||||
|
metricstoreapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthMetricStoreAPI(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
|
||||||
|
configapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthConfigAPI(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
|
||||||
|
frontendapi.Use(func(next http.Handler) http.Handler {
|
||||||
|
return authHandle.AuthFrontendAPI(
|
||||||
|
// On success;
|
||||||
|
next,
|
||||||
|
// On failure: JSON Response
|
||||||
|
onFailureResponse)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if flagDev {
|
||||||
|
s.router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
|
||||||
|
s.router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
|
||||||
|
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
|
||||||
|
}
|
||||||
|
secured.Handle("/query", graphQLServer)
|
||||||
|
|
||||||
|
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
|
||||||
|
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
routerConfig.HandleSearchBar(rw, r, buildInfo)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Mount all /monitoring/... and /api/... routes.
|
||||||
|
routerConfig.SetupRoutes(secured, buildInfo)
|
||||||
|
s.restAPIHandle.MountAPIRoutes(securedapi)
|
||||||
|
s.restAPIHandle.MountUserAPIRoutes(userapi)
|
||||||
|
s.restAPIHandle.MountConfigAPIRoutes(configapi)
|
||||||
|
s.restAPIHandle.MountFrontendAPIRoutes(frontendapi)
|
||||||
|
|
||||||
|
if config.Keys.APISubjects != nil {
|
||||||
|
s.natsAPIHandle = api.NewNatsAPI()
|
||||||
|
if err := s.natsAPIHandle.StartSubscriptions(); err != nil {
|
||||||
|
return fmt.Errorf("starting NATS subscriptions: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.restAPIHandle.MountMetricStoreAPIRoutes(metricstoreapi)
|
||||||
|
|
||||||
|
if config.Keys.EmbedStaticFiles {
|
||||||
|
if i, err := os.Stat("./var/img"); err == nil {
|
||||||
|
if i.IsDir() {
|
||||||
|
cclog.Info("Use local directory for static images")
|
||||||
|
s.router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.router.PathPrefix("/").Handler(http.StripPrefix("/", web.ServeFiles()))
|
||||||
|
} else {
|
||||||
|
s.router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
|
||||||
|
}
|
||||||
|
|
||||||
|
s.router.Use(handlers.CompressHandler)
|
||||||
|
s.router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
|
||||||
|
s.router.Use(handlers.CORS(
|
||||||
|
handlers.AllowCredentials(),
|
||||||
|
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
|
||||||
|
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
|
||||||
|
handlers.AllowedOrigins([]string{"*"})))
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Server timeout defaults (in seconds)
|
||||||
|
const (
|
||||||
|
defaultReadTimeout = 20
|
||||||
|
defaultWriteTimeout = 20
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s *Server) Start(ctx context.Context) error {
|
||||||
|
handler := handlers.CustomLoggingHandler(io.Discard, s.router, func(_ io.Writer, params handlers.LogFormatterParams) {
|
||||||
|
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
|
||||||
|
cclog.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||||
|
params.Request.Method, params.URL.RequestURI(),
|
||||||
|
params.StatusCode, float32(params.Size)/1024,
|
||||||
|
time.Since(params.TimeStamp).Milliseconds())
|
||||||
|
} else {
|
||||||
|
cclog.Debugf("%s %s (%d, %.02fkb, %dms)",
|
||||||
|
params.Request.Method, params.URL.RequestURI(),
|
||||||
|
params.StatusCode, float32(params.Size)/1024,
|
||||||
|
time.Since(params.TimeStamp).Milliseconds())
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Use configurable timeouts with defaults
|
||||||
|
readTimeout := time.Duration(defaultReadTimeout) * time.Second
|
||||||
|
writeTimeout := time.Duration(defaultWriteTimeout) * time.Second
|
||||||
|
|
||||||
|
s.server = &http.Server{
|
||||||
|
ReadTimeout: readTimeout,
|
||||||
|
WriteTimeout: writeTimeout,
|
||||||
|
Handler: handler,
|
||||||
|
Addr: config.Keys.Addr,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start http or https server
|
||||||
|
listener, err := net.Listen("tcp", config.Keys.Addr)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("starting listener on '%s': %w", config.Keys.Addr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHTTPTo != "" {
|
||||||
|
go func() {
|
||||||
|
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHTTPTo, http.StatusMovedPermanently))
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Keys.HTTPSCertFile != "" && config.Keys.HTTPSKeyFile != "" {
|
||||||
|
cert, err := tls.LoadX509KeyPair(
|
||||||
|
config.Keys.HTTPSCertFile, config.Keys.HTTPSKeyFile)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("loading X509 keypair (check 'https-cert-file' and 'https-key-file' in config.json): %w", err)
|
||||||
|
}
|
||||||
|
listener = tls.NewListener(listener, &tls.Config{
|
||||||
|
Certificates: []tls.Certificate{cert},
|
||||||
|
CipherSuites: []uint16{
|
||||||
|
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||||
|
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||||
|
},
|
||||||
|
MinVersion: tls.VersionTLS12,
|
||||||
|
PreferServerCipherSuites: true,
|
||||||
|
})
|
||||||
|
cclog.Infof("HTTPS server listening at %s...", config.Keys.Addr)
|
||||||
|
} else {
|
||||||
|
cclog.Infof("HTTP server listening at %s...", config.Keys.Addr)
|
||||||
|
}
|
||||||
|
//
|
||||||
|
// Because this program will want to bind to a privileged port (like 80), the listener must
|
||||||
|
// be established first, then the user can be changed, and after that,
|
||||||
|
// the actual http server can be started.
|
||||||
|
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
|
||||||
|
return fmt.Errorf("dropping privileges: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle context cancellation for graceful shutdown
|
||||||
|
go func() {
|
||||||
|
<-ctx.Done()
|
||||||
|
shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||||
|
cclog.Errorf("Server shutdown error: %v", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if err = s.server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||||
|
return fmt.Errorf("server failed: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *Server) Shutdown(ctx context.Context) {
|
||||||
|
// Create a shutdown context with timeout
|
||||||
|
shutdownCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
nc := nats.GetClient()
|
||||||
|
if nc != nil {
|
||||||
|
nc.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||||
|
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||||
|
cclog.Errorf("Server shutdown error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Archive all the metric store data
|
||||||
|
memorystore.Shutdown()
|
||||||
|
|
||||||
|
// Shutdown archiver with 10 second timeout for fast shutdown
|
||||||
|
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||||
|
cclog.Warnf("Archiver shutdown: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,92 +0,0 @@
|
|||||||
## Intro
|
|
||||||
|
|
||||||
cc-backend requires a configuration file that specifies the cluster systems to be used.
|
|
||||||
To override the default, specify the location of a json configuration file with the `-config <file path>` command line option.
|
|
||||||
All security-related configurations, e.g. keys and passwords, are set using
|
|
||||||
environment variables.
|
|
||||||
It is supported to set these by means of a `.env` file in the project root.
|
|
||||||
|
|
||||||
## Configuration Options
|
|
||||||
|
|
||||||
* `addr`: Type string. Address where the http (or https) server will listen on (for example: 'localhost:80'). Default `:8080`.
|
|
||||||
* `user`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
|
|
||||||
* `group`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
|
|
||||||
* `disable-authentication`: Type bool. Disable authentication (for everything: API, Web-UI, ...). Default `false`.
|
|
||||||
* `embed-static-files`: Type bool. If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not. Default `true`.
|
|
||||||
* `static-files`: Type string. Folder where static assets can be found, if `embed-static-files` is `false`. No default.
|
|
||||||
* `db-driver`: Type string. 'sqlite3' or 'mysql' (mysql will work for mariadb as well). Default `sqlite3`.
|
|
||||||
* `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`.
|
|
||||||
* `job-archive`: Type object.
|
|
||||||
- `kind`: Type string. At them moment only file is supported as value.
|
|
||||||
- `path`: Type string. Path to the job-archive. Default: `./var/job-archive`.
|
|
||||||
- `compression`: Type integer. Setup automatic compression for jobs older than number of days.
|
|
||||||
- `retention`: Type object.
|
|
||||||
- `policy`: Type string (required). Retention policy. Possible values none, delete,
|
|
||||||
move.
|
|
||||||
- `includeDB`: Type boolean. Also remove jobs from database.
|
|
||||||
- `age`: Type integer. Act on jobs with startTime older than age (in days).
|
|
||||||
- `location`: Type string. The target directory for retention. Only applicable for retention policy move.
|
|
||||||
* `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`.
|
|
||||||
* `validate`: Type bool. Validate all input json documents against json schema.
|
|
||||||
* `session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`.
|
|
||||||
* `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates.
|
|
||||||
* `redirect-http-to`: Type string. If not the empty string and `addr` does not end in ":80", redirect every request incoming at port 80 to that url.
|
|
||||||
* `machine-state-dir`: Type string. Where to store MachineState files. TODO: Explain in more detail!
|
|
||||||
* `stop-jobs-exceeding-walltime`: Type int. If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job. Default `0`.
|
|
||||||
* `short-running-jobs-duration`: Type int. Do not show running jobs shorter than X seconds. Default `300`.
|
|
||||||
* `jwts`: Type object (required). For JWT Authentication.
|
|
||||||
- `max-age`: Type string (required). Configure how long a token is valid. As string parsable by time.ParseDuration().
|
|
||||||
- `cookieName`: Type string. Cookie that should be checked for a JWT token.
|
|
||||||
- `vaidateUser`: Type boolean. Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.
|
|
||||||
- `trustedIssuer`: Type string. Issuer that should be accepted when validating external JWTs.
|
|
||||||
- `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt with values provided in JWT.
|
|
||||||
* `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`.
|
|
||||||
- `url`: Type string (required). URL of LDAP directory server.
|
|
||||||
- `user_base`: Type string (required). Base DN of user tree root.
|
|
||||||
- `search_dn`: Type string (required). DN for authenticating LDAP admin account with general read rights.
|
|
||||||
- `user_bind`: Type string (required). Expression used to authenticate users via LDAP bind. Must contain `uid={username}`.
|
|
||||||
- `user_filter`: Type string (required). Filter to extract users for syncing.
|
|
||||||
- `username_attr`: Type string. Attribute with full user name. Defaults to `gecos` if not provided.
|
|
||||||
- `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.
|
|
||||||
- `sync_del_old_users`: Type boolean. Delete obsolete users in database.
|
|
||||||
- `syncUserOnLogin`: Type boolean. Add non-existent user to DB at login attempt if user exists in Ldap directory.
|
|
||||||
* `clusters`: Type array of objects (required)
|
|
||||||
- `name`: Type string. The name of the cluster.
|
|
||||||
- `metricDataRepository`: Type object with properties: `kind` (Type string, can be one of `cc-metric-store`, `influxdb` ), `url` (Type string), `token` (Type string)
|
|
||||||
- `filterRanges` Type object. This option controls the slider ranges for the UI controls of numNodes, duration, and startTime. Example:
|
|
||||||
```
|
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": { "from": 1, "to": 64 },
|
|
||||||
"duration": { "from": 0, "to": 86400 },
|
|
||||||
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
|
|
||||||
}
|
|
||||||
```
|
|
||||||
* `ui-defaults`: Type object. Default configuration for ui views. If overwritten, all options must be provided! Most options can be overwritten by the user via the web interface.
|
|
||||||
- `analysis_view_histogramMetrics`: Type string array. Metrics to show as job count histograms in analysis view. Default `["flops_any", "mem_bw", "mem_used"]`.
|
|
||||||
- `analysis_view_scatterPlotMetrics`: Type array of string array. Initial
|
|
||||||
scatter plot configuration in analysis view. Default `[["flops_any", "mem_bw"], ["flops_any", "cpu_load"], ["cpu_load", "mem_bw"]]`.
|
|
||||||
- `job_view_nodestats_selectedMetrics`: Type string array. Initial metrics shown in node statistics table of single job view. Default `["flops_any", "mem_bw", "mem_used"]`.
|
|
||||||
- `job_view_polarPlotMetrics`: Type string array. Metrics shown in polar plot of single job view. Default `["flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"]`.
|
|
||||||
- `job_view_selectedMetrics`: Type string array. Default `["flops_any", "mem_bw", "mem_used"]`.
|
|
||||||
- `plot_general_colorBackground`: Type bool. Color plot background according to job average threshold limits. Default `true`.
|
|
||||||
- `plot_general_colorscheme`: Type string array. Initial color scheme. Default `"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"`.
|
|
||||||
- `plot_general_lineWidth`: Type int. Initial linewidth. Default `3`.
|
|
||||||
- `plot_list_jobsPerPage`: Type int. Jobs shown per page in job lists. Default `50`.
|
|
||||||
- `plot_list_selectedMetrics`: Type string array. Initial metric plots shown in jobs lists. Default `"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"`.
|
|
||||||
- `plot_view_plotsPerRow`: Type int. Number of plots per row in single job view. Default `3`.
|
|
||||||
- `plot_view_showPolarplot`: Type bool. Option to toggle polar plot in single job view. Default `true`.
|
|
||||||
- `plot_view_showRoofline`: Type bool. Option to toggle roofline plot in single job view. Default `true`.
|
|
||||||
- `plot_view_showStatTable`: Type bool. Option to toggle the node statistic table in single job view. Default `true`.
|
|
||||||
- `system_view_selectedMetric`: Type string. Initial metric shown in system view. Default `cpu_load`.
|
|
||||||
|
|
||||||
Some of the `ui-defaults` values can be appended by `:<clustername>` in order to have different settings depending on the current cluster. Those are notably `job_view_nodestats_selectedMetrics`, `job_view_polarPlotMetrics`, `job_view_selectedMetrics` and `plot_list_selectedMetrics`.
|
|
||||||
|
|
||||||
## Environment Variables
|
|
||||||
|
|
||||||
An example env file is found in this directory. Copy it to `.env` in the project root and adapt it for your needs.
|
|
||||||
|
|
||||||
* `JWT_PUBLIC_KEY` and `JWT_PRIVATE_KEY`: Base64 encoded Ed25519 keys used for JSON Web Token (JWT) authentication. You can generate your own keypair using `go run ./cmd/gen-keypair/gen-keypair.go`. More information in [README_TOKENS.md](./README_TOKENS.md).
|
|
||||||
* `SESSION_KEY`: Some random bytes used as secret for cookie-based sessions.
|
|
||||||
* `LDAP_ADMIN_PASSWORD`: The LDAP admin user password (optional).
|
|
||||||
* `CROSS_LOGIN_JWT_HS512_KEY`: Used for token based logins via another authentication service.
|
|
||||||
* `LOGLEVEL`: Can be `err`, `warn`, `info` or `debug` (optional, `warn` by default). Can be used to reduce logging.
|
|
||||||
@@ -1,51 +0,0 @@
|
|||||||
## Introduction
|
|
||||||
|
|
||||||
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs.
|
|
||||||
JSON Web Token (JWT) is an open standard (RFC 7519) that defines a compact and self-contained way for securely transmitting information between parties as a JSON object.
|
|
||||||
This information can be verified and trusted because it is digitally signed.
|
|
||||||
In ClusterCockpit JWTs are signed using a public/private key pair using ECDSA.
|
|
||||||
Because tokens are signed using public/private key pairs, the signature also certifies that only the party holding the private key is the one that signed it.
|
|
||||||
Expiration of the generated tokens as well as the max. length of a browser session can be configured in the `config.json` file described [here](./README.md).
|
|
||||||
|
|
||||||
The [Ed25519](https://ed25519.cr.yp.to/) algorithm for signatures was used because it is compatible with other tools that require authentication, such as NATS.io, and because these elliptic-curve methods provide simillar security with smaller keys compared to something like RSA. They are sligthly more expensive to validate, but that effect is negligible.
|
|
||||||
|
|
||||||
## JWT Payload
|
|
||||||
|
|
||||||
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
|
|
||||||
Currently ClusterCockpit sets the following claims:
|
|
||||||
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
|
|
||||||
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
|
|
||||||
* `roles`: An array of strings specifying the roles set for the subject.
|
|
||||||
* `exp`: Expiration date of the token (only if explicitly configured)
|
|
||||||
|
|
||||||
It is important to know that JWTs are not encrypted, only signed. This means that outsiders cannot create new JWTs or modify existing ones, but they are able to read out the username.
|
|
||||||
|
|
||||||
## Workflow
|
|
||||||
|
|
||||||
1. Create a new ECDSA Public/private keypair:
|
|
||||||
```
|
|
||||||
$ go build ./cmd/gen-keypair/
|
|
||||||
$ ./gen-keypair
|
|
||||||
```
|
|
||||||
2. Add keypair in your `.env` file. A template can be found in `./configs`.
|
|
||||||
|
|
||||||
When a user logs in via the `/login` page using a browser, a session cookie (secured using the random bytes in the `SESSION_KEY` env. variable you shoud change as well) is used for all requests after the successfull login. The JWTs make it easier to use the APIs of ClusterCockpit using scripts or other external programs. The token is specified n the `Authorization` HTTP header using the [Bearer schema](https://datatracker.ietf.org/doc/html/rfc6750) (there is an example below). Tokens can be issued to users from the configuration view in the Web-UI or the command line. In order to use the token for API endpoints such as `/api/jobs/start_job/`, the user that executes it needs to have the `api` role. Regular users can only perform read-only queries and only look at data connected to jobs they started themselves.
|
|
||||||
|
|
||||||
## cc-metric-store
|
|
||||||
|
|
||||||
The [cc-metric-store](https://github.com/ClusterCockpit/cc-metric-store) also uses JWTs for authentication. As it does not issue new tokens, it does not need to kown the private key. The public key of the keypair that is used to generate the JWTs that grant access to the `cc-metric-store` can be specified in its `config.json`. When configuring the `metricDataRepository` object in the `cluster.json` file, you can put a token issued by ClusterCockpit itself.
|
|
||||||
|
|
||||||
## Setup user and JWT token for REST API authorization
|
|
||||||
|
|
||||||
1. Create user:
|
|
||||||
```
|
|
||||||
$ ./cc-backend --add-user <username>:api:<password> --no-server
|
|
||||||
```
|
|
||||||
2. Issue token for user:
|
|
||||||
```
|
|
||||||
$ ./cc-backend --jwt <username> --no-server
|
|
||||||
```
|
|
||||||
3. Use issued token token on client side:
|
|
||||||
```
|
|
||||||
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
|
|
||||||
```
|
|
||||||
@@ -1,56 +1,96 @@
|
|||||||
{
|
{
|
||||||
|
"main": {
|
||||||
"addr": "127.0.0.1:8080",
|
"addr": "127.0.0.1:8080",
|
||||||
"archive": {
|
"short-running-jobs-duration": 300,
|
||||||
"kind": "file",
|
"resampling": {
|
||||||
"path": "./var/job-archive"
|
"minimumPoints": 600,
|
||||||
|
"trigger": 180,
|
||||||
|
"resolutions": [
|
||||||
|
240,
|
||||||
|
60
|
||||||
|
]
|
||||||
},
|
},
|
||||||
|
"apiAllowedIPs": [
|
||||||
|
"*"
|
||||||
|
],
|
||||||
|
"emission-constant": 317
|
||||||
|
},
|
||||||
|
"cron": {
|
||||||
|
"commit-job-worker": "2m",
|
||||||
|
"duration-worker": "5m",
|
||||||
|
"footprint-worker": "10m"
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"kind": "file",
|
||||||
|
"path": "./var/job-archive"
|
||||||
|
},
|
||||||
|
"auth": {
|
||||||
"jwts": {
|
"jwts": {
|
||||||
"max-age": "2m"
|
"max-age": "2000h"
|
||||||
},
|
}
|
||||||
"clusters": [
|
},
|
||||||
{
|
"nats": {
|
||||||
"name": "fritz",
|
"address": "nats://0.0.0.0:4222",
|
||||||
"metricDataRepository": {
|
"username": "root",
|
||||||
"kind": "cc-metric-store",
|
"password": "root"
|
||||||
"url": "http://localhost:8082",
|
},
|
||||||
"token": ""
|
"clusters": [
|
||||||
},
|
{
|
||||||
"filterRanges": {
|
"name": "fritz",
|
||||||
"numNodes": {
|
"filterRanges": {
|
||||||
"from": 1,
|
"numNodes": {
|
||||||
"to": 64
|
"from": 1,
|
||||||
},
|
"to": 64
|
||||||
"duration": {
|
|
||||||
"from": 0,
|
|
||||||
"to": 86400
|
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"from": "2022-01-01T00:00:00Z",
|
|
||||||
"to": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
"duration": {
|
||||||
"name": "alex",
|
"from": 0,
|
||||||
"metricDataRepository": {
|
"to": 86400
|
||||||
"kind": "cc-metric-store",
|
},
|
||||||
"url": "http://localhost:8082",
|
"startTime": {
|
||||||
"token": ""
|
"from": "2022-01-01T00:00:00Z",
|
||||||
},
|
"to": null
|
||||||
"filterRanges": {
|
|
||||||
"numNodes": {
|
|
||||||
"from": 1,
|
|
||||||
"to": 64
|
|
||||||
},
|
|
||||||
"duration": {
|
|
||||||
"from": 0,
|
|
||||||
"to": 86400
|
|
||||||
},
|
|
||||||
"startTime": {
|
|
||||||
"from": "2022-01-01T00:00:00Z",
|
|
||||||
"to": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "alex",
|
||||||
|
"filterRanges": {
|
||||||
|
"numNodes": {
|
||||||
|
"from": 1,
|
||||||
|
"to": 64
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"from": 0,
|
||||||
|
"to": 86400
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"from": "2022-01-01T00:00:00Z",
|
||||||
|
"to": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metric-store": {
|
||||||
|
"checkpoints": {
|
||||||
|
"file-format": "avro",
|
||||||
|
"interval": "1h",
|
||||||
|
"directory": "./var/checkpoints",
|
||||||
|
"restore": "48h"
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"interval": "1h",
|
||||||
|
"directory": "./var/archive"
|
||||||
|
},
|
||||||
|
"retention-in-memory": "48h",
|
||||||
|
"subscriptions": [
|
||||||
|
{
|
||||||
|
"subscribe-to": "hpc-nats",
|
||||||
|
"cluster-tag": "fritz"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"subscribe-to": "hpc-nats",
|
||||||
|
"cluster-tag": "alex"
|
||||||
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
}
|
||||||
@@ -1,50 +1,49 @@
|
|||||||
{
|
{
|
||||||
|
"main": {
|
||||||
"addr": "0.0.0.0:443",
|
"addr": "0.0.0.0:443",
|
||||||
"ldap": {
|
|
||||||
"url": "ldaps://test",
|
|
||||||
"user_base": "ou=people,ou=hpc,dc=test,dc=de",
|
|
||||||
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de",
|
|
||||||
"user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de",
|
|
||||||
"user_filter": "(&(objectclass=posixAccount))"
|
|
||||||
},
|
|
||||||
"https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem",
|
"https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem",
|
||||||
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
|
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
|
||||||
"user": "clustercockpit",
|
"user": "clustercockpit",
|
||||||
"group": "clustercockpit",
|
"group": "clustercockpit",
|
||||||
"archive": {
|
"validate": false,
|
||||||
"kind": "file",
|
"apiAllowedIPs": ["*"],
|
||||||
"path": "./var/job-archive"
|
"short-running-jobs-duration": 300,
|
||||||
},
|
"resampling": {
|
||||||
"validate": true,
|
"minimumPoints": 600,
|
||||||
"clusters": [
|
"trigger": 180,
|
||||||
{
|
"resolutions": [
|
||||||
"name": "test",
|
240,
|
||||||
"metricDataRepository": {
|
60
|
||||||
"kind": "cc-metric-store",
|
]
|
||||||
"url": "http://localhost:8082",
|
}
|
||||||
"token": "eyJhbGciOiJF-E-pQBQ"
|
},
|
||||||
},
|
"cron": {
|
||||||
"filterRanges": {
|
"commit-job-worker": "2m",
|
||||||
"numNodes": {
|
"duration-worker": "5m",
|
||||||
"from": 1,
|
"footprint-worker": "10m"
|
||||||
"to": 64
|
},
|
||||||
},
|
"archive": {
|
||||||
"duration": {
|
"kind": "file",
|
||||||
"from": 0,
|
"path": "./var/job-archive"
|
||||||
"to": 86400
|
},
|
||||||
},
|
"clusters": [
|
||||||
"startTime": {
|
{
|
||||||
"from": "2022-01-01T00:00:00Z",
|
"name": "test",
|
||||||
"to": null
|
"filterRanges": {
|
||||||
}
|
"numNodes": {
|
||||||
}
|
"from": 1,
|
||||||
|
"to": 64
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"from": 0,
|
||||||
|
"to": 86400
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"from": "2022-01-01T00:00:00Z",
|
||||||
|
"to": null
|
||||||
}
|
}
|
||||||
],
|
}
|
||||||
"jwts": {
|
}
|
||||||
"cookieName": "",
|
]
|
||||||
"validateUser": false,
|
|
||||||
"max-age": "2m",
|
|
||||||
"trustedIssuer": ""
|
|
||||||
},
|
|
||||||
"short-running-jobs-duration": 300
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -117,10 +117,12 @@ foreach my $ln (split("\n", $topo)) {
|
|||||||
|
|
||||||
my $node;
|
my $node;
|
||||||
my @sockets;
|
my @sockets;
|
||||||
|
my @nodeCores;
|
||||||
foreach my $socket ( @{$DOMAINS{socket}} ) {
|
foreach my $socket ( @{$DOMAINS{socket}} ) {
|
||||||
push @sockets, "[".join(",", @{$socket})."]";
|
push @sockets, "[".join(",", @{$socket})."]";
|
||||||
$node .= join(",", @{$socket})
|
push @nodeCores, join(",", @{$socket});
|
||||||
}
|
}
|
||||||
|
$node = join(",", @nodeCores);
|
||||||
$INFO{sockets} = join(",\n", @sockets);
|
$INFO{sockets} = join(",\n", @sockets);
|
||||||
|
|
||||||
my @memDomains;
|
my @memDomains;
|
||||||
@@ -212,9 +214,27 @@ print <<"END";
|
|||||||
"socketsPerNode": $INFO{socketsPerNode},
|
"socketsPerNode": $INFO{socketsPerNode},
|
||||||
"coresPerSocket": $INFO{coresPerSocket},
|
"coresPerSocket": $INFO{coresPerSocket},
|
||||||
"threadsPerCore": $INFO{threadsPerCore},
|
"threadsPerCore": $INFO{threadsPerCore},
|
||||||
"flopRateScalar": $flopsScalar,
|
"flopRateScalar": {
|
||||||
"flopRateSimd": $flopsSimd,
|
"unit": {
|
||||||
"memoryBandwidth": $memBw,
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"value": $flopsScalar
|
||||||
|
},
|
||||||
|
"flopRateSimd": {
|
||||||
|
"unit": {
|
||||||
|
"base": "F/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"value": $flopsSimd
|
||||||
|
},
|
||||||
|
"memoryBandwidth": {
|
||||||
|
"unit": {
|
||||||
|
"base": "B/s",
|
||||||
|
"prefix": "G"
|
||||||
|
},
|
||||||
|
"value": $memBw
|
||||||
|
},
|
||||||
"nodes": "<FILL IN NODE RANGES>",
|
"nodes": "<FILL IN NODE RANGES>",
|
||||||
"topology": {
|
"topology": {
|
||||||
"node": [$node],
|
"node": [$node],
|
||||||
|
|||||||
22
configs/startJobPayload.json
Normal file
22
configs/startJobPayload.json
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
{
|
||||||
|
"cluster": "fritz",
|
||||||
|
"jobId": 123000,
|
||||||
|
"jobState": "running",
|
||||||
|
"numAcc": 0,
|
||||||
|
"numHwthreads": 72,
|
||||||
|
"numNodes": 1,
|
||||||
|
"partition": "main",
|
||||||
|
"requestedMemory": 128000,
|
||||||
|
"resources": [{ "hostname": "f0726" }],
|
||||||
|
"startTime": 1649723812,
|
||||||
|
"subCluster": "main",
|
||||||
|
"submitTime": 1649723812,
|
||||||
|
"user": "k106eb10",
|
||||||
|
"project": "k106eb",
|
||||||
|
"walltime": 86400,
|
||||||
|
"metaData": {
|
||||||
|
"slurmInfo": "JobId=398759\nJobName=myJob\nUserId=dummyUser\nGroupId=dummyGroup\nAccount=dummyAccount\nQOS=normal Requeue=False Restarts=0 BatchFlag=True\nTimeLimit=1439'\nSubmitTime=2023-02-09T14:10:18\nPartition=singlenode\nNodeList=xx\nNumNodes=xx NumCPUs=72 NumTasks=72 CPUs/Task=1\nNTasksPerNode:Socket:Core=0:None:None\nTRES_req=cpu=72,mem=250000M,node=1,billing=72\nTRES_alloc=cpu=72,node=1,billing=72\nCommand=myCmd\nWorkDir=myDir\nStdErr=\nStdOut=\n",
|
||||||
|
"jobScript": "#!/bin/bash -l\n#SBATCH --job-name=dummy_job\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/dummy/\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\n\n#This is a dummy job script\n./mybinary\n",
|
||||||
|
"jobName": "ams_pipeline"
|
||||||
|
}
|
||||||
|
}
|
||||||
7
configs/stopJobPayload.json
Normal file
7
configs/stopJobPayload.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"cluster": "fritz",
|
||||||
|
"jobId": 123000,
|
||||||
|
"jobState": "completed",
|
||||||
|
"startTime": 1649723812,
|
||||||
|
"stopTime": 1649763839
|
||||||
|
}
|
||||||
45
configs/uiConfig.json
Normal file
45
configs/uiConfig.json
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
{
|
||||||
|
"jobList": {
|
||||||
|
"usePaging": false,
|
||||||
|
"showFootprint":false
|
||||||
|
},
|
||||||
|
"jobView": {
|
||||||
|
"showPolarPlot": true,
|
||||||
|
"showFootprint": true,
|
||||||
|
"showRoofline": true,
|
||||||
|
"showStatTable": true
|
||||||
|
},
|
||||||
|
"metricConfig": {
|
||||||
|
"jobListMetrics": ["mem_bw", "flops_dp"],
|
||||||
|
"jobViewPlotMetrics": ["mem_bw", "flops_dp"],
|
||||||
|
"jobViewTableMetrics": ["mem_bw", "flops_dp"],
|
||||||
|
"clusters": [
|
||||||
|
{
|
||||||
|
"name": "test",
|
||||||
|
"subClusters": [
|
||||||
|
{
|
||||||
|
"name": "one",
|
||||||
|
"jobListMetrics": ["mem_used", "flops_sp"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"nodeList": {
|
||||||
|
"usePaging": true
|
||||||
|
},
|
||||||
|
"plotConfiguration": {
|
||||||
|
"plotsPerRow": 3,
|
||||||
|
"colorBackground": true,
|
||||||
|
"lineWidth": 3,
|
||||||
|
"colorScheme": [
|
||||||
|
"#00bfff",
|
||||||
|
"#0000ff",
|
||||||
|
"#ff00ff",
|
||||||
|
"#ff0000",
|
||||||
|
"#ff8000",
|
||||||
|
"#ffff00",
|
||||||
|
"#80ff00"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
# Release versions
|
|
||||||
|
|
||||||
Versions are marked according to [semantic versioning] (https://semver.org).
|
|
||||||
Each version embeds the following static assets in the binary:
|
|
||||||
* Web frontend with javascript files and all static assets.
|
|
||||||
* Golang template files for server-side rendering.
|
|
||||||
* JSON schema files for validation.
|
|
||||||
* Database migration files.
|
|
||||||
|
|
||||||
The remaining external assets are:
|
|
||||||
* The SQL database used.
|
|
||||||
* The job archive
|
|
||||||
* The configuration files `config.json` and `.env`.
|
|
||||||
|
|
||||||
The external assets are versioned with integer IDs.
|
|
||||||
This means that each release binary is bound to specific versions of the SQL
|
|
||||||
database and the job archive.
|
|
||||||
The configuration file is checked against the current schema at startup.
|
|
||||||
The `-migrate-db` command line switch can be used to upgrade the SQL database
|
|
||||||
to migrate from a previous version to the latest one.
|
|
||||||
We offer a separate tool `archive-migration` to migrate an existing job archive
|
|
||||||
archive from the previous to the latest version.
|
|
||||||
|
|
||||||
# Versioning of APIs
|
|
||||||
|
|
||||||
cc-backend provides two API backends:
|
|
||||||
* A REST API for querying jobs.
|
|
||||||
* A GraphQL API for data exchange between web frontend and cc-backend.
|
|
||||||
|
|
||||||
The REST API will also be versioned. We still have to decide whether we will also
|
|
||||||
support older REST API versions by versioning the endpoint URLs.
|
|
||||||
The GraphQL API is for internal use and will not be versioned.
|
|
||||||
|
|
||||||
# How to build
|
|
||||||
|
|
||||||
In general it is recommended to use the provided release binary.
|
|
||||||
In case you want to build build `cc-backend` please always use the provided makefile. This will ensure
|
|
||||||
that the frontend is also built correctly and that the version in the binary is encoded in the binary.
|
|
||||||
234
docs/Hands-on.md
234
docs/Hands-on.md
@@ -1,234 +0,0 @@
|
|||||||
# Hands-on setup ClusterCockpit from scratch (w/o docker)
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
* perl
|
|
||||||
* go
|
|
||||||
* npm
|
|
||||||
* Optional: curl
|
|
||||||
* Script migrateTimestamp.pl
|
|
||||||
|
|
||||||
## Documentation
|
|
||||||
You find READMEs or api docs in
|
|
||||||
* ./cc-backend/configs
|
|
||||||
* ./cc-backend/init
|
|
||||||
* ./cc-backend/api
|
|
||||||
|
|
||||||
## ClusterCockpit configuration files
|
|
||||||
### cc-backend
|
|
||||||
* `./.env` Passwords and Tokens set in the environment
|
|
||||||
* `./config.json` Configuration options for cc-backend
|
|
||||||
|
|
||||||
### cc-metric-store
|
|
||||||
* `./config.json` Optional to overwrite configuration options
|
|
||||||
|
|
||||||
### cc-metric-collector
|
|
||||||
Not yet included in the hands-on setup.
|
|
||||||
|
|
||||||
## Setup Components
|
|
||||||
Start by creating a base folder for all of the following steps.
|
|
||||||
* `mkdir clustercockpit`
|
|
||||||
* `cd clustercockpit`
|
|
||||||
|
|
||||||
### Setup cc-backend
|
|
||||||
* Clone Repository
|
|
||||||
- `git clone https://github.com/ClusterCockpit/cc-backend.git`
|
|
||||||
- `cd cc-backend`
|
|
||||||
* Build
|
|
||||||
- `make`
|
|
||||||
* Activate & configure environment for cc-backend
|
|
||||||
- `cp configs/env-template.txt .env`
|
|
||||||
- Optional: Have a look via `vim .env`
|
|
||||||
- Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./`
|
|
||||||
* Back to toplevel `clustercockpit`
|
|
||||||
- `cd ..`
|
|
||||||
* Prepare Datafolder and Database file
|
|
||||||
- `mkdir var`
|
|
||||||
- `./cc-backend -migrate-db`
|
|
||||||
|
|
||||||
### Setup cc-metric-store
|
|
||||||
* Clone Repository
|
|
||||||
- `git clone https://github.com/ClusterCockpit/cc-metric-store.git`
|
|
||||||
- `cd cc-metric-store`
|
|
||||||
* Build Go Executable
|
|
||||||
- `go get`
|
|
||||||
- `go build`
|
|
||||||
* Prepare Datafolders
|
|
||||||
- `mkdir -p var/checkpoints`
|
|
||||||
- `mkdir -p var/archive`
|
|
||||||
* Update Config
|
|
||||||
- `vim config.json`
|
|
||||||
- Exchange existing setting in `metrics` with the following:
|
|
||||||
```
|
|
||||||
"clock": { "frequency": 60, "aggregation": null },
|
|
||||||
"cpi": { "frequency": 60, "aggregation": null },
|
|
||||||
"cpu_load": { "frequency": 60, "aggregation": null },
|
|
||||||
"flops_any": { "frequency": 60, "aggregation": null },
|
|
||||||
"flops_dp": { "frequency": 60, "aggregation": null },
|
|
||||||
"flops_sp": { "frequency": 60, "aggregation": null },
|
|
||||||
"ib_bw": { "frequency": 60, "aggregation": null },
|
|
||||||
"lustre_bw": { "frequency": 60, "aggregation": null },
|
|
||||||
"mem_bw": { "frequency": 60, "aggregation": null },
|
|
||||||
"mem_used": { "frequency": 60, "aggregation": null },
|
|
||||||
"rapl_power": { "frequency": 60, "aggregation": null }
|
|
||||||
```
|
|
||||||
* Back to toplevel `clustercockpit`
|
|
||||||
- `cd ..`
|
|
||||||
|
|
||||||
### Setup Demo Data
|
|
||||||
* `mkdir source-data`
|
|
||||||
* `cd source-data`
|
|
||||||
* Download JobArchive-Source:
|
|
||||||
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar.xz`
|
|
||||||
- `tar xJf job-archive-dev.tar.xz`
|
|
||||||
- `mv ./job-archive ./job-archive-source`
|
|
||||||
- `rm ./job-archive-dev.tar.xz`
|
|
||||||
* Download CC-Metric-Store Checkpoints:
|
|
||||||
- `mkdir -p cc-metric-store-source/checkpoints`
|
|
||||||
- `cd cc-metric-store-source/checkpoints`
|
|
||||||
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/cc-metric-store-checkpoints.tar.xz`
|
|
||||||
- `tar xf cc-metric-store-checkpoints.tar.xz`
|
|
||||||
- `rm cc-metric-store-checkpoints.tar.xz`
|
|
||||||
* Back to `source-data`
|
|
||||||
- `cd ../..`
|
|
||||||
* Run timestamp migration script. This may take tens of minutes!
|
|
||||||
- `cp ../migrateTimestamps.pl .`
|
|
||||||
- `./migrateTimestamps.pl`
|
|
||||||
- Expected output:
|
|
||||||
```
|
|
||||||
Starting to update start- and stoptimes in job-archive for emmy
|
|
||||||
Starting to update start- and stoptimes in job-archive for woody
|
|
||||||
Done for job-archive
|
|
||||||
Starting to update checkpoint filenames and data starttimes for emmy
|
|
||||||
Starting to update checkpoint filenames and data starttimes for woody
|
|
||||||
Done for checkpoints
|
|
||||||
```
|
|
||||||
* Copy `cluster.json` files from source to migrated folders
|
|
||||||
- `cp source-data/job-archive-source/emmy/cluster.json cc-backend/var/job-archive/emmy/`
|
|
||||||
- `cp source-data/job-archive-source/woody/cluster.json cc-backend/var/job-archive/woody/`
|
|
||||||
* Initialize Job-Archive in SQLite3 job.db and add demo user
|
|
||||||
- `cd cc-backend`
|
|
||||||
- `./cc-backend -init-db -add-user demo:admin:demo`
|
|
||||||
- Expected output:
|
|
||||||
```
|
|
||||||
<6>[INFO] new user "demo" created (roles: ["admin"], auth-source: 0)
|
|
||||||
<6>[INFO] Building job table...
|
|
||||||
<6>[INFO] A total of 3936 jobs have been registered in 1.791 seconds.
|
|
||||||
```
|
|
||||||
* Back to toplevel `clustercockpit`
|
|
||||||
- `cd ..`
|
|
||||||
|
|
||||||
### Startup both Apps
|
|
||||||
* In cc-backend root: `$./cc-backend -server -dev`
|
|
||||||
- Starts Clustercockpit at `http:localhost:8080`
|
|
||||||
- Log: `<6>[INFO] HTTP server listening at :8080...`
|
|
||||||
- Use local internet browser to access interface
|
|
||||||
- You should see and be able to browse finished Jobs
|
|
||||||
- Metadata is read from SQLite3 database
|
|
||||||
- Metricdata is read from job-archive/JSON-Files
|
|
||||||
- Create User in settings (top-right corner)
|
|
||||||
- Name `apiuser`
|
|
||||||
- Username `apiuser`
|
|
||||||
- Role `API`
|
|
||||||
- Submit & Refresh Page
|
|
||||||
- Create JTW for `apiuser`
|
|
||||||
- In Userlist, press `Gen. JTW` for `apiuser`
|
|
||||||
- Save JWT for later use
|
|
||||||
* In cc-metric-store root: `$./cc-metric-store`
|
|
||||||
- Start the cc-metric-store on `http:localhost:8081`, Log:
|
|
||||||
```
|
|
||||||
2022/07/15 17:17:42 Loading checkpoints newer than 2022-07-13T17:17:42+02:00
|
|
||||||
2022/07/15 17:17:45 Checkpoints loaded (5621 files, 319 MB, that took 3.034652s)
|
|
||||||
2022/07/15 17:17:45 API http endpoint listening on '0.0.0.0:8081'
|
|
||||||
```
|
|
||||||
- Does *not* have a graphical interface
|
|
||||||
- Otpional: Test function by executing:
|
|
||||||
```
|
|
||||||
$ curl -H "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" -D - "http://localhost:8081/api/query" -d "{ \"cluster\": \"emmy\", \"from\": $(expr $(date +%s) - 60), \"to\": $(date +%s), \"queries\": [{
|
|
||||||
\"metric\": \"flops_any\",
|
|
||||||
\"host\": \"e1111\"
|
|
||||||
}] }"
|
|
||||||
|
|
||||||
HTTP/1.1 200 OK
|
|
||||||
Content-Type: application/json
|
|
||||||
Date: Fri, 15 Jul 2022 13:57:22 GMT
|
|
||||||
Content-Length: 119
|
|
||||||
{"results":[[JSON-DATA-ARRAY]]}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Development API web interfaces
|
|
||||||
The `-dev` flag enables web interfaces to document and test the apis:
|
|
||||||
* http://localhost:8080/playground - A GraphQL playground. To use it you must have a authenticated session in the same browser.
|
|
||||||
* http://localhost:8080/swagger - A Swagger UI. To use it you have to be logged out, so no user session in the same browser. Use the JWT token with role Api generate previously to authenticate via http header.
|
|
||||||
|
|
||||||
### Use cc-backend API to start job
|
|
||||||
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
|
|
||||||
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
|
|
||||||
* Click the `/job/start_job` endpoint and click the Try it out button.
|
|
||||||
* Enter the following json into the request body text area and fill in a recent start timestamp by executing `date +%s`.:
|
|
||||||
```
|
|
||||||
{
|
|
||||||
"jobId": 100000,
|
|
||||||
"arrayJobId": 0,
|
|
||||||
"user": "ccdemouser",
|
|
||||||
"subCluster": "main",
|
|
||||||
"cluster": "emmy",
|
|
||||||
"startTime": <date +%s>,
|
|
||||||
"project": "ccdemoproject",
|
|
||||||
"resources": [
|
|
||||||
{"hostname": "e0601"},
|
|
||||||
{"hostname": "e0823"},
|
|
||||||
{"hostname": "e0337"},
|
|
||||||
{"hostname": "e1111"}],
|
|
||||||
"numNodes": 4,
|
|
||||||
"numHwthreads": 80,
|
|
||||||
"walltime": 86400
|
|
||||||
}
|
|
||||||
```
|
|
||||||
* The response body should be the database id of the started job, for example:
|
|
||||||
```
|
|
||||||
{
|
|
||||||
"id": 3937
|
|
||||||
}
|
|
||||||
```
|
|
||||||
* Check in ClusterCockpit
|
|
||||||
- User `ccdemouser` should appear in Users-Tab with one running job
|
|
||||||
- It could take up to 5 Minutes until the Job is displayed with some current data (5 Min Short-Job Filter)
|
|
||||||
- Job then is marked with a green `running` tag
|
|
||||||
- Metricdata displayed is read from cc-metric-store!
|
|
||||||
|
|
||||||
|
|
||||||
### Use cc-backend API to stop job
|
|
||||||
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
|
|
||||||
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
|
|
||||||
* Click the `/job/stop_job/{id}` endpoint and click the Try it out button.
|
|
||||||
* Enter the database id at id that was returned by `start_job` and copy the following into the request body. Replace the timestamp with a recent one:
|
|
||||||
```
|
|
||||||
{
|
|
||||||
"cluster": "emmy",
|
|
||||||
"jobState": "completed",
|
|
||||||
"stopTime": <RECENT TS>
|
|
||||||
}
|
|
||||||
```
|
|
||||||
* On success a json document with the job meta data is returned.
|
|
||||||
|
|
||||||
* Check in ClusterCockpit
|
|
||||||
- User `ccdemouser` should appear in Users-Tab with one completed job
|
|
||||||
- Job is no longer marked with a green `running` tag -> Completed!
|
|
||||||
- Metricdata displayed is now read from job-archive!
|
|
||||||
* Check in job-archive
|
|
||||||
- `cd ./cc-backend/var/job-archive/emmy/100/000`
|
|
||||||
- `cd $STARTTIME`
|
|
||||||
- Inspect `meta.json` and `data.json`
|
|
||||||
|
|
||||||
## Helper scripts
|
|
||||||
* In this tarball you can find the perl script `generate_subcluster.pl` that helps to generate the subcluster section for your system.
|
|
||||||
Usage:
|
|
||||||
* Log into an exclusive cluster node.
|
|
||||||
* The LIKWID tools likwid-topology and likwid-bench must be in the PATH!
|
|
||||||
* `$./generate_subcluster.pl` outputs the subcluster section on `stdout`
|
|
||||||
|
|
||||||
Please be aware that
|
|
||||||
* You have to enter the name and node list for the subCluster manually.
|
|
||||||
* GPU detection only works if LIKWID was build with Cuda avalable and you run likwid-topology also with Cuda loaded.
|
|
||||||
* Do not blindly trust the measured peakflops values.
|
|
||||||
* Because the script blindly relies on the CSV format output by likwid-topology this is a fragile undertaking!
|
|
||||||
@@ -1,99 +0,0 @@
|
|||||||
## Introduction
|
|
||||||
|
|
||||||
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs. JSON
|
|
||||||
Web Token (JWT) is an open standard (RFC 7519) that defines a compact and
|
|
||||||
self-contained way for securely transmitting information between parties as a
|
|
||||||
JSON object. This information can be verified and trusted because it is
|
|
||||||
digitally signed. In ClusterCockpit JWTs are signed using a public/private key
|
|
||||||
pair using ECDSA. Because tokens are signed using public/private key pairs, the
|
|
||||||
signature also certifies that only the party holding the private key is the one
|
|
||||||
that signed it. Token expiration is set to the configuration option MaxAge.
|
|
||||||
|
|
||||||
## JWT Payload
|
|
||||||
|
|
||||||
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
|
|
||||||
Currently ClusterCockpit sets the following claims:
|
|
||||||
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
|
|
||||||
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
|
|
||||||
* `roles`: An array of strings specifying the roles set for the subject.
|
|
||||||
|
|
||||||
## Workflow
|
|
||||||
|
|
||||||
1. Create a new ECDSA Public/private keypair:
|
|
||||||
```
|
|
||||||
$ go build ./tools/gen-keypair.go
|
|
||||||
$ ./gen-keypair
|
|
||||||
```
|
|
||||||
2. Add keypair in your `.env` file. A template can be found in `./configs`.
|
|
||||||
|
|
||||||
There are two usage scenarios:
|
|
||||||
* The APIs are used during a browser session. API accesses are authorized with
|
|
||||||
the active session.
|
|
||||||
* The REST API is used outside a browser session, e.g. by scripts. In this case
|
|
||||||
you have to issue a token manually. This possible from within the
|
|
||||||
configuration view or on the command line. It is recommended to issue a JWT
|
|
||||||
token in this case for a special user that only has the `api` role. By using
|
|
||||||
different users for different purposes a fine grained access control and
|
|
||||||
access revocation management is possible.
|
|
||||||
|
|
||||||
The token is commonly specified in the Authorization HTTP header using the Bearer schema.
|
|
||||||
|
|
||||||
## Setup user and JWT token for REST API authorization
|
|
||||||
|
|
||||||
1. Create user:
|
|
||||||
```
|
|
||||||
$ ./cc-backend --add-user <username>:api:<Password> --no-server
|
|
||||||
```
|
|
||||||
2. Issue token for user:
|
|
||||||
```
|
|
||||||
$ ./cc-backend -jwt <username> -no-server
|
|
||||||
```
|
|
||||||
3. Use issued token token on client side:
|
|
||||||
```
|
|
||||||
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Accept externally generated JWTs provided via cookie
|
|
||||||
If there is an external service like an AuthAPI that can generate JWTs and hand
|
|
||||||
them over to ClusterCockpit via cookies, CC can be configured to accept them:
|
|
||||||
|
|
||||||
1. `.env`: CC needs a public ed25519 key to verify foreign JWT signatures.
|
|
||||||
Public keys in PEM format can be converted with the instructions in
|
|
||||||
[/tools/convert-pem-pubkey-for-cc](../tools/convert-pem-pubkey-for-cc/Readme.md)
|
|
||||||
.
|
|
||||||
|
|
||||||
```
|
|
||||||
CROSS_LOGIN_JWT_PUBLIC_KEY="+51iXX8BdLFocrppRxIw52xCOf8xFSH/eNilN5IHVGc="
|
|
||||||
```
|
|
||||||
|
|
||||||
2. `config.json`: Insert a name for the cookie (set by the external service)
|
|
||||||
containing the JWT so that CC knows where to look at. Define a trusted issuer
|
|
||||||
(JWT claim 'iss'), otherwise it will be rejected. If you want usernames and
|
|
||||||
user roles from JWTs ('sub' and 'roles' claim) to be validated against CC's
|
|
||||||
internal database, you need to enable it here. Unknown users will then be
|
|
||||||
rejected and roles set via JWT will be ignored.
|
|
||||||
|
|
||||||
```json
|
|
||||||
"jwts": {
|
|
||||||
"cookieName": "access_cc",
|
|
||||||
"forceJWTValidationViaDatabase": true,
|
|
||||||
"trustedExternalIssuer": "auth.example.com"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Make sure your external service includes the same issuer (`iss`) in its JWTs.
|
|
||||||
Example JWT payload:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"iat": 1668161471,
|
|
||||||
"nbf": 1668161471,
|
|
||||||
"exp": 1668161531,
|
|
||||||
"sub": "alice",
|
|
||||||
"roles": [
|
|
||||||
"user"
|
|
||||||
],
|
|
||||||
"jti": "a1b2c3d4-1234-5678-abcd-a1b2c3d4e5f6",
|
|
||||||
"iss": "auth.example.com"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
The job archive specifies an exchange format for job meta and performance metric
|
|
||||||
data. It consists of two parts:
|
|
||||||
* a [SQLite database schema](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#sqlite-database-schema) for job meta data and performance statistics
|
|
||||||
* a [Json file format](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#json-file-format) together with a [Directory hierarchy specification](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#directory-hierarchy-specification)
|
|
||||||
|
|
||||||
By using an open, portable and simple specification based on files it is
|
|
||||||
possible to exchange job performance data for research and analysis purposes as
|
|
||||||
well as use it as a robust way for archiving job performance data to disk.
|
|
||||||
|
|
||||||
# SQLite database schema
|
|
||||||
## Introduction
|
|
||||||
|
|
||||||
A SQLite 3 database schema is provided to standardize the job meta data
|
|
||||||
information in a portable way. The schema also includes optional columns for job
|
|
||||||
performance statistics (called a job performance footprint). The database acts
|
|
||||||
as a front end to filter and select subsets of job IDs, that are the keys to get
|
|
||||||
the full job performance data in the job performance tree hierarchy.
|
|
||||||
|
|
||||||
## Database schema
|
|
||||||
|
|
||||||
The schema includes 3 tables: the job table, a tag table and a jobtag table
|
|
||||||
representing the MANY-TO-MANY relation between jobs and tags. The SQL schema is
|
|
||||||
specified
|
|
||||||
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/schemas/jobs-sqlite.sql).
|
|
||||||
Explanation of the various columns including the JSON datatypes is documented
|
|
||||||
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/job-meta.schema.json).
|
|
||||||
|
|
||||||
# Directory hierarchy specification
|
|
||||||
|
|
||||||
## Specification
|
|
||||||
|
|
||||||
To manage the number of directories within a single directory a tree approach is
|
|
||||||
used splitting the integer job ID. The job id is split in junks of 1000 each.
|
|
||||||
Usually 2 layers of directories is sufficient but the concept can be used for an
|
|
||||||
arbitrary number of layers.
|
|
||||||
|
|
||||||
For a 2 layer schema this can be achieved with (code example in Perl):
|
|
||||||
``` perl
|
|
||||||
$level1 = $jobID/1000;
|
|
||||||
$level2 = $jobID%1000;
|
|
||||||
$dstPath = sprintf("%s/%s/%d/%03d", $trunk, $destdir, $level1, $level2);
|
|
||||||
```
|
|
||||||
|
|
||||||
## Example
|
|
||||||
|
|
||||||
For the job ID 1034871 the directory path is `./1034/871/`.
|
|
||||||
|
|
||||||
# Json file format
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
Every cluster must be configured in a `cluster.json` file.
|
|
||||||
|
|
||||||
The job data consists of two files:
|
|
||||||
* `meta.json`: Contains job meta information and job statistics.
|
|
||||||
* `data.json`: Contains complete job data with time series
|
|
||||||
|
|
||||||
The description of the json format specification is available as [[json
|
|
||||||
schema|https://json-schema.org/]] format file. The latest version of the json
|
|
||||||
schema is part of the `cc-backend` source tree. For external reference it is
|
|
||||||
also available in a separate repository.
|
|
||||||
|
|
||||||
## Specification `cluster.json`
|
|
||||||
|
|
||||||
The json schema specification is available
|
|
||||||
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/cluster.schema.json).
|
|
||||||
|
|
||||||
## Specification `meta.json`
|
|
||||||
|
|
||||||
The json schema specification is available
|
|
||||||
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-meta.schema.json).
|
|
||||||
|
|
||||||
## Specification `data.json`
|
|
||||||
|
|
||||||
The json schema specification is available
|
|
||||||
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-data.schema.json).
|
|
||||||
Metric time series data is stored for a fixed time step. The time step is set
|
|
||||||
per metric. If no value is available for a metric time series data timestamp
|
|
||||||
`null` is entered.
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
# Overview
|
|
||||||
|
|
||||||
Customizing `cc-backend` means changing the logo, legal texts, and the login
|
|
||||||
template instead of the placeholders. You can also place a text file in `./var`
|
|
||||||
to add dynamic status or notification messages to the clusterCockpit homepage.
|
|
||||||
|
|
||||||
# Replace legal texts
|
|
||||||
|
|
||||||
To replace the `imprint.tmpl` and `privacy.tmpl` legal texts, you can place your
|
|
||||||
version in `./var/`. At startup `cc-backend` will check if `./var/imprint.tmpl` and/or
|
|
||||||
`./var/privacy.tmpl` exist and use them instead of the built-in placeholders.
|
|
||||||
You can use the placeholders in `web/templates` as a blueprint.
|
|
||||||
|
|
||||||
# Replace login template
|
|
||||||
To replace the default login layout and styling, you can place your version in
|
|
||||||
`./var/`. At startup `cc-backend` will check if `./var/login.tmpl` exist and use
|
|
||||||
it instead of the built-in placeholder. You can use the default temaplte
|
|
||||||
`web/templates/login.tmpl` as a blueprint.
|
|
||||||
|
|
||||||
# Replace logo
|
|
||||||
To change the logo displayed in the navigation bar, you can provide the file
|
|
||||||
`logo.png` in the folder `./var/img/`. On startup `cc-backend` will check if the
|
|
||||||
folder exists and use the images provided there instead of the built-in images.
|
|
||||||
You may also place additional images there you use in a custom login template.
|
|
||||||
|
|
||||||
# Add notification banner on homepage
|
|
||||||
To add a notification banner you can add a file `notice.txt` to `./var`. As long
|
|
||||||
as this file is present all text in this file is shown in an info banner on the
|
|
||||||
homepage.
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
In general, an upgrade is nothing more than a replacement of the binary file.
|
|
||||||
All the necessary files, except the database file, the configuration file and
|
|
||||||
the job archive, are embedded in the binary file. It is recommended to use a
|
|
||||||
directory where the file names of the binary files are named with a version
|
|
||||||
indicator. This can be, for example, the date or the Unix epoch time. A symbolic
|
|
||||||
link points to the version to be used. This makes it easier to switch to earlier
|
|
||||||
versions.
|
|
||||||
|
|
||||||
The database and the job archive are versioned. Each release binary supports
|
|
||||||
specific versions of the database and job archive. If a version mismatch is
|
|
||||||
detected, the application is terminated and migration is required.
|
|
||||||
|
|
||||||
**IMPORTANT NOTE**
|
|
||||||
|
|
||||||
It is recommended to make a backup copy of the database before each update. This
|
|
||||||
is mandatory in case the database needs to be migrated. In the case of sqlite,
|
|
||||||
this means to stopping `cc-backend` and copying the sqlite database file
|
|
||||||
somewhere.
|
|
||||||
|
|
||||||
# Migrating the database
|
|
||||||
|
|
||||||
After you have backed up the database, run the following command to migrate the
|
|
||||||
database to the latest version:
|
|
||||||
```
|
|
||||||
$ ./cc-backend -migrate-db
|
|
||||||
```
|
|
||||||
|
|
||||||
The migration files are embedded in the binary and can also be viewed in the cc
|
|
||||||
backend [source tree](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository/migrations).
|
|
||||||
There are separate migration files for both supported
|
|
||||||
database backends.
|
|
||||||
We use the [migrate library](https://github.com/golang-migrate/migrate).
|
|
||||||
|
|
||||||
If something goes wrong, you can check the status and get the current schema
|
|
||||||
(here for sqlite):
|
|
||||||
```
|
|
||||||
$ sqlite3 var/job.db
|
|
||||||
```
|
|
||||||
In the sqlite console execute:
|
|
||||||
```
|
|
||||||
.schema
|
|
||||||
```
|
|
||||||
to get the current databse schema.
|
|
||||||
You can query the current version and whether the migration failed with:
|
|
||||||
```
|
|
||||||
SELECT * FROM schema_migrations;
|
|
||||||
```
|
|
||||||
The first column indicates the current database version and the second column is
|
|
||||||
a dirty flag indicating whether the migration was successful.
|
|
||||||
|
|
||||||
# Migrating the job archive
|
|
||||||
|
|
||||||
Job archive migration requires a separate tool (`archive-migration`), which is
|
|
||||||
part of the cc-backend source tree (build with `go build ./tools/archive-migration`)
|
|
||||||
and is also provided as part of the releases.
|
|
||||||
|
|
||||||
Migration is supported only between two successive releases. The migration tool
|
|
||||||
migrates the existing job archive to a new job archive. This means that there
|
|
||||||
must be enough disk space for two complete job archives. If the tool is called
|
|
||||||
without options:
|
|
||||||
```
|
|
||||||
$ ./archive-migration
|
|
||||||
```
|
|
||||||
|
|
||||||
it is assumed that a job archive exists in `./var/job-archive`. The new job
|
|
||||||
archive is written to `./var/job-archive-new`. Since execution is threaded in case
|
|
||||||
of a fatal error, it is impossible to determine in which job the error occurred.
|
|
||||||
In this case, you can run the tool in debug mode (with the `-debug` flag). In
|
|
||||||
debug mode, threading is disabled and the job ID of each migrated job is output.
|
|
||||||
Jobs with empty files will be skipped. Between multiple runs of the tools, the
|
|
||||||
`job-archive-new` directory must be moved or deleted.
|
|
||||||
|
|
||||||
The `cluster.json` files in `job-archive-new` must be checked for errors, especially
|
|
||||||
whether the aggregation attribute is set correctly for all metrics.
|
|
||||||
|
|
||||||
Migration takes several hours for relatively large job archives (several hundred
|
|
||||||
GB). A versioned job archive contains a version.txt file in the root directory
|
|
||||||
of the job archive. This file contains the version as an unsigned integer.
|
|
||||||
@@ -1,180 +0,0 @@
|
|||||||
# Overview
|
|
||||||
|
|
||||||
The authentication is implemented in `internal/auth/`. In `auth.go`
|
|
||||||
an interface is defined that any authentication provider must fulfill. It also
|
|
||||||
acts as a dispatcher to delegate the calls to the available authentication
|
|
||||||
providers.
|
|
||||||
|
|
||||||
Two authentication types are available:
|
|
||||||
* JWT authentication for the REST API that does not create a session cookie
|
|
||||||
* Session based authentication using a session cookie
|
|
||||||
|
|
||||||
The most important routines in auth are:
|
|
||||||
* `Login()` Handle POST request to login user and start a new session
|
|
||||||
* `Auth()` Authenticate user and put User Object in context of the request
|
|
||||||
|
|
||||||
The http router calls auth in the following cases:
|
|
||||||
* `r.Handle("/login", authentication.Login( ... )).Methods(http.MethodPost)`:
|
|
||||||
The POST request on the `/login` route will call the Login callback.
|
|
||||||
* `r.Handle("/jwt-login", authentication.Login( ... ))`:
|
|
||||||
Any request on the `/jwt-login` route will call the Login callback. Intended
|
|
||||||
for use for the JWT token based authenticators.
|
|
||||||
* Any route in the secured subrouter will always call Auth(), on success it will
|
|
||||||
call the next handler in the chain, on failure it will render the login
|
|
||||||
template.
|
|
||||||
```
|
|
||||||
secured.Use(func(next http.Handler) http.Handler {
|
|
||||||
return authentication.Auth(
|
|
||||||
// On success;
|
|
||||||
next,
|
|
||||||
|
|
||||||
// On failure:
|
|
||||||
func(rw http.ResponseWriter, r *http.Request, err error) {
|
|
||||||
// Render login form
|
|
||||||
})
|
|
||||||
})
|
|
||||||
```
|
|
||||||
|
|
||||||
A JWT token can be used to initiate an authenticated user
|
|
||||||
session. This can either happen by calling the login route with a token
|
|
||||||
provided in a header or via a special cookie containing the JWT token.
|
|
||||||
For API routes the access is authenticated on every request using the JWT token
|
|
||||||
and no session is initiated.
|
|
||||||
|
|
||||||
# Login
|
|
||||||
|
|
||||||
The Login function (located in `auth.go`):
|
|
||||||
* Extracts the user name and gets the user from the user database table. In case the
|
|
||||||
user is not found the user object is set to nil.
|
|
||||||
* Iterates over all authenticators and:
|
|
||||||
- Calls its `CanLogin` function which checks if the authentication method is
|
|
||||||
supported for this user.
|
|
||||||
- Calls its `Login` function to authenticate the user. On success a valid user
|
|
||||||
object is returned.
|
|
||||||
- Creates a new session object, stores the user attributes in the session and
|
|
||||||
saves the session.
|
|
||||||
- Starts the `onSuccess` http handler
|
|
||||||
|
|
||||||
## Local authenticator
|
|
||||||
|
|
||||||
This authenticator is applied if
|
|
||||||
```
|
|
||||||
return user != nil && user.AuthSource == AuthViaLocalPassword
|
|
||||||
```
|
|
||||||
|
|
||||||
Compares the password provided by the login form to the password hash stored in
|
|
||||||
the user database table:
|
|
||||||
```
|
|
||||||
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil {
|
|
||||||
log.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username)
|
|
||||||
return nil, fmt.Errorf("Authentication failed")
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## LDAP authenticator
|
|
||||||
|
|
||||||
This authenticator is applied if the user was found in the database and its
|
|
||||||
AuthSource is LDAP:
|
|
||||||
```
|
|
||||||
if user != nil {
|
|
||||||
if user.AuthSource == schema.AuthViaLDAP {
|
|
||||||
return user, true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
If the option `SyncUserOnLogin` is set it tried to sync the user from the LDAP
|
|
||||||
directory. In case this succeeds the user is persisted to the database and can
|
|
||||||
login.
|
|
||||||
|
|
||||||
Gets the LDAP connection and tries a bind with the provided credentials:
|
|
||||||
```
|
|
||||||
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
|
|
||||||
log.Errorf("AUTH/LDAP > Authentication for user %s failed: %v", user.Username, err)
|
|
||||||
return nil, fmt.Errorf("Authentication failed")
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## JWT Session authenticator
|
|
||||||
|
|
||||||
Login via JWT token will create a session without password.
|
|
||||||
For login the `X-Auth-Token` header is not supported. This authenticator is
|
|
||||||
applied if the Authorization header or query parameter login-token is present:
|
|
||||||
```
|
|
||||||
return user, r.Header.Get("Authorization") != "" ||
|
|
||||||
r.URL.Query().Get("login-token") != ""
|
|
||||||
```
|
|
||||||
|
|
||||||
The Login function:
|
|
||||||
* Parses the token and checks if it is expired
|
|
||||||
* Check if the signing method is EdDSA or HS256 or HS512
|
|
||||||
* Check if claims are valid and extracts the claims
|
|
||||||
* The following claims have to be present:
|
|
||||||
- `sub`: The subject, in this case this is the username
|
|
||||||
- `exp`: Expiration in Unix epoch time
|
|
||||||
- `roles`: String array with roles of user
|
|
||||||
* In case user does not exist in the database and the option `SyncUserOnLogin`
|
|
||||||
is set add user to user database table with `AuthViaToken` AuthSource.
|
|
||||||
* Return valid user object
|
|
||||||
|
|
||||||
## JWT Cookie Session authenticator
|
|
||||||
|
|
||||||
Login via JWT cookie token will create a session without password.
|
|
||||||
It is first checked if the required configuration options are set:
|
|
||||||
* `trustedIssuer`
|
|
||||||
* `CookieName`
|
|
||||||
|
|
||||||
and optionally the environment variable `CROSS_LOGIN_JWT_PUBLIC_KEY` is set.
|
|
||||||
|
|
||||||
This authenticator is applied if the configured cookie is present:
|
|
||||||
```
|
|
||||||
jwtCookie, err := r.Cookie(cookieName)
|
|
||||||
|
|
||||||
if err == nil && jwtCookie.Value != "" {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The Login function:
|
|
||||||
* Extracts and parses the token
|
|
||||||
* Checks if signing method is Ed25519/EdDSA
|
|
||||||
* In case publicKeyCrossLogin is configured:
|
|
||||||
- Check if `iss` issuer claim matched trusted issuer from configuration
|
|
||||||
- Return public cross login key
|
|
||||||
- Otherwise return standard public key
|
|
||||||
* Check if claims are valid
|
|
||||||
* Depending on the option `validateUser` the roles are
|
|
||||||
extracted from JWT token or taken from user object fetched from database
|
|
||||||
* Ask browser to delete the JWT cookie
|
|
||||||
* In case user does not exist in the database and the option `SyncUserOnLogin`
|
|
||||||
is set add user to user database table with `AuthViaToken` AuthSource.
|
|
||||||
* Return valid user object
|
|
||||||
|
|
||||||
# Auth
|
|
||||||
|
|
||||||
The Auth function (located in `auth.go`):
|
|
||||||
* Returns a new http handler function that is defined right away
|
|
||||||
* This handler tries two methods to authenticate a user:
|
|
||||||
- Via a JWT API token in `AuthViaJWT()`
|
|
||||||
- Via a valid session in `AuthViaSession()`
|
|
||||||
* If err is not nil and the user object is valid it puts the user object in the
|
|
||||||
request context and starts the onSuccess http handler
|
|
||||||
* Otherwise it calls the onFailure handler
|
|
||||||
|
|
||||||
## AuthViaJWT
|
|
||||||
|
|
||||||
Implemented in JWTAuthenticator:
|
|
||||||
* Extract token either from header `X-Auth-Token` or `Authorization` with Bearer
|
|
||||||
prefix
|
|
||||||
* Parse token and check if it is valid. The Parse routine will also check if the
|
|
||||||
token is expired.
|
|
||||||
* If the option `validateUser` is set it will ensure the
|
|
||||||
user object exists in the database and takes the roles from the database user
|
|
||||||
* Otherwise the roles are extracted from the roles claim
|
|
||||||
* Returns a valid user object with AuthType set to AuthToken
|
|
||||||
|
|
||||||
## AuthViaSession
|
|
||||||
|
|
||||||
* Extracts session
|
|
||||||
* Get values username, projects, and roles from session
|
|
||||||
* Returns a valid user object with AuthType set to AuthSession
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
## Tips for frontend development
|
|
||||||
|
|
||||||
The frontend assets including the Svelte js files are per default embedded in
|
|
||||||
the bgo binary. To enable a quick turnaround cycle for web development of the
|
|
||||||
frontend disable embedding of static assets in `config.json`:
|
|
||||||
```
|
|
||||||
"embed-static-files": false,
|
|
||||||
"static-files": "./web/frontend/public/",
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
Start the node build process (in directory `./web/frontend`) in development mode:
|
|
||||||
```
|
|
||||||
$ npm run dev
|
|
||||||
```
|
|
||||||
|
|
||||||
This will start the build process in listen mode. Whenever you change a source
|
|
||||||
files the depending javascript targets will be automatically rebuild.
|
|
||||||
In case the javascript files are minified you may need to set the production
|
|
||||||
flag by hand to false in `./web/frontend/rollup.config.mjs`:
|
|
||||||
```
|
|
||||||
const production = false
|
|
||||||
```
|
|
||||||
|
|
||||||
Usually this should work automatically.
|
|
||||||
|
|
||||||
Because the files are still served by ./cc-backend you have to reload the view
|
|
||||||
explicitly in your browser.
|
|
||||||
|
|
||||||
A common setup is to have three terminals open:
|
|
||||||
* One running cc-backend (working directory repository root): `./cc-backend -server -dev`
|
|
||||||
* Another running npm in developer mode (working directory `./web/frontend`): `npm run dev`
|
|
||||||
* And the last one editing the frontend source files
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
# Steps to prepare a release
|
|
||||||
|
|
||||||
1. On `hotfix` branch:
|
|
||||||
* Update ReleaseNotes.md
|
|
||||||
* Update version in Makefile
|
|
||||||
* Commit, push, and pull request
|
|
||||||
* Merge in master
|
|
||||||
|
|
||||||
2. On Linux host:
|
|
||||||
* Pull master
|
|
||||||
* Ensure that GitHub Token environment variable `GITHUB_TOKEN` is set
|
|
||||||
* Create release tag: `git tag v1.1.0 -m release`
|
|
||||||
* Execute `goreleaser release`
|
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
## Overview
|
|
||||||
|
|
||||||
We use the standard golang testing environment.
|
|
||||||
|
|
||||||
The following conventions are used:
|
|
||||||
|
|
||||||
* *White box unit tests*: Tests for internal functionality are placed in files
|
|
||||||
* *Black box unit tests*: Tests for public interfaces are placed in files
|
|
||||||
with `<package name>_test.go` and belong to the package `<package_name>_test`.
|
|
||||||
There only exists one package test file per package.
|
|
||||||
* *Integration tests*: Tests that use multiple componenents are placed in a
|
|
||||||
package test file. These are named `<package name>_test.go` and belong to the
|
|
||||||
package `<package_name>_test`.
|
|
||||||
* *Test assets*: Any required files are placed in a directory `./testdata`
|
|
||||||
within each package directory.
|
|
||||||
|
|
||||||
## Executing tests
|
|
||||||
|
|
||||||
Visual Studio Code has a very good golang test integration.
|
|
||||||
For debugging a test this is the recommended solution.
|
|
||||||
|
|
||||||
The Makefile provided by us has a `test` target that executes:
|
|
||||||
```
|
|
||||||
$ go clean -testcache
|
|
||||||
$ go build ./...
|
|
||||||
$ go vet ./...
|
|
||||||
$ go test ./...
|
|
||||||
```
|
|
||||||
|
|
||||||
Of course the commands can also be used on the command line.
|
|
||||||
For details about golang testing refer to the standard documentation:
|
|
||||||
|
|
||||||
* [Testing package](https://pkg.go.dev/testing)
|
|
||||||
* [go test command](https://pkg.go.dev/cmd/go#hdr-Test_packages)
|
|
||||||
@@ -1,229 +0,0 @@
|
|||||||
#!/usr/bin/env perl
|
|
||||||
use strict;
|
|
||||||
use warnings;
|
|
||||||
use utf8;
|
|
||||||
|
|
||||||
use JSON::PP; # from Perl default install
|
|
||||||
use Time::Local qw( timelocal ); # from Perl default install
|
|
||||||
use Time::Piece; # from Perl default install
|
|
||||||
|
|
||||||
### JSON
|
|
||||||
my $json = JSON::PP->new->allow_nonref;
|
|
||||||
|
|
||||||
### TIME AND DATE
|
|
||||||
# now
|
|
||||||
my $localtime = localtime;
|
|
||||||
my $epochtime = $localtime->epoch;
|
|
||||||
# 5 days ago: Via epoch due to possible reverse month borders
|
|
||||||
my $epochlessfive = $epochtime - (86400 * 5);
|
|
||||||
my $locallessfive = localtime($epochlessfive);
|
|
||||||
# Calc like `date --date 'TZ="Europe/Berlin" 0:00 5 days ago' +%s`)
|
|
||||||
my ($day, $month, $year) = ($locallessfive->mday, $locallessfive->_mon, $locallessfive->year);
|
|
||||||
my $checkpointStart = timelocal(0, 0, 0, $day, $month, $year);
|
|
||||||
# for checkpoints
|
|
||||||
my $halfday = 43200;
|
|
||||||
|
|
||||||
### JOB-ARCHIVE
|
|
||||||
my $archiveTarget = './cc-backend/var/job-archive';
|
|
||||||
my $archiveSrc = './source-data/job-archive-source';
|
|
||||||
my @ArchiveClusters;
|
|
||||||
|
|
||||||
# Gen folder
|
|
||||||
if ( not -d $archiveTarget ){
|
|
||||||
mkdir( $archiveTarget ) or die "Couldn't create $archiveTarget directory, $!";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get clusters by job-archive/$subfolder
|
|
||||||
opendir my $dh, $archiveSrc or die "can't open directory: $!";
|
|
||||||
while ( readdir $dh ) {
|
|
||||||
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
|
|
||||||
my $cluster = $_;
|
|
||||||
push @ArchiveClusters, $cluster;
|
|
||||||
}
|
|
||||||
|
|
||||||
# start for jobarchive
|
|
||||||
foreach my $cluster ( @ArchiveClusters ) {
|
|
||||||
print "Starting to update start- and stoptimes in job-archive for $cluster\n";
|
|
||||||
|
|
||||||
my $clusterTarget = "$archiveTarget/$cluster";
|
|
||||||
|
|
||||||
if ( not -d $clusterTarget ){
|
|
||||||
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
|
|
||||||
}
|
|
||||||
|
|
||||||
opendir my $dhLevel1, "$archiveSrc/$cluster" or die "can't open directory: $!";
|
|
||||||
while ( readdir $dhLevel1 ) {
|
|
||||||
chomp; next if $_ eq '.' or $_ eq '..';
|
|
||||||
my $level1 = $_;
|
|
||||||
|
|
||||||
if ( -d "$archiveSrc/$cluster/$level1" ) {
|
|
||||||
opendir my $dhLevel2, "$archiveSrc/$cluster/$level1" or die "can't open directory: $!";
|
|
||||||
while ( readdir $dhLevel2 ) {
|
|
||||||
chomp; next if $_ eq '.' or $_ eq '..';
|
|
||||||
my $level2 = $_;
|
|
||||||
my $jobSource = "$archiveSrc/$cluster/$level1/$level2";
|
|
||||||
my $jobOrigin = "$jobSource";
|
|
||||||
my $jobTargetL1 = "$clusterTarget/$level1";
|
|
||||||
my $jobTargetL2 = "$jobTargetL1/$level2";
|
|
||||||
|
|
||||||
# check if files are directly accessible (old format) else get subfolders as file and update path
|
|
||||||
if ( ! -e "$jobSource/meta.json") {
|
|
||||||
opendir(D, "$jobSource") || die "Can't open directory $jobSource: $!\n";
|
|
||||||
my @folders = readdir(D);
|
|
||||||
closedir(D);
|
|
||||||
if (!@folders) {
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach my $folder ( @folders ) {
|
|
||||||
next if $folder eq '.' or $folder eq '..';
|
|
||||||
$jobSource = "$jobSource/".$folder;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# check if subfolder contains file, else skip
|
|
||||||
if ( ! -e "$jobSource/meta.json") {
|
|
||||||
print "$jobSource skipped\n";
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
open my $metafh, '<', "$jobSource/meta.json" or die "Can't open file $!";
|
|
||||||
my $rawstr = do { local $/; <$metafh> };
|
|
||||||
close($metafh);
|
|
||||||
my $metadata = $json->decode($rawstr);
|
|
||||||
|
|
||||||
# NOTE Start meta.json iteration here
|
|
||||||
# my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT;
|
|
||||||
# Set new startTime: Between 5 days and 1 day before now
|
|
||||||
|
|
||||||
# Remove id from attributes
|
|
||||||
$metadata->{startTime} = $epochtime - (int(rand(432000)) + 86400);
|
|
||||||
$metadata->{stopTime} = $metadata->{startTime} + $metadata->{duration};
|
|
||||||
|
|
||||||
# Add starttime subfolder to target path
|
|
||||||
my $jobTargetL3 = "$jobTargetL2/".$metadata->{startTime};
|
|
||||||
|
|
||||||
if ( not -d $jobTargetL1 ){
|
|
||||||
mkdir( $jobTargetL1 ) or die "Couldn't create $jobTargetL1 directory, $!";
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( not -d $jobTargetL2 ){
|
|
||||||
mkdir( $jobTargetL2 ) or die "Couldn't create $jobTargetL2 directory, $!";
|
|
||||||
}
|
|
||||||
|
|
||||||
# target is not directory
|
|
||||||
if ( not -d $jobTargetL3 ){
|
|
||||||
mkdir( $jobTargetL3 ) or die "Couldn't create $jobTargetL3 directory, $!";
|
|
||||||
|
|
||||||
my $outstr = $json->encode($metadata);
|
|
||||||
open my $metaout, '>', "$jobTargetL3/meta.json" or die "Can't write to file $!";
|
|
||||||
print $metaout $outstr;
|
|
||||||
close($metaout);
|
|
||||||
|
|
||||||
open my $datafh, '<', "$jobSource/data.json" or die "Can't open file $!";
|
|
||||||
my $datastr = do { local $/; <$datafh> };
|
|
||||||
close($datafh);
|
|
||||||
|
|
||||||
open my $dataout, '>', "$jobTargetL3/data.json" or die "Can't write to file $!";
|
|
||||||
print $dataout $datastr;
|
|
||||||
close($dataout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
print "Done for job-archive\n";
|
|
||||||
sleep(1);
|
|
||||||
exit;
|
|
||||||
|
|
||||||
## CHECKPOINTS
|
|
||||||
my $checkpTarget = './cc-metric-store/var/checkpoints';
|
|
||||||
my $checkpSource = './source-data/cc-metric-store-source/checkpoints';
|
|
||||||
my @CheckpClusters;
|
|
||||||
|
|
||||||
# Gen folder
|
|
||||||
if ( not -d $checkpTarget ){
|
|
||||||
mkdir( $checkpTarget ) or die "Couldn't create $checkpTarget directory, $!";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Get clusters by cc-metric-store/$subfolder
|
|
||||||
opendir my $dhc, $checkpSource or die "can't open directory: $!";
|
|
||||||
while ( readdir $dhc ) {
|
|
||||||
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
|
|
||||||
my $cluster = $_;
|
|
||||||
push @CheckpClusters, $cluster;
|
|
||||||
}
|
|
||||||
closedir($dhc);
|
|
||||||
|
|
||||||
# start for checkpoints
|
|
||||||
foreach my $cluster ( @CheckpClusters ) {
|
|
||||||
print "Starting to update checkpoint filenames and data starttimes for $cluster\n";
|
|
||||||
|
|
||||||
my $clusterTarget = "$checkpTarget/$cluster";
|
|
||||||
|
|
||||||
if ( not -d $clusterTarget ){
|
|
||||||
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
|
|
||||||
}
|
|
||||||
|
|
||||||
opendir my $dhLevel1, "$checkpSource/$cluster" or die "can't open directory: $!";
|
|
||||||
while ( readdir $dhLevel1 ) {
|
|
||||||
chomp; next if $_ eq '.' or $_ eq '..';
|
|
||||||
# Nodename as level1-folder
|
|
||||||
my $level1 = $_;
|
|
||||||
|
|
||||||
if ( -d "$checkpSource/$cluster/$level1" ) {
|
|
||||||
|
|
||||||
my $nodeSource = "$checkpSource/$cluster/$level1/";
|
|
||||||
my $nodeOrigin = "$nodeSource";
|
|
||||||
my $nodeTarget = "$clusterTarget/$level1";
|
|
||||||
my @files;
|
|
||||||
|
|
||||||
if ( -e "$nodeSource/1609459200.json") { # 1609459200 == First Checkpoint time in latest dump
|
|
||||||
opendir(D, "$nodeSource") || die "Can't open directory $nodeSource: $!\n";
|
|
||||||
while ( readdir D ) {
|
|
||||||
chomp; next if $_ eq '.' or $_ eq '..';
|
|
||||||
my $nodeFile = $_;
|
|
||||||
push @files, $nodeFile;
|
|
||||||
}
|
|
||||||
closedir(D);
|
|
||||||
my $length = @files;
|
|
||||||
if (!@files || $length != 14) { # needs 14 files == 7 days worth of data
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
next;
|
|
||||||
}
|
|
||||||
|
|
||||||
# sort for integer timestamp-filename-part (moduleless): Guarantees start with index == 0 == 1609459200.json
|
|
||||||
my @sortedFiles = sort { ($a =~ /^([0-9]{10}).json$/)[0] <=> ($b =~ /^([0-9]{10}).json$/)[0] } @files;
|
|
||||||
|
|
||||||
if ( not -d $nodeTarget ){
|
|
||||||
mkdir( $nodeTarget ) or die "Couldn't create $nodeTarget directory, $!";
|
|
||||||
|
|
||||||
while (my ($index, $file) = each(@sortedFiles)) {
|
|
||||||
open my $checkfh, '<', "$nodeSource/$file" or die "Can't open file $!";
|
|
||||||
my $rawstr = do { local $/; <$checkfh> };
|
|
||||||
close($checkfh);
|
|
||||||
my $checkpdata = $json->decode($rawstr);
|
|
||||||
|
|
||||||
my $newTimestamp = $checkpointStart + ($index * $halfday);
|
|
||||||
# Get Diff from old Timestamp
|
|
||||||
my $timeDiff = $newTimestamp - $checkpdata->{from};
|
|
||||||
# Set new timestamp
|
|
||||||
$checkpdata->{from} = $newTimestamp;
|
|
||||||
|
|
||||||
foreach my $metric (keys %{$checkpdata->{metrics}}) {
|
|
||||||
$checkpdata->{metrics}->{$metric}->{start} += $timeDiff;
|
|
||||||
}
|
|
||||||
|
|
||||||
my $outstr = $json->encode($checkpdata);
|
|
||||||
|
|
||||||
open my $checkout, '>', "$nodeTarget/$newTimestamp.json" or die "Can't write to file $!";
|
|
||||||
print $checkout $outstr;
|
|
||||||
close($checkout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
closedir($dhLevel1);
|
|
||||||
}
|
|
||||||
print "Done for checkpoints\n";
|
|
||||||
@@ -1,36 +0,0 @@
|
|||||||
# Docs for ClusterCockpit Searchbar
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
* Searchtags are implemented as `type:<query>` search-string
|
|
||||||
* Types `jobId, jobName, projectId, username, name, arrayJobId` for roles `admin` and `support`
|
|
||||||
* `jobName` is jobName as persisted in `job.meta_data` table-column
|
|
||||||
* `username` is actual account identifier as persisted in `job.user` table-column
|
|
||||||
* `name` is account owners name as persisted in `user.name` table-column
|
|
||||||
* Types `jobId, jobName, projectId, arrayJobId` for role `user`
|
|
||||||
* Examples:
|
|
||||||
* `jobName:myJob12`
|
|
||||||
* `jobId:123456`
|
|
||||||
* `username:abcd100`
|
|
||||||
* `name:Paul`
|
|
||||||
* If no searchTag used: Best guess search with the following hierarchy
|
|
||||||
* `jobId -> username -> name -> projectId -> jobName`
|
|
||||||
* Destinations:
|
|
||||||
* JobId: Job-Table (Allows multiple identical matches, e.g. JobIds from different clusters)
|
|
||||||
* JobName: Job-Table (Allows multiple identical matches, e.g. JobNames from different clusters)
|
|
||||||
* ProjectId: Job-Table
|
|
||||||
* Username: Users-Table
|
|
||||||
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table. Also, a `Last 30 Days` is active by default and might filter out expected users.
|
|
||||||
* Name: Users-Table
|
|
||||||
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table. Also, a `Last 30 Days` is active by default and might filter out expected users.
|
|
||||||
* ArrayJobId: Job-Table (Lists all Jobs of Queried ArrayJobId)
|
|
||||||
* Best guess search always redirects to Job-Table or `/monitoring/user/$USER` (first username match)
|
|
||||||
* Unprocessable queries will display messages detailing the cause (Info, Warning, Error)
|
|
||||||
* Spaces trimmed (both for searchTag and queryString)
|
|
||||||
* ` job12` == `job12`
|
|
||||||
* `projectID : abcd ` == `projectId:abcd`
|
|
||||||
* `jobName`- and `name-`queries work with a part of the target-string
|
|
||||||
* `jobName:myjob` for jobName "myjob_cluster1"
|
|
||||||
* `name:Paul` for name "Paul Atreides"
|
|
||||||
|
|
||||||
* JobName GQL Query is resolved as matching the query as a part of the whole metaData-JSON in the SQL DB.
|
|
||||||
170
go.mod
170
go.mod
@@ -1,88 +1,126 @@
|
|||||||
module github.com/ClusterCockpit/cc-backend
|
module github.com/ClusterCockpit/cc-backend
|
||||||
|
|
||||||
go 1.18
|
go 1.24.0
|
||||||
|
|
||||||
|
toolchain go1.24.1
|
||||||
|
|
||||||
|
tool (
|
||||||
|
github.com/99designs/gqlgen
|
||||||
|
github.com/swaggo/swag/cmd/swag
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/99designs/gqlgen v0.17.36
|
github.com/99designs/gqlgen v0.17.84
|
||||||
github.com/ClusterCockpit/cc-units v0.4.0
|
github.com/ClusterCockpit/cc-lib v1.0.2
|
||||||
github.com/Masterminds/squirrel v1.5.3
|
github.com/Masterminds/squirrel v1.5.4
|
||||||
github.com/go-co-op/gocron v1.25.0
|
github.com/aws/aws-sdk-go-v2 v1.41.0
|
||||||
github.com/go-ldap/ldap/v3 v3.4.4
|
github.com/aws/aws-sdk-go-v2/config v1.31.20
|
||||||
github.com/go-sql-driver/mysql v1.7.0
|
github.com/aws/aws-sdk-go-v2/credentials v1.18.24
|
||||||
github.com/golang-jwt/jwt/v4 v4.5.0
|
github.com/aws/aws-sdk-go-v2/service/s3 v1.90.2
|
||||||
github.com/golang-migrate/migrate/v4 v4.15.2
|
github.com/coreos/go-oidc/v3 v3.16.0
|
||||||
github.com/google/gops v0.3.27
|
github.com/expr-lang/expr v1.17.6
|
||||||
github.com/gorilla/handlers v1.5.1
|
github.com/go-co-op/gocron/v2 v2.18.2
|
||||||
github.com/gorilla/mux v1.8.0
|
github.com/go-ldap/ldap/v3 v3.4.12
|
||||||
github.com/gorilla/sessions v1.2.1
|
github.com/golang-jwt/jwt/v5 v5.3.0
|
||||||
github.com/influxdata/influxdb-client-go/v2 v2.12.2
|
github.com/golang-migrate/migrate/v4 v4.19.1
|
||||||
github.com/jmoiron/sqlx v1.3.5
|
github.com/google/gops v0.3.28
|
||||||
github.com/mattn/go-sqlite3 v1.14.16
|
github.com/gorilla/handlers v1.5.2
|
||||||
github.com/prometheus/client_golang v1.14.0
|
github.com/gorilla/mux v1.8.1
|
||||||
github.com/prometheus/common v0.40.0
|
github.com/gorilla/sessions v1.4.0
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.2.1
|
||||||
|
github.com/jmoiron/sqlx v1.4.0
|
||||||
|
github.com/joho/godotenv v1.5.1
|
||||||
|
github.com/linkedin/goavro/v2 v2.14.1
|
||||||
|
github.com/mattn/go-sqlite3 v1.14.32
|
||||||
|
github.com/nats-io/nats.go v1.47.0
|
||||||
|
github.com/prometheus/client_golang v1.23.2
|
||||||
|
github.com/prometheus/common v0.67.4
|
||||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||||
github.com/santhosh-tekuri/jsonschema/v5 v5.2.0
|
github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
|
||||||
github.com/swaggo/http-swagger v1.3.3
|
github.com/stretchr/testify v1.11.1
|
||||||
github.com/swaggo/swag v1.16.1
|
github.com/swaggo/http-swagger v1.3.4
|
||||||
github.com/vektah/gqlparser/v2 v2.5.8
|
github.com/swaggo/swag v1.16.6
|
||||||
golang.org/x/crypto v0.12.0
|
github.com/vektah/gqlparser/v2 v2.5.31
|
||||||
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea
|
golang.org/x/crypto v0.45.0
|
||||||
|
golang.org/x/oauth2 v0.32.0
|
||||||
|
golang.org/x/time v0.14.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
|
||||||
github.com/KyleBanks/depth v1.2.1 // indirect
|
github.com/KyleBanks/depth v1.2.1 // indirect
|
||||||
github.com/agnivade/levenshtein v1.1.1 // indirect
|
github.com/agnivade/levenshtein v1.2.1 // indirect
|
||||||
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
|
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/sso v1.30.3 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.7 // indirect
|
||||||
|
github.com/aws/aws-sdk-go-v2/service/sts v1.40.2 // indirect
|
||||||
|
github.com/aws/smithy-go v1.24.0 // indirect
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
github.com/containerd/containerd v1.6.18 // indirect
|
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
|
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||||
github.com/deepmap/oapi-codegen v1.12.4 // indirect
|
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||||
github.com/felixge/httpsnoop v1.0.3 // indirect
|
github.com/fsnotify/fsnotify v1.9.0 // indirect
|
||||||
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
|
github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect
|
||||||
github.com/go-openapi/jsonpointer v0.20.0 // indirect
|
github.com/go-jose/go-jose/v4 v4.1.3 // indirect
|
||||||
github.com/go-openapi/jsonreference v0.20.2 // indirect
|
github.com/go-openapi/jsonpointer v0.22.3 // indirect
|
||||||
github.com/go-openapi/spec v0.20.9 // indirect
|
github.com/go-openapi/jsonreference v0.21.3 // indirect
|
||||||
github.com/go-openapi/swag v0.22.4 // indirect
|
github.com/go-openapi/spec v0.22.1 // indirect
|
||||||
github.com/golang/protobuf v1.5.2 // indirect
|
github.com/go-openapi/swag/conv v0.25.4 // indirect
|
||||||
github.com/google/uuid v1.3.0 // indirect
|
github.com/go-openapi/swag/jsonname v0.25.4 // indirect
|
||||||
github.com/gorilla/securecookie v1.1.1 // indirect
|
github.com/go-openapi/swag/jsonutils v0.25.4 // indirect
|
||||||
github.com/gorilla/websocket v1.5.0 // indirect
|
github.com/go-openapi/swag/loading v0.25.4 // indirect
|
||||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
github.com/go-openapi/swag/stringutils v0.25.4 // indirect
|
||||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
github.com/go-openapi/swag/typeutils v0.25.4 // indirect
|
||||||
github.com/hashicorp/golang-lru/v2 v2.0.3 // indirect
|
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
|
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
||||||
github.com/josharian/intern v1.0.0 // indirect
|
github.com/goccy/go-yaml v1.19.0 // indirect
|
||||||
|
github.com/golang/snappy v0.0.4 // indirect
|
||||||
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
|
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||||
|
github.com/gorilla/websocket v1.5.3 // indirect
|
||||||
|
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
|
||||||
|
github.com/jonboulle/clockwork v0.5.0 // indirect
|
||||||
github.com/jpillora/backoff v1.0.0 // indirect
|
github.com/jpillora/backoff v1.0.0 // indirect
|
||||||
github.com/json-iterator/go v1.1.12 // indirect
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
|
github.com/klauspost/compress v1.18.1 // indirect
|
||||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||||
github.com/mailru/easyjson v0.7.7 // indirect
|
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
|
||||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
|
||||||
github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect
|
github.com/nats-io/nkeys v0.4.11 // indirect
|
||||||
github.com/pkg/errors v0.9.1 // indirect
|
github.com/nats-io/nuid v1.0.1 // indirect
|
||||||
github.com/prometheus/client_model v0.3.0 // indirect
|
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||||
github.com/prometheus/procfs v0.9.0 // indirect
|
github.com/prometheus/client_model v0.6.2 // indirect
|
||||||
|
github.com/prometheus/procfs v0.16.1 // indirect
|
||||||
github.com/robfig/cron/v3 v3.0.1 // indirect
|
github.com/robfig/cron/v3 v3.0.1 // indirect
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||||
github.com/swaggo/files v1.0.0 // indirect
|
github.com/sosodev/duration v1.3.1 // indirect
|
||||||
github.com/urfave/cli/v2 v2.25.7 // indirect
|
github.com/stretchr/objx v0.5.2 // indirect
|
||||||
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
|
github.com/swaggo/files v1.0.1 // indirect
|
||||||
go.uber.org/atomic v1.10.0 // indirect
|
github.com/urfave/cli/v2 v2.27.7 // indirect
|
||||||
golang.org/x/mod v0.12.0 // indirect
|
github.com/urfave/cli/v3 v3.6.1 // indirect
|
||||||
golang.org/x/net v0.14.0 // indirect
|
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
||||||
golang.org/x/oauth2 v0.5.0 // indirect
|
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||||
golang.org/x/sys v0.11.0 // indirect
|
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||||
golang.org/x/text v0.12.0 // indirect
|
golang.org/x/mod v0.30.0 // indirect
|
||||||
golang.org/x/tools v0.12.0 // indirect
|
golang.org/x/net v0.47.0 // indirect
|
||||||
google.golang.org/appengine v1.6.7 // indirect
|
golang.org/x/sync v0.18.0 // indirect
|
||||||
google.golang.org/protobuf v1.30.0 // indirect
|
golang.org/x/sys v0.38.0 // indirect
|
||||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
golang.org/x/text v0.31.0 // indirect
|
||||||
|
golang.org/x/tools v0.39.0 // indirect
|
||||||
|
google.golang.org/protobuf v1.36.10 // indirect
|
||||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
sigs.k8s.io/yaml v1.3.0 // indirect
|
sigs.k8s.io/yaml v1.6.0 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
63
gqlgen.yml
63
gqlgen.yml
@@ -30,7 +30,9 @@ resolver:
|
|||||||
# gqlgen will search for any type names in the schema in these go packages
|
# gqlgen will search for any type names in the schema in these go packages
|
||||||
# if they match it will use them, otherwise it will generate them.
|
# if they match it will use them, otherwise it will generate them.
|
||||||
autobind:
|
autobind:
|
||||||
|
- "github.com/99designs/gqlgen/graphql/introspection"
|
||||||
- "github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
- "github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
|
- "github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
|
||||||
# This section declares type mapping between the GraphQL and go type systems
|
# This section declares type mapping between the GraphQL and go type systems
|
||||||
#
|
#
|
||||||
@@ -50,34 +52,51 @@ models:
|
|||||||
- github.com/99designs/gqlgen/graphql.Int64
|
- github.com/99designs/gqlgen/graphql.Int64
|
||||||
- github.com/99designs/gqlgen/graphql.Int32
|
- github.com/99designs/gqlgen/graphql.Int32
|
||||||
Job:
|
Job:
|
||||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Job"
|
model: "github.com/ClusterCockpit/cc-lib/schema.Job"
|
||||||
fields:
|
fields:
|
||||||
tags:
|
tags:
|
||||||
resolver: true
|
resolver: true
|
||||||
metaData:
|
metaData:
|
||||||
resolver: true
|
resolver: true
|
||||||
Cluster:
|
Cluster:
|
||||||
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Cluster"
|
model: "github.com/ClusterCockpit/cc-lib/schema.Cluster"
|
||||||
fields:
|
fields:
|
||||||
partitions:
|
partitions:
|
||||||
resolver: true
|
resolver: true
|
||||||
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
|
# Node:
|
||||||
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
|
# model: "github.com/ClusterCockpit/cc-lib/schema.Node"
|
||||||
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
|
# fields:
|
||||||
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
|
# metaData:
|
||||||
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
|
# resolver: true
|
||||||
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
|
NullableFloat: { model: "github.com/ClusterCockpit/cc-lib/schema.Float" }
|
||||||
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
|
MetricScope: { model: "github.com/ClusterCockpit/cc-lib/schema.MetricScope" }
|
||||||
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
|
MetricValue: { model: "github.com/ClusterCockpit/cc-lib/schema.MetricValue" }
|
||||||
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
|
JobStatistics:
|
||||||
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
|
{ model: "github.com/ClusterCockpit/cc-lib/schema.JobStatistics" }
|
||||||
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
|
GlobalMetricListItem:
|
||||||
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" }
|
{ model: "github.com/ClusterCockpit/cc-lib/schema.GlobalMetricListItem" }
|
||||||
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
|
ClusterSupport:
|
||||||
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" }
|
{ model: "github.com/ClusterCockpit/cc-lib/schema.ClusterSupport" }
|
||||||
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
|
Tag: { model: "github.com/ClusterCockpit/cc-lib/schema.Tag" }
|
||||||
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
|
Resource: { model: "github.com/ClusterCockpit/cc-lib/schema.Resource" }
|
||||||
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
|
JobState: { model: "github.com/ClusterCockpit/cc-lib/schema.JobState" }
|
||||||
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
|
Node: { model: "github.com/ClusterCockpit/cc-lib/schema.Node" }
|
||||||
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
|
SchedulerState:
|
||||||
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }
|
{ model: "github.com/ClusterCockpit/cc-lib/schema.SchedulerState" }
|
||||||
|
HealthState:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-lib/schema.MonitoringState" }
|
||||||
|
JobMetric: { model: "github.com/ClusterCockpit/cc-lib/schema.JobMetric" }
|
||||||
|
Series: { model: "github.com/ClusterCockpit/cc-lib/schema.Series" }
|
||||||
|
MetricStatistics:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-lib/schema.MetricStatistics" }
|
||||||
|
MetricConfig:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-lib/schema.MetricConfig" }
|
||||||
|
SubClusterConfig:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-lib/schema.SubClusterConfig" }
|
||||||
|
Accelerator: { model: "github.com/ClusterCockpit/cc-lib/schema.Accelerator" }
|
||||||
|
Topology: { model: "github.com/ClusterCockpit/cc-lib/schema.Topology" }
|
||||||
|
FilterRanges:
|
||||||
|
{ model: "github.com/ClusterCockpit/cc-lib/schema.FilterRanges" }
|
||||||
|
SubCluster: { model: "github.com/ClusterCockpit/cc-lib/schema.SubCluster" }
|
||||||
|
StatsSeries: { model: "github.com/ClusterCockpit/cc-lib/schema.StatsSeries" }
|
||||||
|
Unit: { model: "github.com/ClusterCockpit/cc-lib/schema.Unit" }
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=ClusterCockpit Web Server (Go edition)
|
Description=ClusterCockpit Web Server
|
||||||
Documentation=https://github.com/ClusterCockpit/cc-backend
|
Documentation=https://github.com/ClusterCockpit/cc-backend
|
||||||
Wants=network-online.target
|
Wants=network-online.target
|
||||||
After=network-online.target
|
After=network-online.target
|
||||||
After=mariadb.service mysql.service
|
# Database is file-based SQLite - no service dependency required
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
WorkingDirectory=/opt/monitoring/cc-backend
|
WorkingDirectory=/opt/monitoring/cc-backend
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
package api_test
|
package api_test
|
||||||
@@ -14,38 +14,49 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/api"
|
"github.com/ClusterCockpit/cc-backend/internal/api"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/auth"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
"github.com/ClusterCockpit/cc-backend/internal/graph"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
"github.com/gorilla/mux"
|
"github.com/gorilla/mux"
|
||||||
|
|
||||||
_ "github.com/mattn/go-sqlite3"
|
_ "github.com/mattn/go-sqlite3"
|
||||||
)
|
)
|
||||||
|
|
||||||
func setup(t *testing.T) *api.RestApi {
|
func setup(t *testing.T) *api.RestAPI {
|
||||||
const testconfig = `{
|
const testconfig = `{
|
||||||
|
"main": {
|
||||||
"addr": "0.0.0.0:8080",
|
"addr": "0.0.0.0:8080",
|
||||||
"validate": false,
|
"validate": false,
|
||||||
|
"apiAllowedIPs": [
|
||||||
|
"*"
|
||||||
|
]
|
||||||
|
},
|
||||||
"archive": {
|
"archive": {
|
||||||
"kind": "file",
|
"kind": "file",
|
||||||
"path": "./var/job-archive"
|
"path": "./var/job-archive"
|
||||||
},
|
},
|
||||||
"jwts": {
|
"auth": {
|
||||||
"max-age": "2m"
|
"jwts": {
|
||||||
},
|
"max-age": "2m"
|
||||||
|
}
|
||||||
|
},
|
||||||
"clusters": [
|
"clusters": [
|
||||||
{
|
{
|
||||||
"name": "testcluster",
|
"name": "testcluster",
|
||||||
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
|
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"numNodes": { "from": 1, "to": 64 },
|
"numNodes": { "from": 1, "to": 64 },
|
||||||
"duration": { "from": 0, "to": 86400 },
|
"duration": { "from": 0, "to": 86400 },
|
||||||
@@ -54,7 +65,7 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
}`
|
}`
|
||||||
const testclusterJson = `{
|
const testclusterJSON = `{
|
||||||
"name": "testcluster",
|
"name": "testcluster",
|
||||||
"subClusters": [
|
"subClusters": [
|
||||||
{
|
{
|
||||||
@@ -110,97 +121,108 @@ func setup(t *testing.T) *api.RestApi {
|
|||||||
]
|
]
|
||||||
}`
|
}`
|
||||||
|
|
||||||
log.Init("info", true)
|
cclog.Init("info", true)
|
||||||
tmpdir := t.TempDir()
|
tmpdir := t.TempDir()
|
||||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
if err := os.Mkdir(jobarchive, 0o777); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0777); err != nil {
|
if err := os.Mkdir(filepath.Join(jobarchive, "testcluster"), 0o777); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJson), 0666); err != nil {
|
if err := os.WriteFile(filepath.Join(jobarchive, "testcluster", "cluster.json"), []byte(testclusterJSON), 0o666); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
dbfilepath := filepath.Join(tmpdir, "test.db")
|
dbfilepath := filepath.Join(tmpdir, "test.db")
|
||||||
err := repository.MigrateDB("sqlite3", dbfilepath)
|
err := repository.MigrateDB(dbfilepath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
|
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config.Init(cfgFilePath)
|
ccconf.Init(cfgFilePath)
|
||||||
|
|
||||||
|
// Load and check main configuration
|
||||||
|
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||||
|
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||||
|
config.Init(cfg, clustercfg)
|
||||||
|
} else {
|
||||||
|
cclog.Abort("Cluster configuration must be present")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cclog.Abort("Main configuration must be present")
|
||||||
|
}
|
||||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||||
|
|
||||||
repository.Connect("sqlite3", dbfilepath)
|
repository.Connect("sqlite3", dbfilepath)
|
||||||
db := repository.GetConnection()
|
|
||||||
|
|
||||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
|
// Initialize memorystore (optional - will return nil if not configured)
|
||||||
t.Fatal(err)
|
// For this test, we don't initialize it to test the nil handling
|
||||||
|
mscfg := ccconf.GetPackageConfig("metric-store")
|
||||||
|
if mscfg != nil {
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
memorystore.Init(mscfg, &wg)
|
||||||
}
|
}
|
||||||
|
|
||||||
jobRepo := repository.GetJobRepository()
|
archiver.Start(repository.GetJobRepository(), context.Background())
|
||||||
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
|
|
||||||
|
|
||||||
return &api.RestApi{
|
if cfg := ccconf.GetPackageConfig("auth"); cfg != nil {
|
||||||
JobRepository: resolver.Repo,
|
auth.Init(&cfg)
|
||||||
Resolver: resolver,
|
} else {
|
||||||
|
cclog.Warn("Authentication disabled due to missing configuration")
|
||||||
|
auth.Init(nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
graph.Init()
|
||||||
|
|
||||||
|
return api.New()
|
||||||
}
|
}
|
||||||
|
|
||||||
func cleanup() {
|
func cleanup() {
|
||||||
// TODO: Clear all caches, reset all modules, etc...
|
// Gracefully shutdown archiver with timeout
|
||||||
|
if err := archiver.Shutdown(5 * time.Second); err != nil {
|
||||||
|
cclog.Warnf("Archiver shutdown timeout in tests: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown memorystore if it was initialized
|
||||||
|
memorystore.Shutdown()
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function starts a job, stops it, and then reads its data from the job-archive.
|
* This function starts a job, stops it, and tests the REST API.
|
||||||
* Do not run sub-tests in parallel! Tests should not be run in parallel at all, because
|
* Do not run sub-tests in parallel! Tests should not be run in parallel at all, because
|
||||||
* at least `setup` modifies global state.
|
* at least `setup` modifies global state.
|
||||||
*/
|
*/
|
||||||
func TestRestApi(t *testing.T) {
|
func TestRestApi(t *testing.T) {
|
||||||
restapi := setup(t)
|
restapi := setup(t)
|
||||||
t.Cleanup(cleanup)
|
t.Cleanup(cleanup)
|
||||||
|
|
||||||
testData := schema.JobData{
|
|
||||||
"load_one": map[schema.MetricScope]*schema.JobMetric{
|
|
||||||
schema.MetricScopeNode: {
|
|
||||||
Unit: schema.Unit{Base: "load"},
|
|
||||||
Timestep: 60,
|
|
||||||
Series: []schema.Series{
|
|
||||||
{
|
|
||||||
Hostname: "host123",
|
|
||||||
Statistics: schema.MetricStatistics{Min: 0.1, Avg: 0.2, Max: 0.3},
|
|
||||||
Data: []schema.Float{0.1, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 0.3},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
|
|
||||||
return testData, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
r := mux.NewRouter()
|
r := mux.NewRouter()
|
||||||
restapi.MountRoutes(r)
|
r.PathPrefix("/api").Subrouter()
|
||||||
|
r.StrictSlash(true)
|
||||||
|
restapi.MountAPIRoutes(r)
|
||||||
|
|
||||||
|
var TestJobId int64 = 123
|
||||||
|
TestClusterName := "testcluster"
|
||||||
|
var TestStartTime int64 = 123456789
|
||||||
|
|
||||||
const startJobBody string = `{
|
const startJobBody string = `{
|
||||||
"jobId": 123,
|
"jobId": 123,
|
||||||
"user": "testuser",
|
"user": "testuser",
|
||||||
"project": "testproj",
|
"project": "testproj",
|
||||||
"cluster": "testcluster",
|
"cluster": "testcluster",
|
||||||
@@ -210,10 +232,9 @@ func TestRestApi(t *testing.T) {
|
|||||||
"numNodes": 1,
|
"numNodes": 1,
|
||||||
"numHwthreads": 8,
|
"numHwthreads": 8,
|
||||||
"numAcc": 0,
|
"numAcc": 0,
|
||||||
"exclusive": 1,
|
"shared": "none",
|
||||||
"monitoringStatus": 1,
|
"monitoringStatus": 1,
|
||||||
"smt": 1,
|
"smt": 1,
|
||||||
"tags": [{ "type": "testTagType", "name": "testTagName" }],
|
|
||||||
"resources": [
|
"resources": [
|
||||||
{
|
{
|
||||||
"hostname": "host123",
|
"hostname": "host123",
|
||||||
@@ -224,28 +245,28 @@ func TestRestApi(t *testing.T) {
|
|||||||
"startTime": 123456789
|
"startTime": 123456789
|
||||||
}`
|
}`
|
||||||
|
|
||||||
var dbid int64
|
const contextUserKey repository.ContextKey = "user"
|
||||||
|
contextUserValue := &schema.User{
|
||||||
|
Username: "testuser",
|
||||||
|
Projects: make([]string, 0),
|
||||||
|
Roles: []string{"user"},
|
||||||
|
AuthType: 0,
|
||||||
|
AuthSource: 2,
|
||||||
|
}
|
||||||
|
|
||||||
if ok := t.Run("StartJob", func(t *testing.T) {
|
if ok := t.Run("StartJob", func(t *testing.T) {
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusCreated {
|
if response.StatusCode != http.StatusCreated {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
}
|
}
|
||||||
|
restapi.JobRepository.SyncJobs()
|
||||||
var res api.StartJobApiResponse
|
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||||
if err := json.Unmarshal(recorder.Body.Bytes(), &res); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(res.DBID)))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
job.Tags, err = restapi.Resolver.Job().Tags(context.Background(), job)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -257,23 +278,16 @@ func TestRestApi(t *testing.T) {
|
|||||||
job.SubCluster != "sc1" ||
|
job.SubCluster != "sc1" ||
|
||||||
job.Partition != "default" ||
|
job.Partition != "default" ||
|
||||||
job.Walltime != 3600 ||
|
job.Walltime != 3600 ||
|
||||||
job.ArrayJobId != 0 ||
|
job.ArrayJobID != 0 ||
|
||||||
job.NumNodes != 1 ||
|
job.NumNodes != 1 ||
|
||||||
job.NumHWThreads != 8 ||
|
job.NumHWThreads != 8 ||
|
||||||
job.NumAcc != 0 ||
|
job.NumAcc != 0 ||
|
||||||
job.Exclusive != 1 ||
|
|
||||||
job.MonitoringStatus != 1 ||
|
job.MonitoringStatus != 1 ||
|
||||||
job.SMT != 1 ||
|
job.SMT != 1 ||
|
||||||
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
|
||||||
job.StartTime.Unix() != 123456789 {
|
job.StartTime != 123456789 {
|
||||||
t.Fatalf("unexpected job properties: %#v", job)
|
t.Fatalf("unexpected job properties: %#v", job)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" {
|
|
||||||
t.Fatalf("unexpected tags: %#v", job.Tags)
|
|
||||||
}
|
|
||||||
|
|
||||||
dbid = res.DBID
|
|
||||||
}); !ok {
|
}); !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -287,19 +301,20 @@ func TestRestApi(t *testing.T) {
|
|||||||
"stopTime": 123457789
|
"stopTime": 123457789
|
||||||
}`
|
}`
|
||||||
|
|
||||||
var stoppedJob *schema.Job
|
|
||||||
if ok := t.Run("StopJob", func(t *testing.T) {
|
if ok := t.Run("StopJob", func(t *testing.T) {
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusOK {
|
if response.StatusCode != http.StatusOK {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
restapi.JobRepository.WaitForArchiving()
|
// Archiving happens asynchronously, will be completed in cleanup
|
||||||
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(dbid)))
|
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -321,30 +336,23 @@ func TestRestApi(t *testing.T) {
|
|||||||
t.Fatalf("unexpected job.metaData: %#v", job.MetaData)
|
t.Fatalf("unexpected job.metaData: %#v", job.MetaData)
|
||||||
}
|
}
|
||||||
|
|
||||||
stoppedJob = job
|
|
||||||
}); !ok {
|
}); !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Run("CheckArchive", func(t *testing.T) {
|
// Note: We skip the CheckArchive test because without memorystore initialized,
|
||||||
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
|
// archiving will fail gracefully. This test now focuses on the REST API itself.
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !reflect.DeepEqual(data, testData) {
|
|
||||||
t.Fatal("unexpected data fetched from archive")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
t.Run("CheckDoubleStart", func(t *testing.T) {
|
t.Run("CheckDoubleStart", func(t *testing.T) {
|
||||||
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
|
||||||
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
|
body := strings.ReplaceAll(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`)
|
||||||
|
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusUnprocessableEntity {
|
if response.StatusCode != http.StatusUnprocessableEntity {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
@@ -359,7 +367,7 @@ func TestRestApi(t *testing.T) {
|
|||||||
"partition": "default",
|
"partition": "default",
|
||||||
"walltime": 3600,
|
"walltime": 3600,
|
||||||
"numNodes": 1,
|
"numNodes": 1,
|
||||||
"exclusive": 1,
|
"shared": "none",
|
||||||
"monitoringStatus": 1,
|
"monitoringStatus": 1,
|
||||||
"smt": 1,
|
"smt": 1,
|
||||||
"resources": [
|
"resources": [
|
||||||
@@ -371,10 +379,12 @@ func TestRestApi(t *testing.T) {
|
|||||||
}`
|
}`
|
||||||
|
|
||||||
ok := t.Run("StartJobFailed", func(t *testing.T) {
|
ok := t.Run("StartJobFailed", func(t *testing.T) {
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusCreated {
|
if response.StatusCode != http.StatusCreated {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
@@ -384,8 +394,11 @@ func TestRestApi(t *testing.T) {
|
|||||||
t.Fatal("subtest failed")
|
t.Fatal("subtest failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
restapi.JobRepository.SyncJobs()
|
||||||
|
|
||||||
const stopJobBodyFailed string = `{
|
const stopJobBodyFailed string = `{
|
||||||
"jobId": 12345,
|
"jobId": 12345,
|
||||||
"cluster": "testcluster",
|
"cluster": "testcluster",
|
||||||
|
|
||||||
"jobState": "failed",
|
"jobState": "failed",
|
||||||
@@ -393,16 +406,18 @@ func TestRestApi(t *testing.T) {
|
|||||||
}`
|
}`
|
||||||
|
|
||||||
ok = t.Run("StopJobFailed", func(t *testing.T) {
|
ok = t.Run("StopJobFailed", func(t *testing.T) {
|
||||||
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
|
|
||||||
r.ServeHTTP(recorder, req)
|
ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
|
||||||
|
|
||||||
|
r.ServeHTTP(recorder, req.WithContext(ctx))
|
||||||
response := recorder.Result()
|
response := recorder.Result()
|
||||||
if response.StatusCode != http.StatusOK {
|
if response.StatusCode != http.StatusOK {
|
||||||
t.Fatal(response.Status, recorder.Body.String())
|
t.Fatal(response.Status, recorder.Body.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
restapi.JobRepository.WaitForArchiving()
|
// Archiving happens asynchronously, will be completed in cleanup
|
||||||
jobid, cluster := int64(12345), "testcluster"
|
jobid, cluster := int64(12345), "testcluster"
|
||||||
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
71
internal/api/cluster.go
Normal file
71
internal/api/cluster.go
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetClustersAPIResponse model
|
||||||
|
type GetClustersAPIResponse struct {
|
||||||
|
Clusters []*schema.Cluster `json:"clusters"` // Array of clusters
|
||||||
|
}
|
||||||
|
|
||||||
|
// getClusters godoc
|
||||||
|
// @summary Lists all cluster configs
|
||||||
|
// @tags Cluster query
|
||||||
|
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
|
||||||
|
// @produce json
|
||||||
|
// @param cluster query string false "Job Cluster"
|
||||||
|
// @success 200 {object} api.GetClustersApiResponse "Array of clusters"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/clusters/ [get]
|
||||||
|
func (api *RestAPI) getClusters(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||||
|
!user.HasRole(schema.RoleApi) {
|
||||||
|
|
||||||
|
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
bw := bufio.NewWriter(rw)
|
||||||
|
defer bw.Flush()
|
||||||
|
|
||||||
|
var clusters []*schema.Cluster
|
||||||
|
|
||||||
|
if r.URL.Query().Has("cluster") {
|
||||||
|
name := r.URL.Query().Get("cluster")
|
||||||
|
cluster := archive.GetCluster(name)
|
||||||
|
if cluster == nil {
|
||||||
|
handleError(fmt.Errorf("unknown cluster: %s", name), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
clusters = append(clusters, cluster)
|
||||||
|
} else {
|
||||||
|
clusters = archive.Clusters
|
||||||
|
}
|
||||||
|
|
||||||
|
payload := GetClustersAPIResponse{
|
||||||
|
Clusters: clusters,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
1264
internal/api/docs.go
1264
internal/api/docs.go
File diff suppressed because it is too large
Load Diff
1024
internal/api/job.go
Normal file
1024
internal/api/job.go
Normal file
File diff suppressed because it is too large
Load Diff
170
internal/api/memorystore.go
Normal file
170
internal/api/memorystore.go
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/memorystore"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
|
||||||
|
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||||
|
)
|
||||||
|
|
||||||
|
// handleFree godoc
|
||||||
|
// @summary
|
||||||
|
// @tags free
|
||||||
|
// @description This endpoint allows the users to free the Buffers from the
|
||||||
|
// metric store. This endpoint offers the users to remove then systematically
|
||||||
|
// and also allows then to prune the data under node, if they do not want to
|
||||||
|
// remove the whole node.
|
||||||
|
// @produce json
|
||||||
|
// @param to query string false "up to timestamp"
|
||||||
|
// @success 200 {string} string "ok"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /free/ [post]
|
||||||
|
func freeMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rawTo := r.URL.Query().Get("to")
|
||||||
|
if rawTo == "" {
|
||||||
|
handleError(errors.New("'to' is a required query parameter"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
to, err := strconv.ParseInt(rawTo, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
bodyDec := json.NewDecoder(r.Body)
|
||||||
|
var selectors [][]string
|
||||||
|
err = bodyDec.Decode(&selectors)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ms := memorystore.GetMemoryStore()
|
||||||
|
n := 0
|
||||||
|
for _, sel := range selectors {
|
||||||
|
bn, err := ms.Free(sel, to)
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
n += bn
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
fmt.Fprintf(rw, "buffers freed: %d\n", n)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleWrite godoc
|
||||||
|
// @summary Receive metrics in InfluxDB line-protocol
|
||||||
|
// @tags write
|
||||||
|
// @description Write data to the in-memory store in the InfluxDB line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)
|
||||||
|
|
||||||
|
// @accept plain
|
||||||
|
// @produce json
|
||||||
|
// @param cluster query string false "If the lines in the body do not have a cluster tag, use this value instead."
|
||||||
|
// @success 200 {string} string "ok"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /write/ [post]
|
||||||
|
func writeMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
bytes, err := io.ReadAll(r.Body)
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ms := memorystore.GetMemoryStore()
|
||||||
|
dec := lineprotocol.NewDecoderWithBytes(bytes)
|
||||||
|
if err := memorystore.DecodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil {
|
||||||
|
cclog.Errorf("/api/write error: %s", err.Error())
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleDebug godoc
|
||||||
|
// @summary Debug endpoint
|
||||||
|
// @tags debug
|
||||||
|
// @description This endpoint allows the users to print the content of
|
||||||
|
// nodes/clusters/metrics to review the state of the data.
|
||||||
|
// @produce json
|
||||||
|
// @param selector query string false "Selector"
|
||||||
|
// @success 200 {string} string "Debug dump"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /debug/ [post]
|
||||||
|
func debugMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
raw := r.URL.Query().Get("selector")
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
selector := []string{}
|
||||||
|
if len(raw) != 0 {
|
||||||
|
selector = strings.Split(raw, ":")
|
||||||
|
}
|
||||||
|
|
||||||
|
ms := memorystore.GetMemoryStore()
|
||||||
|
if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleHealthCheck godoc
|
||||||
|
// @summary HealthCheck endpoint
|
||||||
|
// @tags healthcheck
|
||||||
|
// @description This endpoint allows the users to check if a node is healthy
|
||||||
|
// @produce json
|
||||||
|
// @param selector query string false "Selector"
|
||||||
|
// @success 200 {string} string "Debug dump"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /healthcheck/ [get]
|
||||||
|
func metricsHealth(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
rawCluster := r.URL.Query().Get("cluster")
|
||||||
|
rawNode := r.URL.Query().Get("node")
|
||||||
|
|
||||||
|
if rawCluster == "" || rawNode == "" {
|
||||||
|
handleError(errors.New("'cluster' and 'node' are required query parameter"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
|
||||||
|
selector := []string{rawCluster, rawNode}
|
||||||
|
|
||||||
|
ms := memorystore.GetMemoryStore()
|
||||||
|
if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil {
|
||||||
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
231
internal/api/nats.go
Normal file
231
internal/api/nats.go
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/archiver"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/nats"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NatsAPI provides NATS subscription-based handlers for Job and Node operations.
|
||||||
|
// It mirrors the functionality of the REST API but uses NATS messaging.
|
||||||
|
type NatsAPI struct {
|
||||||
|
JobRepository *repository.JobRepository
|
||||||
|
// RepositoryMutex protects job creation operations from race conditions
|
||||||
|
// when checking for duplicate jobs during startJob calls.
|
||||||
|
RepositoryMutex sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewNatsAPI creates a new NatsAPI instance with default dependencies.
|
||||||
|
func NewNatsAPI() *NatsAPI {
|
||||||
|
return &NatsAPI{
|
||||||
|
JobRepository: repository.GetJobRepository(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartSubscriptions registers all NATS subscriptions for Job and Node APIs.
|
||||||
|
// Returns an error if the NATS client is not available or subscription fails.
|
||||||
|
func (api *NatsAPI) StartSubscriptions() error {
|
||||||
|
client := nats.GetClient()
|
||||||
|
if client == nil {
|
||||||
|
cclog.Warn("NATS client not available, skipping API subscriptions")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Keys.APISubjects != nil {
|
||||||
|
|
||||||
|
s := config.Keys.APISubjects
|
||||||
|
|
||||||
|
if err := client.Subscribe(s.SubjectJobStart, api.handleStartJob); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := client.Subscribe(s.SubjectJobStop, api.handleStopJob); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := client.Subscribe(s.SubjectNodeState, api.handleNodeState); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.Info("NATS API subscriptions started")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleStartJob processes job start messages received via NATS.
|
||||||
|
// Expected JSON payload follows the schema.Job structure.
|
||||||
|
func (api *NatsAPI) handleStartJob(subject string, data []byte) {
|
||||||
|
req := schema.Job{
|
||||||
|
Shared: "none",
|
||||||
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
|
||||||
|
dec := json.NewDecoder(bytes.NewReader(data))
|
||||||
|
dec.DisallowUnknownFields()
|
||||||
|
if err := dec.Decode(&req); err != nil {
|
||||||
|
cclog.Errorf("NATS %s: parsing request failed: %v", subject, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.Debugf("NATS %s: %s", subject, req.GoString())
|
||||||
|
req.State = schema.JobStateRunning
|
||||||
|
|
||||||
|
if err := importer.SanityChecks(&req); err != nil {
|
||||||
|
cclog.Errorf("NATS %s: sanity check failed: %v", subject, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var unlockOnce sync.Once
|
||||||
|
api.RepositoryMutex.Lock()
|
||||||
|
defer unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||||
|
|
||||||
|
jobs, err := api.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
cclog.Errorf("NATS %s: checking for duplicate failed: %v", subject, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err == nil {
|
||||||
|
for _, job := range jobs {
|
||||||
|
if (req.StartTime - job.StartTime) < secondsPerDay {
|
||||||
|
cclog.Errorf("NATS %s: job with jobId %d, cluster %s already exists (dbid: %d)",
|
||||||
|
subject, req.JobID, req.Cluster, job.ID)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
id, err := api.JobRepository.Start(&req)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("NATS %s: insert into database failed: %v", subject, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
unlockOnce.Do(api.RepositoryMutex.Unlock)
|
||||||
|
|
||||||
|
for _, tag := range req.Tags {
|
||||||
|
if _, err := api.JobRepository.AddTagOrCreate(nil, id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||||
|
cclog.Errorf("NATS %s: adding tag to new job %d failed: %v", subject, id, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.Infof("NATS: new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d",
|
||||||
|
id, req.Cluster, req.JobID, req.User, req.StartTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleStopJob processes job stop messages received via NATS.
|
||||||
|
// Expected JSON payload follows the StopJobAPIRequest structure.
|
||||||
|
func (api *NatsAPI) handleStopJob(subject string, data []byte) {
|
||||||
|
var req StopJobAPIRequest
|
||||||
|
|
||||||
|
dec := json.NewDecoder(bytes.NewReader(data))
|
||||||
|
dec.DisallowUnknownFields()
|
||||||
|
if err := dec.Decode(&req); err != nil {
|
||||||
|
cclog.Errorf("NATS %s: parsing request failed: %v", subject, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.JobID == nil {
|
||||||
|
cclog.Errorf("NATS %s: the field 'jobId' is required", subject)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
job, err := api.JobRepository.Find(req.JobID, req.Cluster, req.StartTime)
|
||||||
|
if err != nil {
|
||||||
|
cachedJob, cachedErr := api.JobRepository.FindCached(req.JobID, req.Cluster, req.StartTime)
|
||||||
|
if cachedErr != nil {
|
||||||
|
cclog.Errorf("NATS %s: finding job failed: %v (cached lookup also failed: %v)",
|
||||||
|
subject, err, cachedErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
job = cachedJob
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.State != schema.JobStateRunning {
|
||||||
|
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: job has already been stopped (state is: %s)",
|
||||||
|
subject, job.JobID, job.ID, job.Cluster, job.State)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.StartTime > req.StopTime {
|
||||||
|
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: stopTime %d must be >= startTime %d",
|
||||||
|
subject, job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.State != "" && !req.State.Valid() {
|
||||||
|
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: invalid job state: %#v",
|
||||||
|
subject, job.JobID, job.ID, job.Cluster, req.State)
|
||||||
|
return
|
||||||
|
} else if req.State == "" {
|
||||||
|
req.State = schema.JobStateCompleted
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Duration = int32(req.StopTime - job.StartTime)
|
||||||
|
job.State = req.State
|
||||||
|
api.JobRepository.Mutex.Lock()
|
||||||
|
defer api.JobRepository.Mutex.Unlock()
|
||||||
|
|
||||||
|
if err := api.JobRepository.Stop(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||||
|
if err := api.JobRepository.StopCached(*job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
|
||||||
|
cclog.Errorf("NATS %s: jobId %d (id %d) on %s: marking job as '%s' failed: %v",
|
||||||
|
subject, job.JobID, job.ID, job.Cluster, job.State, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.Infof("NATS: archiving job (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%d, duration=%d, state=%s",
|
||||||
|
job.ID, job.Cluster, job.JobID, job.User, job.StartTime, job.Duration, job.State)
|
||||||
|
|
||||||
|
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
archiver.TriggerArchiving(job)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleNodeState processes node state update messages received via NATS.
|
||||||
|
// Expected JSON payload follows the UpdateNodeStatesRequest structure.
|
||||||
|
func (api *NatsAPI) handleNodeState(subject string, data []byte) {
|
||||||
|
var req UpdateNodeStatesRequest
|
||||||
|
|
||||||
|
dec := json.NewDecoder(bytes.NewReader(data))
|
||||||
|
dec.DisallowUnknownFields()
|
||||||
|
if err := dec.Decode(&req); err != nil {
|
||||||
|
cclog.Errorf("NATS %s: parsing request failed: %v", subject, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
|
||||||
|
for _, node := range req.Nodes {
|
||||||
|
state := determineState(node.States)
|
||||||
|
nodeState := schema.NodeStateDB{
|
||||||
|
TimeStamp: time.Now().Unix(),
|
||||||
|
NodeState: state,
|
||||||
|
CpusAllocated: node.CpusAllocated,
|
||||||
|
MemoryAllocated: node.MemoryAllocated,
|
||||||
|
GpusAllocated: node.GpusAllocated,
|
||||||
|
HealthState: schema.MonitoringStateFull,
|
||||||
|
JobsRunning: node.JobsRunning,
|
||||||
|
}
|
||||||
|
|
||||||
|
repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState)
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.Debugf("NATS %s: updated %d node states for cluster %s", subject, len(req.Nodes), req.Cluster)
|
||||||
|
}
|
||||||
80
internal/api/node.go
Normal file
80
internal/api/node.go
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
type UpdateNodeStatesRequest struct {
|
||||||
|
Nodes []schema.NodePayload `json:"nodes"`
|
||||||
|
Cluster string `json:"cluster" example:"fritz"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// this routine assumes that only one of them exists per node
|
||||||
|
func determineState(states []string) schema.SchedulerState {
|
||||||
|
for _, state := range states {
|
||||||
|
switch strings.ToLower(state) {
|
||||||
|
case "allocated":
|
||||||
|
return schema.NodeStateAllocated
|
||||||
|
case "reserved":
|
||||||
|
return schema.NodeStateReserved
|
||||||
|
case "idle":
|
||||||
|
return schema.NodeStateIdle
|
||||||
|
case "down":
|
||||||
|
return schema.NodeStateDown
|
||||||
|
case "mixed":
|
||||||
|
return schema.NodeStateMixed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return schema.NodeStateUnknown
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateNodeStates godoc
|
||||||
|
// @summary Deliver updated Slurm node states
|
||||||
|
// @tags Nodestates
|
||||||
|
// @description Returns a JSON-encoded list of users.
|
||||||
|
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||||
|
// @produce json
|
||||||
|
// @param request body UpdateNodeStatesRequest true "Request body containing nodes and their states"
|
||||||
|
// @success 200 {object} api.DefaultApiResponse "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/nodestats/ [post]
|
||||||
|
func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// Parse request body
|
||||||
|
req := UpdateNodeStatesRequest{}
|
||||||
|
if err := decode(r.Body, &req); err != nil {
|
||||||
|
handleError(fmt.Errorf("parsing request body failed: %w", err),
|
||||||
|
http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
|
||||||
|
for _, node := range req.Nodes {
|
||||||
|
state := determineState(node.States)
|
||||||
|
nodeState := schema.NodeStateDB{
|
||||||
|
TimeStamp: time.Now().Unix(), NodeState: state,
|
||||||
|
CpusAllocated: node.CpusAllocated,
|
||||||
|
MemoryAllocated: node.MemoryAllocated,
|
||||||
|
GpusAllocated: node.GpusAllocated,
|
||||||
|
HealthState: schema.MonitoringStateFull,
|
||||||
|
JobsRunning: node.JobsRunning,
|
||||||
|
}
|
||||||
|
|
||||||
|
repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState)
|
||||||
|
}
|
||||||
|
}
|
||||||
1246
internal/api/rest.go
1246
internal/api/rest.go
File diff suppressed because it is too large
Load Diff
221
internal/api/user.go
Normal file
221
internal/api/user.go
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package api
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
)
|
||||||
|
|
||||||
|
type APIReturnedUser struct {
|
||||||
|
Username string `json:"username"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Roles []string `json:"roles"`
|
||||||
|
Email string `json:"email"`
|
||||||
|
Projects []string `json:"projects"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// getUsers godoc
|
||||||
|
// @summary Returns a list of users
|
||||||
|
// @tags User
|
||||||
|
// @description Returns a JSON-encoded list of users.
|
||||||
|
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
|
||||||
|
// @produce json
|
||||||
|
// @param not-just-user query bool true "If returned list should contain all users or only users with additional special roles"
|
||||||
|
// @success 200 {array} api.ApiReturnedUser "List of users returned successfully"
|
||||||
|
// @failure 400 {string} string "Bad Request"
|
||||||
|
// @failure 401 {string} string "Unauthorized"
|
||||||
|
// @failure 403 {string} string "Forbidden"
|
||||||
|
// @failure 500 {string} string "Internal Server Error"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/users/ [get]
|
||||||
|
func (api *RestAPI) getUsers(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
handleError(fmt.Errorf("only admins are allowed to fetch a list of users"), http.StatusForbidden, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
users, err := repository.GetUserRepository().ListUsers(r.URL.Query().Get("not-just-user") == "true")
|
||||||
|
if err != nil {
|
||||||
|
handleError(fmt.Errorf("listing users failed: %w", err), http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(rw).Encode(users); err != nil {
|
||||||
|
cclog.Errorf("Failed to encode users response: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// updateUser godoc
|
||||||
|
// @summary Update user roles and projects
|
||||||
|
// @tags User
|
||||||
|
// @description Allows admins to add/remove roles and projects for a user
|
||||||
|
// @produce plain
|
||||||
|
// @param id path string true "Username"
|
||||||
|
// @param add-role formData string false "Role to add"
|
||||||
|
// @param remove-role formData string false "Role to remove"
|
||||||
|
// @param add-project formData string false "Project to add"
|
||||||
|
// @param remove-project formData string false "Project to remove"
|
||||||
|
// @success 200 {string} string "Success message"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/user/{id} [post]
|
||||||
|
func (api *RestAPI) updateUser(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
handleError(fmt.Errorf("only admins are allowed to update a user"), http.StatusForbidden, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get Values
|
||||||
|
newrole := r.FormValue("add-role")
|
||||||
|
delrole := r.FormValue("remove-role")
|
||||||
|
newproj := r.FormValue("add-project")
|
||||||
|
delproj := r.FormValue("remove-project")
|
||||||
|
|
||||||
|
rw.Header().Set("Content-Type", "application/json")
|
||||||
|
|
||||||
|
// Handle role updates
|
||||||
|
if newrole != "" {
|
||||||
|
if err := repository.GetUserRepository().AddRole(r.Context(), mux.Vars(r)["id"], newrole); err != nil {
|
||||||
|
handleError(fmt.Errorf("adding role failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Add Role Success"}); err != nil {
|
||||||
|
cclog.Errorf("Failed to encode response: %v", err)
|
||||||
|
}
|
||||||
|
} else if delrole != "" {
|
||||||
|
if err := repository.GetUserRepository().RemoveRole(r.Context(), mux.Vars(r)["id"], delrole); err != nil {
|
||||||
|
handleError(fmt.Errorf("removing role failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Remove Role Success"}); err != nil {
|
||||||
|
cclog.Errorf("Failed to encode response: %v", err)
|
||||||
|
}
|
||||||
|
} else if newproj != "" {
|
||||||
|
if err := repository.GetUserRepository().AddProject(r.Context(), mux.Vars(r)["id"], newproj); err != nil {
|
||||||
|
handleError(fmt.Errorf("adding project failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Add Project Success"}); err != nil {
|
||||||
|
cclog.Errorf("Failed to encode response: %v", err)
|
||||||
|
}
|
||||||
|
} else if delproj != "" {
|
||||||
|
if err := repository.GetUserRepository().RemoveProject(r.Context(), mux.Vars(r)["id"], delproj); err != nil {
|
||||||
|
handleError(fmt.Errorf("removing project failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err := json.NewEncoder(rw).Encode(DefaultAPIResponse{Message: "Remove Project Success"}); err != nil {
|
||||||
|
cclog.Errorf("Failed to encode response: %v", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
handleError(fmt.Errorf("no operation specified: must provide add-role, remove-role, add-project, or remove-project"), http.StatusBadRequest, rw)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// createUser godoc
|
||||||
|
// @summary Create a new user
|
||||||
|
// @tags User
|
||||||
|
// @description Creates a new user with specified credentials and role
|
||||||
|
// @produce plain
|
||||||
|
// @param username formData string true "Username"
|
||||||
|
// @param password formData string false "Password (not required for API users)"
|
||||||
|
// @param role formData string true "User role"
|
||||||
|
// @param name formData string false "Full name"
|
||||||
|
// @param email formData string false "Email address"
|
||||||
|
// @param project formData string false "Project (required for managers)"
|
||||||
|
// @success 200 {string} string "Success message"
|
||||||
|
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/users/ [post]
|
||||||
|
func (api *RestAPI) createUser(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
rw.Header().Set("Content-Type", "text/plain")
|
||||||
|
me := repository.GetUserFromContext(r.Context())
|
||||||
|
if !me.HasRole(schema.RoleAdmin) {
|
||||||
|
handleError(fmt.Errorf("only admins are allowed to create new users"), http.StatusForbidden, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
username, password, role, name, email, project := r.FormValue("username"),
|
||||||
|
r.FormValue("password"), r.FormValue("role"), r.FormValue("name"),
|
||||||
|
r.FormValue("email"), r.FormValue("project")
|
||||||
|
|
||||||
|
// Validate username length
|
||||||
|
if len(username) == 0 || len(username) > 100 {
|
||||||
|
handleError(fmt.Errorf("username must be between 1 and 100 characters"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(password) == 0 && role != schema.GetRoleString(schema.RoleApi) {
|
||||||
|
handleError(fmt.Errorf("only API users are allowed to have a blank password (login will be impossible)"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(project) != 0 && role != schema.GetRoleString(schema.RoleManager) {
|
||||||
|
handleError(fmt.Errorf("only managers require a project (can be changed later)"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
} else if len(project) == 0 && role == schema.GetRoleString(schema.RoleManager) {
|
||||||
|
handleError(fmt.Errorf("managers require a project to manage (can be changed later)"), http.StatusBadRequest, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := repository.GetUserRepository().AddUser(&schema.User{
|
||||||
|
Username: username,
|
||||||
|
Name: name,
|
||||||
|
Password: password,
|
||||||
|
Email: email,
|
||||||
|
Projects: []string{project},
|
||||||
|
Roles: []string{role},
|
||||||
|
}); err != nil {
|
||||||
|
handleError(fmt.Errorf("adding user failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(rw, "User %v successfully created!\n", username)
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteUser godoc
|
||||||
|
// @summary Delete a user
|
||||||
|
// @tags User
|
||||||
|
// @description Deletes a user from the system
|
||||||
|
// @produce plain
|
||||||
|
// @param username formData string true "Username to delete"
|
||||||
|
// @success 200 {string} string "Success"
|
||||||
|
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||||
|
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity"
|
||||||
|
// @security ApiKeyAuth
|
||||||
|
// @router /api/users/ [delete]
|
||||||
|
func (api *RestAPI) deleteUser(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// SecuredCheck() only worked with TokenAuth: Removed
|
||||||
|
|
||||||
|
if user := repository.GetUserFromContext(r.Context()); !user.HasRole(schema.RoleAdmin) {
|
||||||
|
handleError(fmt.Errorf("only admins are allowed to delete a user"), http.StatusForbidden, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
username := r.FormValue("username")
|
||||||
|
if err := repository.GetUserRepository().DelUser(username); err != nil {
|
||||||
|
handleError(fmt.Errorf("deleting user failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
rw.WriteHeader(http.StatusOK)
|
||||||
|
}
|
||||||
190
internal/archiver/README.md
Normal file
190
internal/archiver/README.md
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
# Archiver Package
|
||||||
|
|
||||||
|
The `archiver` package provides asynchronous job archiving functionality for ClusterCockpit. When jobs complete, their metric data is archived from the metric store to a persistent archive backend (filesystem, S3, SQLite, etc.).
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
### Producer-Consumer Pattern
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────────┐ TriggerArchiving() ┌───────────────┐
|
||||||
|
│ API Handler │ ───────────────────────▶ │ archiveChannel│
|
||||||
|
│ (Job Stop) │ │ (buffer: 128)│
|
||||||
|
└──────────────┘ └───────┬───────┘
|
||||||
|
│
|
||||||
|
┌─────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌──────────────────────┐
|
||||||
|
│ archivingWorker() │
|
||||||
|
│ (goroutine) │
|
||||||
|
└──────────┬───────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
1. Fetch job metadata
|
||||||
|
2. Load metric data
|
||||||
|
3. Calculate statistics
|
||||||
|
4. Archive to backend
|
||||||
|
5. Update database
|
||||||
|
6. Call hooks
|
||||||
|
```
|
||||||
|
|
||||||
|
### Components
|
||||||
|
|
||||||
|
- **archiveChannel**: Buffered channel (128 jobs) for async communication
|
||||||
|
- **archivePending**: WaitGroup tracking in-flight archiving operations
|
||||||
|
- **archivingWorker**: Background goroutine processing archiving requests
|
||||||
|
- **shutdownCtx**: Context for graceful cancellation during shutdown
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Initialization
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Start archiver with context for shutdown control
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
archiver.Start(jobRepository, ctx)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Archiving a Job
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Called automatically when a job completes
|
||||||
|
archiver.TriggerArchiving(job)
|
||||||
|
```
|
||||||
|
|
||||||
|
The function returns immediately. Actual archiving happens in the background.
|
||||||
|
|
||||||
|
### Graceful Shutdown
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Shutdown with 10 second timeout
|
||||||
|
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||||
|
log.Printf("Archiver shutdown timeout: %v", err)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Shutdown process:**
|
||||||
|
1. Closes channel (rejects new jobs)
|
||||||
|
2. Waits for pending jobs (up to timeout)
|
||||||
|
3. Cancels context if timeout exceeded
|
||||||
|
4. Waits for worker to exit cleanly
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Channel Buffer Size
|
||||||
|
|
||||||
|
The archiving channel has a buffer of 128 jobs. If more than 128 jobs are queued simultaneously, `TriggerArchiving()` will block until space is available.
|
||||||
|
|
||||||
|
To adjust:
|
||||||
|
```go
|
||||||
|
// In archiveWorker.go Start() function
|
||||||
|
archiveChannel = make(chan *schema.Job, 256) // Increase buffer
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scope Selection
|
||||||
|
|
||||||
|
Archive data scopes are automatically selected based on job size:
|
||||||
|
|
||||||
|
- **Node scope**: Always included
|
||||||
|
- **Core scope**: Included for jobs with ≤8 nodes (reduces data volume for large jobs)
|
||||||
|
- **Accelerator scope**: Included if job used accelerators (`NumAcc > 0`)
|
||||||
|
|
||||||
|
To adjust the node threshold:
|
||||||
|
```go
|
||||||
|
// In archiver.go ArchiveJob() function
|
||||||
|
if job.NumNodes <= 16 { // Change from 8 to 16
|
||||||
|
scopes = append(scopes, schema.MetricScopeCore)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resolution
|
||||||
|
|
||||||
|
Data is archived at the highest available resolution (typically 60s intervals). To change:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// In archiver.go ArchiveJob() function
|
||||||
|
jobData, err := metricdispatcher.LoadData(job, allMetrics, scopes, ctx, 300)
|
||||||
|
// 0 = highest resolution
|
||||||
|
// 300 = 5-minute resolution
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
### Automatic Retry
|
||||||
|
|
||||||
|
The archiver does **not** automatically retry failed archiving operations. If archiving fails:
|
||||||
|
|
||||||
|
1. Error is logged
|
||||||
|
2. Job is marked as `MonitoringStatusArchivingFailed` in database
|
||||||
|
3. Worker continues processing other jobs
|
||||||
|
|
||||||
|
### Manual Retry
|
||||||
|
|
||||||
|
To re-archive failed jobs, query for jobs with `MonitoringStatusArchivingFailed` and call `TriggerArchiving()` again.
|
||||||
|
|
||||||
|
## Performance Considerations
|
||||||
|
|
||||||
|
### Single Worker Thread
|
||||||
|
|
||||||
|
The archiver uses a single worker goroutine. For high-throughput systems:
|
||||||
|
|
||||||
|
- Large channel buffer (128) prevents blocking
|
||||||
|
- Archiving is typically I/O bound (writing to storage)
|
||||||
|
- Single worker prevents overwhelming storage backend
|
||||||
|
|
||||||
|
### Shutdown Timeout
|
||||||
|
|
||||||
|
Recommended timeout values:
|
||||||
|
- **Development**: 5-10 seconds
|
||||||
|
- **Production**: 10-30 seconds
|
||||||
|
- **High-load**: 30-60 seconds
|
||||||
|
|
||||||
|
Choose based on:
|
||||||
|
- Average archiving time per job
|
||||||
|
- Storage backend latency
|
||||||
|
- Acceptable shutdown delay
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
### Logging
|
||||||
|
|
||||||
|
The archiver logs:
|
||||||
|
- **Info**: Startup, shutdown, successful completions
|
||||||
|
- **Debug**: Individual job archiving times
|
||||||
|
- **Error**: Archiving failures with job ID and reason
|
||||||
|
- **Warn**: Shutdown timeout exceeded
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
|
||||||
|
Monitor these signals for archiver health:
|
||||||
|
- Jobs with `MonitoringStatusArchivingFailed`
|
||||||
|
- Time from job stop to successful archive
|
||||||
|
- Shutdown timeout occurrences
|
||||||
|
|
||||||
|
## Thread Safety
|
||||||
|
|
||||||
|
All exported functions are safe for concurrent use:
|
||||||
|
- `Start()` - Safe to call once
|
||||||
|
- `TriggerArchiving()` - Safe from multiple goroutines
|
||||||
|
- `Shutdown()` - Safe to call once
|
||||||
|
- `WaitForArchiving()` - Deprecated, but safe
|
||||||
|
|
||||||
|
Internal state is protected by:
|
||||||
|
- Channel synchronization (`archiveChannel`)
|
||||||
|
- WaitGroup for pending count (`archivePending`)
|
||||||
|
- Context for cancellation (`shutdownCtx`)
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
- **archiveWorker.go**: Worker lifecycle, channel management, shutdown logic
|
||||||
|
- **archiver.go**: Core archiving logic, metric loading, statistics calculation
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- `internal/repository`: Database operations for job metadata
|
||||||
|
- `internal/metricdispatcher`: Loading metric data from various backends
|
||||||
|
- `pkg/archive`: Archive backend abstraction (filesystem, S3, SQLite)
|
||||||
|
- `cc-lib/schema`: Job and metric data structures
|
||||||
250
internal/archiver/archiveWorker.go
Normal file
250
internal/archiver/archiveWorker.go
Normal file
@@ -0,0 +1,250 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package archiver provides asynchronous job archiving functionality for ClusterCockpit.
|
||||||
|
//
|
||||||
|
// The archiver runs a background worker goroutine that processes job archiving requests
|
||||||
|
// from a buffered channel. When jobs complete, their metric data is archived from the
|
||||||
|
// metric store to the configured archive backend (filesystem, S3, etc.).
|
||||||
|
//
|
||||||
|
// # Architecture
|
||||||
|
//
|
||||||
|
// The archiver uses a producer-consumer pattern:
|
||||||
|
// - Producer: TriggerArchiving() sends jobs to archiveChannel
|
||||||
|
// - Consumer: archivingWorker() processes jobs from the channel
|
||||||
|
// - Coordination: sync.WaitGroup tracks pending archive operations
|
||||||
|
//
|
||||||
|
// # Lifecycle
|
||||||
|
//
|
||||||
|
// 1. Start(repo, ctx) - Initialize worker with context for cancellation
|
||||||
|
// 2. TriggerArchiving(job) - Queue job for archiving (called when job stops)
|
||||||
|
// 3. archivingWorker() - Background goroutine processes jobs
|
||||||
|
// 4. Shutdown(timeout) - Graceful shutdown with timeout
|
||||||
|
//
|
||||||
|
// # Graceful Shutdown
|
||||||
|
//
|
||||||
|
// The archiver supports graceful shutdown with configurable timeout:
|
||||||
|
// - Closes channel to reject new jobs
|
||||||
|
// - Waits for pending jobs to complete (up to timeout)
|
||||||
|
// - Cancels context if timeout exceeded
|
||||||
|
// - Ensures worker goroutine exits cleanly
|
||||||
|
//
|
||||||
|
// # Example Usage
|
||||||
|
//
|
||||||
|
// // Initialize archiver
|
||||||
|
// ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
// defer cancel()
|
||||||
|
// archiver.Start(jobRepository, ctx)
|
||||||
|
//
|
||||||
|
// // Trigger archiving when job completes
|
||||||
|
// archiver.TriggerArchiving(job)
|
||||||
|
//
|
||||||
|
// // Graceful shutdown with 10 second timeout
|
||||||
|
// if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||||
|
// log.Printf("Archiver shutdown timeout: %v", err)
|
||||||
|
// }
|
||||||
|
package archiver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
sq "github.com/Masterminds/squirrel"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
archivePending sync.WaitGroup
|
||||||
|
archiveChannel chan *schema.Job
|
||||||
|
jobRepo *repository.JobRepository
|
||||||
|
shutdownCtx context.Context
|
||||||
|
shutdownCancel context.CancelFunc
|
||||||
|
workerDone chan struct{}
|
||||||
|
)
|
||||||
|
|
||||||
|
// Start initializes the archiver and starts the background worker goroutine.
|
||||||
|
//
|
||||||
|
// The archiver processes job archiving requests asynchronously via a buffered channel.
|
||||||
|
// Jobs are sent to the channel using TriggerArchiving() and processed by the worker.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - r: JobRepository instance for database operations
|
||||||
|
// - ctx: Context for cancellation (shutdown signal propagation)
|
||||||
|
//
|
||||||
|
// The worker goroutine will run until:
|
||||||
|
// - ctx is cancelled (via parent shutdown)
|
||||||
|
// - archiveChannel is closed (via Shutdown())
|
||||||
|
//
|
||||||
|
// Must be called before TriggerArchiving(). Safe to call only once.
|
||||||
|
func Start(r *repository.JobRepository, ctx context.Context) {
|
||||||
|
shutdownCtx, shutdownCancel = context.WithCancel(ctx)
|
||||||
|
archiveChannel = make(chan *schema.Job, 128)
|
||||||
|
workerDone = make(chan struct{})
|
||||||
|
jobRepo = r
|
||||||
|
|
||||||
|
go archivingWorker()
|
||||||
|
}
|
||||||
|
|
||||||
|
// archivingWorker is the background goroutine that processes job archiving requests.
|
||||||
|
//
|
||||||
|
// The worker loop:
|
||||||
|
// 1. Blocks waiting for jobs on archiveChannel or shutdown signal
|
||||||
|
// 2. Fetches job metadata from repository
|
||||||
|
// 3. Archives job data to configured backend (calls ArchiveJob)
|
||||||
|
// 4. Updates job footprint and energy metrics in database
|
||||||
|
// 5. Marks job as successfully archived
|
||||||
|
// 6. Calls job stop hooks
|
||||||
|
//
|
||||||
|
// The worker exits when:
|
||||||
|
// - shutdownCtx is cancelled (timeout during shutdown)
|
||||||
|
// - archiveChannel is closed (normal shutdown)
|
||||||
|
//
|
||||||
|
// Errors during archiving are logged and the job is marked as failed,
|
||||||
|
// but the worker continues processing other jobs.
|
||||||
|
func archivingWorker() {
|
||||||
|
defer close(workerDone)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-shutdownCtx.Done():
|
||||||
|
cclog.Info("Archive worker received shutdown signal")
|
||||||
|
return
|
||||||
|
|
||||||
|
case job, ok := <-archiveChannel:
|
||||||
|
if !ok {
|
||||||
|
cclog.Info("Archive channel closed, worker exiting")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
// not using meta data, called to load JobMeta into Cache?
|
||||||
|
// will fail if job meta not in repository
|
||||||
|
if _, err := jobRepo.FetchMetadata(job); err != nil {
|
||||||
|
cclog.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
|
||||||
|
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||||
|
archivePending.Done()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
|
||||||
|
// Use shutdown context to allow cancellation
|
||||||
|
jobMeta, err := ArchiveJob(job, shutdownCtx)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
|
||||||
|
jobRepo.UpdateMonitoringStatus(*job.ID, schema.MonitoringStatusArchivingFailed)
|
||||||
|
archivePending.Done()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt := sq.Update("job").Where("job.id = ?", job.ID)
|
||||||
|
|
||||||
|
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
|
||||||
|
cclog.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
||||||
|
archivePending.Done()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
|
||||||
|
cclog.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
||||||
|
archivePending.Done()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Update the jobs database entry one last time:
|
||||||
|
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
|
||||||
|
if err := jobRepo.Execute(stmt); err != nil {
|
||||||
|
cclog.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
||||||
|
archivePending.Done()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
cclog.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
|
||||||
|
cclog.Infof("archiving job (dbid: %d) successful", job.ID)
|
||||||
|
|
||||||
|
repository.CallJobStopHooks(job)
|
||||||
|
archivePending.Done()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TriggerArchiving queues a job for asynchronous archiving.
|
||||||
|
//
|
||||||
|
// This function should be called when a job completes (stops) to archive its
|
||||||
|
// metric data from the metric store to the configured archive backend.
|
||||||
|
//
|
||||||
|
// The function:
|
||||||
|
// 1. Increments the pending job counter (WaitGroup)
|
||||||
|
// 2. Sends the job to the archiving channel (buffered, capacity 128)
|
||||||
|
// 3. Returns immediately (non-blocking unless channel is full)
|
||||||
|
//
|
||||||
|
// The actual archiving is performed asynchronously by the worker goroutine.
|
||||||
|
// Upon completion, the worker will decrement the pending counter.
|
||||||
|
//
|
||||||
|
// Panics if Start() has not been called first.
|
||||||
|
func TriggerArchiving(job *schema.Job) {
|
||||||
|
if archiveChannel == nil {
|
||||||
|
cclog.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
|
||||||
|
}
|
||||||
|
|
||||||
|
archivePending.Add(1)
|
||||||
|
archiveChannel <- job
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown performs a graceful shutdown of the archiver with a configurable timeout.
|
||||||
|
//
|
||||||
|
// The shutdown process:
|
||||||
|
// 1. Closes archiveChannel - no new jobs will be accepted
|
||||||
|
// 2. Waits for pending jobs to complete (up to timeout duration)
|
||||||
|
// 3. If timeout is exceeded:
|
||||||
|
// - Cancels shutdownCtx to interrupt ongoing ArchiveJob operations
|
||||||
|
// - Returns error indicating timeout
|
||||||
|
// 4. Waits for worker goroutine to exit cleanly
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - timeout: Maximum duration to wait for pending jobs to complete
|
||||||
|
// (recommended: 10-30 seconds for production)
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - nil if all jobs completed within timeout
|
||||||
|
// - error if timeout was exceeded (some jobs may not have been archived)
|
||||||
|
//
|
||||||
|
// Jobs that don't complete within the timeout will be marked as failed.
|
||||||
|
// The function always ensures the worker goroutine exits before returning.
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
//
|
||||||
|
// if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||||
|
// log.Printf("Some jobs did not complete: %v", err)
|
||||||
|
// }
|
||||||
|
func Shutdown(timeout time.Duration) error {
|
||||||
|
cclog.Info("Initiating archiver shutdown...")
|
||||||
|
|
||||||
|
// Close channel to signal no more jobs will be accepted
|
||||||
|
close(archiveChannel)
|
||||||
|
|
||||||
|
// Create a channel to signal when all jobs are done
|
||||||
|
done := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
archivePending.Wait()
|
||||||
|
close(done)
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Wait for jobs to complete or timeout
|
||||||
|
select {
|
||||||
|
case <-done:
|
||||||
|
cclog.Info("All archive jobs completed successfully")
|
||||||
|
// Wait for worker to exit
|
||||||
|
<-workerDone
|
||||||
|
return nil
|
||||||
|
case <-time.After(timeout):
|
||||||
|
cclog.Warn("Archiver shutdown timeout exceeded, cancelling remaining operations")
|
||||||
|
// Cancel any ongoing operations
|
||||||
|
shutdownCancel()
|
||||||
|
// Wait for worker to exit
|
||||||
|
<-workerDone
|
||||||
|
return fmt.Errorf("archiver shutdown timeout after %v", timeout)
|
||||||
|
}
|
||||||
|
}
|
||||||
105
internal/archiver/archiver.go
Normal file
105
internal/archiver/archiver.go
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package archiver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/metricdispatcher"
|
||||||
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ArchiveJob archives a completed job's metric data to the configured archive backend.
|
||||||
|
//
|
||||||
|
// This function performs the following operations:
|
||||||
|
// 1. Loads all metric data for the job from the metric data repository
|
||||||
|
// 2. Calculates job-level statistics (avg, min, max) for each metric
|
||||||
|
// 3. Stores the job metadata and metric data to the archive backend
|
||||||
|
//
|
||||||
|
// Metric data is retrieved at the highest available resolution (typically 60s)
|
||||||
|
// for the following scopes:
|
||||||
|
// - Node scope (always)
|
||||||
|
// - Core scope (for jobs with ≤8 nodes, to reduce data volume)
|
||||||
|
// - Accelerator scope (if job used accelerators)
|
||||||
|
//
|
||||||
|
// The function respects context cancellation. If ctx is cancelled (e.g., during
|
||||||
|
// shutdown timeout), the operation will be interrupted and return an error.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - job: The job to archive (must be a completed job)
|
||||||
|
// - ctx: Context for cancellation and timeout control
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - *schema.Job with populated Statistics field
|
||||||
|
// - error if data loading or archiving fails
|
||||||
|
//
|
||||||
|
// If config.Keys.DisableArchive is true, only job statistics are calculated
|
||||||
|
// and returned (no data is written to archive backend).
|
||||||
|
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
||||||
|
allMetrics := make([]string, 0)
|
||||||
|
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
for _, mc := range metricConfigs {
|
||||||
|
allMetrics = append(allMetrics, mc.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
||||||
|
// FIXME: Add a config option for this
|
||||||
|
if job.NumNodes <= 8 {
|
||||||
|
// This will add the native scope if core scope is not available
|
||||||
|
scopes = append(scopes, schema.MetricScopeCore)
|
||||||
|
}
|
||||||
|
|
||||||
|
if job.NumAcc > 0 {
|
||||||
|
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||||
|
}
|
||||||
|
|
||||||
|
jobData, err := metricdispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Error("Error wile loading job data for archiving")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Statistics = make(map[string]schema.JobStatistics)
|
||||||
|
|
||||||
|
for metric, data := range jobData {
|
||||||
|
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||||
|
nodeData, ok := data["node"]
|
||||||
|
if !ok {
|
||||||
|
// This should never happen ?
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, series := range nodeData.Series {
|
||||||
|
avg += series.Statistics.Avg
|
||||||
|
min = math.Min(min, series.Statistics.Min)
|
||||||
|
max = math.Max(max, series.Statistics.Max)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round AVG Result to 2 Digits
|
||||||
|
job.Statistics[metric] = schema.JobStatistics{
|
||||||
|
Unit: schema.Unit{
|
||||||
|
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
||||||
|
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
||||||
|
},
|
||||||
|
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
|
||||||
|
Min: min,
|
||||||
|
Max: max,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the file based archive is disabled,
|
||||||
|
// only return the JobMeta structure as the
|
||||||
|
// statistics in there are needed.
|
||||||
|
if config.Keys.DisableArchive {
|
||||||
|
return job, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return job, archive.GetHandle().ImportJob(job, &jobData)
|
||||||
|
}
|
||||||
@@ -1,47 +1,137 @@
|
|||||||
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package auth implements various authentication methods
|
||||||
package auth
|
package auth
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"crypto/rand"
|
"crypto/rand"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/util"
|
||||||
"github.com/gorilla/sessions"
|
"github.com/gorilla/sessions"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Authenticator is the interface for all authentication methods.
|
||||||
|
// Each authenticator determines if it can handle a login request (CanLogin)
|
||||||
|
// and performs the actual authentication (Login).
|
||||||
type Authenticator interface {
|
type Authenticator interface {
|
||||||
|
// CanLogin determines if this authenticator can handle the login request.
|
||||||
|
// It returns the user object if available and a boolean indicating if this
|
||||||
|
// authenticator should attempt the login. This method should not perform
|
||||||
|
// expensive operations or actual authentication.
|
||||||
CanLogin(user *schema.User, username string, rw http.ResponseWriter, r *http.Request) (*schema.User, bool)
|
CanLogin(user *schema.User, username string, rw http.ResponseWriter, r *http.Request) (*schema.User, bool)
|
||||||
|
|
||||||
|
// Login performs the actually authentication for the user.
|
||||||
|
// It returns the authenticated user or an error if authentication fails.
|
||||||
|
// The user parameter may be nil if the user doesn't exist in the database yet.
|
||||||
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type Authentication struct {
|
var (
|
||||||
sessionStore *sessions.CookieStore
|
initOnce sync.Once
|
||||||
SessionMaxAge time.Duration
|
authInstance *Authentication
|
||||||
|
)
|
||||||
|
|
||||||
authenticators []Authenticator
|
// rateLimiterEntry tracks a rate limiter and its last use time for cleanup
|
||||||
|
type rateLimiterEntry struct {
|
||||||
|
limiter *rate.Limiter
|
||||||
|
lastUsed time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
var ipUserLimiters sync.Map
|
||||||
|
|
||||||
|
// getIPUserLimiter returns a rate limiter for the given IP and username combination.
|
||||||
|
// Rate limiters are created on demand and track 5 attempts per 15 minutes.
|
||||||
|
func getIPUserLimiter(ip, username string) *rate.Limiter {
|
||||||
|
key := ip + ":" + username
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
if entry, ok := ipUserLimiters.Load(key); ok {
|
||||||
|
rle := entry.(*rateLimiterEntry)
|
||||||
|
rle.lastUsed = now
|
||||||
|
return rle.limiter
|
||||||
|
}
|
||||||
|
|
||||||
|
// More aggressive rate limiting: 5 attempts per 15 minutes
|
||||||
|
newLimiter := rate.NewLimiter(rate.Every(15*time.Minute/5), 5)
|
||||||
|
ipUserLimiters.Store(key, &rateLimiterEntry{
|
||||||
|
limiter: newLimiter,
|
||||||
|
lastUsed: now,
|
||||||
|
})
|
||||||
|
return newLimiter
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanupOldRateLimiters removes rate limiters that haven't been used recently
|
||||||
|
func cleanupOldRateLimiters(olderThan time.Time) {
|
||||||
|
ipUserLimiters.Range(func(key, value any) bool {
|
||||||
|
entry := value.(*rateLimiterEntry)
|
||||||
|
if entry.lastUsed.Before(olderThan) {
|
||||||
|
ipUserLimiters.Delete(key)
|
||||||
|
cclog.Debugf("Cleaned up rate limiter for %v", key)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// startRateLimiterCleanup starts a background goroutine to clean up old rate limiters
|
||||||
|
func startRateLimiterCleanup() {
|
||||||
|
go func() {
|
||||||
|
ticker := time.NewTicker(1 * time.Hour)
|
||||||
|
defer ticker.Stop()
|
||||||
|
for range ticker.C {
|
||||||
|
// Clean up limiters not used in the last 24 hours
|
||||||
|
cleanupOldRateLimiters(time.Now().Add(-24 * time.Hour))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// AuthConfig contains configuration for all authentication methods
|
||||||
|
type AuthConfig struct {
|
||||||
|
LdapConfig *LdapConfig `json:"ldap"`
|
||||||
|
JwtConfig *JWTAuthConfig `json:"jwts"`
|
||||||
|
OpenIDConfig *OpenIDConfig `json:"oidc"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keys holds the global authentication configuration
|
||||||
|
var Keys AuthConfig
|
||||||
|
|
||||||
|
// Authentication manages all authentication methods and session handling
|
||||||
|
type Authentication struct {
|
||||||
|
sessionStore *sessions.CookieStore
|
||||||
LdapAuth *LdapAuthenticator
|
LdapAuth *LdapAuthenticator
|
||||||
JwtAuth *JWTAuthenticator
|
JwtAuth *JWTAuthenticator
|
||||||
LocalAuth *LocalAuthenticator
|
LocalAuth *LocalAuthenticator
|
||||||
|
authenticators []Authenticator
|
||||||
|
SessionMaxAge time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
func (auth *Authentication) AuthViaSession(
|
func (auth *Authentication) AuthViaSession(
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, error) {
|
r *http.Request,
|
||||||
|
) (*schema.User, error) {
|
||||||
session, err := auth.sessionStore.Get(r, "session")
|
session, err := auth.sessionStore.Get(r, "session")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while getting session store")
|
cclog.Error("Error while getting session store")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -49,10 +139,31 @@ func (auth *Authentication) AuthViaSession(
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Check if session keys exist
|
// Validate session data with proper type checking
|
||||||
username, _ := session.Values["username"].(string)
|
username, ok := session.Values["username"].(string)
|
||||||
projects, _ := session.Values["projects"].([]string)
|
if !ok || username == "" {
|
||||||
roles, _ := session.Values["roles"].([]string)
|
cclog.Warn("Invalid session: missing or invalid username")
|
||||||
|
// Invalidate the corrupted session
|
||||||
|
session.Options.MaxAge = -1
|
||||||
|
_ = auth.sessionStore.Save(r, rw, session)
|
||||||
|
return nil, errors.New("invalid session data")
|
||||||
|
}
|
||||||
|
|
||||||
|
projects, ok := session.Values["projects"].([]string)
|
||||||
|
if !ok {
|
||||||
|
cclog.Warn("Invalid session: projects not found or invalid type, using empty list")
|
||||||
|
projects = []string{}
|
||||||
|
}
|
||||||
|
|
||||||
|
roles, ok := session.Values["roles"].([]string)
|
||||||
|
if !ok || len(roles) == 0 {
|
||||||
|
cclog.Warn("Invalid session: missing or invalid roles")
|
||||||
|
// Invalidate the corrupted session
|
||||||
|
session.Options.MaxAge = -1
|
||||||
|
_ = auth.sessionStore.Save(r, rw, session)
|
||||||
|
return nil, errors.New("invalid session data")
|
||||||
|
}
|
||||||
|
|
||||||
return &schema.User{
|
return &schema.User{
|
||||||
Username: username,
|
Username: username,
|
||||||
Projects: projects,
|
Projects: projects,
|
||||||
@@ -62,86 +173,179 @@ func (auth *Authentication) AuthViaSession(
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func Init() (*Authentication, error) {
|
func Init(authCfg *json.RawMessage) {
|
||||||
auth := &Authentication{}
|
initOnce.Do(func() {
|
||||||
|
authInstance = &Authentication{}
|
||||||
|
|
||||||
|
// Start background cleanup of rate limiters
|
||||||
|
startRateLimiterCleanup()
|
||||||
|
|
||||||
sessKey := os.Getenv("SESSION_KEY")
|
sessKey := os.Getenv("SESSION_KEY")
|
||||||
if sessKey == "" {
|
if sessKey == "" {
|
||||||
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
cclog.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
|
||||||
bytes := make([]byte, 32)
|
bytes := make([]byte, 32)
|
||||||
if _, err := rand.Read(bytes); err != nil {
|
if _, err := rand.Read(bytes); err != nil {
|
||||||
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
|
cclog.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
|
||||||
return nil, err
|
}
|
||||||
}
|
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
|
||||||
} else {
|
|
||||||
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Error while initializing authentication -> decoding session key failed")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
auth.sessionStore = sessions.NewCookieStore(bytes)
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.Keys.LdapConfig != nil {
|
|
||||||
ldapAuth := &LdapAuthenticator{}
|
|
||||||
if err := ldapAuth.Init(); err != nil {
|
|
||||||
log.Warn("Error while initializing authentication -> ldapAuth init failed")
|
|
||||||
} else {
|
} else {
|
||||||
auth.LdapAuth = ldapAuth
|
bytes, err := base64.StdEncoding.DecodeString(sessKey)
|
||||||
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
|
if err != nil {
|
||||||
}
|
cclog.Fatal("Error while initializing authentication -> decoding session key failed")
|
||||||
} else {
|
}
|
||||||
log.Info("Missing LDAP configuration: No LDAP support!")
|
authInstance.sessionStore = sessions.NewCookieStore(bytes)
|
||||||
}
|
|
||||||
|
|
||||||
if config.Keys.JwtConfig != nil {
|
|
||||||
auth.JwtAuth = &JWTAuthenticator{}
|
|
||||||
if err := auth.JwtAuth.Init(); err != nil {
|
|
||||||
log.Error("Error while initializing authentication -> jwtAuth init failed")
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
jwtSessionAuth := &JWTSessionAuthenticator{}
|
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err == nil {
|
||||||
if err := jwtSessionAuth.Init(); err != nil {
|
authInstance.SessionMaxAge = d
|
||||||
log.Info("jwtSessionAuth init failed: No JWT login support!")
|
}
|
||||||
|
|
||||||
|
if authCfg == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
config.Validate(configSchema, *authCfg)
|
||||||
|
dec := json.NewDecoder(bytes.NewReader(*authCfg))
|
||||||
|
dec.DisallowUnknownFields()
|
||||||
|
if err := dec.Decode(&Keys); err != nil {
|
||||||
|
cclog.Errorf("error while decoding ldap config: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if Keys.LdapConfig != nil {
|
||||||
|
ldapAuth := &LdapAuthenticator{}
|
||||||
|
if err := ldapAuth.Init(); err != nil {
|
||||||
|
cclog.Warn("Error while initializing authentication -> ldapAuth init failed")
|
||||||
|
} else {
|
||||||
|
authInstance.LdapAuth = ldapAuth
|
||||||
|
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
auth.authenticators = append(auth.authenticators, jwtSessionAuth)
|
cclog.Info("Missing LDAP configuration: No LDAP support!")
|
||||||
}
|
}
|
||||||
|
|
||||||
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
if Keys.JwtConfig != nil {
|
||||||
if err := jwtCookieSessionAuth.Init(); err != nil {
|
authInstance.JwtAuth = &JWTAuthenticator{}
|
||||||
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
if err := authInstance.JwtAuth.Init(); err != nil {
|
||||||
|
cclog.Fatal("Error while initializing authentication -> jwtAuth init failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
jwtSessionAuth := &JWTSessionAuthenticator{}
|
||||||
|
if err := jwtSessionAuth.Init(); err != nil {
|
||||||
|
cclog.Info("jwtSessionAuth init failed: No JWT login support!")
|
||||||
|
} else {
|
||||||
|
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
|
||||||
|
}
|
||||||
|
|
||||||
|
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
|
||||||
|
if err := jwtCookieSessionAuth.Init(); err != nil {
|
||||||
|
cclog.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
|
||||||
|
} else {
|
||||||
|
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
auth.authenticators = append(auth.authenticators, jwtCookieSessionAuth)
|
cclog.Info("Missing JWT configuration: No JWT token support!")
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
log.Info("Missing JWT configuration: No JWT token support!")
|
authInstance.LocalAuth = &LocalAuthenticator{}
|
||||||
|
if err := authInstance.LocalAuth.Init(); err != nil {
|
||||||
|
cclog.Fatal("Error while initializing authentication -> localAuth init failed")
|
||||||
|
}
|
||||||
|
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetAuthInstance() *Authentication {
|
||||||
|
if authInstance == nil {
|
||||||
|
cclog.Fatal("Authentication module not initialized!")
|
||||||
}
|
}
|
||||||
|
|
||||||
auth.LocalAuth = &LocalAuthenticator{}
|
return authInstance
|
||||||
if err := auth.LocalAuth.Init(); err != nil {
|
}
|
||||||
log.Error("Error while initializing authentication -> localAuth init failed")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
|
|
||||||
|
|
||||||
return auth, nil
|
// handleUserSync syncs or updates a user in the database based on configuration.
|
||||||
|
// This is used for both JWT and OIDC authentication when syncUserOnLogin or updateUserOnLogin is enabled.
|
||||||
|
func handleUserSync(user *schema.User, syncUserOnLogin, updateUserOnLogin bool) {
|
||||||
|
r := repository.GetUserRepository()
|
||||||
|
dbUser, err := r.GetUser(user.Username)
|
||||||
|
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
cclog.Errorf("Error while loading user '%s': %v", user.Username, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err == sql.ErrNoRows && syncUserOnLogin { // Add new user
|
||||||
|
if err := r.AddUser(user); err != nil {
|
||||||
|
cclog.Errorf("Error while adding user '%s' to DB: %v", user.Username, err)
|
||||||
|
}
|
||||||
|
} else if err == nil && updateUserOnLogin { // Update existing user
|
||||||
|
if err := r.UpdateUser(dbUser, user); err != nil {
|
||||||
|
cclog.Errorf("Error while updating user '%s' in DB: %v", dbUser.Username, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleTokenUser syncs JWT token user with database
|
||||||
|
func handleTokenUser(tokenUser *schema.User) {
|
||||||
|
handleUserSync(tokenUser, Keys.JwtConfig.SyncUserOnLogin, Keys.JwtConfig.UpdateUserOnLogin)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleOIDCUser syncs OIDC user with database
|
||||||
|
func handleOIDCUser(OIDCUser *schema.User) {
|
||||||
|
handleUserSync(OIDCUser, Keys.OpenIDConfig.SyncUserOnLogin, Keys.OpenIDConfig.UpdateUserOnLogin)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error {
|
||||||
|
session, err := auth.sessionStore.New(r, "session")
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("session creation failed: %s", err.Error())
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if auth.SessionMaxAge != 0 {
|
||||||
|
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
||||||
|
}
|
||||||
|
if config.Keys.HTTPSCertFile == "" && config.Keys.HTTPSKeyFile == "" {
|
||||||
|
cclog.Warn("HTTPS not configured - session cookies will not have Secure flag set (insecure for production)")
|
||||||
|
session.Options.Secure = false
|
||||||
|
}
|
||||||
|
session.Options.SameSite = http.SameSiteStrictMode
|
||||||
|
session.Values["username"] = user.Username
|
||||||
|
session.Values["projects"] = user.Projects
|
||||||
|
session.Values["roles"] = user.Roles
|
||||||
|
if err := auth.sessionStore.Save(r, rw, session); err != nil {
|
||||||
|
cclog.Warnf("session save failed: %s", err.Error())
|
||||||
|
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (auth *Authentication) Login(
|
func (auth *Authentication) Login(
|
||||||
onsuccess http.Handler,
|
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
|
||||||
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error)) http.Handler {
|
) http.Handler {
|
||||||
|
|
||||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
username := r.FormValue("username")
|
ip, _, err := net.SplitHostPort(r.RemoteAddr)
|
||||||
var dbUser *schema.User
|
if err != nil {
|
||||||
|
ip = r.RemoteAddr
|
||||||
|
}
|
||||||
|
|
||||||
|
username := r.FormValue("username")
|
||||||
|
|
||||||
|
limiter := getIPUserLimiter(ip, username)
|
||||||
|
if !limiter.Allow() {
|
||||||
|
cclog.Warnf("AUTH/RATE > Too many login attempts for combination IP: %s, Username: %s", ip, username)
|
||||||
|
onfailure(rw, r, errors.New("too many login attempts, try again in a few minutes"))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var dbUser *schema.User
|
||||||
if username != "" {
|
if username != "" {
|
||||||
var err error
|
var err error
|
||||||
dbUser, err = repository.GetUserRepository().GetUser(username)
|
dbUser, err = repository.GetUserRepository().GetUser(username)
|
||||||
if err != nil && err != sql.ErrNoRows {
|
if err != nil && err != sql.ErrNoRows {
|
||||||
log.Errorf("Error while loading user '%v'", username)
|
cclog.Errorf("Error while loading user '%v'", username)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -151,79 +355,224 @@ func (auth *Authentication) Login(
|
|||||||
if user, ok = authenticator.CanLogin(dbUser, username, rw, r); !ok {
|
if user, ok = authenticator.CanLogin(dbUser, username, rw, r); !ok {
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
log.Debugf("Can login with user %v", user)
|
cclog.Debugf("Can login with user %v", user)
|
||||||
}
|
}
|
||||||
|
|
||||||
user, err := authenticator.Login(user, rw, r)
|
user, err := authenticator.Login(user, rw, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warnf("user login failed: %s", err.Error())
|
cclog.Warnf("user login failed: %s", err.Error())
|
||||||
onfailure(rw, r, err)
|
onfailure(rw, r, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
session, err := auth.sessionStore.New(r, "session")
|
if err := auth.SaveSession(rw, r, user); err != nil {
|
||||||
if err != nil {
|
|
||||||
log.Errorf("session creation failed: %s", err.Error())
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if auth.SessionMaxAge != 0 {
|
cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||||
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
|
|
||||||
}
|
|
||||||
session.Values["username"] = user.Username
|
|
||||||
session.Values["projects"] = user.Projects
|
|
||||||
session.Values["roles"] = user.Roles
|
|
||||||
if err := auth.sessionStore.Save(r, rw, session); err != nil {
|
|
||||||
log.Warnf("session save failed: %s", err.Error())
|
|
||||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
|
||||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
|
||||||
|
if r.FormValue("redirect") != "" {
|
||||||
|
http.RedirectHandler(r.FormValue("redirect"), http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
http.RedirectHandler("/", http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debugf("login failed: no authenticator applied")
|
cclog.Debugf("login failed: no authenticator applied")
|
||||||
onfailure(rw, r, errors.New("no authenticator applied"))
|
onfailure(rw, r, errors.New("no authenticator applied"))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (auth *Authentication) Auth(
|
func (auth *Authentication) Auth(
|
||||||
onsuccess http.Handler,
|
onsuccess http.Handler,
|
||||||
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error)) http.Handler {
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Infof("authentication failed: %s", err.Error())
|
cclog.Infof("auth -> authentication failed: %s", err.Error())
|
||||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if user == nil {
|
if user == nil {
|
||||||
user, err = auth.AuthViaSession(rw, r)
|
user, err = auth.AuthViaSession(rw, r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Infof("authentication failed: %s", err.Error())
|
cclog.Infof("auth -> authentication failed: %s", err.Error())
|
||||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if user != nil {
|
if user != nil {
|
||||||
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug("authentication failed")
|
cclog.Info("auth -> authentication failed")
|
||||||
onfailure(rw, r, errors.New("unauthorized (please login first)"))
|
onfailure(rw, r, errors.New("unauthorized (please login first)"))
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthAPI(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Infof("auth api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ipErr := securedCheck(user, r)
|
||||||
|
if ipErr != nil {
|
||||||
|
cclog.Infof("auth api -> secured check failed: %s", ipErr.Error())
|
||||||
|
onfailure(rw, r, ipErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if user != nil {
|
||||||
|
switch {
|
||||||
|
case len(user.Roles) == 1:
|
||||||
|
if user.HasRole(schema.RoleApi) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case len(user.Roles) >= 2:
|
||||||
|
if user.HasAllRoles([]schema.Role{schema.RoleAdmin, schema.RoleApi}) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
cclog.Info("auth api -> authentication failed: missing role")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cclog.Info("auth api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthUserAPI(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Infof("auth user api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if user != nil {
|
||||||
|
switch {
|
||||||
|
case len(user.Roles) == 1:
|
||||||
|
if user.HasRole(schema.RoleApi) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case len(user.Roles) >= 2:
|
||||||
|
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleSupport, schema.RoleAdmin}) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
cclog.Info("auth user api -> authentication failed: missing role")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cclog.Info("auth user api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthMetricStoreAPI(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Infof("auth metricstore api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if user != nil {
|
||||||
|
switch {
|
||||||
|
case len(user.Roles) == 1:
|
||||||
|
if user.HasRole(schema.RoleApi) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case len(user.Roles) >= 2:
|
||||||
|
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
cclog.Info("auth metricstore api -> authentication failed: missing role")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cclog.Info("auth metricstore api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthConfigAPI(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.AuthViaSession(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Infof("auth config api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if user != nil && user.HasRole(schema.RoleAdmin) {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cclog.Info("auth config api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func (auth *Authentication) AuthFrontendAPI(
|
||||||
|
onsuccess http.Handler,
|
||||||
|
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
|
||||||
|
) http.Handler {
|
||||||
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
user, err := auth.AuthViaSession(rw, r)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Infof("auth frontend api -> authentication failed: %s", err.Error())
|
||||||
|
onfailure(rw, r, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if user != nil {
|
||||||
|
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cclog.Info("auth frontend api -> authentication failed: no auth")
|
||||||
|
onfailure(rw, r, errors.New("unauthorized"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
||||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
session, err := auth.sessionStore.Get(r, "session")
|
session, err := auth.sessionStore.Get(r, "session")
|
||||||
@@ -243,3 +592,42 @@ func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
|
|||||||
onsuccess.ServeHTTP(rw, r)
|
onsuccess.ServeHTTP(rw, r)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper Moved To MiddleWare Auth Handlers
|
||||||
|
func securedCheck(user *schema.User, r *http.Request) error {
|
||||||
|
if user == nil {
|
||||||
|
return fmt.Errorf("no user for secured check")
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract IP address for checking
|
||||||
|
IPAddress := r.Header.Get("X-Real-Ip")
|
||||||
|
if IPAddress == "" {
|
||||||
|
IPAddress = r.Header.Get("X-Forwarded-For")
|
||||||
|
}
|
||||||
|
if IPAddress == "" {
|
||||||
|
IPAddress = r.RemoteAddr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle both IPv4 and IPv6 addresses properly
|
||||||
|
// For IPv6, this will strip the port and brackets
|
||||||
|
// For IPv4, this will strip the port
|
||||||
|
if host, _, err := net.SplitHostPort(IPAddress); err == nil {
|
||||||
|
IPAddress = host
|
||||||
|
}
|
||||||
|
// If SplitHostPort fails, IPAddress is already just a host (no port)
|
||||||
|
|
||||||
|
// If nothing declared in config: deny all request to this api endpoint
|
||||||
|
if len(config.Keys.APIAllowedIPs) == 0 {
|
||||||
|
return fmt.Errorf("missing configuration key ApiAllowedIPs")
|
||||||
|
}
|
||||||
|
// If wildcard declared in config: Continue
|
||||||
|
if config.Keys.APIAllowedIPs[0] == "*" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
// check if IP is allowed
|
||||||
|
if !util.Contains(config.Keys.APIAllowedIPs, IPAddress) {
|
||||||
|
return fmt.Errorf("unknown ip: %v", IPAddress)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
176
internal/auth/auth_test.go
Normal file
176
internal/auth/auth_test.go
Normal file
@@ -0,0 +1,176 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestGetIPUserLimiter tests the rate limiter creation and retrieval
|
||||||
|
func TestGetIPUserLimiter(t *testing.T) {
|
||||||
|
ip := "192.168.1.1"
|
||||||
|
username := "testuser"
|
||||||
|
|
||||||
|
// Get limiter for the first time
|
||||||
|
limiter1 := getIPUserLimiter(ip, username)
|
||||||
|
if limiter1 == nil {
|
||||||
|
t.Fatal("Expected limiter to be created")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the same limiter again
|
||||||
|
limiter2 := getIPUserLimiter(ip, username)
|
||||||
|
if limiter1 != limiter2 {
|
||||||
|
t.Error("Expected to get the same limiter instance")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get a different limiter for different user
|
||||||
|
limiter3 := getIPUserLimiter(ip, "otheruser")
|
||||||
|
if limiter1 == limiter3 {
|
||||||
|
t.Error("Expected different limiter for different user")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get a different limiter for different IP
|
||||||
|
limiter4 := getIPUserLimiter("192.168.1.2", username)
|
||||||
|
if limiter1 == limiter4 {
|
||||||
|
t.Error("Expected different limiter for different IP")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRateLimiterBehavior tests that rate limiting works correctly
|
||||||
|
func TestRateLimiterBehavior(t *testing.T) {
|
||||||
|
ip := "10.0.0.1"
|
||||||
|
username := "ratelimituser"
|
||||||
|
|
||||||
|
limiter := getIPUserLimiter(ip, username)
|
||||||
|
|
||||||
|
// Should allow first 5 attempts
|
||||||
|
for i := 0; i < 5; i++ {
|
||||||
|
if !limiter.Allow() {
|
||||||
|
t.Errorf("Request %d should be allowed within rate limit", i+1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6th attempt should be blocked
|
||||||
|
if limiter.Allow() {
|
||||||
|
t.Error("Request 6 should be blocked by rate limiter")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestCleanupOldRateLimiters tests the cleanup function
|
||||||
|
func TestCleanupOldRateLimiters(t *testing.T) {
|
||||||
|
// Clear all existing limiters first to avoid interference from other tests
|
||||||
|
cleanupOldRateLimiters(time.Now().Add(24 * time.Hour))
|
||||||
|
|
||||||
|
// Create some new rate limiters
|
||||||
|
limiter1 := getIPUserLimiter("1.1.1.1", "user1")
|
||||||
|
limiter2 := getIPUserLimiter("2.2.2.2", "user2")
|
||||||
|
|
||||||
|
if limiter1 == nil || limiter2 == nil {
|
||||||
|
t.Fatal("Failed to create test limiters")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup limiters older than 1 second from now (should keep both)
|
||||||
|
time.Sleep(10 * time.Millisecond) // Small delay to ensure timestamp difference
|
||||||
|
cleanupOldRateLimiters(time.Now().Add(-1 * time.Second))
|
||||||
|
|
||||||
|
// Verify they still exist (should get same instance)
|
||||||
|
if getIPUserLimiter("1.1.1.1", "user1") != limiter1 {
|
||||||
|
t.Error("Limiter 1 was incorrectly cleaned up")
|
||||||
|
}
|
||||||
|
if getIPUserLimiter("2.2.2.2", "user2") != limiter2 {
|
||||||
|
t.Error("Limiter 2 was incorrectly cleaned up")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup limiters older than 1 hour from now (should remove both)
|
||||||
|
cleanupOldRateLimiters(time.Now().Add(2 * time.Hour))
|
||||||
|
|
||||||
|
// Getting them again should create new instances
|
||||||
|
newLimiter1 := getIPUserLimiter("1.1.1.1", "user1")
|
||||||
|
if newLimiter1 == limiter1 {
|
||||||
|
t.Error("Old limiter should have been cleaned up")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestIPv4Extraction tests extracting IPv4 addresses
|
||||||
|
func TestIPv4Extraction(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{"IPv4 with port", "192.168.1.1:8080", "192.168.1.1"},
|
||||||
|
{"IPv4 without port", "192.168.1.1", "192.168.1.1"},
|
||||||
|
{"Localhost with port", "127.0.0.1:3000", "127.0.0.1"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := tt.input
|
||||||
|
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||||
|
result = host
|
||||||
|
}
|
||||||
|
|
||||||
|
if result != tt.expected {
|
||||||
|
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestIPv6Extraction tests extracting IPv6 addresses
|
||||||
|
func TestIPv6Extraction(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{"IPv6 with port", "[2001:db8::1]:8080", "2001:db8::1"},
|
||||||
|
{"IPv6 localhost with port", "[::1]:3000", "::1"},
|
||||||
|
{"IPv6 without port", "2001:db8::1", "2001:db8::1"},
|
||||||
|
{"IPv6 localhost", "::1", "::1"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := tt.input
|
||||||
|
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||||
|
result = host
|
||||||
|
}
|
||||||
|
|
||||||
|
if result != tt.expected {
|
||||||
|
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestIPExtractionEdgeCases tests edge cases for IP extraction
|
||||||
|
func TestIPExtractionEdgeCases(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{"Hostname without port", "example.com", "example.com"},
|
||||||
|
{"Empty string", "", ""},
|
||||||
|
{"Just port", ":8080", ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := tt.input
|
||||||
|
if host, _, err := net.SplitHostPort(result); err == nil {
|
||||||
|
result = host
|
||||||
|
}
|
||||||
|
|
||||||
|
if result != tt.expected {
|
||||||
|
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,7 +1,8 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package auth
|
package auth
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -13,13 +14,33 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/golang-jwt/jwt/v5"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
"github.com/golang-jwt/jwt/v4"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type JWTAuthConfig struct {
|
||||||
|
// Specifies for how long a JWT token shall be valid
|
||||||
|
// as a string parsable by time.ParseDuration().
|
||||||
|
MaxAge string `json:"max-age"`
|
||||||
|
|
||||||
|
// Specifies which cookie should be checked for a JWT token (if no authorization header is present)
|
||||||
|
CookieName string `json:"cookieName"`
|
||||||
|
|
||||||
|
// Deny login for users not in database (but defined in JWT).
|
||||||
|
// Ignore user roles defined in JWTs ('roles' claim), get them from db.
|
||||||
|
ValidateUser bool `json:"validateUser"`
|
||||||
|
|
||||||
|
// Specifies which issuer should be accepted when validating external JWTs ('iss' claim)
|
||||||
|
TrustedIssuer string `json:"trustedIssuer"`
|
||||||
|
|
||||||
|
// Should an non-existent user be added to the DB based on the information in the token
|
||||||
|
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||||
|
|
||||||
|
// Should an existent user be updated in the DB based on the information in the token
|
||||||
|
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||||
|
}
|
||||||
|
|
||||||
type JWTAuthenticator struct {
|
type JWTAuthenticator struct {
|
||||||
publicKey ed25519.PublicKey
|
publicKey ed25519.PublicKey
|
||||||
privateKey ed25519.PrivateKey
|
privateKey ed25519.PrivateKey
|
||||||
@@ -28,17 +49,17 @@ type JWTAuthenticator struct {
|
|||||||
func (ja *JWTAuthenticator) Init() error {
|
func (ja *JWTAuthenticator) Init() error {
|
||||||
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
||||||
if pubKey == "" || privKey == "" {
|
if pubKey == "" || privKey == "" {
|
||||||
log.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
cclog.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||||
} else {
|
} else {
|
||||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Could not decode JWT public key")
|
cclog.Warn("Could not decode JWT public key")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ja.publicKey = ed25519.PublicKey(bytes)
|
ja.publicKey = ed25519.PublicKey(bytes)
|
||||||
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Could not decode JWT private key")
|
cclog.Warn("Could not decode JWT private key")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ja.privateKey = ed25519.PrivateKey(bytes)
|
ja.privateKey = ed25519.PrivateKey(bytes)
|
||||||
@@ -49,8 +70,8 @@ func (ja *JWTAuthenticator) Init() error {
|
|||||||
|
|
||||||
func (ja *JWTAuthenticator) AuthViaJWT(
|
func (ja *JWTAuthenticator) AuthViaJWT(
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, error) {
|
r *http.Request,
|
||||||
|
) (*schema.User, error) {
|
||||||
rawtoken := r.Header.Get("X-Auth-Token")
|
rawtoken := r.Header.Get("X-Auth-Token")
|
||||||
if rawtoken == "" {
|
if rawtoken == "" {
|
||||||
rawtoken = r.Header.Get("Authorization")
|
rawtoken = r.Header.Get("Authorization")
|
||||||
@@ -62,7 +83,7 @@ func (ja *JWTAuthenticator) AuthViaJWT(
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
|
||||||
if t.Method != jwt.SigningMethodEdDSA {
|
if t.Method != jwt.SigningMethodEdDSA {
|
||||||
return nil, errors.New("only Ed25519/EdDSA supported")
|
return nil, errors.New("only Ed25519/EdDSA supported")
|
||||||
}
|
}
|
||||||
@@ -70,54 +91,35 @@ func (ja *JWTAuthenticator) AuthViaJWT(
|
|||||||
return ja.publicKey, nil
|
return ja.publicKey, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while parsing JWT token")
|
cclog.Warn("Error while parsing JWT token")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if err := token.Claims.Valid(); err != nil {
|
if !token.Valid {
|
||||||
log.Warn("jwt token claims are not valid")
|
cclog.Warn("jwt token claims are not valid")
|
||||||
return nil, err
|
return nil, errors.New("jwt token claims are not valid")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Token is valid, extract payload
|
// Token is valid, extract payload
|
||||||
claims := token.Claims.(jwt.MapClaims)
|
claims := token.Claims.(jwt.MapClaims)
|
||||||
sub, _ := claims["sub"].(string)
|
|
||||||
|
// Use shared helper to get user from JWT claims
|
||||||
var roles []string
|
var user *schema.User
|
||||||
|
user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthToken, -1)
|
||||||
// Validate user + roles from JWT against database?
|
if err != nil {
|
||||||
if config.Keys.JwtConfig.ValidateUser {
|
return nil, err
|
||||||
ur := repository.GetUserRepository()
|
|
||||||
user, err := ur.GetUser(sub)
|
|
||||||
|
|
||||||
// Deny any logins for unknown usernames
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Could not find user from JWT in internal database.")
|
|
||||||
return nil, errors.New("unknown user")
|
|
||||||
}
|
|
||||||
// Take user roles from database instead of trusting the JWT
|
|
||||||
roles = user.Roles
|
|
||||||
} else {
|
|
||||||
// Extract roles from JWT (if present)
|
|
||||||
if rawroles, ok := claims["roles"].([]interface{}); ok {
|
|
||||||
for _, rr := range rawroles {
|
|
||||||
if r, ok := rr.(string); ok {
|
|
||||||
roles = append(roles, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &schema.User{
|
// If not validating user, we only get roles from JWT (no projects for this auth method)
|
||||||
Username: sub,
|
if !Keys.JwtConfig.ValidateUser {
|
||||||
Roles: roles,
|
user.Roles = extractRolesFromClaims(claims, false)
|
||||||
AuthType: schema.AuthToken,
|
user.Projects = nil // Standard JWT auth doesn't include projects
|
||||||
AuthSource: -1,
|
}
|
||||||
}, nil
|
|
||||||
|
return user, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate a new JWT that can be used for authentication
|
// ProvideJWT generates a new JWT that can be used for authentication
|
||||||
func (ja *JWTAuthenticator) ProvideJWT(user *schema.User) (string, error) {
|
func (ja *JWTAuthenticator) ProvideJWT(user *schema.User) (string, error) {
|
||||||
|
|
||||||
if ja.privateKey == nil {
|
if ja.privateKey == nil {
|
||||||
return "", errors.New("environment variable 'JWT_PRIVATE_KEY' not set")
|
return "", errors.New("environment variable 'JWT_PRIVATE_KEY' not set")
|
||||||
}
|
}
|
||||||
@@ -128,8 +130,8 @@ func (ja *JWTAuthenticator) ProvideJWT(user *schema.User) (string, error) {
|
|||||||
"roles": user.Roles,
|
"roles": user.Roles,
|
||||||
"iat": now.Unix(),
|
"iat": now.Unix(),
|
||||||
}
|
}
|
||||||
if config.Keys.JwtConfig.MaxAge != "" {
|
if Keys.JwtConfig.MaxAge != "" {
|
||||||
d, err := time.ParseDuration(config.Keys.JwtConfig.MaxAge)
|
d, err := time.ParseDuration(Keys.JwtConfig.MaxAge)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", errors.New("cannot parse max-age config key")
|
return "", errors.New("cannot parse max-age config key")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,22 +1,20 @@
|
|||||||
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package auth
|
package auth
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/ed25519"
|
"crypto/ed25519"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/golang-jwt/jwt/v5"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
"github.com/golang-jwt/jwt/v4"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type JWTCookieSessionAuthenticator struct {
|
type JWTCookieSessionAuthenticator struct {
|
||||||
@@ -30,18 +28,18 @@ var _ Authenticator = (*JWTCookieSessionAuthenticator)(nil)
|
|||||||
func (ja *JWTCookieSessionAuthenticator) Init() error {
|
func (ja *JWTCookieSessionAuthenticator) Init() error {
|
||||||
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
|
||||||
if pubKey == "" || privKey == "" {
|
if pubKey == "" || privKey == "" {
|
||||||
log.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
cclog.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||||
return errors.New("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
return errors.New("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
|
||||||
} else {
|
} else {
|
||||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Could not decode JWT public key")
|
cclog.Warn("Could not decode JWT public key")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ja.publicKey = ed25519.PublicKey(bytes)
|
ja.publicKey = ed25519.PublicKey(bytes)
|
||||||
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
bytes, err = base64.StdEncoding.DecodeString(privKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Could not decode JWT private key")
|
cclog.Warn("Could not decode JWT private key")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ja.privateKey = ed25519.PrivateKey(bytes)
|
ja.privateKey = ed25519.PrivateKey(bytes)
|
||||||
@@ -52,36 +50,35 @@ func (ja *JWTCookieSessionAuthenticator) Init() error {
|
|||||||
if keyFound && pubKeyCrossLogin != "" {
|
if keyFound && pubKeyCrossLogin != "" {
|
||||||
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
|
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Could not decode cross login JWT public key")
|
cclog.Warn("Could not decode cross login JWT public key")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
|
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
|
||||||
} else {
|
} else {
|
||||||
ja.publicKeyCrossLogin = nil
|
ja.publicKeyCrossLogin = nil
|
||||||
log.Debug("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
cclog.Debug("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
||||||
return errors.New("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
return errors.New("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
|
||||||
}
|
}
|
||||||
|
|
||||||
jc := config.Keys.JwtConfig
|
|
||||||
// Warn if other necessary settings are not configured
|
// Warn if other necessary settings are not configured
|
||||||
if jc != nil {
|
if Keys.JwtConfig != nil {
|
||||||
if jc.CookieName == "" {
|
if Keys.JwtConfig.CookieName == "" {
|
||||||
log.Info("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
cclog.Info("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
||||||
return errors.New("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
return errors.New("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
|
||||||
}
|
}
|
||||||
if !jc.ValidateUser {
|
if !Keys.JwtConfig.ValidateUser {
|
||||||
log.Info("forceJWTValidationViaDatabase not set to true: CC will accept users and roles defined in JWTs regardless of its own database!")
|
cclog.Info("forceJWTValidationViaDatabase not set to true: CC will accept users and roles defined in JWTs regardless of its own database!")
|
||||||
}
|
}
|
||||||
if jc.TrustedIssuer == "" {
|
if Keys.JwtConfig.TrustedIssuer == "" {
|
||||||
log.Info("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
cclog.Info("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
||||||
return errors.New("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
return errors.New("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Warn("config for JWTs not configured (cross login via JWT cookie will fail)")
|
cclog.Warn("config for JWTs not configured (cross login via JWT cookie will fail)")
|
||||||
return errors.New("config for JWTs not configured (cross login via JWT cookie will fail)")
|
return errors.New("config for JWTs not configured (cross login via JWT cookie will fail)")
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info("JWT Cookie Session authenticator successfully registered")
|
cclog.Info("JWT Cookie Session authenticator successfully registered")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -89,9 +86,9 @@ func (ja *JWTCookieSessionAuthenticator) CanLogin(
|
|||||||
user *schema.User,
|
user *schema.User,
|
||||||
username string,
|
username string,
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, bool) {
|
r *http.Request,
|
||||||
|
) (*schema.User, bool) {
|
||||||
jc := config.Keys.JwtConfig
|
jc := Keys.JwtConfig
|
||||||
cookieName := ""
|
cookieName := ""
|
||||||
if jc.CookieName != "" {
|
if jc.CookieName != "" {
|
||||||
cookieName = jc.CookieName
|
cookieName = jc.CookieName
|
||||||
@@ -112,9 +109,9 @@ func (ja *JWTCookieSessionAuthenticator) CanLogin(
|
|||||||
func (ja *JWTCookieSessionAuthenticator) Login(
|
func (ja *JWTCookieSessionAuthenticator) Login(
|
||||||
user *schema.User,
|
user *schema.User,
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, error) {
|
r *http.Request,
|
||||||
|
) (*schema.User, error) {
|
||||||
jc := config.Keys.JwtConfig
|
jc := Keys.JwtConfig
|
||||||
jwtCookie, err := r.Cookie(jc.CookieName)
|
jwtCookie, err := r.Cookie(jc.CookieName)
|
||||||
var rawtoken string
|
var rawtoken string
|
||||||
|
|
||||||
@@ -122,7 +119,7 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
|||||||
rawtoken = jwtCookie.Value
|
rawtoken = jwtCookie.Value
|
||||||
}
|
}
|
||||||
|
|
||||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
|
||||||
if t.Method != jwt.SigningMethodEdDSA {
|
if t.Method != jwt.SigningMethodEdDSA {
|
||||||
return nil, errors.New("only Ed25519/EdDSA supported")
|
return nil, errors.New("only Ed25519/EdDSA supported")
|
||||||
}
|
}
|
||||||
@@ -139,52 +136,26 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
|||||||
return ja.publicKey, nil
|
return ja.publicKey, nil
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("JWT cookie session: error while parsing token")
|
cclog.Warn("JWT cookie session: error while parsing token")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check token validity and extract paypload
|
if !token.Valid {
|
||||||
if err := token.Claims.Valid(); err != nil {
|
cclog.Warn("jwt token claims are not valid")
|
||||||
log.Warn("jwt token claims are not valid")
|
return nil, errors.New("jwt token claims are not valid")
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
claims := token.Claims.(jwt.MapClaims)
|
claims := token.Claims.(jwt.MapClaims)
|
||||||
sub, _ := claims["sub"].(string)
|
|
||||||
|
// Use shared helper to get user from JWT claims
|
||||||
var name string
|
user, err = getUserFromJWT(claims, jc.ValidateUser, schema.AuthSession, schema.AuthViaToken)
|
||||||
if wrap, ok := claims["name"].(map[string]interface{}); ok {
|
if err != nil {
|
||||||
if vals, ok := wrap["values"].([]interface{}); ok {
|
return nil, err
|
||||||
if len(vals) != 0 {
|
|
||||||
name = fmt.Sprintf("%v", vals[0])
|
|
||||||
|
|
||||||
for i := 1; i < len(vals); i++ {
|
|
||||||
name += fmt.Sprintf(" %v", vals[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var roles []string
|
// Sync or update user if configured
|
||||||
|
if !jc.ValidateUser && (jc.SyncUserOnLogin || jc.UpdateUserOnLogin) {
|
||||||
if jc.ValidateUser {
|
handleTokenUser(user)
|
||||||
// Deny any logins for unknown usernames
|
|
||||||
if user == nil {
|
|
||||||
log.Warn("Could not find user from JWT in internal database.")
|
|
||||||
return nil, errors.New("unknown user")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Take user roles from database instead of trusting the JWT
|
|
||||||
roles = user.Roles
|
|
||||||
} else {
|
|
||||||
// Extract roles from JWT (if present)
|
|
||||||
if rawroles, ok := claims["roles"].([]interface{}); ok {
|
|
||||||
for _, rr := range rawroles {
|
|
||||||
if r, ok := rr.(string); ok {
|
|
||||||
roles = append(roles, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// (Ask browser to) Delete JWT cookie
|
// (Ask browser to) Delete JWT cookie
|
||||||
@@ -197,23 +168,5 @@ func (ja *JWTCookieSessionAuthenticator) Login(
|
|||||||
}
|
}
|
||||||
http.SetCookie(rw, deletedCookie)
|
http.SetCookie(rw, deletedCookie)
|
||||||
|
|
||||||
if user == nil {
|
|
||||||
projects := make([]string, 0)
|
|
||||||
user = &schema.User{
|
|
||||||
Username: sub,
|
|
||||||
Name: name,
|
|
||||||
Roles: roles,
|
|
||||||
Projects: projects,
|
|
||||||
AuthType: schema.AuthSession,
|
|
||||||
AuthSource: schema.AuthViaToken,
|
|
||||||
}
|
|
||||||
|
|
||||||
if jc.SyncUserOnLogin {
|
|
||||||
if err := repository.GetUserRepository().AddUser(user); err != nil {
|
|
||||||
log.Errorf("Error while adding user '%s' to DB", user.Username)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return user, nil
|
return user, nil
|
||||||
}
|
}
|
||||||
|
|||||||
136
internal/auth/jwtHelpers.go
Normal file
136
internal/auth/jwtHelpers.go
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/golang-jwt/jwt/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
// extractStringFromClaims extracts a string value from JWT claims
|
||||||
|
func extractStringFromClaims(claims jwt.MapClaims, key string) string {
|
||||||
|
if val, ok := claims[key].(string); ok {
|
||||||
|
return val
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractRolesFromClaims extracts roles from JWT claims
|
||||||
|
// If validateRoles is true, only valid roles are returned
|
||||||
|
func extractRolesFromClaims(claims jwt.MapClaims, validateRoles bool) []string {
|
||||||
|
var roles []string
|
||||||
|
|
||||||
|
if rawroles, ok := claims["roles"].([]any); ok {
|
||||||
|
for _, rr := range rawroles {
|
||||||
|
if r, ok := rr.(string); ok {
|
||||||
|
if validateRoles {
|
||||||
|
if schema.IsValidRole(r) {
|
||||||
|
roles = append(roles, r)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
roles = append(roles, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return roles
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractProjectsFromClaims extracts projects from JWT claims
|
||||||
|
func extractProjectsFromClaims(claims jwt.MapClaims) []string {
|
||||||
|
projects := make([]string, 0)
|
||||||
|
|
||||||
|
if rawprojs, ok := claims["projects"].([]any); ok {
|
||||||
|
for _, pp := range rawprojs {
|
||||||
|
if p, ok := pp.(string); ok {
|
||||||
|
projects = append(projects, p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if rawprojs, ok := claims["projects"]; ok {
|
||||||
|
if projSlice, ok := rawprojs.([]string); ok {
|
||||||
|
projects = append(projects, projSlice...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return projects
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractNameFromClaims extracts name from JWT claims
|
||||||
|
// Handles both simple string and complex nested structure
|
||||||
|
func extractNameFromClaims(claims jwt.MapClaims) string {
|
||||||
|
// Try simple string first
|
||||||
|
if name, ok := claims["name"].(string); ok {
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try nested structure: {name: {values: [...]}}
|
||||||
|
if wrap, ok := claims["name"].(map[string]any); ok {
|
||||||
|
if vals, ok := wrap["values"].([]any); ok {
|
||||||
|
if len(vals) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
name := fmt.Sprintf("%v", vals[0])
|
||||||
|
for i := 1; i < len(vals); i++ {
|
||||||
|
name += fmt.Sprintf(" %v", vals[i])
|
||||||
|
}
|
||||||
|
return name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// getUserFromJWT creates or retrieves a user based on JWT claims
|
||||||
|
// If validateUser is true, the user must exist in the database
|
||||||
|
// Otherwise, a new user object is created from claims
|
||||||
|
// authSource should be a schema.AuthSource constant (like schema.AuthViaToken)
|
||||||
|
func getUserFromJWT(claims jwt.MapClaims, validateUser bool, authType schema.AuthType, authSource schema.AuthSource) (*schema.User, error) {
|
||||||
|
sub := extractStringFromClaims(claims, "sub")
|
||||||
|
if sub == "" {
|
||||||
|
return nil, errors.New("missing 'sub' claim in JWT")
|
||||||
|
}
|
||||||
|
|
||||||
|
if validateUser {
|
||||||
|
// Validate user against database
|
||||||
|
ur := repository.GetUserRepository()
|
||||||
|
user, err := ur.GetUser(sub)
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
cclog.Errorf("Error while loading user '%v': %v", sub, err)
|
||||||
|
return nil, fmt.Errorf("database error: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deny any logins for unknown usernames
|
||||||
|
if user == nil || err == sql.ErrNoRows {
|
||||||
|
cclog.Warn("Could not find user from JWT in internal database.")
|
||||||
|
return nil, errors.New("unknown user")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return database user (with database roles)
|
||||||
|
return user, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create user from JWT claims
|
||||||
|
name := extractNameFromClaims(claims)
|
||||||
|
roles := extractRolesFromClaims(claims, true) // Validate roles
|
||||||
|
projects := extractProjectsFromClaims(claims)
|
||||||
|
|
||||||
|
return &schema.User{
|
||||||
|
Username: sub,
|
||||||
|
Name: name,
|
||||||
|
Roles: roles,
|
||||||
|
Projects: projects,
|
||||||
|
AuthType: authType,
|
||||||
|
AuthSource: authSource,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
281
internal/auth/jwtHelpers_test.go
Normal file
281
internal/auth/jwtHelpers_test.go
Normal file
@@ -0,0 +1,281 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/golang-jwt/jwt/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestExtractStringFromClaims tests extracting string values from JWT claims
|
||||||
|
func TestExtractStringFromClaims(t *testing.T) {
|
||||||
|
claims := jwt.MapClaims{
|
||||||
|
"sub": "testuser",
|
||||||
|
"email": "test@example.com",
|
||||||
|
"age": 25, // not a string
|
||||||
|
}
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
key string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{"Existing string", "sub", "testuser"},
|
||||||
|
{"Another string", "email", "test@example.com"},
|
||||||
|
{"Non-existent key", "missing", ""},
|
||||||
|
{"Non-string value", "age", ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := extractStringFromClaims(claims, tt.key)
|
||||||
|
if result != tt.expected {
|
||||||
|
t.Errorf("Expected %s, got %s", tt.expected, result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestExtractRolesFromClaims tests role extraction and validation
|
||||||
|
func TestExtractRolesFromClaims(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
claims jwt.MapClaims
|
||||||
|
validateRoles bool
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Valid roles without validation",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"roles": []any{"admin", "user", "invalid_role"},
|
||||||
|
},
|
||||||
|
validateRoles: false,
|
||||||
|
expected: []string{"admin", "user", "invalid_role"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Valid roles with validation",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"roles": []any{"admin", "user", "api"},
|
||||||
|
},
|
||||||
|
validateRoles: true,
|
||||||
|
expected: []string{"admin", "user", "api"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Invalid roles with validation",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"roles": []any{"invalid_role", "fake_role"},
|
||||||
|
},
|
||||||
|
validateRoles: true,
|
||||||
|
expected: []string{}, // Should filter out invalid roles
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "No roles claim",
|
||||||
|
claims: jwt.MapClaims{},
|
||||||
|
validateRoles: false,
|
||||||
|
expected: []string{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Non-array roles",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"roles": "admin",
|
||||||
|
},
|
||||||
|
validateRoles: false,
|
||||||
|
expected: []string{},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := extractRolesFromClaims(tt.claims, tt.validateRoles)
|
||||||
|
|
||||||
|
if len(result) != len(tt.expected) {
|
||||||
|
t.Errorf("Expected %d roles, got %d", len(tt.expected), len(result))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, role := range result {
|
||||||
|
if i >= len(tt.expected) || role != tt.expected[i] {
|
||||||
|
t.Errorf("Expected role %s at position %d, got %s", tt.expected[i], i, role)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestExtractProjectsFromClaims tests project extraction from claims
|
||||||
|
func TestExtractProjectsFromClaims(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
claims jwt.MapClaims
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Projects as array of interfaces",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"projects": []any{"project1", "project2", "project3"},
|
||||||
|
},
|
||||||
|
expected: []string{"project1", "project2", "project3"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Projects as string array",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"projects": []string{"projectA", "projectB"},
|
||||||
|
},
|
||||||
|
expected: []string{"projectA", "projectB"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "No projects claim",
|
||||||
|
claims: jwt.MapClaims{},
|
||||||
|
expected: []string{},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Mixed types in projects array",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"projects": []any{"project1", 123, "project2"},
|
||||||
|
},
|
||||||
|
expected: []string{"project1", "project2"}, // Should skip non-strings
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := extractProjectsFromClaims(tt.claims)
|
||||||
|
|
||||||
|
if len(result) != len(tt.expected) {
|
||||||
|
t.Errorf("Expected %d projects, got %d", len(tt.expected), len(result))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, project := range result {
|
||||||
|
if i >= len(tt.expected) || project != tt.expected[i] {
|
||||||
|
t.Errorf("Expected project %s at position %d, got %s", tt.expected[i], i, project)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestExtractNameFromClaims tests name extraction from various formats
|
||||||
|
func TestExtractNameFromClaims(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
claims jwt.MapClaims
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "Simple string name",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"name": "John Doe",
|
||||||
|
},
|
||||||
|
expected: "John Doe",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Nested name structure",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"name": map[string]any{
|
||||||
|
"values": []any{"John", "Doe"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: "John Doe",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Nested name with single value",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"name": map[string]any{
|
||||||
|
"values": []any{"Alice"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: "Alice",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "No name claim",
|
||||||
|
claims: jwt.MapClaims{},
|
||||||
|
expected: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Empty nested values",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"name": map[string]any{
|
||||||
|
"values": []any{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Nested with non-string values",
|
||||||
|
claims: jwt.MapClaims{
|
||||||
|
"name": map[string]any{
|
||||||
|
"values": []any{123, "Smith"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: "123 Smith", // Should convert to string
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := extractNameFromClaims(tt.claims)
|
||||||
|
if result != tt.expected {
|
||||||
|
t.Errorf("Expected '%s', got '%s'", tt.expected, result)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGetUserFromJWT_NoValidation tests getUserFromJWT without database validation
|
||||||
|
func TestGetUserFromJWT_NoValidation(t *testing.T) {
|
||||||
|
claims := jwt.MapClaims{
|
||||||
|
"sub": "testuser",
|
||||||
|
"name": "Test User",
|
||||||
|
"roles": []any{"user", "admin"},
|
||||||
|
"projects": []any{"project1", "project2"},
|
||||||
|
}
|
||||||
|
|
||||||
|
user, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if user.Username != "testuser" {
|
||||||
|
t.Errorf("Expected username 'testuser', got '%s'", user.Username)
|
||||||
|
}
|
||||||
|
|
||||||
|
if user.Name != "Test User" {
|
||||||
|
t.Errorf("Expected name 'Test User', got '%s'", user.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(user.Roles) != 2 {
|
||||||
|
t.Errorf("Expected 2 roles, got %d", len(user.Roles))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(user.Projects) != 2 {
|
||||||
|
t.Errorf("Expected 2 projects, got %d", len(user.Projects))
|
||||||
|
}
|
||||||
|
|
||||||
|
if user.AuthType != schema.AuthToken {
|
||||||
|
t.Errorf("Expected AuthType %v, got %v", schema.AuthToken, user.AuthType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestGetUserFromJWT_MissingSub tests error when sub claim is missing
|
||||||
|
func TestGetUserFromJWT_MissingSub(t *testing.T) {
|
||||||
|
claims := jwt.MapClaims{
|
||||||
|
"name": "Test User",
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := getUserFromJWT(claims, false, schema.AuthToken, -1)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
t.Error("Expected error for missing sub claim")
|
||||||
|
}
|
||||||
|
|
||||||
|
if err.Error() != "missing 'sub' claim in JWT" {
|
||||||
|
t.Errorf("Expected specific error message, got: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,7 +1,8 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package auth
|
package auth
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -12,11 +13,9 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/golang-jwt/jwt/v5"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
|
||||||
"github.com/golang-jwt/jwt/v4"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type JWTSessionAuthenticator struct {
|
type JWTSessionAuthenticator struct {
|
||||||
@@ -29,13 +28,13 @@ func (ja *JWTSessionAuthenticator) Init() error {
|
|||||||
if pubKey := os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
|
if pubKey := os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
|
||||||
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
bytes, err := base64.StdEncoding.DecodeString(pubKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Could not decode cross login JWT HS512 key")
|
cclog.Warn("Could not decode cross login JWT HS512 key")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
ja.loginTokenKey = bytes
|
ja.loginTokenKey = bytes
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info("JWT Session authenticator successfully registered")
|
cclog.Info("JWT Session authenticator successfully registered")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -43,8 +42,8 @@ func (ja *JWTSessionAuthenticator) CanLogin(
|
|||||||
user *schema.User,
|
user *schema.User,
|
||||||
username string,
|
username string,
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, bool) {
|
r *http.Request,
|
||||||
|
) (*schema.User, bool) {
|
||||||
return user, r.Header.Get("Authorization") != "" ||
|
return user, r.Header.Get("Authorization") != "" ||
|
||||||
r.URL.Query().Get("login-token") != ""
|
r.URL.Query().Get("login-token") != ""
|
||||||
}
|
}
|
||||||
@@ -52,98 +51,40 @@ func (ja *JWTSessionAuthenticator) CanLogin(
|
|||||||
func (ja *JWTSessionAuthenticator) Login(
|
func (ja *JWTSessionAuthenticator) Login(
|
||||||
user *schema.User,
|
user *schema.User,
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, error) {
|
r *http.Request,
|
||||||
|
) (*schema.User, error) {
|
||||||
rawtoken := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ")
|
rawtoken := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ")
|
||||||
if rawtoken == "" {
|
if rawtoken == "" {
|
||||||
rawtoken = r.URL.Query().Get("login-token")
|
rawtoken = r.URL.Query().Get("login-token")
|
||||||
}
|
}
|
||||||
|
|
||||||
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (any, error) {
|
||||||
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
|
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
|
||||||
return ja.loginTokenKey, nil
|
return ja.loginTokenKey, nil
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
|
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while parsing jwt token")
|
cclog.Warn("Error while parsing jwt token")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = token.Claims.Valid(); err != nil {
|
if !token.Valid {
|
||||||
log.Warn("jwt token claims are not valid")
|
cclog.Warn("jwt token claims are not valid")
|
||||||
return nil, err
|
return nil, errors.New("jwt token claims are not valid")
|
||||||
}
|
}
|
||||||
|
|
||||||
claims := token.Claims.(jwt.MapClaims)
|
claims := token.Claims.(jwt.MapClaims)
|
||||||
sub, _ := claims["sub"].(string)
|
|
||||||
|
// Use shared helper to get user from JWT claims
|
||||||
var name string
|
user, err = getUserFromJWT(claims, Keys.JwtConfig.ValidateUser, schema.AuthSession, schema.AuthViaToken)
|
||||||
if wrap, ok := claims["name"].(map[string]interface{}); ok {
|
if err != nil {
|
||||||
if vals, ok := wrap["values"].([]interface{}); ok {
|
return nil, err
|
||||||
if len(vals) != 0 {
|
|
||||||
name = fmt.Sprintf("%v", vals[0])
|
|
||||||
|
|
||||||
for i := 1; i < len(vals); i++ {
|
|
||||||
name += fmt.Sprintf(" %v", vals[i])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var roles []string
|
// Sync or update user if configured
|
||||||
|
if !Keys.JwtConfig.ValidateUser && (Keys.JwtConfig.SyncUserOnLogin || Keys.JwtConfig.UpdateUserOnLogin) {
|
||||||
if config.Keys.JwtConfig.ValidateUser {
|
handleTokenUser(user)
|
||||||
// Deny any logins for unknown usernames
|
|
||||||
if user == nil {
|
|
||||||
log.Warn("Could not find user from JWT in internal database.")
|
|
||||||
return nil, errors.New("unknown user")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Take user roles from database instead of trusting the JWT
|
|
||||||
roles = user.Roles
|
|
||||||
} else {
|
|
||||||
// Extract roles from JWT (if present)
|
|
||||||
if rawroles, ok := claims["roles"].([]interface{}); ok {
|
|
||||||
for _, rr := range rawroles {
|
|
||||||
if r, ok := rr.(string); ok {
|
|
||||||
if schema.IsValidRole(r) {
|
|
||||||
roles = append(roles, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
projects := make([]string, 0)
|
|
||||||
// Java/Grails Issued Token
|
|
||||||
// if rawprojs, ok := claims["projects"].([]interface{}); ok {
|
|
||||||
// for _, pp := range rawprojs {
|
|
||||||
// if p, ok := pp.(string); ok {
|
|
||||||
// projects = append(projects, p)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// } else if rawprojs, ok := claims["projects"]; ok {
|
|
||||||
// for _, p := range rawprojs.([]string) {
|
|
||||||
// projects = append(projects, p)
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
if user == nil {
|
|
||||||
user = &schema.User{
|
|
||||||
Username: sub,
|
|
||||||
Name: name,
|
|
||||||
Roles: roles,
|
|
||||||
Projects: projects,
|
|
||||||
AuthType: schema.AuthSession,
|
|
||||||
AuthSource: schema.AuthViaToken,
|
|
||||||
}
|
|
||||||
|
|
||||||
if config.Keys.JwtConfig.SyncUserOnLogin {
|
|
||||||
if err := repository.GetUserRepository().AddUser(user); err != nil {
|
|
||||||
log.Errorf("Error while adding user '%s' to DB", user.Username)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return user, nil
|
return user, nil
|
||||||
|
|||||||
@@ -1,7 +1,8 @@
|
|||||||
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package auth
|
package auth
|
||||||
|
|
||||||
import (
|
import (
|
||||||
@@ -10,18 +11,30 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
"github.com/go-ldap/ldap/v3"
|
"github.com/go-ldap/ldap/v3"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type LdapConfig struct {
|
||||||
|
URL string `json:"url"`
|
||||||
|
UserBase string `json:"user_base"`
|
||||||
|
SearchDN string `json:"search_dn"`
|
||||||
|
UserBind string `json:"user_bind"`
|
||||||
|
UserFilter string `json:"user_filter"`
|
||||||
|
UserAttr string `json:"username_attr"`
|
||||||
|
SyncInterval string `json:"sync_interval"` // Parsed using time.ParseDuration.
|
||||||
|
SyncDelOldUsers bool `json:"sync_del_old_users"`
|
||||||
|
|
||||||
|
// Should an non-existent user be added to the DB if user exists in ldap directory
|
||||||
|
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||||
|
}
|
||||||
|
|
||||||
type LdapAuthenticator struct {
|
type LdapAuthenticator struct {
|
||||||
syncPassword string
|
syncPassword string
|
||||||
UserAttr string
|
UserAttr string
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ Authenticator = (*LdapAuthenticator)(nil)
|
var _ Authenticator = (*LdapAuthenticator)(nil)
|
||||||
@@ -29,40 +42,11 @@ var _ Authenticator = (*LdapAuthenticator)(nil)
|
|||||||
func (la *LdapAuthenticator) Init() error {
|
func (la *LdapAuthenticator) Init() error {
|
||||||
la.syncPassword = os.Getenv("LDAP_ADMIN_PASSWORD")
|
la.syncPassword = os.Getenv("LDAP_ADMIN_PASSWORD")
|
||||||
if la.syncPassword == "" {
|
if la.syncPassword == "" {
|
||||||
log.Warn("environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync will not work)")
|
cclog.Warn("environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync will not work)")
|
||||||
}
|
}
|
||||||
|
|
||||||
lc := config.Keys.LdapConfig
|
if Keys.LdapConfig.UserAttr != "" {
|
||||||
|
la.UserAttr = Keys.LdapConfig.UserAttr
|
||||||
if lc.SyncInterval != "" {
|
|
||||||
interval, err := time.ParseDuration(lc.SyncInterval)
|
|
||||||
if err != nil {
|
|
||||||
log.Warnf("Could not parse duration for sync interval: %v",
|
|
||||||
lc.SyncInterval)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if interval == 0 {
|
|
||||||
log.Info("Sync interval is zero")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
go func() {
|
|
||||||
ticker := time.NewTicker(interval)
|
|
||||||
for t := range ticker.C {
|
|
||||||
log.Printf("sync started at %s", t.Format(time.RFC3339))
|
|
||||||
if err := la.Sync(); err != nil {
|
|
||||||
log.Errorf("sync failed: %s", err.Error())
|
|
||||||
}
|
|
||||||
log.Print("sync done")
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
} else {
|
|
||||||
log.Info("LDAP configuration key sync_interval invalid")
|
|
||||||
}
|
|
||||||
|
|
||||||
if lc.UserAttr != "" {
|
|
||||||
la.UserAttr = lc.UserAttr
|
|
||||||
} else {
|
} else {
|
||||||
la.UserAttr = "gecos"
|
la.UserAttr = "gecos"
|
||||||
}
|
}
|
||||||
@@ -74,9 +58,9 @@ func (la *LdapAuthenticator) CanLogin(
|
|||||||
user *schema.User,
|
user *schema.User,
|
||||||
username string,
|
username string,
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, bool) {
|
r *http.Request,
|
||||||
|
) (*schema.User, bool) {
|
||||||
lc := config.Keys.LdapConfig
|
lc := Keys.LdapConfig
|
||||||
|
|
||||||
if user != nil {
|
if user != nil {
|
||||||
if user.AuthSource == schema.AuthViaLDAP {
|
if user.AuthSource == schema.AuthViaLDAP {
|
||||||
@@ -86,7 +70,8 @@ func (la *LdapAuthenticator) CanLogin(
|
|||||||
if lc.SyncUserOnLogin {
|
if lc.SyncUserOnLogin {
|
||||||
l, err := la.getLdapConnection(true)
|
l, err := la.getLdapConnection(true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("LDAP connection error")
|
cclog.Error("LDAP connection error")
|
||||||
|
return nil, false
|
||||||
}
|
}
|
||||||
defer l.Close()
|
defer l.Close()
|
||||||
|
|
||||||
@@ -99,12 +84,12 @@ func (la *LdapAuthenticator) CanLogin(
|
|||||||
|
|
||||||
sr, err := l.Search(searchRequest)
|
sr, err := l.Search(searchRequest)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn(err)
|
cclog.Warn(err)
|
||||||
return nil, false
|
return nil, false
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(sr.Entries) != 1 {
|
if len(sr.Entries) != 1 {
|
||||||
log.Warn("LDAP: User does not exist or too many entries returned")
|
cclog.Warn("LDAP: User does not exist or too many entries returned")
|
||||||
return nil, false
|
return nil, false
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,7 +109,7 @@ func (la *LdapAuthenticator) CanLogin(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if err := repository.GetUserRepository().AddUser(user); err != nil {
|
if err := repository.GetUserRepository().AddUser(user); err != nil {
|
||||||
log.Errorf("User '%s' LDAP: Insert into DB failed", username)
|
cclog.Errorf("User '%s' LDAP: Insert into DB failed", username)
|
||||||
return nil, false
|
return nil, false
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,18 +123,18 @@ func (la *LdapAuthenticator) CanLogin(
|
|||||||
func (la *LdapAuthenticator) Login(
|
func (la *LdapAuthenticator) Login(
|
||||||
user *schema.User,
|
user *schema.User,
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, error) {
|
r *http.Request,
|
||||||
|
) (*schema.User, error) {
|
||||||
l, err := la.getLdapConnection(false)
|
l, err := la.getLdapConnection(false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while getting ldap connection")
|
cclog.Warn("Error while getting ldap connection")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
defer l.Close()
|
defer l.Close()
|
||||||
|
|
||||||
userDn := strings.Replace(config.Keys.LdapConfig.UserBind, "{username}", user.Username, -1)
|
userDn := strings.ReplaceAll(Keys.LdapConfig.UserBind, "{username}", user.Username)
|
||||||
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
|
if err := l.Bind(userDn, r.FormValue("password")); err != nil {
|
||||||
log.Errorf("AUTH/LDAP > Authentication for user %s failed: %v",
|
cclog.Errorf("AUTH/LDAP > Authentication for user %s failed: %v",
|
||||||
user.Username, err)
|
user.Username, err)
|
||||||
return nil, fmt.Errorf("Authentication failed")
|
return nil, fmt.Errorf("Authentication failed")
|
||||||
}
|
}
|
||||||
@@ -158,11 +143,11 @@ func (la *LdapAuthenticator) Login(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (la *LdapAuthenticator) Sync() error {
|
func (la *LdapAuthenticator) Sync() error {
|
||||||
const IN_DB int = 1
|
const InDB int = 1
|
||||||
const IN_LDAP int = 2
|
const InLdap int = 2
|
||||||
const IN_BOTH int = 3
|
const InBoth int = 3
|
||||||
ur := repository.GetUserRepository()
|
ur := repository.GetUserRepository()
|
||||||
lc := config.Keys.LdapConfig
|
lc := Keys.LdapConfig
|
||||||
|
|
||||||
users := map[string]int{}
|
users := map[string]int{}
|
||||||
usernames, err := ur.GetLdapUsernames()
|
usernames, err := ur.GetLdapUsernames()
|
||||||
@@ -171,12 +156,12 @@ func (la *LdapAuthenticator) Sync() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, username := range usernames {
|
for _, username := range usernames {
|
||||||
users[username] = IN_DB
|
users[username] = InDB
|
||||||
}
|
}
|
||||||
|
|
||||||
l, err := la.getLdapConnection(true)
|
l, err := la.getLdapConnection(true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("LDAP connection error")
|
cclog.Error("LDAP connection error")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer l.Close()
|
defer l.Close()
|
||||||
@@ -187,7 +172,7 @@ func (la *LdapAuthenticator) Sync() error {
|
|||||||
lc.UserFilter,
|
lc.UserFilter,
|
||||||
[]string{"dn", "uid", la.UserAttr}, nil))
|
[]string{"dn", "uid", la.UserAttr}, nil))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("LDAP search error")
|
cclog.Warn("LDAP search error")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -200,18 +185,18 @@ func (la *LdapAuthenticator) Sync() error {
|
|||||||
|
|
||||||
_, ok := users[username]
|
_, ok := users[username]
|
||||||
if !ok {
|
if !ok {
|
||||||
users[username] = IN_LDAP
|
users[username] = InLdap
|
||||||
newnames[username] = entry.GetAttributeValue(la.UserAttr)
|
newnames[username] = entry.GetAttributeValue(la.UserAttr)
|
||||||
} else {
|
} else {
|
||||||
users[username] = IN_BOTH
|
users[username] = InBoth
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for username, where := range users {
|
for username, where := range users {
|
||||||
if where == IN_DB && lc.SyncDelOldUsers {
|
if where == InDB && lc.SyncDelOldUsers {
|
||||||
ur.DelUser(username)
|
ur.DelUser(username)
|
||||||
log.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
|
cclog.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
|
||||||
} else if where == IN_LDAP {
|
} else if where == InLdap {
|
||||||
name := newnames[username]
|
name := newnames[username]
|
||||||
|
|
||||||
var roles []string
|
var roles []string
|
||||||
@@ -226,9 +211,9 @@ func (la *LdapAuthenticator) Sync() error {
|
|||||||
AuthSource: schema.AuthViaLDAP,
|
AuthSource: schema.AuthViaLDAP,
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
|
cclog.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
|
||||||
if err := ur.AddUser(user); err != nil {
|
if err := ur.AddUser(user); err != nil {
|
||||||
log.Errorf("User '%s' LDAP: Insert into DB failed", username)
|
cclog.Errorf("User '%s' LDAP: Insert into DB failed", username)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -238,18 +223,17 @@ func (la *LdapAuthenticator) Sync() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
|
func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
|
||||||
|
lc := Keys.LdapConfig
|
||||||
lc := config.Keys.LdapConfig
|
conn, err := ldap.DialURL(lc.URL)
|
||||||
conn, err := ldap.DialURL(lc.Url)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("LDAP URL dial failed")
|
cclog.Warn("LDAP URL dial failed")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if admin {
|
if admin {
|
||||||
if err := conn.Bind(lc.SearchDN, la.syncPassword); err != nil {
|
if err := conn.Bind(lc.SearchDN, la.syncPassword); err != nil {
|
||||||
conn.Close()
|
conn.Close()
|
||||||
log.Warn("LDAP connection bind failed")
|
cclog.Warn("LDAP connection bind failed")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,15 +1,16 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package auth
|
package auth
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
"golang.org/x/crypto/bcrypt"
|
"golang.org/x/crypto/bcrypt"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -27,19 +28,19 @@ func (la *LocalAuthenticator) CanLogin(
|
|||||||
user *schema.User,
|
user *schema.User,
|
||||||
username string,
|
username string,
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, bool) {
|
r *http.Request,
|
||||||
|
) (*schema.User, bool) {
|
||||||
return user, user != nil && user.AuthSource == schema.AuthViaLocalPassword
|
return user, user != nil && user.AuthSource == schema.AuthViaLocalPassword
|
||||||
}
|
}
|
||||||
|
|
||||||
func (la *LocalAuthenticator) Login(
|
func (la *LocalAuthenticator) Login(
|
||||||
user *schema.User,
|
user *schema.User,
|
||||||
rw http.ResponseWriter,
|
rw http.ResponseWriter,
|
||||||
r *http.Request) (*schema.User, error) {
|
r *http.Request,
|
||||||
|
) (*schema.User, error) {
|
||||||
if e := bcrypt.CompareHashAndPassword([]byte(user.Password),
|
if e := bcrypt.CompareHashAndPassword([]byte(user.Password),
|
||||||
[]byte(r.FormValue("password"))); e != nil {
|
[]byte(r.FormValue("password"))); e != nil {
|
||||||
log.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username)
|
cclog.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username)
|
||||||
return nil, fmt.Errorf("Authentication failed")
|
return nil, fmt.Errorf("Authentication failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
212
internal/auth/oidc.go
Normal file
212
internal/auth/oidc.go
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package auth
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/rand"
|
||||||
|
"encoding/base64"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"os"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/coreos/go-oidc/v3/oidc"
|
||||||
|
"github.com/gorilla/mux"
|
||||||
|
"golang.org/x/oauth2"
|
||||||
|
)
|
||||||
|
|
||||||
|
type OpenIDConfig struct {
|
||||||
|
Provider string `json:"provider"`
|
||||||
|
SyncUserOnLogin bool `json:"syncUserOnLogin"`
|
||||||
|
UpdateUserOnLogin bool `json:"updateUserOnLogin"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type OIDC struct {
|
||||||
|
client *oauth2.Config
|
||||||
|
provider *oidc.Provider
|
||||||
|
authentication *Authentication
|
||||||
|
clientID string
|
||||||
|
}
|
||||||
|
|
||||||
|
func randString(nByte int) (string, error) {
|
||||||
|
b := make([]byte, nByte)
|
||||||
|
if _, err := io.ReadFull(rand.Reader, b); err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
return base64.RawURLEncoding.EncodeToString(b), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value string) {
|
||||||
|
c := &http.Cookie{
|
||||||
|
Name: name,
|
||||||
|
Value: value,
|
||||||
|
MaxAge: int(time.Hour.Seconds()),
|
||||||
|
Secure: r.TLS != nil,
|
||||||
|
HttpOnly: true,
|
||||||
|
}
|
||||||
|
http.SetCookie(w, c)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewOIDC creates a new OIDC authenticator with the configured provider
|
||||||
|
func NewOIDC(a *Authentication) *OIDC {
|
||||||
|
// Use context with timeout for provider initialization
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
provider, err := oidc.NewProvider(ctx, Keys.OpenIDConfig.Provider)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Fatal(err)
|
||||||
|
}
|
||||||
|
clientID := os.Getenv("OID_CLIENT_ID")
|
||||||
|
if clientID == "" {
|
||||||
|
cclog.Warn("environment variable 'OID_CLIENT_ID' not set (Open ID connect auth will not work)")
|
||||||
|
}
|
||||||
|
clientSecret := os.Getenv("OID_CLIENT_SECRET")
|
||||||
|
if clientSecret == "" {
|
||||||
|
cclog.Warn("environment variable 'OID_CLIENT_SECRET' not set (Open ID connect auth will not work)")
|
||||||
|
}
|
||||||
|
|
||||||
|
client := &oauth2.Config{
|
||||||
|
ClientID: clientID,
|
||||||
|
ClientSecret: clientSecret,
|
||||||
|
Endpoint: provider.Endpoint(),
|
||||||
|
RedirectURL: "oidc-callback",
|
||||||
|
Scopes: []string{oidc.ScopeOpenID, "profile", "email"},
|
||||||
|
}
|
||||||
|
|
||||||
|
oa := &OIDC{provider: provider, client: client, clientID: clientID, authentication: a}
|
||||||
|
|
||||||
|
return oa
|
||||||
|
}
|
||||||
|
|
||||||
|
func (oa *OIDC) RegisterEndpoints(r *mux.Router) {
|
||||||
|
r.HandleFunc("/oidc-login", oa.OAuth2Login)
|
||||||
|
r.HandleFunc("/oidc-callback", oa.OAuth2Callback)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
c, err := r.Cookie("state")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, "state cookie not found", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
state := c.Value
|
||||||
|
|
||||||
|
c, err = r.Cookie("verifier")
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, "verifier cookie not found", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
codeVerifier := c.Value
|
||||||
|
|
||||||
|
_ = r.ParseForm()
|
||||||
|
if r.Form.Get("state") != state {
|
||||||
|
http.Error(rw, "State invalid", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
code := r.Form.Get("code")
|
||||||
|
if code == "" {
|
||||||
|
http.Error(rw, "Code not found", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Exchange authorization code for token with timeout
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
token, err := oa.client.Exchange(ctx, code, oauth2.VerifierOption(codeVerifier))
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get user info from OIDC provider with same timeout
|
||||||
|
userInfo, err := oa.provider.UserInfo(ctx, oauth2.StaticTokenSource(token))
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, "Failed to get userinfo: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// // Extract the ID Token from OAuth2 token.
|
||||||
|
// rawIDToken, ok := token.Extra("id_token").(string)
|
||||||
|
// if !ok {
|
||||||
|
// http.Error(rw, "Cannot access idToken", http.StatusInternalServerError)
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// verifier := oa.provider.Verifier(&oidc.Config{ClientID: oa.clientID})
|
||||||
|
// // Parse and verify ID Token payload.
|
||||||
|
// idToken, err := verifier.Verify(context.Background(), rawIDToken)
|
||||||
|
// if err != nil {
|
||||||
|
// http.Error(rw, "Failed to extract idToken: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
// }
|
||||||
|
|
||||||
|
projects := make([]string, 0)
|
||||||
|
|
||||||
|
// Extract custom claims
|
||||||
|
var claims struct {
|
||||||
|
Username string `json:"preferred_username"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Profile struct {
|
||||||
|
Client struct {
|
||||||
|
Roles []string `json:"roles"`
|
||||||
|
} `json:"clustercockpit"`
|
||||||
|
} `json:"resource_access"`
|
||||||
|
}
|
||||||
|
if err := userInfo.Claims(&claims); err != nil {
|
||||||
|
http.Error(rw, "Failed to extract Claims: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
}
|
||||||
|
|
||||||
|
var roles []string
|
||||||
|
for _, r := range claims.Profile.Client.Roles {
|
||||||
|
switch r {
|
||||||
|
case "user":
|
||||||
|
roles = append(roles, schema.GetRoleString(schema.RoleUser))
|
||||||
|
case "admin":
|
||||||
|
roles = append(roles, schema.GetRoleString(schema.RoleAdmin))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(roles) == 0 {
|
||||||
|
roles = append(roles, schema.GetRoleString(schema.RoleUser))
|
||||||
|
}
|
||||||
|
|
||||||
|
user := &schema.User{
|
||||||
|
Username: claims.Username,
|
||||||
|
Name: claims.Name,
|
||||||
|
Roles: roles,
|
||||||
|
Projects: projects,
|
||||||
|
AuthSource: schema.AuthViaOIDC,
|
||||||
|
}
|
||||||
|
|
||||||
|
if Keys.OpenIDConfig.SyncUserOnLogin || Keys.OpenIDConfig.UpdateUserOnLogin {
|
||||||
|
handleOIDCUser(user)
|
||||||
|
}
|
||||||
|
|
||||||
|
oa.authentication.SaveSession(rw, r, user)
|
||||||
|
cclog.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
|
||||||
|
userCtx := context.WithValue(r.Context(), repository.ContextUserKey, user)
|
||||||
|
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(userCtx))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
state, err := randString(16)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(rw, "Internal error", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
setCallbackCookie(rw, r, "state", state)
|
||||||
|
|
||||||
|
// use PKCE to protect against CSRF attacks
|
||||||
|
codeVerifier := oauth2.GenerateVerifier()
|
||||||
|
setCallbackCookie(rw, r, "verifier", codeVerifier)
|
||||||
|
|
||||||
|
// Redirect user to consent page to ask for permission
|
||||||
|
url := oa.client.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.S256ChallengeOption(codeVerifier))
|
||||||
|
http.Redirect(rw, r, url, http.StatusFound)
|
||||||
|
}
|
||||||
96
internal/auth/schema.go
Normal file
96
internal/auth/schema.go
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package auth
|
||||||
|
|
||||||
|
var configSchema = `
|
||||||
|
{
|
||||||
|
"jwts": {
|
||||||
|
"description": "For JWT token authentication.",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"max-age": {
|
||||||
|
"description": "Configure how long a token is valid. As string parsable by time.ParseDuration()",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"cookieName": {
|
||||||
|
"description": "Cookie that should be checked for a JWT token.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"validateUser": {
|
||||||
|
"description": "Deny login for users not in database (but defined in JWT). Overwrite roles in JWT with database roles.",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"trustedIssuer": {
|
||||||
|
"description": "Issuer that should be accepted when validating external JWTs ",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"syncUserOnLogin": {
|
||||||
|
"description": "Add non-existent user to DB at login attempt with values provided in JWT.",
|
||||||
|
"type": "boolean"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["max-age"]
|
||||||
|
},
|
||||||
|
"oidc": {
|
||||||
|
"provider": {
|
||||||
|
"description": "",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"syncUserOnLogin": {
|
||||||
|
"description": "",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"updateUserOnLogin": {
|
||||||
|
"description": "",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"required": ["provider"]
|
||||||
|
},
|
||||||
|
"ldap": {
|
||||||
|
"description": "For LDAP Authentication and user synchronisation.",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"url": {
|
||||||
|
"description": "URL of LDAP directory server.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"user_base": {
|
||||||
|
"description": "Base DN of user tree root.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"search_dn": {
|
||||||
|
"description": "DN for authenticating LDAP admin account with general read rights.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"user_bind": {
|
||||||
|
"description": "Expression used to authenticate users via LDAP bind. Must contain uid={username}.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"user_filter": {
|
||||||
|
"description": "Filter to extract users for syncing.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"username_attr": {
|
||||||
|
"description": "Attribute with full username. Default: gecos",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"sync_interval": {
|
||||||
|
"description": "Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"sync_del_old_users": {
|
||||||
|
"description": "Delete obsolete users in database.",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"syncUserOnLogin": {
|
||||||
|
"description": "Add non-existent user to DB at login attempt if user exists in Ldap directory",
|
||||||
|
"type": "boolean"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["url", "user_base", "search_dn", "user_bind", "user_filter"]
|
||||||
|
},
|
||||||
|
"required": ["jwts"]
|
||||||
|
}`
|
||||||
@@ -1,71 +1,160 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package config implements the program configuration data structures, validation and parsing
|
||||||
package config
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"log"
|
"time"
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/resampler"
|
||||||
)
|
)
|
||||||
|
|
||||||
var Keys schema.ProgramConfig = schema.ProgramConfig{
|
type ProgramConfig struct {
|
||||||
|
// Address where the http (or https) server will listen on (for example: 'localhost:80').
|
||||||
|
Addr string `json:"addr"`
|
||||||
|
|
||||||
|
// Addresses from which secured admin API endpoints can be reached, can be wildcard "*"
|
||||||
|
APIAllowedIPs []string `json:"apiAllowedIPs"`
|
||||||
|
|
||||||
|
APISubjects *NATSConfig `json:"apiSubjects"`
|
||||||
|
|
||||||
|
// Drop root permissions once .env was read and the port was taken.
|
||||||
|
User string `json:"user"`
|
||||||
|
Group string `json:"group"`
|
||||||
|
|
||||||
|
// Disable authentication (for everything: API, Web-UI, ...)
|
||||||
|
DisableAuthentication bool `json:"disable-authentication"`
|
||||||
|
|
||||||
|
// If `embed-static-files` is true (default), the frontend files are directly
|
||||||
|
// embeded into the go binary and expected to be in web/frontend. Only if
|
||||||
|
// it is false the files in `static-files` are served instead.
|
||||||
|
EmbedStaticFiles bool `json:"embed-static-files"`
|
||||||
|
StaticFiles string `json:"static-files"`
|
||||||
|
|
||||||
|
// Database driver - only 'sqlite3' is supported
|
||||||
|
DBDriver string `json:"db-driver"`
|
||||||
|
|
||||||
|
// Path to SQLite database file
|
||||||
|
DB string `json:"db"`
|
||||||
|
|
||||||
|
// Keep all metric data in the metric data repositories,
|
||||||
|
// do not write to the job-archive.
|
||||||
|
DisableArchive bool `json:"disable-archive"`
|
||||||
|
|
||||||
|
EnableJobTaggers bool `json:"enable-job-taggers"`
|
||||||
|
|
||||||
|
// Validate json input against schema
|
||||||
|
Validate bool `json:"validate"`
|
||||||
|
|
||||||
|
// If 0 or empty, the session does not expire!
|
||||||
|
SessionMaxAge string `json:"session-max-age"`
|
||||||
|
|
||||||
|
// If both those options are not empty, use HTTPS using those certificates.
|
||||||
|
HTTPSCertFile string `json:"https-cert-file"`
|
||||||
|
HTTPSKeyFile string `json:"https-key-file"`
|
||||||
|
|
||||||
|
// If not the empty string and `addr` does not end in ":80",
|
||||||
|
// redirect every request incoming at port 80 to that url.
|
||||||
|
RedirectHTTPTo string `json:"redirect-http-to"`
|
||||||
|
|
||||||
|
// Where to store MachineState files
|
||||||
|
MachineStateDir string `json:"machine-state-dir"`
|
||||||
|
|
||||||
|
// If not zero, automatically mark jobs as stopped running X seconds longer than their walltime.
|
||||||
|
StopJobsExceedingWalltime int `json:"stop-jobs-exceeding-walltime"`
|
||||||
|
|
||||||
|
// Defines time X in seconds in which jobs are considered to be "short" and will be filtered in specific views.
|
||||||
|
ShortRunningJobsDuration int `json:"short-running-jobs-duration"`
|
||||||
|
|
||||||
|
// Energy Mix CO2 Emission Constant [g/kWh]
|
||||||
|
// If entered, displays estimated CO2 emission for job based on jobs totalEnergy
|
||||||
|
EmissionConstant int `json:"emission-constant"`
|
||||||
|
|
||||||
|
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
|
||||||
|
EnableResampling *ResampleConfig `json:"resampling"`
|
||||||
|
|
||||||
|
// Global upstream metric repository configuration for metric pull workers
|
||||||
|
UpstreamMetricRepository *json.RawMessage `json:"upstreamMetricRepository,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ResampleConfig struct {
|
||||||
|
// Minimum number of points to trigger resampling of data
|
||||||
|
MinimumPoints int `json:"minimumPoints"`
|
||||||
|
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
|
||||||
|
Resolutions []int `json:"resolutions"`
|
||||||
|
// Trigger next zoom level at less than this many visible datapoints
|
||||||
|
Trigger int `json:"trigger"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NATSConfig struct {
|
||||||
|
SubjectJobStart string `json:"subjectJobStart"`
|
||||||
|
SubjectJobStop string `json:"subjectJobStop"`
|
||||||
|
SubjectNodeState string `json:"subjectNodeState"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type IntRange struct {
|
||||||
|
From int `json:"from"`
|
||||||
|
To int `json:"to"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type TimeRange struct {
|
||||||
|
From *time.Time `json:"from"`
|
||||||
|
To *time.Time `json:"to"`
|
||||||
|
Range string `json:"range,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type FilterRanges struct {
|
||||||
|
Duration *IntRange `json:"duration"`
|
||||||
|
NumNodes *IntRange `json:"numNodes"`
|
||||||
|
StartTime *TimeRange `json:"startTime"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClusterConfig struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
FilterRanges *FilterRanges `json:"filterRanges"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var Clusters []*ClusterConfig
|
||||||
|
|
||||||
|
var Keys ProgramConfig = ProgramConfig{
|
||||||
Addr: "localhost:8080",
|
Addr: "localhost:8080",
|
||||||
DisableAuthentication: false,
|
DisableAuthentication: false,
|
||||||
EmbedStaticFiles: true,
|
EmbedStaticFiles: true,
|
||||||
DBDriver: "sqlite3",
|
DBDriver: "sqlite3",
|
||||||
DB: "./var/job.db",
|
DB: "./var/job.db",
|
||||||
Archive: json.RawMessage(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`),
|
|
||||||
DisableArchive: false,
|
DisableArchive: false,
|
||||||
Validate: false,
|
Validate: false,
|
||||||
SessionMaxAge: "168h",
|
SessionMaxAge: "168h",
|
||||||
StopJobsExceedingWalltime: 0,
|
StopJobsExceedingWalltime: 0,
|
||||||
ShortRunningJobsDuration: 5 * 60,
|
ShortRunningJobsDuration: 5 * 60,
|
||||||
UiDefaults: map[string]interface{}{
|
|
||||||
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
|
||||||
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
|
|
||||||
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
|
||||||
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
|
||||||
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
|
|
||||||
"plot_general_colorBackground": true,
|
|
||||||
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
|
|
||||||
"plot_general_lineWidth": 3,
|
|
||||||
"plot_list_jobsPerPage": 50,
|
|
||||||
"plot_list_selectedMetrics": []string{"cpu_load", "mem_used", "flops_any", "mem_bw"},
|
|
||||||
"plot_view_plotsPerRow": 3,
|
|
||||||
"plot_view_showPolarplot": true,
|
|
||||||
"plot_view_showRoofline": true,
|
|
||||||
"plot_view_showStatTable": true,
|
|
||||||
"system_view_selectedMetric": "cpu_load",
|
|
||||||
"analysis_view_selectedTopEntity": "user",
|
|
||||||
"analysis_view_selectedTopCategory": "totalWalltime",
|
|
||||||
"status_view_selectedTopUserCategory": "totalJobs",
|
|
||||||
"status_view_selectedTopProjectCategory": "totalJobs",
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func Init(flagConfigFile string) {
|
func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) {
|
||||||
raw, err := os.ReadFile(flagConfigFile)
|
Validate(configSchema, mainConfig)
|
||||||
if err != nil {
|
dec := json.NewDecoder(bytes.NewReader(mainConfig))
|
||||||
if !os.IsNotExist(err) {
|
dec.DisallowUnknownFields()
|
||||||
log.Fatalf("CONFIG ERROR: %v", err)
|
if err := dec.Decode(&Keys); err != nil {
|
||||||
}
|
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
||||||
} else {
|
}
|
||||||
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
|
|
||||||
log.Fatalf("Validate config: %v\n", err)
|
|
||||||
}
|
|
||||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
|
||||||
dec.DisallowUnknownFields()
|
|
||||||
if err := dec.Decode(&Keys); err != nil {
|
|
||||||
log.Fatalf("could not decode: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if Keys.Clusters == nil || len(Keys.Clusters) < 1 {
|
Validate(clustersSchema, clusterConfig)
|
||||||
log.Fatal("At least one cluster required in config!")
|
dec = json.NewDecoder(bytes.NewReader(clusterConfig))
|
||||||
}
|
dec.DisallowUnknownFields()
|
||||||
|
if err := dec.Decode(&Clusters); err != nil {
|
||||||
|
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(Clusters) < 1 {
|
||||||
|
cclog.Abort("Config Init: At least one cluster required in config. Exited with error.")
|
||||||
|
}
|
||||||
|
|
||||||
|
if Keys.EnableResampling != nil && Keys.EnableResampling.MinimumPoints > 0 {
|
||||||
|
resampler.SetMinimumRequiredPoints(Keys.EnableResampling.MinimumPoints)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,16 +1,30 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package config
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestInit(t *testing.T) {
|
func TestInit(t *testing.T) {
|
||||||
fp := "../../configs/config.json"
|
fp := "../../configs/config.json"
|
||||||
Init(fp)
|
ccconf.Init(fp)
|
||||||
|
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||||
|
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||||
|
Init(cfg, clustercfg)
|
||||||
|
} else {
|
||||||
|
cclog.Abort("Cluster configuration must be present")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cclog.Abort("Main configuration must be present")
|
||||||
|
}
|
||||||
|
|
||||||
if Keys.Addr != "0.0.0.0:443" {
|
if Keys.Addr != "0.0.0.0:443" {
|
||||||
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:443", Keys.Addr)
|
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:443", Keys.Addr)
|
||||||
}
|
}
|
||||||
@@ -18,7 +32,17 @@ func TestInit(t *testing.T) {
|
|||||||
|
|
||||||
func TestInitMinimal(t *testing.T) {
|
func TestInitMinimal(t *testing.T) {
|
||||||
fp := "../../configs/config-demo.json"
|
fp := "../../configs/config-demo.json"
|
||||||
Init(fp)
|
ccconf.Init(fp)
|
||||||
|
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||||
|
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||||
|
Init(cfg, clustercfg)
|
||||||
|
} else {
|
||||||
|
cclog.Abort("Cluster configuration must be present")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cclog.Abort("Main configuration must be present")
|
||||||
|
}
|
||||||
|
|
||||||
if Keys.Addr != "127.0.0.1:8080" {
|
if Keys.Addr != "127.0.0.1:8080" {
|
||||||
t.Errorf("wrong addr\ngot: %s \nwant: 127.0.0.1:8080", Keys.Addr)
|
t.Errorf("wrong addr\ngot: %s \nwant: 127.0.0.1:8080", Keys.Addr)
|
||||||
}
|
}
|
||||||
|
|||||||
51
internal/config/default_metrics.go
Normal file
51
internal/config/default_metrics.go
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DEPRECATED: SUPERSEDED BY NEW USER CONFIG - userConfig.go / web.go
|
||||||
|
|
||||||
|
type DefaultMetricsCluster struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
DefaultMetrics string `json:"default_metrics"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type DefaultMetricsConfig struct {
|
||||||
|
Clusters []DefaultMetricsCluster `json:"clusters"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func LoadDefaultMetricsConfig() (*DefaultMetricsConfig, error) {
|
||||||
|
filePath := "default_metrics.json"
|
||||||
|
if _, err := os.Stat(filePath); os.IsNotExist(err) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
data, err := os.ReadFile(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var cfg DefaultMetricsConfig
|
||||||
|
if err := json.Unmarshal(data, &cfg); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &cfg, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseMetricsString(s string) []string {
|
||||||
|
parts := strings.Split(s, ",")
|
||||||
|
var metrics []string
|
||||||
|
for _, p := range parts {
|
||||||
|
trimmed := strings.TrimSpace(p)
|
||||||
|
if trimmed != "" {
|
||||||
|
metrics = append(metrics, trimmed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
222
internal/config/schema.go
Normal file
222
internal/config/schema.go
Normal file
@@ -0,0 +1,222 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package config
|
||||||
|
|
||||||
|
var configSchema = `
|
||||||
|
{
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"addr": {
|
||||||
|
"description": "Address where the http (or https) server will listen on (for example: 'localhost:80').",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"apiAllowedIPs": {
|
||||||
|
"description": "Addresses from which secured API endpoints can be reached",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"user": {
|
||||||
|
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"group": {
|
||||||
|
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"disable-authentication": {
|
||||||
|
"description": "Disable authentication (for everything: API, Web-UI, ...).",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"embed-static-files": {
|
||||||
|
"description": "If all files in web/frontend/public should be served from within the binary itself (they are embedded) or not.",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"static-files": {
|
||||||
|
"description": "Folder where static assets can be found, if embed-static-files is false.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"db": {
|
||||||
|
"description": "Path to SQLite database file (e.g., './var/job.db')",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"disable-archive": {
|
||||||
|
"description": "Keep all metric data in the metric data repositories, do not write to the job-archive.",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"enable-job-taggers": {
|
||||||
|
"description": "Turn on automatic application and jobclass taggers",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"validate": {
|
||||||
|
"description": "Validate all input json documents against json schema.",
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
"session-max-age": {
|
||||||
|
"description": "Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire!",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"https-cert-file": {
|
||||||
|
"description": "Filepath to SSL certificate. If also https-key-file is set use HTTPS using those certificates.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"https-key-file": {
|
||||||
|
"description": "Filepath to SSL key file. If also https-cert-file is set use HTTPS using those certificates.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"redirect-http-to": {
|
||||||
|
"description": "If not the empty string and addr does not end in :80, redirect every request incoming at port 80 to that url.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"stop-jobs-exceeding-walltime": {
|
||||||
|
"description": "If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job.",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"short-running-jobs-duration": {
|
||||||
|
"description": "Do not show running jobs shorter than X seconds.",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"emission-constant": {
|
||||||
|
"description": ".",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"cron-frequency": {
|
||||||
|
"description": "Frequency of cron job workers.",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"duration-worker": {
|
||||||
|
"description": "Duration Update Worker [Defaults to '5m']",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"footprint-worker": {
|
||||||
|
"description": "Metric-Footprint Update Worker [Defaults to '10m']",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"enable-resampling": {
|
||||||
|
"description": "Enable dynamic zoom in frontend metric plots.",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"minimumPoints": {
|
||||||
|
"description": "Minimum points to trigger resampling of time-series data.",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"trigger": {
|
||||||
|
"description": "Trigger next zoom level at less than this many visible datapoints.",
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"resolutions": {
|
||||||
|
"description": "Array of resampling target resolutions, in seconds.",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["trigger", "resolutions"]
|
||||||
|
},
|
||||||
|
"upstreamMetricRepository": {
|
||||||
|
"description": "Global upstream metric repository configuration for metric pull workers",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"kind": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["influxdb", "prometheus", "cc-metric-store", "cc-metric-store-internal", "test"]
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"token": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["kind"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["apiAllowedIPs"]
|
||||||
|
}`
|
||||||
|
|
||||||
|
var clustersSchema = `
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"name": {
|
||||||
|
"description": "The name of the cluster.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"metricDataRepository": {
|
||||||
|
"description": "Type of the metric data repository for this cluster",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"kind": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["influxdb", "prometheus", "cc-metric-store", "cc-metric-store-internal", "test"]
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"token": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["kind"]
|
||||||
|
},
|
||||||
|
"filterRanges": {
|
||||||
|
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"numNodes": {
|
||||||
|
"description": "UI slider range for number of nodes",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"from": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"to": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["from", "to"]
|
||||||
|
},
|
||||||
|
"duration": {
|
||||||
|
"description": "UI slider range for duration",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"from": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"to": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["from", "to"]
|
||||||
|
},
|
||||||
|
"startTime": {
|
||||||
|
"description": "UI slider range for start time",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"from": {
|
||||||
|
"type": "string",
|
||||||
|
"format": "date-time"
|
||||||
|
},
|
||||||
|
"to": {
|
||||||
|
"type": "null"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["from", "to"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["numNodes", "duration", "startTime"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["name", "filterRanges"],
|
||||||
|
"minItems": 1
|
||||||
|
}
|
||||||
|
}`
|
||||||
29
internal/config/validate.go
Normal file
29
internal/config/validate.go
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package config
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/santhosh-tekuri/jsonschema/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Validate(schema string, instance json.RawMessage) {
|
||||||
|
sch, err := jsonschema.CompileString("schema.json", schema)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Fatalf("%#v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var v any
|
||||||
|
if err := json.Unmarshal([]byte(instance), &v); err != nil {
|
||||||
|
cclog.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = sch.Validate(v); err != nil {
|
||||||
|
cclog.Fatalf("%#v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,6 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package model
|
package model
|
||||||
|
|||||||
@@ -3,24 +3,50 @@
|
|||||||
package model
|
package model
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type ClusterMetricWithName struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Unit *schema.Unit `json:"unit,omitempty"`
|
||||||
|
Timestep int `json:"timestep"`
|
||||||
|
Data []schema.Float `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClusterMetrics struct {
|
||||||
|
NodeCount int `json:"nodeCount"`
|
||||||
|
Metrics []*ClusterMetricWithName `json:"metrics"`
|
||||||
|
}
|
||||||
|
|
||||||
type Count struct {
|
type Count struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Count int `json:"count"`
|
Count int `json:"count"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type EnergyFootprintValue struct {
|
||||||
|
Hardware string `json:"hardware"`
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Value float64 `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
type FloatRange struct {
|
type FloatRange struct {
|
||||||
From float64 `json:"from"`
|
From float64 `json:"from"`
|
||||||
To float64 `json:"to"`
|
To float64 `json:"to"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type FootprintValue struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Stat string `json:"stat"`
|
||||||
|
Value float64 `json:"value"`
|
||||||
|
}
|
||||||
|
|
||||||
type Footprints struct {
|
type Footprints struct {
|
||||||
TimeWeights *TimeWeights `json:"timeWeights"`
|
TimeWeights *TimeWeights `json:"timeWeights"`
|
||||||
Metrics []*MetricFootprints `json:"metrics"`
|
Metrics []*MetricFootprints `json:"metrics"`
|
||||||
@@ -38,6 +64,7 @@ type IntRangeOutput struct {
|
|||||||
|
|
||||||
type JobFilter struct {
|
type JobFilter struct {
|
||||||
Tags []string `json:"tags,omitempty"`
|
Tags []string `json:"tags,omitempty"`
|
||||||
|
DbID []string `json:"dbId,omitempty"`
|
||||||
JobID *StringInput `json:"jobId,omitempty"`
|
JobID *StringInput `json:"jobId,omitempty"`
|
||||||
ArrayJobID *int `json:"arrayJobId,omitempty"`
|
ArrayJobID *int `json:"arrayJobId,omitempty"`
|
||||||
User *StringInput `json:"user,omitempty"`
|
User *StringInput `json:"user,omitempty"`
|
||||||
@@ -45,18 +72,16 @@ type JobFilter struct {
|
|||||||
JobName *StringInput `json:"jobName,omitempty"`
|
JobName *StringInput `json:"jobName,omitempty"`
|
||||||
Cluster *StringInput `json:"cluster,omitempty"`
|
Cluster *StringInput `json:"cluster,omitempty"`
|
||||||
Partition *StringInput `json:"partition,omitempty"`
|
Partition *StringInput `json:"partition,omitempty"`
|
||||||
Duration *schema.IntRange `json:"duration,omitempty"`
|
Duration *config.IntRange `json:"duration,omitempty"`
|
||||||
|
Energy *FloatRange `json:"energy,omitempty"`
|
||||||
MinRunningFor *int `json:"minRunningFor,omitempty"`
|
MinRunningFor *int `json:"minRunningFor,omitempty"`
|
||||||
NumNodes *schema.IntRange `json:"numNodes,omitempty"`
|
NumNodes *config.IntRange `json:"numNodes,omitempty"`
|
||||||
NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
|
NumAccelerators *config.IntRange `json:"numAccelerators,omitempty"`
|
||||||
NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
|
NumHWThreads *config.IntRange `json:"numHWThreads,omitempty"`
|
||||||
StartTime *schema.TimeRange `json:"startTime,omitempty"`
|
StartTime *config.TimeRange `json:"startTime,omitempty"`
|
||||||
State []schema.JobState `json:"state,omitempty"`
|
State []schema.JobState `json:"state,omitempty"`
|
||||||
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg,omitempty"`
|
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
|
||||||
MemBwAvg *FloatRange `json:"memBwAvg,omitempty"`
|
Shared *string `json:"shared,omitempty"`
|
||||||
LoadAvg *FloatRange `json:"loadAvg,omitempty"`
|
|
||||||
MemUsedMax *FloatRange `json:"memUsedMax,omitempty"`
|
|
||||||
Exclusive *int `json:"exclusive,omitempty"`
|
|
||||||
Node *StringInput `json:"node,omitempty"`
|
Node *StringInput `json:"node,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -78,29 +103,45 @@ type JobMetricWithName struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type JobResultList struct {
|
type JobResultList struct {
|
||||||
Items []*schema.Job `json:"items"`
|
Items []*schema.Job `json:"items"`
|
||||||
Offset *int `json:"offset,omitempty"`
|
Offset *int `json:"offset,omitempty"`
|
||||||
Limit *int `json:"limit,omitempty"`
|
Limit *int `json:"limit,omitempty"`
|
||||||
Count *int `json:"count,omitempty"`
|
Count *int `json:"count,omitempty"`
|
||||||
|
HasNextPage *bool `json:"hasNextPage,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobStats struct {
|
||||||
|
ID int `json:"id"`
|
||||||
|
JobID string `json:"jobId"`
|
||||||
|
StartTime int `json:"startTime"`
|
||||||
|
Duration int `json:"duration"`
|
||||||
|
Cluster string `json:"cluster"`
|
||||||
|
SubCluster string `json:"subCluster"`
|
||||||
|
NumNodes int `json:"numNodes"`
|
||||||
|
NumHWThreads *int `json:"numHWThreads,omitempty"`
|
||||||
|
NumAccelerators *int `json:"numAccelerators,omitempty"`
|
||||||
|
Stats []*NamedStats `json:"stats"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type JobsStatistics struct {
|
type JobsStatistics struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
TotalJobs int `json:"totalJobs"`
|
TotalUsers int `json:"totalUsers"`
|
||||||
RunningJobs int `json:"runningJobs"`
|
TotalJobs int `json:"totalJobs"`
|
||||||
ShortJobs int `json:"shortJobs"`
|
RunningJobs int `json:"runningJobs"`
|
||||||
TotalWalltime int `json:"totalWalltime"`
|
ShortJobs int `json:"shortJobs"`
|
||||||
TotalNodes int `json:"totalNodes"`
|
TotalWalltime int `json:"totalWalltime"`
|
||||||
TotalNodeHours int `json:"totalNodeHours"`
|
TotalNodes int `json:"totalNodes"`
|
||||||
TotalCores int `json:"totalCores"`
|
TotalNodeHours int `json:"totalNodeHours"`
|
||||||
TotalCoreHours int `json:"totalCoreHours"`
|
TotalCores int `json:"totalCores"`
|
||||||
TotalAccs int `json:"totalAccs"`
|
TotalCoreHours int `json:"totalCoreHours"`
|
||||||
TotalAccHours int `json:"totalAccHours"`
|
TotalAccs int `json:"totalAccs"`
|
||||||
HistDuration []*HistoPoint `json:"histDuration"`
|
TotalAccHours int `json:"totalAccHours"`
|
||||||
HistNumNodes []*HistoPoint `json:"histNumNodes"`
|
HistDuration []*HistoPoint `json:"histDuration"`
|
||||||
HistNumCores []*HistoPoint `json:"histNumCores"`
|
HistNumNodes []*HistoPoint `json:"histNumNodes"`
|
||||||
HistNumAccs []*HistoPoint `json:"histNumAccs"`
|
HistNumCores []*HistoPoint `json:"histNumCores"`
|
||||||
|
HistNumAccs []*HistoPoint `json:"histNumAccs"`
|
||||||
|
HistMetrics []*MetricHistoPoints `json:"histMetrics"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricFootprints struct {
|
type MetricFootprints struct {
|
||||||
@@ -108,14 +149,83 @@ type MetricFootprints struct {
|
|||||||
Data []schema.Float `json:"data"`
|
Data []schema.Float `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MetricHistoPoint struct {
|
||||||
|
Bin *int `json:"bin,omitempty"`
|
||||||
|
Count int `json:"count"`
|
||||||
|
Min *int `json:"min,omitempty"`
|
||||||
|
Max *int `json:"max,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricHistoPoints struct {
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Unit string `json:"unit"`
|
||||||
|
Stat *string `json:"stat,omitempty"`
|
||||||
|
Data []*MetricHistoPoint `json:"data,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricStatItem struct {
|
||||||
|
MetricName string `json:"metricName"`
|
||||||
|
Range *FloatRange `json:"range"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Mutation struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
type NamedStats struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Data *schema.MetricStatistics `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NamedStatsWithScope struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Scope schema.MetricScope `json:"scope"`
|
||||||
|
Stats []*ScopedStats `json:"stats"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeFilter struct {
|
||||||
|
Hostname *StringInput `json:"hostname,omitempty"`
|
||||||
|
Cluster *StringInput `json:"cluster,omitempty"`
|
||||||
|
Subcluster *StringInput `json:"subcluster,omitempty"`
|
||||||
|
SchedulerState *schema.SchedulerState `json:"schedulerState,omitempty"`
|
||||||
|
HealthState *string `json:"healthState,omitempty"`
|
||||||
|
TimeStart *int `json:"timeStart,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type NodeMetrics struct {
|
type NodeMetrics struct {
|
||||||
Host string `json:"host"`
|
Host string `json:"host"`
|
||||||
|
State string `json:"state"`
|
||||||
SubCluster string `json:"subCluster"`
|
SubCluster string `json:"subCluster"`
|
||||||
Metrics []*JobMetricWithName `json:"metrics"`
|
Metrics []*JobMetricWithName `json:"metrics"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeStateResultList struct {
|
||||||
|
Items []*schema.Node `json:"items"`
|
||||||
|
Count *int `json:"count,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeStates struct {
|
||||||
|
State string `json:"state"`
|
||||||
|
Count int `json:"count"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeStatesTimed struct {
|
||||||
|
State string `json:"state"`
|
||||||
|
Counts []int `json:"counts"`
|
||||||
|
Times []int `json:"times"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodesResultList struct {
|
||||||
|
Items []*NodeMetrics `json:"items"`
|
||||||
|
Offset *int `json:"offset,omitempty"`
|
||||||
|
Limit *int `json:"limit,omitempty"`
|
||||||
|
Count *int `json:"count,omitempty"`
|
||||||
|
TotalNodes *int `json:"totalNodes,omitempty"`
|
||||||
|
HasNextPage *bool `json:"hasNextPage,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type OrderByInput struct {
|
type OrderByInput struct {
|
||||||
Field string `json:"field"`
|
Field string `json:"field"`
|
||||||
|
Type string `json:"type"`
|
||||||
Order SortDirectionEnum `json:"order"`
|
Order SortDirectionEnum `json:"order"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,6 +234,12 @@ type PageRequest struct {
|
|||||||
Page int `json:"page"`
|
Page int `json:"page"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type ScopedStats struct {
|
||||||
|
Hostname string `json:"hostname"`
|
||||||
|
ID *string `json:"id,omitempty"`
|
||||||
|
Data *schema.MetricStatistics `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
type StringInput struct {
|
type StringInput struct {
|
||||||
Eq *string `json:"eq,omitempty"`
|
Eq *string `json:"eq,omitempty"`
|
||||||
Neq *string `json:"neq,omitempty"`
|
Neq *string `json:"neq,omitempty"`
|
||||||
@@ -134,8 +250,9 @@ type StringInput struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type TimeRangeOutput struct {
|
type TimeRangeOutput struct {
|
||||||
From time.Time `json:"from"`
|
Range *string `json:"range,omitempty"`
|
||||||
To time.Time `json:"to"`
|
From time.Time `json:"from"`
|
||||||
|
To time.Time `json:"to"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type TimeWeights struct {
|
type TimeWeights struct {
|
||||||
@@ -153,20 +270,22 @@ type User struct {
|
|||||||
type Aggregate string
|
type Aggregate string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
AggregateUser Aggregate = "USER"
|
AggregateUser Aggregate = "USER"
|
||||||
AggregateProject Aggregate = "PROJECT"
|
AggregateProject Aggregate = "PROJECT"
|
||||||
AggregateCluster Aggregate = "CLUSTER"
|
AggregateCluster Aggregate = "CLUSTER"
|
||||||
|
AggregateSubcluster Aggregate = "SUBCLUSTER"
|
||||||
)
|
)
|
||||||
|
|
||||||
var AllAggregate = []Aggregate{
|
var AllAggregate = []Aggregate{
|
||||||
AggregateUser,
|
AggregateUser,
|
||||||
AggregateProject,
|
AggregateProject,
|
||||||
AggregateCluster,
|
AggregateCluster,
|
||||||
|
AggregateSubcluster,
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e Aggregate) IsValid() bool {
|
func (e Aggregate) IsValid() bool {
|
||||||
switch e {
|
switch e {
|
||||||
case AggregateUser, AggregateProject, AggregateCluster:
|
case AggregateUser, AggregateProject, AggregateCluster, AggregateSubcluster:
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@@ -176,7 +295,7 @@ func (e Aggregate) String() string {
|
|||||||
return string(e)
|
return string(e)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Aggregate) UnmarshalGQL(v interface{}) error {
|
func (e *Aggregate) UnmarshalGQL(v any) error {
|
||||||
str, ok := v.(string)
|
str, ok := v.(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("enums must be strings")
|
return fmt.Errorf("enums must be strings")
|
||||||
@@ -193,11 +312,26 @@ func (e Aggregate) MarshalGQL(w io.Writer) {
|
|||||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *Aggregate) UnmarshalJSON(b []byte) error {
|
||||||
|
s, err := strconv.Unquote(string(b))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return e.UnmarshalGQL(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e Aggregate) MarshalJSON() ([]byte, error) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
e.MarshalGQL(&buf)
|
||||||
|
return buf.Bytes(), nil
|
||||||
|
}
|
||||||
|
|
||||||
type SortByAggregate string
|
type SortByAggregate string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
SortByAggregateTotalwalltime SortByAggregate = "TOTALWALLTIME"
|
SortByAggregateTotalwalltime SortByAggregate = "TOTALWALLTIME"
|
||||||
SortByAggregateTotaljobs SortByAggregate = "TOTALJOBS"
|
SortByAggregateTotaljobs SortByAggregate = "TOTALJOBS"
|
||||||
|
SortByAggregateTotalusers SortByAggregate = "TOTALUSERS"
|
||||||
SortByAggregateTotalnodes SortByAggregate = "TOTALNODES"
|
SortByAggregateTotalnodes SortByAggregate = "TOTALNODES"
|
||||||
SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS"
|
SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS"
|
||||||
SortByAggregateTotalcores SortByAggregate = "TOTALCORES"
|
SortByAggregateTotalcores SortByAggregate = "TOTALCORES"
|
||||||
@@ -209,6 +343,7 @@ const (
|
|||||||
var AllSortByAggregate = []SortByAggregate{
|
var AllSortByAggregate = []SortByAggregate{
|
||||||
SortByAggregateTotalwalltime,
|
SortByAggregateTotalwalltime,
|
||||||
SortByAggregateTotaljobs,
|
SortByAggregateTotaljobs,
|
||||||
|
SortByAggregateTotalusers,
|
||||||
SortByAggregateTotalnodes,
|
SortByAggregateTotalnodes,
|
||||||
SortByAggregateTotalnodehours,
|
SortByAggregateTotalnodehours,
|
||||||
SortByAggregateTotalcores,
|
SortByAggregateTotalcores,
|
||||||
@@ -219,7 +354,7 @@ var AllSortByAggregate = []SortByAggregate{
|
|||||||
|
|
||||||
func (e SortByAggregate) IsValid() bool {
|
func (e SortByAggregate) IsValid() bool {
|
||||||
switch e {
|
switch e {
|
||||||
case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
|
case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalusers, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@@ -229,7 +364,7 @@ func (e SortByAggregate) String() string {
|
|||||||
return string(e)
|
return string(e)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *SortByAggregate) UnmarshalGQL(v interface{}) error {
|
func (e *SortByAggregate) UnmarshalGQL(v any) error {
|
||||||
str, ok := v.(string)
|
str, ok := v.(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("enums must be strings")
|
return fmt.Errorf("enums must be strings")
|
||||||
@@ -246,6 +381,20 @@ func (e SortByAggregate) MarshalGQL(w io.Writer) {
|
|||||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *SortByAggregate) UnmarshalJSON(b []byte) error {
|
||||||
|
s, err := strconv.Unquote(string(b))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return e.UnmarshalGQL(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e SortByAggregate) MarshalJSON() ([]byte, error) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
e.MarshalGQL(&buf)
|
||||||
|
return buf.Bytes(), nil
|
||||||
|
}
|
||||||
|
|
||||||
type SortDirectionEnum string
|
type SortDirectionEnum string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -270,7 +419,7 @@ func (e SortDirectionEnum) String() string {
|
|||||||
return string(e)
|
return string(e)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *SortDirectionEnum) UnmarshalGQL(v interface{}) error {
|
func (e *SortDirectionEnum) UnmarshalGQL(v any) error {
|
||||||
str, ok := v.(string)
|
str, ok := v.(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fmt.Errorf("enums must be strings")
|
return fmt.Errorf("enums must be strings")
|
||||||
@@ -286,3 +435,17 @@ func (e *SortDirectionEnum) UnmarshalGQL(v interface{}) error {
|
|||||||
func (e SortDirectionEnum) MarshalGQL(w io.Writer) {
|
func (e SortDirectionEnum) MarshalGQL(w io.Writer) {
|
||||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *SortDirectionEnum) UnmarshalJSON(b []byte) error {
|
||||||
|
s, err := strconv.Unquote(string(b))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return e.UnmarshalGQL(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e SortDirectionEnum) MarshalJSON() ([]byte, error) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
e.MarshalGQL(&buf)
|
||||||
|
return buf.Bytes(), nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,15 +1,39 @@
|
|||||||
package graph
|
package graph
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/jmoiron/sqlx"
|
"github.com/jmoiron/sqlx"
|
||||||
)
|
)
|
||||||
|
|
||||||
// This file will not be regenerated automatically.
|
// This file will not be regenerated automatically.
|
||||||
//
|
//
|
||||||
// It serves as dependency injection for your app, add any dependencies you require here.
|
// It serves as dependency injection for your app, add any dependencies you require here.
|
||||||
|
var (
|
||||||
|
initOnce sync.Once
|
||||||
|
resolverInstance *Resolver
|
||||||
|
)
|
||||||
|
|
||||||
type Resolver struct {
|
type Resolver struct {
|
||||||
DB *sqlx.DB
|
DB *sqlx.DB
|
||||||
Repo *repository.JobRepository
|
Repo *repository.JobRepository
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Init() {
|
||||||
|
initOnce.Do(func() {
|
||||||
|
db := repository.GetConnection()
|
||||||
|
resolverInstance = &Resolver{
|
||||||
|
DB: db.DB, Repo: repository.GetJobRepository(),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetResolverInstance() *Resolver {
|
||||||
|
if resolverInstance == nil {
|
||||||
|
cclog.Fatal("Authentication module not initialized!")
|
||||||
|
}
|
||||||
|
|
||||||
|
return resolverInstance
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,23 +1,29 @@
|
|||||||
package graph
|
package graph
|
||||||
|
|
||||||
// This file will be automatically regenerated based on the schema, any resolver implementations
|
// This file will be automatically regenerated based on the schema, any resolver
|
||||||
|
// implementations
|
||||||
// will be copied through when generating and any unknown code will be moved to the end.
|
// will be copied through when generating and any unknown code will be moved to the end.
|
||||||
// Code generated by github.com/99designs/gqlgen version v0.17.36
|
// Code generated by github.com/99designs/gqlgen version v0.17.84
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"regexp"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdispatcher"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Partitions is the resolver for the partitions field.
|
// Partitions is the resolver for the partitions field.
|
||||||
@@ -25,26 +31,93 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
|
|||||||
return r.Repo.Partitions(obj.Name)
|
return r.Repo.Partitions(obj.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StartTime is the resolver for the startTime field.
|
||||||
|
func (r *jobResolver) StartTime(ctx context.Context, obj *schema.Job) (*time.Time, error) {
|
||||||
|
timestamp := time.Unix(obj.StartTime, 0)
|
||||||
|
return ×tamp, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Tags is the resolver for the tags field.
|
// Tags is the resolver for the tags field.
|
||||||
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
|
||||||
return r.Repo.GetTags(&obj.ID)
|
return r.Repo.GetTags(repository.GetUserFromContext(ctx), obj.ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
// ConcurrentJobs is the resolver for the concurrentJobs field.
|
||||||
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
|
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
|
||||||
if obj.State == schema.JobStateRunning {
|
// FIXME: Make the hardcoded duration configurable
|
||||||
obj.Duration = int32(time.Now().Unix() - obj.StartTimeUnix)
|
if obj.Shared != "none" && obj.Duration > 600 {
|
||||||
}
|
|
||||||
|
|
||||||
if obj.Exclusive != 1 && obj.Duration > 600 {
|
|
||||||
return r.Repo.FindConcurrentJobs(ctx, obj)
|
return r.Repo.FindConcurrentJobs(ctx, obj)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Footprint is the resolver for the footprint field.
|
||||||
|
func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.FootprintValue, error) {
|
||||||
|
rawFootprint, err := r.Repo.FetchFootprint(obj)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while fetching job footprint data")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
res := []*model.FootprintValue{}
|
||||||
|
for name, value := range rawFootprint {
|
||||||
|
|
||||||
|
parts := strings.Split(name, "_")
|
||||||
|
statPart := parts[len(parts)-1]
|
||||||
|
nameParts := parts[:len(parts)-1]
|
||||||
|
|
||||||
|
res = append(res, &model.FootprintValue{
|
||||||
|
Name: strings.Join(nameParts, "_"),
|
||||||
|
Stat: statPart,
|
||||||
|
Value: value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnergyFootprint is the resolver for the energyFootprint field.
|
||||||
|
func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*model.EnergyFootprintValue, error) {
|
||||||
|
rawEnergyFootprint, err := r.Repo.FetchEnergyFootprint(obj)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while fetching job energy footprint data")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
res := []*model.EnergyFootprintValue{}
|
||||||
|
for name, value := range rawEnergyFootprint {
|
||||||
|
// Suboptimal: Nearly hardcoded metric name expectations
|
||||||
|
matchCPU := regexp.MustCompile(`cpu|Cpu|CPU`)
|
||||||
|
matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
|
||||||
|
matchMem := regexp.MustCompile(`mem|Mem|MEM`)
|
||||||
|
matchCore := regexp.MustCompile(`core|Core|CORE`)
|
||||||
|
|
||||||
|
hwType := ""
|
||||||
|
switch test := name; { // NOtice ';' for var declaration
|
||||||
|
case matchCPU.MatchString(test):
|
||||||
|
hwType = "CPU"
|
||||||
|
case matchAcc.MatchString(test):
|
||||||
|
hwType = "Accelerator"
|
||||||
|
case matchMem.MatchString(test):
|
||||||
|
hwType = "Memory"
|
||||||
|
case matchCore.MatchString(test):
|
||||||
|
hwType = "Core"
|
||||||
|
default:
|
||||||
|
hwType = "Other"
|
||||||
|
}
|
||||||
|
|
||||||
|
res = append(res, &model.EnergyFootprintValue{
|
||||||
|
Hardware: hwType,
|
||||||
|
Metric: name,
|
||||||
|
Value: value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
// MetaData is the resolver for the metaData field.
|
// MetaData is the resolver for the metaData field.
|
||||||
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
|
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (any, error) {
|
||||||
return r.Repo.FetchMetadata(obj)
|
return r.Repo.FetchMetadata(obj)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -53,41 +126,82 @@ func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.Use
|
|||||||
return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
|
return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Name is the resolver for the name field.
|
||||||
|
func (r *metricValueResolver) Name(ctx context.Context, obj *schema.MetricValue) (*string, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: Name - name"))
|
||||||
|
}
|
||||||
|
|
||||||
// CreateTag is the resolver for the createTag field.
|
// CreateTag is the resolver for the createTag field.
|
||||||
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
|
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string, scope string) (*schema.Tag, error) {
|
||||||
id, err := r.Repo.CreateTag(typeArg, name)
|
user := repository.GetUserFromContext(ctx)
|
||||||
if err != nil {
|
if user == nil {
|
||||||
log.Warn("Error while creating tag")
|
return nil, fmt.Errorf("no user in context")
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
|
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||||
|
if user.HasRole(schema.RoleAdmin) && scope == "admin" ||
|
||||||
|
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && scope == "global" ||
|
||||||
|
user.Username == scope {
|
||||||
|
// Create in DB
|
||||||
|
id, err := r.Repo.CreateTag(typeArg, name, scope)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while creating tag")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
|
||||||
|
} else {
|
||||||
|
cclog.Warnf("Not authorized to create tag with scope: %s", scope)
|
||||||
|
return nil, fmt.Errorf("not authorized to create tag with scope: %s", scope)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteTag is the resolver for the deleteTag field.
|
// DeleteTag is the resolver for the deleteTag field.
|
||||||
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
|
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
|
||||||
|
// This Uses ID string <-> ID string, removeTagFromList uses []string <-> []int
|
||||||
panic(fmt.Errorf("not implemented: DeleteTag - deleteTag"))
|
panic(fmt.Errorf("not implemented: DeleteTag - deleteTag"))
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddTagsToJob is the resolver for the addTagsToJob field.
|
// AddTagsToJob is the resolver for the addTagsToJob field.
|
||||||
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||||
|
user := repository.GetUserFromContext(ctx)
|
||||||
|
if user == nil {
|
||||||
|
return nil, fmt.Errorf("no user in context")
|
||||||
|
}
|
||||||
|
|
||||||
jid, err := strconv.ParseInt(job, 10, 64)
|
jid, err := strconv.ParseInt(job, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while adding tag to job")
|
cclog.Warn("Error while adding tag to job")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tags := []*schema.Tag{}
|
tags := []*schema.Tag{}
|
||||||
for _, tagId := range tagIds {
|
for _, tagID := range tagIds {
|
||||||
tid, err := strconv.ParseInt(tagId, 10, 64)
|
// Get ID
|
||||||
|
tid, err := strconv.ParseInt(tagID, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while parsing tag id")
|
cclog.Warn("Error while parsing tag id")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if tags, err = r.Repo.AddTag(jid, tid); err != nil {
|
// Test Exists
|
||||||
log.Warn("Error while adding tag")
|
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||||
return nil, err
|
if !exists {
|
||||||
|
cclog.Warnf("Tag does not exist (ID): %d", tid)
|
||||||
|
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||||
|
if user.HasRole(schema.RoleAdmin) && tscope == "admin" ||
|
||||||
|
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && tscope == "global" ||
|
||||||
|
user.Username == tscope {
|
||||||
|
// Add to Job
|
||||||
|
if tags, err = r.Repo.AddTag(user, jid, tid); err != nil {
|
||||||
|
cclog.Warn("Error while adding tag")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cclog.Warnf("Not authorized to add tag: %d", tid)
|
||||||
|
return nil, fmt.Errorf("not authorized to add tag: %d", tid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,39 +210,127 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
|
|||||||
|
|
||||||
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
|
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
|
||||||
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
|
||||||
|
user := repository.GetUserFromContext(ctx)
|
||||||
|
if user == nil {
|
||||||
|
return nil, fmt.Errorf("no user in context")
|
||||||
|
}
|
||||||
|
|
||||||
jid, err := strconv.ParseInt(job, 10, 64)
|
jid, err := strconv.ParseInt(job, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while parsing job id")
|
cclog.Warn("Error while parsing job id")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tags := []*schema.Tag{}
|
tags := []*schema.Tag{}
|
||||||
for _, tagId := range tagIds {
|
for _, tagID := range tagIds {
|
||||||
tid, err := strconv.ParseInt(tagId, 10, 64)
|
// Get ID
|
||||||
|
tid, err := strconv.ParseInt(tagID, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while parsing tag id")
|
cclog.Warn("Error while parsing tag id")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil {
|
// Test Exists
|
||||||
log.Warn("Error while removing tag")
|
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||||
return nil, err
|
if !exists {
|
||||||
|
cclog.Warnf("Tag does not exist (ID): %d", tid)
|
||||||
|
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test Access: Admins && Admin Tag OR Support/Admin and Global Tag OR Everyone && Private Tag
|
||||||
|
if user.HasRole(schema.RoleAdmin) && tscope == "admin" ||
|
||||||
|
user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) && tscope == "global" ||
|
||||||
|
user.Username == tscope {
|
||||||
|
// Remove from Job
|
||||||
|
if tags, err = r.Repo.RemoveTag(user, jid, tid); err != nil {
|
||||||
|
cclog.Warn("Error while removing tag")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cclog.Warnf("Not authorized to remove tag: %d", tid)
|
||||||
|
return nil, fmt.Errorf("not authorized to remove tag: %d", tid)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return tags, nil
|
return tags, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RemoveTagFromList is the resolver for the removeTagFromList field.
|
||||||
|
func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []string) ([]int, error) {
|
||||||
|
// Needs Contextuser
|
||||||
|
user := repository.GetUserFromContext(ctx)
|
||||||
|
if user == nil {
|
||||||
|
return nil, fmt.Errorf("no user in context")
|
||||||
|
}
|
||||||
|
|
||||||
|
tags := []int{}
|
||||||
|
for _, tagID := range tagIds {
|
||||||
|
// Get ID
|
||||||
|
tid, err := strconv.ParseInt(tagID, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while parsing tag id for removal")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test Exists
|
||||||
|
_, _, tscope, exists := r.Repo.TagInfo(tid)
|
||||||
|
if !exists {
|
||||||
|
cclog.Warnf("Tag does not exist (ID): %d", tid)
|
||||||
|
return nil, fmt.Errorf("tag does not exist (ID): %d", tid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test Access: Admins && Admin Tag OR Everyone && Private Tag
|
||||||
|
if user.HasRole(schema.RoleAdmin) && (tscope == "global" || tscope == "admin") || user.Username == tscope {
|
||||||
|
// Remove from DB
|
||||||
|
if err = r.Repo.RemoveTagById(tid); err != nil {
|
||||||
|
cclog.Warn("Error while removing tag")
|
||||||
|
return nil, err
|
||||||
|
} else {
|
||||||
|
tags = append(tags, int(tid))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cclog.Warnf("Not authorized to remove tag: %d", tid)
|
||||||
|
return nil, fmt.Errorf("not authorized to remove tag: %d", tid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tags, nil
|
||||||
|
}
|
||||||
|
|
||||||
// UpdateConfiguration is the resolver for the updateConfiguration field.
|
// UpdateConfiguration is the resolver for the updateConfiguration field.
|
||||||
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
|
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
|
||||||
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, repository.GetUserFromContext(ctx)); err != nil {
|
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, repository.GetUserFromContext(ctx)); err != nil {
|
||||||
log.Warn("Error while updating user config")
|
cclog.Warn("Error while updating user config")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ID is the resolver for the id field.
|
||||||
|
func (r *nodeResolver) ID(ctx context.Context, obj *schema.Node) (string, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: ID - id"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// SchedulerState is the resolver for the schedulerState field.
|
||||||
|
func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (schema.SchedulerState, error) {
|
||||||
|
if obj.NodeState != "" {
|
||||||
|
return obj.NodeState, nil
|
||||||
|
} else {
|
||||||
|
return "", fmt.Errorf("no SchedulerState (NodeState) on Object")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// HealthState is the resolver for the healthState field.
|
||||||
|
func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (string, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: HealthState - healthState"))
|
||||||
|
}
|
||||||
|
|
||||||
|
// MetaData is the resolver for the metaData field.
|
||||||
|
func (r *nodeResolver) MetaData(ctx context.Context, obj *schema.Node) (any, error) {
|
||||||
|
panic(fmt.Errorf("not implemented: MetaData - metaData"))
|
||||||
|
}
|
||||||
|
|
||||||
// Clusters is the resolver for the clusters field.
|
// Clusters is the resolver for the clusters field.
|
||||||
func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error) {
|
func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error) {
|
||||||
return archive.Clusters, nil
|
return archive.Clusters, nil
|
||||||
@@ -136,7 +338,20 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error)
|
|||||||
|
|
||||||
// Tags is the resolver for the tags field.
|
// Tags is the resolver for the tags field.
|
||||||
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
|
||||||
return r.Repo.GetTags(nil)
|
return r.Repo.GetTags(repository.GetUserFromContext(ctx), nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GlobalMetrics is the resolver for the globalMetrics field.
|
||||||
|
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
|
||||||
|
user := repository.GetUserFromContext(ctx)
|
||||||
|
|
||||||
|
if user != nil {
|
||||||
|
if user.HasRole(schema.RoleUser) || user.HasRole(schema.RoleManager) {
|
||||||
|
return archive.GlobalUserMetricList, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return archive.GlobalMetricList, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// User is the resolver for the user field.
|
// User is the resolver for the user field.
|
||||||
@@ -148,7 +363,7 @@ func (r *queryResolver) User(ctx context.Context, username string) (*model.User,
|
|||||||
func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) {
|
func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) {
|
||||||
data, err := r.Repo.AllocatedNodes(cluster)
|
data, err := r.Repo.AllocatedNodes(cluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while fetching allocated nodes")
|
cclog.Warn("Error while fetching allocated nodes")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -163,17 +378,82 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
|
|||||||
return counts, nil
|
return counts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Node is the resolver for the node field.
|
||||||
|
func (r *queryResolver) Node(ctx context.Context, id string) (*schema.Node, error) {
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
numericID, err := strconv.ParseInt(id, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while parsing job id")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return repo.GetNodeByID(numericID, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nodes is the resolver for the nodes field.
|
||||||
|
func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, order *model.OrderByInput) (*model.NodeStateResultList, error) {
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
nodes, err := repo.QueryNodes(ctx, filter, nil, order) // Ignore Paging, Order Unused
|
||||||
|
count := len(nodes)
|
||||||
|
return &model.NodeStateResultList{Items: nodes, Count: &count}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// NodeStates is the resolver for the nodeStates field.
|
||||||
|
func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) {
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
|
||||||
|
stateCounts, serr := repo.CountStates(ctx, filter, "node_state")
|
||||||
|
if serr != nil {
|
||||||
|
cclog.Warnf("Error while counting nodeStates: %s", serr.Error())
|
||||||
|
return nil, serr
|
||||||
|
}
|
||||||
|
|
||||||
|
healthCounts, herr := repo.CountStates(ctx, filter, "health_state")
|
||||||
|
if herr != nil {
|
||||||
|
cclog.Warnf("Error while counting healthStates: %s", herr.Error())
|
||||||
|
return nil, herr
|
||||||
|
}
|
||||||
|
|
||||||
|
allCounts := append(stateCounts, healthCounts...)
|
||||||
|
|
||||||
|
return allCounts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NodeStatesTimed is the resolver for the nodeStatesTimed field.
|
||||||
|
func (r *queryResolver) NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error) {
|
||||||
|
repo := repository.GetNodeRepository()
|
||||||
|
|
||||||
|
if typeArg == "node" {
|
||||||
|
stateCounts, serr := repo.CountStatesTimed(ctx, filter, "node_state")
|
||||||
|
if serr != nil {
|
||||||
|
cclog.Warnf("Error while counting nodeStates in time: %s", serr.Error())
|
||||||
|
return nil, serr
|
||||||
|
}
|
||||||
|
return stateCounts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if typeArg == "health" {
|
||||||
|
healthCounts, herr := repo.CountStatesTimed(ctx, filter, "health_state")
|
||||||
|
if herr != nil {
|
||||||
|
cclog.Warnf("Error while counting healthStates in time: %s", herr.Error())
|
||||||
|
return nil, herr
|
||||||
|
}
|
||||||
|
return healthCounts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, errors.New("unknown Node State Query Type")
|
||||||
|
}
|
||||||
|
|
||||||
// Job is the resolver for the job field.
|
// Job is the resolver for the job field.
|
||||||
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
|
||||||
numericId, err := strconv.ParseInt(id, 10, 64)
|
numericID, err := strconv.ParseInt(id, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while parsing job id")
|
cclog.Warn("Error while parsing job id")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
job, err := r.Repo.FindById(numericId)
|
job, err := r.Repo.FindByID(ctx, numericID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while finding job by id")
|
cclog.Warn("Error while finding job by id")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -187,16 +467,26 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// JobMetrics is the resolver for the jobMetrics field.
|
// JobMetrics is the resolver for the jobMetrics field.
|
||||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
|
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
|
||||||
|
if resolution == nil { // Load from Config
|
||||||
|
if config.Keys.EnableResampling != nil {
|
||||||
|
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||||
|
resolution = &defaultRes
|
||||||
|
} else { // Set 0 (Loads configured metric timestep)
|
||||||
|
defaultRes := 0
|
||||||
|
resolution = &defaultRes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
job, err := r.Query().Job(ctx, id)
|
job, err := r.Query().Job(ctx, id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while querying job for metrics")
|
cclog.Warn("Error while querying job for metrics")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdata.LoadData(job, metrics, scopes, ctx)
|
data, err := metricdispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while loading job data")
|
cclog.Warn("Error while loading job data")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -214,9 +504,67 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
|
|||||||
return res, err
|
return res, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// JobsFootprints is the resolver for the jobsFootprints field.
|
// JobStats is the resolver for the jobStats field.
|
||||||
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error) {
|
||||||
return r.jobsFootprints(ctx, filter, metrics)
|
job, err := r.Query().Job(ctx, id)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warnf("Error while querying job %s for metadata", id)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := metricdispatcher.LoadJobStats(job, metrics, ctx)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warnf("Error while loading jobStats data for job id %s", id)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
res := []*model.NamedStats{}
|
||||||
|
for name, md := range data {
|
||||||
|
res = append(res, &model.NamedStats{
|
||||||
|
Name: name,
|
||||||
|
Data: &md,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// ScopedJobStats is the resolver for the scopedJobStats field.
|
||||||
|
func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.NamedStatsWithScope, error) {
|
||||||
|
job, err := r.Query().Job(ctx, id)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warnf("Error while querying job %s for metadata", id)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := metricdispatcher.LoadScopedJobStats(job, metrics, scopes, ctx)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warnf("Error while loading scopedJobStats data for job id %s", id)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
res := make([]*model.NamedStatsWithScope, 0)
|
||||||
|
for name, scoped := range data {
|
||||||
|
for scope, stats := range scoped {
|
||||||
|
|
||||||
|
mdlStats := make([]*model.ScopedStats, 0)
|
||||||
|
for _, stat := range stats {
|
||||||
|
mdlStats = append(mdlStats, &model.ScopedStats{
|
||||||
|
Hostname: stat.Hostname,
|
||||||
|
ID: stat.Id,
|
||||||
|
Data: stat.Data,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
res = append(res, &model.NamedStatsWithScope{
|
||||||
|
Name: name,
|
||||||
|
Scope: scope,
|
||||||
|
Stats: mdlStats,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return res, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Jobs is the resolver for the jobs field.
|
// Jobs is the resolver for the jobs field.
|
||||||
@@ -230,25 +578,47 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
|
|||||||
|
|
||||||
jobs, err := r.Repo.QueryJobs(ctx, filter, page, order)
|
jobs, err := r.Repo.QueryJobs(ctx, filter, page, order)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while querying jobs")
|
cclog.Warn("Error while querying jobs")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
count, err := r.Repo.CountJobs(ctx, filter)
|
count, err := r.Repo.CountJobs(ctx, filter)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while counting jobs")
|
cclog.Warn("Error while counting jobs")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return &model.JobResultList{Items: jobs, Count: &count}, nil
|
// Note: Even if App-Default 'config.Keys.UiDefaults["job_list_usePaging"]' is set, always return hasNextPage boolean.
|
||||||
|
// Users can decide in frontend to use continuous scroll, even if app-default is paging!
|
||||||
|
/*
|
||||||
|
Example Page 4 @ 10 IpP : Does item 41 exist?
|
||||||
|
Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 @ 10 IpP exists.
|
||||||
|
*/
|
||||||
|
nextPage := &model.PageRequest{
|
||||||
|
ItemsPerPage: 1,
|
||||||
|
Page: ((page.Page * page.ItemsPerPage) + 1),
|
||||||
|
}
|
||||||
|
nextJobs, err := r.Repo.QueryJobs(ctx, filter, nextPage, order)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while querying next jobs")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
hasNextPage := len(nextJobs) == 1
|
||||||
|
|
||||||
|
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// JobsStatistics is the resolver for the jobsStatistics field.
|
// JobsStatistics is the resolver for the jobsStatistics field.
|
||||||
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
|
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) ([]*model.JobsStatistics, error) {
|
||||||
var err error
|
var err error
|
||||||
var stats []*model.JobsStatistics
|
var stats []*model.JobsStatistics
|
||||||
|
|
||||||
if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
// Top Level Defaults
|
||||||
|
defaultDurationBins := "1h"
|
||||||
|
defaultMetricBins := 10
|
||||||
|
|
||||||
|
if requireField(ctx, "totalJobs") || requireField(ctx, "totalUsers") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
|
||||||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
|
||||||
if groupBy == nil {
|
if groupBy == nil {
|
||||||
stats, err = r.Repo.JobsStats(ctx, filter)
|
stats, err = r.Repo.JobsStats(ctx, filter)
|
||||||
@@ -281,8 +651,13 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
}
|
}
|
||||||
|
|
||||||
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
|
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
|
||||||
|
|
||||||
|
if numDurationBins == nil {
|
||||||
|
numDurationBins = &defaultDurationBins
|
||||||
|
}
|
||||||
|
|
||||||
if groupBy == nil {
|
if groupBy == nil {
|
||||||
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0])
|
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0], numDurationBins)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -291,9 +666,81 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if requireField(ctx, "histMetrics") {
|
||||||
|
|
||||||
|
if numMetricBins == nil {
|
||||||
|
numMetricBins = &defaultMetricBins
|
||||||
|
}
|
||||||
|
|
||||||
|
if groupBy == nil {
|
||||||
|
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0], numMetricBins)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return nil, errors.New("metric histograms only implemented without groupBy argument")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// JobsMetricStats is the resolver for the jobsMetricStats field.
|
||||||
|
func (r *queryResolver) JobsMetricStats(ctx context.Context, filter []*model.JobFilter, metrics []string) ([]*model.JobStats, error) {
|
||||||
|
// No Paging, Fixed Order by StartTime ASC
|
||||||
|
order := &model.OrderByInput{
|
||||||
|
Field: "startTime",
|
||||||
|
Type: "col",
|
||||||
|
Order: "ASC",
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs, err := r.Repo.QueryJobs(ctx, filter, nil, order)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while querying jobs for comparison")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
res := []*model.JobStats{}
|
||||||
|
for _, job := range jobs {
|
||||||
|
data, err := metricdispatcher.LoadJobStats(job, metrics, ctx)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warnf("Error while loading comparison jobStats data for job id %d", job.JobID)
|
||||||
|
continue
|
||||||
|
// return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
sres := []*model.NamedStats{}
|
||||||
|
for name, md := range data {
|
||||||
|
sres = append(sres, &model.NamedStats{
|
||||||
|
Name: name,
|
||||||
|
Data: &md,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
numThreadsInt := int(job.NumHWThreads)
|
||||||
|
numAccsInt := int(job.NumAcc)
|
||||||
|
res = append(res, &model.JobStats{
|
||||||
|
ID: int(*job.ID),
|
||||||
|
JobID: strconv.Itoa(int(job.JobID)),
|
||||||
|
StartTime: int(job.StartTime),
|
||||||
|
Duration: int(job.Duration),
|
||||||
|
Cluster: job.Cluster,
|
||||||
|
SubCluster: job.SubCluster,
|
||||||
|
NumNodes: int(job.NumNodes),
|
||||||
|
NumHWThreads: &numThreadsInt,
|
||||||
|
NumAccelerators: &numAccsInt,
|
||||||
|
Stats: sres,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return res, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// JobsFootprints is the resolver for the jobsFootprints field.
|
||||||
|
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||||
|
// NOTE: Legacy Naming! This resolver is for normalized histograms in analysis view only - *Not* related to DB "footprint" column!
|
||||||
|
return r.jobsFootprints(ctx, filter, metrics)
|
||||||
|
}
|
||||||
|
|
||||||
// RooflineHeatmap is the resolver for the rooflineHeatmap field.
|
// RooflineHeatmap is the resolver for the rooflineHeatmap field.
|
||||||
func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
||||||
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
|
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
|
||||||
@@ -302,8 +749,8 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
|
|||||||
// NodeMetrics is the resolver for the nodeMetrics field.
|
// NodeMetrics is the resolver for the nodeMetrics field.
|
||||||
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
|
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
|
||||||
user := repository.GetUserFromContext(ctx)
|
user := repository.GetUserFromContext(ctx)
|
||||||
if user != nil && !user.HasRole(schema.RoleAdmin) {
|
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||||
return nil, errors.New("you need to be an administrator for this query")
|
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||||
}
|
}
|
||||||
|
|
||||||
if metrics == nil {
|
if metrics == nil {
|
||||||
@@ -312,19 +759,26 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
data, err := metricdispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while loading node data")
|
cclog.Warn("error while loading node data")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nodeRepo := repository.GetNodeRepository()
|
||||||
|
stateMap, _ := nodeRepo.MapNodes(cluster)
|
||||||
|
|
||||||
nodeMetrics := make([]*model.NodeMetrics, 0, len(data))
|
nodeMetrics := make([]*model.NodeMetrics, 0, len(data))
|
||||||
for hostname, metrics := range data {
|
for hostname, metrics := range data {
|
||||||
host := &model.NodeMetrics{
|
host := &model.NodeMetrics{
|
||||||
Host: hostname,
|
Host: hostname,
|
||||||
|
State: stateMap[hostname],
|
||||||
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||||
}
|
}
|
||||||
host.SubCluster, _ = archive.GetSubClusterByNode(cluster, hostname)
|
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warnf("error in nodeMetrics resolver: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
for metric, scopedMetrics := range metrics {
|
for metric, scopedMetrics := range metrics {
|
||||||
for _, scopedMetric := range scopedMetrics {
|
for _, scopedMetric := range scopedMetrics {
|
||||||
@@ -342,6 +796,152 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
|||||||
return nodeMetrics, nil
|
return nodeMetrics, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NodeMetricsList is the resolver for the nodeMetricsList field.
|
||||||
|
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
|
||||||
|
if resolution == nil { // Load from Config
|
||||||
|
if config.Keys.EnableResampling != nil {
|
||||||
|
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||||
|
resolution = &defaultRes
|
||||||
|
} else { // Set 0 (Loads configured metric timestep)
|
||||||
|
defaultRes := 0
|
||||||
|
resolution = &defaultRes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
user := repository.GetUserFromContext(ctx)
|
||||||
|
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||||
|
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeRepo := repository.GetNodeRepository()
|
||||||
|
nodes, stateMap, countNodes, hasNextPage, nerr := nodeRepo.GetNodesForList(ctx, cluster, subCluster, stateFilter, nodeFilter, page)
|
||||||
|
if nerr != nil {
|
||||||
|
return nil, errors.New("could not retrieve node list required for resolving NodeMetricsList")
|
||||||
|
}
|
||||||
|
|
||||||
|
if metrics == nil {
|
||||||
|
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
||||||
|
metrics = append(metrics, mc.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := metricdispatcher.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("error while loading node data (Resolver.NodeMetricsList")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeMetricsList := make([]*model.NodeMetrics, 0, len(data))
|
||||||
|
for hostname, metrics := range data {
|
||||||
|
host := &model.NodeMetrics{
|
||||||
|
Host: hostname,
|
||||||
|
State: stateMap[hostname],
|
||||||
|
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
|
||||||
|
}
|
||||||
|
host.SubCluster, err = archive.GetSubClusterByNode(cluster, hostname)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warnf("error in nodeMetrics resolver: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for metric, scopedMetrics := range metrics {
|
||||||
|
for scope, scopedMetric := range scopedMetrics {
|
||||||
|
host.Metrics = append(host.Metrics, &model.JobMetricWithName{
|
||||||
|
Name: metric,
|
||||||
|
Scope: scope,
|
||||||
|
Metric: scopedMetric,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeMetricsList = append(nodeMetricsList, host)
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeMetricsListResult := &model.NodesResultList{
|
||||||
|
Items: nodeMetricsList,
|
||||||
|
TotalNodes: &countNodes,
|
||||||
|
HasNextPage: &hasNextPage,
|
||||||
|
}
|
||||||
|
|
||||||
|
return nodeMetricsListResult, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ClusterMetrics is the resolver for the clusterMetrics field.
|
||||||
|
func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metrics []string, from time.Time, to time.Time) (*model.ClusterMetrics, error) {
|
||||||
|
user := repository.GetUserFromContext(ctx)
|
||||||
|
if user != nil && !user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}) {
|
||||||
|
return nil, errors.New("you need to be administrator or support staff for this query")
|
||||||
|
}
|
||||||
|
|
||||||
|
if metrics == nil {
|
||||||
|
for _, mc := range archive.GetCluster(cluster).MetricConfig {
|
||||||
|
metrics = append(metrics, mc.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 'nodes' == nil -> Defaults to all nodes of cluster for existing query workflow
|
||||||
|
scopes := []schema.MetricScope{"node"}
|
||||||
|
data, err := metricdispatcher.LoadNodeData(cluster, metrics, nil, scopes, from, to, ctx)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("error while loading node data")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
clusterMetricData := make([]*model.ClusterMetricWithName, 0)
|
||||||
|
clusterMetrics := model.ClusterMetrics{NodeCount: 0, Metrics: clusterMetricData}
|
||||||
|
|
||||||
|
collectorTimestep := make(map[string]int)
|
||||||
|
collectorUnit := make(map[string]schema.Unit)
|
||||||
|
collectorData := make(map[string][]schema.Float)
|
||||||
|
|
||||||
|
for _, metrics := range data {
|
||||||
|
clusterMetrics.NodeCount += 1
|
||||||
|
for metric, scopedMetrics := range metrics {
|
||||||
|
_, ok := collectorData[metric]
|
||||||
|
if !ok {
|
||||||
|
collectorData[metric] = make([]schema.Float, 0)
|
||||||
|
for _, scopedMetric := range scopedMetrics {
|
||||||
|
// Collect Info
|
||||||
|
collectorTimestep[metric] = scopedMetric.Timestep
|
||||||
|
collectorUnit[metric] = scopedMetric.Unit
|
||||||
|
// Collect Initial Data
|
||||||
|
for _, ser := range scopedMetric.Series {
|
||||||
|
collectorData[metric] = append(collectorData[metric], ser.Data...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Sum up values by index
|
||||||
|
for _, scopedMetric := range scopedMetrics {
|
||||||
|
// For This Purpose (Cluster_Wide-Sum of Node Metrics) OK
|
||||||
|
for _, ser := range scopedMetric.Series {
|
||||||
|
for i, val := range ser.Data {
|
||||||
|
collectorData[metric][i] += val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for metricName, data := range collectorData {
|
||||||
|
cu := collectorUnit[metricName]
|
||||||
|
roundedData := make([]schema.Float, 0)
|
||||||
|
for _, val := range data {
|
||||||
|
roundedData = append(roundedData, schema.Float((math.Round(float64(val)*100.0) / 100.0)))
|
||||||
|
}
|
||||||
|
|
||||||
|
cm := model.ClusterMetricWithName{
|
||||||
|
Name: metricName,
|
||||||
|
Unit: &cu,
|
||||||
|
Timestep: collectorTimestep[metricName],
|
||||||
|
Data: roundedData,
|
||||||
|
}
|
||||||
|
|
||||||
|
clusterMetrics.Metrics = append(clusterMetrics.Metrics, &cm)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &clusterMetrics, nil
|
||||||
|
}
|
||||||
|
|
||||||
// NumberOfNodes is the resolver for the numberOfNodes field.
|
// NumberOfNodes is the resolver for the numberOfNodes field.
|
||||||
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
|
func (r *subClusterResolver) NumberOfNodes(ctx context.Context, obj *schema.SubCluster) (int, error) {
|
||||||
nodeList, err := archive.ParseNodeList(obj.Nodes)
|
nodeList, err := archive.ParseNodeList(obj.Nodes)
|
||||||
@@ -357,17 +957,27 @@ func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver
|
|||||||
// Job returns generated.JobResolver implementation.
|
// Job returns generated.JobResolver implementation.
|
||||||
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
|
||||||
|
|
||||||
|
// MetricValue returns generated.MetricValueResolver implementation.
|
||||||
|
func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricValueResolver{r} }
|
||||||
|
|
||||||
// Mutation returns generated.MutationResolver implementation.
|
// Mutation returns generated.MutationResolver implementation.
|
||||||
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
|
||||||
|
|
||||||
|
// Node returns generated.NodeResolver implementation.
|
||||||
|
func (r *Resolver) Node() generated.NodeResolver { return &nodeResolver{r} }
|
||||||
|
|
||||||
// Query returns generated.QueryResolver implementation.
|
// Query returns generated.QueryResolver implementation.
|
||||||
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
|
||||||
|
|
||||||
// SubCluster returns generated.SubClusterResolver implementation.
|
// SubCluster returns generated.SubClusterResolver implementation.
|
||||||
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
|
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
|
||||||
|
|
||||||
type clusterResolver struct{ *Resolver }
|
type (
|
||||||
type jobResolver struct{ *Resolver }
|
clusterResolver struct{ *Resolver }
|
||||||
type mutationResolver struct{ *Resolver }
|
jobResolver struct{ *Resolver }
|
||||||
type queryResolver struct{ *Resolver }
|
metricValueResolver struct{ *Resolver }
|
||||||
type subClusterResolver struct{ *Resolver }
|
mutationResolver struct{ *Resolver }
|
||||||
|
nodeResolver struct{ *Resolver }
|
||||||
|
queryResolver struct{ *Resolver }
|
||||||
|
subClusterResolver struct{ *Resolver }
|
||||||
|
)
|
||||||
|
|||||||
@@ -1,20 +1,21 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
package graph
|
package graph
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
"slices"
|
||||||
|
|
||||||
"github.com/99designs/gqlgen/graphql"
|
"github.com/99designs/gqlgen/graphql"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdispatcher"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const MAX_JOBS_FOR_ANALYSIS = 500
|
const MAX_JOBS_FOR_ANALYSIS = 500
|
||||||
@@ -24,11 +25,11 @@ func (r *queryResolver) rooflineHeatmap(
|
|||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
filter []*model.JobFilter,
|
filter []*model.JobFilter,
|
||||||
rows int, cols int,
|
rows int, cols int,
|
||||||
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
|
minX float64, minY float64, maxX float64, maxY float64,
|
||||||
|
) ([][]float64, error) {
|
||||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while querying jobs for roofline")
|
cclog.Error("Error while querying jobs for roofline")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
|
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
|
||||||
@@ -47,15 +48,22 @@ func (r *queryResolver) rooflineHeatmap(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
|
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
|
||||||
|
// resolution := 0
|
||||||
|
|
||||||
|
// for _, mc := range metricConfigs {
|
||||||
|
// resolution = max(resolution, mc.Timestep)
|
||||||
|
// }
|
||||||
|
|
||||||
|
jobdata, err := metricdispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
cclog.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
|
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
|
||||||
if flops_ == nil && membw_ == nil {
|
if flops_ == nil && membw_ == nil {
|
||||||
log.Infof("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
cclog.Infof("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||||
continue
|
continue
|
||||||
// return nil, fmt.Errorf("GRAPH/UTIL > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
// return nil, fmt.Errorf("GRAPH/UTIL > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||||
}
|
}
|
||||||
@@ -63,7 +71,7 @@ func (r *queryResolver) rooflineHeatmap(
|
|||||||
flops, ok1 := flops_["node"]
|
flops, ok1 := flops_["node"]
|
||||||
membw, ok2 := membw_["node"]
|
membw, ok2 := membw_["node"]
|
||||||
if !ok1 || !ok2 {
|
if !ok1 || !ok2 {
|
||||||
log.Info("rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
cclog.Info("rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||||
continue
|
continue
|
||||||
// TODO/FIXME:
|
// TODO/FIXME:
|
||||||
// return nil, errors.New("GRAPH/UTIL > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
// return nil, errors.New("GRAPH/UTIL > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
|
||||||
@@ -98,7 +106,7 @@ func (r *queryResolver) rooflineHeatmap(
|
|||||||
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
|
||||||
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error("Error while querying jobs for footprint")
|
cclog.Error("Error while querying jobs for footprint")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
|
if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
|
||||||
@@ -120,8 +128,8 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
if err := metricdispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
|
||||||
log.Error("Error while loading averages for footprint")
|
cclog.Error("Error while loading averages for footprint")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -179,11 +187,5 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
|
|||||||
func requireField(ctx context.Context, name string) bool {
|
func requireField(ctx context.Context, name string) bool {
|
||||||
fields := graphql.CollectAllFields(ctx)
|
fields := graphql.CollectAllFields(ctx)
|
||||||
|
|
||||||
for _, f := range fields {
|
return slices.Contains(fields, name)
|
||||||
if f == name {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|||||||
132
internal/importer/README.md
Normal file
132
internal/importer/README.md
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
# Importer Package
|
||||||
|
|
||||||
|
The `importer` package provides functionality for importing job data into the ClusterCockpit database from archived job files.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This package supports two primary import workflows:
|
||||||
|
|
||||||
|
1. **Bulk Database Initialization** - Reinitialize the entire job database from archived jobs
|
||||||
|
2. **Individual Job Import** - Import specific jobs from metadata/data file pairs
|
||||||
|
|
||||||
|
Both workflows enrich job metadata by calculating performance footprints and energy consumption metrics before persisting to the database.
|
||||||
|
|
||||||
|
## Main Entry Points
|
||||||
|
|
||||||
|
### InitDB()
|
||||||
|
|
||||||
|
Reinitializes the job database from all archived jobs.
|
||||||
|
|
||||||
|
```go
|
||||||
|
if err := importer.InitDB(); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This function:
|
||||||
|
- Flushes existing job, tag, and jobtag tables
|
||||||
|
- Iterates through all jobs in the configured archive
|
||||||
|
- Enriches each job with calculated metrics
|
||||||
|
- Inserts jobs into the database in batched transactions (100 jobs per batch)
|
||||||
|
- Continues on individual job failures, logging errors
|
||||||
|
|
||||||
|
**Use Case**: Initial database setup or complete database rebuild from archive.
|
||||||
|
|
||||||
|
### HandleImportFlag(flag string)
|
||||||
|
|
||||||
|
Imports jobs from specified file pairs.
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Format: "<meta.json>:<data.json>[,<meta2.json>:<data2.json>,...]"
|
||||||
|
flag := "/path/to/meta.json:/path/to/data.json"
|
||||||
|
if err := importer.HandleImportFlag(flag); err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This function:
|
||||||
|
- Parses the comma-separated file pairs
|
||||||
|
- Validates metadata and job data against schemas (if validation enabled)
|
||||||
|
- Enriches each job with footprints and energy metrics
|
||||||
|
- Imports jobs into both the archive and database
|
||||||
|
- Fails fast on the first error
|
||||||
|
|
||||||
|
**Use Case**: Importing specific jobs from external sources or manual job additions.
|
||||||
|
|
||||||
|
## Job Enrichment
|
||||||
|
|
||||||
|
Both import workflows use `enrichJobMetadata()` to calculate:
|
||||||
|
|
||||||
|
### Performance Footprints
|
||||||
|
|
||||||
|
Performance footprints are calculated from metric averages based on the subcluster configuration:
|
||||||
|
|
||||||
|
```go
|
||||||
|
job.Footprint["mem_used_avg"] = 45.2 // GB
|
||||||
|
job.Footprint["cpu_load_avg"] = 0.87 // percentage
|
||||||
|
```
|
||||||
|
|
||||||
|
### Energy Metrics
|
||||||
|
|
||||||
|
Energy consumption is calculated from power metrics using the formula:
|
||||||
|
|
||||||
|
```
|
||||||
|
Energy (kWh) = (Power (W) × Duration (s) / 3600) / 1000
|
||||||
|
```
|
||||||
|
|
||||||
|
For each energy metric:
|
||||||
|
```go
|
||||||
|
job.EnergyFootprint["acc_power"] = 12.5 // kWh
|
||||||
|
job.Energy = 150.2 // Total energy in kWh
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: Energy calculations for metrics with unit "energy" (Joules) are not yet implemented.
|
||||||
|
|
||||||
|
## Data Validation
|
||||||
|
|
||||||
|
### SanityChecks(job *schema.Job)
|
||||||
|
|
||||||
|
Validates job metadata before database insertion:
|
||||||
|
|
||||||
|
- Cluster exists in configuration
|
||||||
|
- Subcluster is valid (assigns if needed)
|
||||||
|
- Job state is valid
|
||||||
|
- Resources and user fields are populated
|
||||||
|
- Node counts and hardware thread counts are positive
|
||||||
|
- Resource count matches declared node count
|
||||||
|
|
||||||
|
## Normalization Utilities
|
||||||
|
|
||||||
|
The package includes utilities for normalizing metric values to appropriate SI prefixes:
|
||||||
|
|
||||||
|
### Normalize(avg float64, prefix string)
|
||||||
|
|
||||||
|
Adjusts values and SI prefixes for readability:
|
||||||
|
|
||||||
|
```go
|
||||||
|
factor, newPrefix := importer.Normalize(2048.0, "M")
|
||||||
|
// Converts 2048 MB → ~2.0 GB
|
||||||
|
// Returns: factor for conversion, "G"
|
||||||
|
```
|
||||||
|
|
||||||
|
This is useful for automatically scaling metrics (e.g., memory, storage) to human-readable units.
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
- `github.com/ClusterCockpit/cc-backend/internal/repository` - Database operations
|
||||||
|
- `github.com/ClusterCockpit/cc-backend/pkg/archive` - Job archive access
|
||||||
|
- `github.com/ClusterCockpit/cc-lib/schema` - Job schema definitions
|
||||||
|
- `github.com/ClusterCockpit/cc-lib/ccLogger` - Logging
|
||||||
|
- `github.com/ClusterCockpit/cc-lib/ccUnits` - SI unit handling
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
- **InitDB**: Continues processing on individual job failures, logs errors, returns summary
|
||||||
|
- **HandleImportFlag**: Fails fast on first error, returns immediately
|
||||||
|
- Both functions log detailed error context for debugging
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
- **Transaction Batching**: InitDB processes jobs in batches of 100 for optimal database performance
|
||||||
|
- **Tag Caching**: Tag IDs are cached during import to minimize database queries
|
||||||
|
- **Progress Reporting**: InitDB prints progress updates during bulk operations
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
package importer
|
package importer
|
||||||
@@ -10,16 +10,30 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Import all jobs specified as `<path-to-meta.json>:<path-to-data.json>,...`
|
// HandleImportFlag imports jobs from file pairs specified in a comma-separated flag string.
|
||||||
|
//
|
||||||
|
// The flag format is: "<path-to-meta.json>:<path-to-data.json>[,<path-to-meta2.json>:<path-to-data2.json>,...]"
|
||||||
|
//
|
||||||
|
// For each job pair, this function:
|
||||||
|
// 1. Reads and validates the metadata JSON file (schema.Job)
|
||||||
|
// 2. Reads and validates the job data JSON file (schema.JobData)
|
||||||
|
// 3. Enriches the job with calculated footprints and energy metrics
|
||||||
|
// 4. Validates the job using SanityChecks()
|
||||||
|
// 5. Imports the job into the archive
|
||||||
|
// 6. Inserts the job into the database with associated tags
|
||||||
|
//
|
||||||
|
// Schema validation is performed if config.Keys.Validate is true.
|
||||||
|
//
|
||||||
|
// Returns an error if file reading, validation, enrichment, or database operations fail.
|
||||||
|
// The function stops processing on the first error encountered.
|
||||||
func HandleImportFlag(flag string) error {
|
func HandleImportFlag(flag string) error {
|
||||||
r := repository.GetJobRepository()
|
r := repository.GetJobRepository()
|
||||||
|
|
||||||
@@ -31,7 +45,7 @@ func HandleImportFlag(flag string) error {
|
|||||||
|
|
||||||
raw, err := os.ReadFile(files[0])
|
raw, err := os.ReadFile(files[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while reading metadata file for import")
|
cclog.Warn("Error while reading metadata file for import")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -42,15 +56,18 @@ func HandleImportFlag(flag string) error {
|
|||||||
}
|
}
|
||||||
dec := json.NewDecoder(bytes.NewReader(raw))
|
dec := json.NewDecoder(bytes.NewReader(raw))
|
||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
|
job := schema.Job{
|
||||||
if err = dec.Decode(&jobMeta); err != nil {
|
Shared: "none",
|
||||||
log.Warn("Error while decoding raw json metadata for import")
|
MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
|
||||||
|
}
|
||||||
|
if err = dec.Decode(&job); err != nil {
|
||||||
|
cclog.Warn("Error while decoding raw json metadata for import")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
raw, err = os.ReadFile(files[1])
|
raw, err = os.ReadFile(files[1])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while reading jobdata file for import")
|
cclog.Warn("Error while reading jobdata file for import")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -63,72 +80,41 @@ func HandleImportFlag(flag string) error {
|
|||||||
dec.DisallowUnknownFields()
|
dec.DisallowUnknownFields()
|
||||||
jobData := schema.JobData{}
|
jobData := schema.JobData{}
|
||||||
if err = dec.Decode(&jobData); err != nil {
|
if err = dec.Decode(&jobData); err != nil {
|
||||||
log.Warn("Error while decoding raw json jobdata for import")
|
cclog.Warn("Error while decoding raw json jobdata for import")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// checkJobData(&jobData)
|
job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||||
|
|
||||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
if err = enrichJobMetadata(&job); err != nil {
|
||||||
|
cclog.Errorf("Error enriching job metadata: %v", err)
|
||||||
// if _, err = r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
|
|
||||||
// if err != nil {
|
|
||||||
// log.Warn("Error while finding job in jobRepository")
|
|
||||||
// return err
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist")
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
job := schema.Job{
|
|
||||||
BaseJob: jobMeta.BaseJob,
|
|
||||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
|
||||||
StartTimeUnix: jobMeta.StartTime,
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Other metrics...
|
|
||||||
job.LoadAvg = loadJobStat(&jobMeta, "cpu_load")
|
|
||||||
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
|
|
||||||
job.MemUsedMax = loadJobStat(&jobMeta, "mem_used")
|
|
||||||
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
|
|
||||||
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
|
|
||||||
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
|
|
||||||
|
|
||||||
job.RawResources, err = json.Marshal(job.Resources)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while marshaling job resources")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
|
||||||
if err != nil {
|
|
||||||
log.Warn("Error while marshaling job metadata")
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = SanityChecks(&job.BaseJob); err != nil {
|
if err = SanityChecks(&job); err != nil {
|
||||||
log.Warn("BaseJob SanityChecks failed")
|
cclog.Warn("BaseJob SanityChecks failed")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err = archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
|
if err = archive.GetHandle().ImportJob(&job, &jobData); err != nil {
|
||||||
log.Error("Error while importing job")
|
cclog.Error("Error while importing job")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
id, err := r.InsertJob(&job)
|
id, err := r.InsertJob(&job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while job db insert")
|
cclog.Warn("Error while job db insert")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tag := range job.Tags {
|
for _, tag := range job.Tags {
|
||||||
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
|
if err := r.ImportTag(id, tag.Type, tag.Name, tag.Scope); err != nil {
|
||||||
log.Error("Error while adding or creating tag")
|
cclog.Error("Error while adding or creating tag on import")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Infof("successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
|
cclog.Infof("successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
package importer_test
|
package importer_test
|
||||||
@@ -16,9 +16,12 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
"github.com/ClusterCockpit/cc-backend/internal/importer"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
ccconf "github.com/ClusterCockpit/cc-lib/ccConfig"
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// copyFile copies a file from source path to destination path.
|
||||||
|
// Used by tests to set up test fixtures.
|
||||||
func copyFile(s string, d string) error {
|
func copyFile(s string, d string) error {
|
||||||
r, err := os.Open(s)
|
r, err := os.Open(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -34,21 +37,29 @@ func copyFile(s string, d string) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// setup initializes a test environment for importer tests.
|
||||||
|
//
|
||||||
|
// Creates a temporary directory with:
|
||||||
|
// - A test job archive with cluster configuration
|
||||||
|
// - A SQLite database initialized with schema
|
||||||
|
// - Configuration files loaded
|
||||||
|
//
|
||||||
|
// Returns a JobRepository instance for test assertions.
|
||||||
func setup(t *testing.T) *repository.JobRepository {
|
func setup(t *testing.T) *repository.JobRepository {
|
||||||
const testconfig = `{
|
const testconfig = `{
|
||||||
|
"main": {
|
||||||
"addr": "0.0.0.0:8080",
|
"addr": "0.0.0.0:8080",
|
||||||
"validate": false,
|
"validate": false,
|
||||||
|
"apiAllowedIPs": [
|
||||||
|
"*"
|
||||||
|
]},
|
||||||
"archive": {
|
"archive": {
|
||||||
"kind": "file",
|
"kind": "file",
|
||||||
"path": "./var/job-archive"
|
"path": "./var/job-archive"
|
||||||
},
|
},
|
||||||
"jwts": {
|
|
||||||
"max-age": "2m"
|
|
||||||
},
|
|
||||||
"clusters": [
|
"clusters": [
|
||||||
{
|
{
|
||||||
"name": "testcluster",
|
"name": "testcluster",
|
||||||
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
|
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"numNodes": { "from": 1, "to": 64 },
|
"numNodes": { "from": 1, "to": 64 },
|
||||||
"duration": { "from": 0, "to": 86400 },
|
"duration": { "from": 0, "to": 86400 },
|
||||||
@@ -57,7 +68,6 @@ func setup(t *testing.T) *repository.JobRepository {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "fritz",
|
"name": "fritz",
|
||||||
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
|
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"numNodes": { "from": 1, "to": 944 },
|
"numNodes": { "from": 1, "to": 944 },
|
||||||
"duration": { "from": 0, "to": 86400 },
|
"duration": { "from": 0, "to": 86400 },
|
||||||
@@ -66,7 +76,6 @@ func setup(t *testing.T) *repository.JobRepository {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "taurus",
|
"name": "taurus",
|
||||||
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
|
|
||||||
"filterRanges": {
|
"filterRanges": {
|
||||||
"numNodes": { "from": 1, "to": 4000 },
|
"numNodes": { "from": 1, "to": 4000 },
|
||||||
"duration": { "from": 0, "to": 604800 },
|
"duration": { "from": 0, "to": 604800 },
|
||||||
@@ -75,18 +84,18 @@ func setup(t *testing.T) *repository.JobRepository {
|
|||||||
}
|
}
|
||||||
]}`
|
]}`
|
||||||
|
|
||||||
log.Init("info", true)
|
cclog.Init("info", true)
|
||||||
tmpdir := t.TempDir()
|
tmpdir := t.TempDir()
|
||||||
|
|
||||||
jobarchive := filepath.Join(tmpdir, "job-archive")
|
jobarchive := filepath.Join(tmpdir, "job-archive")
|
||||||
if err := os.Mkdir(jobarchive, 0777); err != nil {
|
if err := os.Mkdir(jobarchive, 0o777); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil {
|
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), fmt.Appendf(nil, "%d", 3), 0o666); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
|
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")
|
||||||
if err := os.Mkdir(fritzArchive, 0777); err != nil {
|
if err := os.Mkdir(fritzArchive, 0o777); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
if err := copyFile(filepath.Join("testdata", "cluster-fritz.json"),
|
if err := copyFile(filepath.Join("testdata", "cluster-fritz.json"),
|
||||||
@@ -95,17 +104,29 @@ func setup(t *testing.T) *repository.JobRepository {
|
|||||||
}
|
}
|
||||||
|
|
||||||
dbfilepath := filepath.Join(tmpdir, "test.db")
|
dbfilepath := filepath.Join(tmpdir, "test.db")
|
||||||
err := repository.MigrateDB("sqlite3", dbfilepath)
|
err := repository.MigrateDB(dbfilepath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
cfgFilePath := filepath.Join(tmpdir, "config.json")
|
||||||
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
|
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0o666); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
config.Init(cfgFilePath)
|
ccconf.Init(cfgFilePath)
|
||||||
|
|
||||||
|
// Load and check main configuration
|
||||||
|
if cfg := ccconf.GetPackageConfig("main"); cfg != nil {
|
||||||
|
if clustercfg := ccconf.GetPackageConfig("clusters"); clustercfg != nil {
|
||||||
|
config.Init(cfg, clustercfg)
|
||||||
|
} else {
|
||||||
|
t.Fatal("Cluster configuration must be present")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
t.Fatal("Main configuration must be present")
|
||||||
|
}
|
||||||
|
|
||||||
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
|
||||||
|
|
||||||
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
|
||||||
@@ -116,6 +137,7 @@ func setup(t *testing.T) *repository.JobRepository {
|
|||||||
return repository.GetJobRepository()
|
return repository.GetJobRepository()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Result represents the expected test result for job import verification.
|
||||||
type Result struct {
|
type Result struct {
|
||||||
JobId int64
|
JobId int64
|
||||||
Cluster string
|
Cluster string
|
||||||
@@ -123,6 +145,8 @@ type Result struct {
|
|||||||
Duration int32
|
Duration int32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// readResult reads the expected test result from a golden file.
|
||||||
|
// Golden files contain the expected job attributes after import.
|
||||||
func readResult(t *testing.T, testname string) Result {
|
func readResult(t *testing.T, testname string) Result {
|
||||||
var r Result
|
var r Result
|
||||||
|
|
||||||
@@ -140,6 +164,13 @@ func readResult(t *testing.T, testname string) Result {
|
|||||||
return r
|
return r
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestHandleImportFlag tests the HandleImportFlag function with various job import scenarios.
|
||||||
|
//
|
||||||
|
// The test uses golden files in testdata/ to verify that jobs are correctly:
|
||||||
|
// - Parsed from metadata and data JSON files
|
||||||
|
// - Enriched with footprints and energy metrics
|
||||||
|
// - Inserted into the database
|
||||||
|
// - Retrievable with correct attributes
|
||||||
func TestHandleImportFlag(t *testing.T) {
|
func TestHandleImportFlag(t *testing.T) {
|
||||||
r := setup(t)
|
r := setup(t)
|
||||||
|
|
||||||
@@ -163,7 +194,7 @@ func TestHandleImportFlag(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
result := readResult(t, testname)
|
result := readResult(t, testname)
|
||||||
job, err := r.Find(&result.JobId, &result.Cluster, &result.StartTime)
|
job, err := r.FindCached(&result.JobId, &result.Cluster, &result.StartTime)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,40 +1,68 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// Package importer provides functionality for importing job data into the ClusterCockpit database.
|
||||||
|
//
|
||||||
|
// The package supports two primary use cases:
|
||||||
|
// 1. Bulk database initialization from archived jobs via InitDB()
|
||||||
|
// 2. Individual job import from file pairs via HandleImportFlag()
|
||||||
|
//
|
||||||
|
// Both operations enrich job metadata by calculating footprints and energy metrics
|
||||||
|
// before persisting to the database.
|
||||||
package importer
|
package importer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
const (
|
||||||
// repopulate them using the jobs found in `archive`.
|
addTagQuery = "INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)"
|
||||||
|
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
|
||||||
|
)
|
||||||
|
|
||||||
|
// InitDB reinitializes the job database from archived job data.
|
||||||
|
//
|
||||||
|
// This function performs the following operations:
|
||||||
|
// 1. Flushes existing job, tag, and jobtag tables
|
||||||
|
// 2. Iterates through all jobs in the archive
|
||||||
|
// 3. Enriches each job with calculated footprints and energy metrics
|
||||||
|
// 4. Inserts jobs and tags into the database in batched transactions
|
||||||
|
//
|
||||||
|
// Jobs are processed in batches of 100 for optimal performance. The function
|
||||||
|
// continues processing even if individual jobs fail, logging errors and
|
||||||
|
// returning a summary at the end.
|
||||||
|
//
|
||||||
|
// Returns an error if database initialization, transaction management, or
|
||||||
|
// critical operations fail. Individual job failures are logged but do not
|
||||||
|
// stop the overall import process.
|
||||||
func InitDB() error {
|
func InitDB() error {
|
||||||
r := repository.GetJobRepository()
|
r := repository.GetJobRepository()
|
||||||
if err := r.Flush(); err != nil {
|
if err := r.Flush(); err != nil {
|
||||||
log.Errorf("repository initDB(): %v", err)
|
cclog.Errorf("repository initDB(): %v", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
starttime := time.Now()
|
starttime := time.Now()
|
||||||
log.Print("Building job table...")
|
cclog.Print("Building job table...")
|
||||||
|
|
||||||
t, err := r.TransactionInit()
|
t, err := r.TransactionInit()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Error while initializing SQL transactions")
|
cclog.Warn("Error while initializing SQL transactions")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
tags := make(map[string]int64)
|
tags := make(map[string]int64)
|
||||||
|
|
||||||
// Not using log.Print because we want the line to end with `\r` and
|
// Not using cclog.Print because we want the line to end with `\r` and
|
||||||
// this function is only ever called when a special command line flag
|
// this function is only ever called when a special command line flag
|
||||||
// is passed anyways.
|
// is passed anyways.
|
||||||
fmt.Printf("%d jobs inserted...\r", 0)
|
fmt.Printf("%d jobs inserted...\r", 0)
|
||||||
@@ -46,92 +74,195 @@ func InitDB() error {
|
|||||||
for jobContainer := range ar.Iter(false) {
|
for jobContainer := range ar.Iter(false) {
|
||||||
|
|
||||||
jobMeta := jobContainer.Meta
|
jobMeta := jobContainer.Meta
|
||||||
|
if jobMeta == nil {
|
||||||
|
cclog.Warn("skipping job with nil metadata")
|
||||||
|
errorOccured++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Bundle 100 inserts into one transaction for better performance
|
// Bundle 100 inserts into one transaction for better performance
|
||||||
if i%100 == 0 {
|
if i%100 == 0 {
|
||||||
r.TransactionCommit(t)
|
if i > 0 {
|
||||||
|
if err := t.Commit(); err != nil {
|
||||||
|
cclog.Errorf("transaction commit error: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Start a new transaction for the next batch
|
||||||
|
t, err = r.TransactionInit()
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("transaction init error: %v", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
fmt.Printf("%d jobs inserted...\r", i)
|
fmt.Printf("%d jobs inserted...\r", i)
|
||||||
}
|
}
|
||||||
|
|
||||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||||
job := schema.Job{
|
|
||||||
BaseJob: jobMeta.BaseJob,
|
|
||||||
StartTime: time.Unix(jobMeta.StartTime, 0),
|
|
||||||
StartTimeUnix: jobMeta.StartTime,
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Other metrics...
|
if err := enrichJobMetadata(jobMeta); err != nil {
|
||||||
job.LoadAvg = loadJobStat(jobMeta, "cpu_load")
|
cclog.Errorf("repository initDB(): %v", err)
|
||||||
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
|
|
||||||
job.MemUsedMax = loadJobStat(jobMeta, "mem_used")
|
|
||||||
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
|
|
||||||
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
|
|
||||||
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
|
|
||||||
|
|
||||||
job.RawResources, err = json.Marshal(job.Resources)
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("repository initDB(): %v", err)
|
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
if err := SanityChecks(jobMeta); err != nil {
|
||||||
if err != nil {
|
cclog.Errorf("repository initDB(): %v", err)
|
||||||
log.Errorf("repository initDB(): %v", err)
|
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := SanityChecks(&job.BaseJob); err != nil {
|
id, jobErr := r.TransactionAddNamed(t,
|
||||||
log.Errorf("repository initDB(): %v", err)
|
repository.NamedJobInsert, jobMeta)
|
||||||
|
if jobErr != nil {
|
||||||
|
cclog.Errorf("repository initDB(): %v", jobErr)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
id, err := r.TransactionAdd(t, job)
|
// Job successfully inserted, increment counter
|
||||||
if err != nil {
|
i += 1
|
||||||
log.Errorf("repository initDB(): %v", err)
|
|
||||||
errorOccured++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tag := range job.Tags {
|
for _, tag := range jobMeta.Tags {
|
||||||
tagstr := tag.Name + ":" + tag.Type
|
tagstr := tag.Name + ":" + tag.Type
|
||||||
tagId, ok := tags[tagstr]
|
tagID, ok := tags[tagstr]
|
||||||
if !ok {
|
if !ok {
|
||||||
tagId, err = r.TransactionAddTag(t, tag)
|
var err error
|
||||||
|
tagID, err = r.TransactionAdd(t,
|
||||||
|
addTagQuery,
|
||||||
|
tag.Name, tag.Type)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error adding tag: %v", err)
|
cclog.Errorf("Error adding tag: %v", err)
|
||||||
errorOccured++
|
errorOccured++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
tags[tagstr] = tagId
|
tags[tagstr] = tagID
|
||||||
}
|
}
|
||||||
|
|
||||||
r.TransactionSetTag(t, id, tagId)
|
r.TransactionAdd(t,
|
||||||
}
|
setTagQuery,
|
||||||
|
id, tagID)
|
||||||
if err == nil {
|
|
||||||
i += 1
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if errorOccured > 0 {
|
if errorOccured > 0 {
|
||||||
log.Warnf("Error in import of %d jobs!", errorOccured)
|
cclog.Warnf("Error in import of %d jobs!", errorOccured)
|
||||||
}
|
}
|
||||||
|
|
||||||
r.TransactionEnd(t)
|
r.TransactionEnd(t)
|
||||||
log.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
|
cclog.Infof("A total of %d jobs have been registered in %.3f seconds.", i, time.Since(starttime).Seconds())
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function also sets the subcluster if necessary!
|
// enrichJobMetadata calculates and populates job footprints, energy metrics, and serialized fields.
|
||||||
func SanityChecks(job *schema.BaseJob) error {
|
//
|
||||||
|
// This function performs the following enrichment operations:
|
||||||
|
// 1. Calculates job footprint metrics based on the subcluster configuration
|
||||||
|
// 2. Computes energy footprint and total energy consumption in kWh
|
||||||
|
// 3. Marshals footprints, resources, and metadata into JSON for database storage
|
||||||
|
//
|
||||||
|
// The function expects the job's MonitoringStatus and SubCluster to be already set.
|
||||||
|
// Energy calculations convert power metrics (Watts) to energy (kWh) using the formula:
|
||||||
|
//
|
||||||
|
// Energy (kWh) = (Power (W) * Duration (s) / 3600) / 1000
|
||||||
|
//
|
||||||
|
// Returns an error if subcluster retrieval, metric indexing, or JSON marshaling fails.
|
||||||
|
func enrichJobMetadata(job *schema.Job) error {
|
||||||
|
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Footprint = make(map[string]float64)
|
||||||
|
|
||||||
|
for _, fp := range sc.Footprint {
|
||||||
|
statType := "avg"
|
||||||
|
|
||||||
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||||
|
statType = sc.MetricConfig[i].Footprint
|
||||||
|
}
|
||||||
|
|
||||||
|
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||||
|
|
||||||
|
job.Footprint[name] = repository.LoadJobStat(job, fp, statType)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while marshaling job footprint")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
job.EnergyFootprint = make(map[string]float64)
|
||||||
|
|
||||||
|
// Total Job Energy Outside Loop
|
||||||
|
totalEnergy := 0.0
|
||||||
|
for _, fp := range sc.EnergyFootprint {
|
||||||
|
// Always Init Metric Energy Inside Loop
|
||||||
|
metricEnergy := 0.0
|
||||||
|
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||||
|
// Note: For DB data, calculate and save as kWh
|
||||||
|
switch sc.MetricConfig[i].Energy {
|
||||||
|
case "energy": // this metric has energy as unit (Joules)
|
||||||
|
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
|
||||||
|
// FIXME: Needs sum as stats type
|
||||||
|
case "power": // this metric has power as unit (Watt)
|
||||||
|
// Energy: Power (in Watts) * Time (in Seconds)
|
||||||
|
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||||
|
// Round 2 Digits: round(Energy * 100) / 100
|
||||||
|
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||||
|
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||||
|
rawEnergy := ((repository.LoadJobStat(job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
|
||||||
|
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||||
|
}
|
||||||
|
|
||||||
|
job.EnergyFootprint[fp] = metricEnergy
|
||||||
|
totalEnergy += metricEnergy
|
||||||
|
}
|
||||||
|
|
||||||
|
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||||
|
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||||
|
cclog.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
job.RawResources, err = json.Marshal(job.Resources)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while marshaling job resources")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Warn("Error while marshaling job metadata")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SanityChecks validates job metadata and ensures cluster/subcluster configuration is valid.
|
||||||
|
//
|
||||||
|
// This function performs the following validations:
|
||||||
|
// 1. Verifies the cluster exists in the archive configuration
|
||||||
|
// 2. Assigns and validates the subcluster (may modify job.SubCluster)
|
||||||
|
// 3. Validates job state is a recognized value
|
||||||
|
// 4. Ensures resources and user fields are populated
|
||||||
|
// 5. Validates node counts and hardware thread counts are positive
|
||||||
|
// 6. Verifies the number of resources matches the declared node count
|
||||||
|
//
|
||||||
|
// The function may modify the job's SubCluster field if it needs to be assigned.
|
||||||
|
//
|
||||||
|
// Returns an error if any validation check fails.
|
||||||
|
func SanityChecks(job *schema.Job) error {
|
||||||
if c := archive.GetCluster(job.Cluster); c == nil {
|
if c := archive.GetCluster(job.Cluster); c == nil {
|
||||||
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
||||||
}
|
}
|
||||||
if err := archive.AssignSubCluster(job); err != nil {
|
if err := archive.AssignSubCluster(job); err != nil {
|
||||||
log.Warn("Error while assigning subcluster to job")
|
cclog.Warn("Error while assigning subcluster to job")
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if !job.State.Valid() {
|
if !job.State.Valid() {
|
||||||
@@ -150,18 +281,14 @@ func SanityChecks(job *schema.BaseJob) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func loadJobStat(job *schema.JobMeta, metric string) float64 {
|
// checkJobData normalizes metric units in job data based on average values.
|
||||||
if stats, ok := job.Statistics[metric]; ok {
|
//
|
||||||
if metric == "mem_used" {
|
// NOTE: This function is currently unused and contains incomplete implementation.
|
||||||
return stats.Max
|
// It was intended to normalize byte and file-related metrics to appropriate SI prefixes,
|
||||||
} else {
|
// but the normalization logic is commented out. Consider removing or completing this
|
||||||
return stats.Avg
|
// function based on project requirements.
|
||||||
}
|
//
|
||||||
}
|
// TODO: Either implement the metric normalization or remove this dead code.
|
||||||
|
|
||||||
return 0.0
|
|
||||||
}
|
|
||||||
|
|
||||||
func checkJobData(d *schema.JobData) error {
|
func checkJobData(d *schema.JobData) error {
|
||||||
for _, scopes := range *d {
|
for _, scopes := range *d {
|
||||||
// var newUnit schema.Unit
|
// var newUnit schema.Unit
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
package importer
|
package importer
|
||||||
@@ -7,13 +7,27 @@ package importer
|
|||||||
import (
|
import (
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
ccunits "github.com/ClusterCockpit/cc-units"
|
ccunits "github.com/ClusterCockpit/cc-lib/ccUnits"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// getNormalizationFactor calculates the scaling factor needed to normalize a value
|
||||||
|
// to a more readable range (typically between 1.0 and 1000.0).
|
||||||
|
//
|
||||||
|
// For values greater than 1000, the function scales down by factors of 1000 (returns negative exponent).
|
||||||
|
// For values less than 1.0, the function scales up by factors of 1000 (returns positive exponent).
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - factor: The multiplicative factor to apply (10^(count*scale))
|
||||||
|
// - exponent: The power of 10 representing the adjustment (multiple of 3 for SI prefixes)
|
||||||
func getNormalizationFactor(v float64) (float64, int) {
|
func getNormalizationFactor(v float64) (float64, int) {
|
||||||
count := 0
|
count := 0
|
||||||
scale := -3
|
scale := -3
|
||||||
|
|
||||||
|
// Prevent infinite loop for zero or negative values
|
||||||
|
if v <= 0.0 {
|
||||||
|
return 1.0, 0
|
||||||
|
}
|
||||||
|
|
||||||
if v > 1000.0 {
|
if v > 1000.0 {
|
||||||
for v > 1000.0 {
|
for v > 1000.0 {
|
||||||
v *= 1e-3
|
v *= 1e-3
|
||||||
@@ -29,9 +43,22 @@ func getNormalizationFactor(v float64) (float64, int) {
|
|||||||
return math.Pow10(count * scale), count * scale
|
return math.Pow10(count * scale), count * scale
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getExponent calculates the SI prefix exponent from a numeric prefix value.
|
||||||
|
//
|
||||||
|
// For example:
|
||||||
|
// - Input: 1000.0 (kilo) returns 3
|
||||||
|
// - Input: 1000000.0 (mega) returns 6
|
||||||
|
// - Input: 1000000000.0 (giga) returns 9
|
||||||
|
//
|
||||||
|
// Returns the exponent representing the power of 10 for the SI prefix.
|
||||||
func getExponent(p float64) int {
|
func getExponent(p float64) int {
|
||||||
count := 0
|
count := 0
|
||||||
|
|
||||||
|
// Prevent infinite loop for infinity or NaN values
|
||||||
|
if math.IsInf(p, 0) || math.IsNaN(p) || p <= 0.0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
for p > 1.0 {
|
for p > 1.0 {
|
||||||
p = p / 1000.0
|
p = p / 1000.0
|
||||||
count++
|
count++
|
||||||
@@ -40,12 +67,42 @@ func getExponent(p float64) int {
|
|||||||
return count * 3
|
return count * 3
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// newPrefixFromFactor computes a new SI unit prefix after applying a normalization factor.
|
||||||
|
//
|
||||||
|
// Given an original prefix and an exponent adjustment, this function calculates
|
||||||
|
// the resulting SI prefix. For example, if normalizing from bytes (no prefix) by
|
||||||
|
// a factor of 10^9, the result would be the "G" (giga) prefix.
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - op: The original SI prefix value
|
||||||
|
// - e: The exponent adjustment to apply
|
||||||
|
//
|
||||||
|
// Returns the new SI prefix after adjustment.
|
||||||
func newPrefixFromFactor(op ccunits.Prefix, e int) ccunits.Prefix {
|
func newPrefixFromFactor(op ccunits.Prefix, e int) ccunits.Prefix {
|
||||||
f := float64(op)
|
f := float64(op)
|
||||||
exp := math.Pow10(getExponent(f) - e)
|
exp := math.Pow10(getExponent(f) - e)
|
||||||
return ccunits.Prefix(exp)
|
return ccunits.Prefix(exp)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Normalize adjusts a metric value and its SI unit prefix to a more readable range.
|
||||||
|
//
|
||||||
|
// This function is useful for automatically scaling metrics to appropriate units.
|
||||||
|
// For example, normalizing 2048 MiB might result in ~2.0 GiB.
|
||||||
|
//
|
||||||
|
// The function analyzes the average value and determines if a different SI prefix
|
||||||
|
// would make the number more human-readable (typically keeping values between 1 and 1000).
|
||||||
|
//
|
||||||
|
// Parameters:
|
||||||
|
// - avg: The metric value to normalize
|
||||||
|
// - p: The current SI prefix as a string (e.g., "K", "M", "G")
|
||||||
|
//
|
||||||
|
// Returns:
|
||||||
|
// - factor: The multiplicative factor to apply to convert the value
|
||||||
|
// - newPrefix: The new SI prefix string to use
|
||||||
|
//
|
||||||
|
// Example:
|
||||||
|
//
|
||||||
|
// factor, newPrefix := Normalize(2048.0, "M") // returns factor for MB->GB conversion, "G"
|
||||||
func Normalize(avg float64, p string) (float64, string) {
|
func Normalize(avg float64, p string) (float64, string) {
|
||||||
f, e := getNormalizationFactor(avg)
|
f, e := getNormalizationFactor(avg)
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
// All rights reserved.
|
// All rights reserved. This file is part of cc-backend.
|
||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
package importer
|
package importer
|
||||||
@@ -8,9 +8,11 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
ccunits "github.com/ClusterCockpit/cc-units"
|
ccunits "github.com/ClusterCockpit/cc-lib/ccUnits"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TestNormalizeFactor tests the normalization of large byte values to gigabyte prefix.
|
||||||
|
// Verifies that values in the billions are correctly scaled to the "G" (giga) prefix.
|
||||||
func TestNormalizeFactor(t *testing.T) {
|
func TestNormalizeFactor(t *testing.T) {
|
||||||
// var us string
|
// var us string
|
||||||
s := []float64{2890031237, 23998994567, 389734042344, 390349424345}
|
s := []float64{2890031237, 23998994567, 389734042344, 390349424345}
|
||||||
@@ -38,6 +40,8 @@ func TestNormalizeFactor(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestNormalizeKeep tests that values already in an appropriate range maintain their prefix.
|
||||||
|
// Verifies that when values don't require rescaling, the original "G" prefix is preserved.
|
||||||
func TestNormalizeKeep(t *testing.T) {
|
func TestNormalizeKeep(t *testing.T) {
|
||||||
s := []float64{3.0, 24.0, 390.0, 391.0}
|
s := []float64{3.0, 24.0, 390.0, 391.0}
|
||||||
|
|
||||||
|
|||||||
1486
internal/importer/testdata/cluster-fritz.json
vendored
1486
internal/importer/testdata/cluster-fritz.json
vendored
File diff suppressed because it is too large
Load Diff
@@ -1 +1 @@
|
|||||||
{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"exclusive":1,"monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}
|
{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"shared":"none","monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n UserId=k106eb10(210387) GroupId=80111\n Account=k106eb QOS=normal \n Requeue=False Restarts=0 BatchFlag=True \n TimeLimit=1439\n SubmitTime=2023-02-09T14:11:22\n Partition=singlenode \n NodeList=f0720\n NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n NTasksPerNode:Socket:Core=0:None:None\n TRES_req=cpu=72,mem=250000M,node=1,billing=72\n TRES_alloc=cpu=72,node=1,billing=72\n Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n StdErr=\n StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"exclusive":1,"jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}
|
{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"shared":"none","jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}
|
||||||
|
|||||||
232
internal/memorystore/api.go
Normal file
232
internal/memorystore/api.go
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrInvalidTimeRange = errors.New("[METRICSTORE]> invalid time range: 'from' must be before 'to'")
|
||||||
|
ErrEmptyCluster = errors.New("[METRICSTORE]> cluster name cannot be empty")
|
||||||
|
)
|
||||||
|
|
||||||
|
type APIMetricData struct {
|
||||||
|
Error *string `json:"error,omitempty"`
|
||||||
|
Data schema.FloatArray `json:"data,omitempty"`
|
||||||
|
From int64 `json:"from"`
|
||||||
|
To int64 `json:"to"`
|
||||||
|
Resolution int64 `json:"resolution"`
|
||||||
|
Avg schema.Float `json:"avg"`
|
||||||
|
Min schema.Float `json:"min"`
|
||||||
|
Max schema.Float `json:"max"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type APIQueryRequest struct {
|
||||||
|
Cluster string `json:"cluster"`
|
||||||
|
Queries []APIQuery `json:"queries"`
|
||||||
|
ForAllNodes []string `json:"for-all-nodes"`
|
||||||
|
From int64 `json:"from"`
|
||||||
|
To int64 `json:"to"`
|
||||||
|
WithStats bool `json:"with-stats"`
|
||||||
|
WithData bool `json:"with-data"`
|
||||||
|
WithPadding bool `json:"with-padding"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type APIQueryResponse struct {
|
||||||
|
Queries []APIQuery `json:"queries,omitempty"`
|
||||||
|
Results [][]APIMetricData `json:"results"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type APIQuery struct {
|
||||||
|
Type *string `json:"type,omitempty"`
|
||||||
|
SubType *string `json:"subtype,omitempty"`
|
||||||
|
Metric string `json:"metric"`
|
||||||
|
Hostname string `json:"host"`
|
||||||
|
Resolution int64 `json:"resolution"`
|
||||||
|
TypeIds []string `json:"type-ids,omitempty"`
|
||||||
|
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||||
|
ScaleFactor schema.Float `json:"scale-by,omitempty"`
|
||||||
|
Aggregate bool `json:"aggreg"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Optimize this, just like the stats endpoint!
|
||||||
|
func (data *APIMetricData) AddStats() {
|
||||||
|
n := 0
|
||||||
|
sum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64
|
||||||
|
for _, x := range data.Data {
|
||||||
|
if x.IsNaN() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
n += 1
|
||||||
|
sum += float64(x)
|
||||||
|
min = math.Min(min, float64(x))
|
||||||
|
max = math.Max(max, float64(x))
|
||||||
|
}
|
||||||
|
|
||||||
|
if n > 0 {
|
||||||
|
avg := sum / float64(n)
|
||||||
|
data.Avg = schema.Float(avg)
|
||||||
|
data.Min = schema.Float(min)
|
||||||
|
data.Max = schema.Float(max)
|
||||||
|
} else {
|
||||||
|
data.Avg, data.Min, data.Max = schema.NaN, schema.NaN, schema.NaN
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (data *APIMetricData) ScaleBy(f schema.Float) {
|
||||||
|
if f == 0 || f == 1 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
data.Avg *= f
|
||||||
|
data.Min *= f
|
||||||
|
data.Max *= f
|
||||||
|
for i := 0; i < len(data.Data); i++ {
|
||||||
|
data.Data[i] *= f
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (data *APIMetricData) PadDataWithNull(ms *MemoryStore, from, to int64, metric string) {
|
||||||
|
minfo, ok := ms.Metrics[metric]
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.From / minfo.Frequency) > (from / minfo.Frequency) {
|
||||||
|
padfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency))
|
||||||
|
ndata := make([]schema.Float, 0, padfront+len(data.Data))
|
||||||
|
for range padfront {
|
||||||
|
ndata = append(ndata, schema.NaN)
|
||||||
|
}
|
||||||
|
for j := 0; j < len(data.Data); j++ {
|
||||||
|
ndata = append(ndata, data.Data[j])
|
||||||
|
}
|
||||||
|
data.Data = ndata
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func FetchData(req APIQueryRequest) (*APIQueryResponse, error) {
|
||||||
|
if req.From > req.To {
|
||||||
|
return nil, ErrInvalidTimeRange
|
||||||
|
}
|
||||||
|
if req.Cluster == "" && req.ForAllNodes != nil {
|
||||||
|
return nil, ErrEmptyCluster
|
||||||
|
}
|
||||||
|
|
||||||
|
req.WithData = true
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
if ms == nil {
|
||||||
|
return nil, fmt.Errorf("memorystore not initialized")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
response := APIQueryResponse{
|
||||||
|
Results: make([][]APIMetricData, 0, len(req.Queries)),
|
||||||
|
}
|
||||||
|
if req.ForAllNodes != nil {
|
||||||
|
nodes := ms.ListChildren([]string{req.Cluster})
|
||||||
|
for _, node := range nodes {
|
||||||
|
for _, metric := range req.ForAllNodes {
|
||||||
|
q := APIQuery{
|
||||||
|
Metric: metric,
|
||||||
|
Hostname: node,
|
||||||
|
}
|
||||||
|
req.Queries = append(req.Queries, q)
|
||||||
|
response.Queries = append(response.Queries, q)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, query := range req.Queries {
|
||||||
|
sels := make([]util.Selector, 0, 1)
|
||||||
|
if query.Aggregate || query.Type == nil {
|
||||||
|
sel := util.Selector{{String: req.Cluster}, {String: query.Hostname}}
|
||||||
|
if query.Type != nil {
|
||||||
|
if len(query.TypeIds) == 1 {
|
||||||
|
sel = append(sel, util.SelectorElement{String: *query.Type + query.TypeIds[0]})
|
||||||
|
} else {
|
||||||
|
ids := make([]string, len(query.TypeIds))
|
||||||
|
for i, id := range query.TypeIds {
|
||||||
|
ids[i] = *query.Type + id
|
||||||
|
}
|
||||||
|
sel = append(sel, util.SelectorElement{Group: ids})
|
||||||
|
}
|
||||||
|
|
||||||
|
if query.SubType != nil {
|
||||||
|
if len(query.SubTypeIds) == 1 {
|
||||||
|
sel = append(sel, util.SelectorElement{String: *query.SubType + query.SubTypeIds[0]})
|
||||||
|
} else {
|
||||||
|
ids := make([]string, len(query.SubTypeIds))
|
||||||
|
for i, id := range query.SubTypeIds {
|
||||||
|
ids[i] = *query.SubType + id
|
||||||
|
}
|
||||||
|
sel = append(sel, util.SelectorElement{Group: ids})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sels = append(sels, sel)
|
||||||
|
} else {
|
||||||
|
for _, typeID := range query.TypeIds {
|
||||||
|
if query.SubType != nil {
|
||||||
|
for _, subTypeID := range query.SubTypeIds {
|
||||||
|
sels = append(sels, util.Selector{
|
||||||
|
{String: req.Cluster},
|
||||||
|
{String: query.Hostname},
|
||||||
|
{String: *query.Type + typeID},
|
||||||
|
{String: *query.SubType + subTypeID},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
sels = append(sels, util.Selector{
|
||||||
|
{String: req.Cluster},
|
||||||
|
{String: query.Hostname},
|
||||||
|
{String: *query.Type + typeID},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// log.Printf("query: %#v\n", query)
|
||||||
|
// log.Printf("sels: %#v\n", sels)
|
||||||
|
var err error
|
||||||
|
res := make([]APIMetricData, 0, len(sels))
|
||||||
|
for _, sel := range sels {
|
||||||
|
data := APIMetricData{}
|
||||||
|
|
||||||
|
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)
|
||||||
|
if err != nil {
|
||||||
|
msg := err.Error()
|
||||||
|
data.Error = &msg
|
||||||
|
res = append(res, data)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if req.WithStats {
|
||||||
|
data.AddStats()
|
||||||
|
}
|
||||||
|
if query.ScaleFactor != 0 {
|
||||||
|
data.ScaleBy(query.ScaleFactor)
|
||||||
|
}
|
||||||
|
if req.WithPadding {
|
||||||
|
data.PadDataWithNull(ms, req.From, req.To, query.Metric)
|
||||||
|
}
|
||||||
|
if !req.WithData {
|
||||||
|
data.Data = nil
|
||||||
|
}
|
||||||
|
res = append(res, data)
|
||||||
|
}
|
||||||
|
response.Results = append(response.Results, res)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &response, nil
|
||||||
|
}
|
||||||
196
internal/memorystore/archive.go
Normal file
196
internal/memorystore/archive.go
Normal file
@@ -0,0 +1,196 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"archive/zip"
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Archiving(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
d, err := time.ParseDuration(Keys.Archive.Interval)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err)
|
||||||
|
}
|
||||||
|
if d <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(d)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
t := time.Now().Add(-d)
|
||||||
|
cclog.Infof("[METRICSTORE]> start archiving checkpoints (older than %s)...", t.Format(time.RFC3339))
|
||||||
|
n, err := ArchiveCheckpoints(Keys.Checkpoints.RootDir,
|
||||||
|
Keys.Archive.RootDir, t.Unix(), Keys.Archive.DeleteInstead)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("[METRICSTORE]> archiving failed: %s", err.Error())
|
||||||
|
} else {
|
||||||
|
cclog.Infof("[METRICSTORE]> done: %d files zipped and moved to archive", n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
var ErrNoNewArchiveData error = errors.New("all data already archived")
|
||||||
|
|
||||||
|
// ZIP all checkpoint files older than `from` together and write them to the `archiveDir`,
|
||||||
|
// deleting them from the `checkpointsDir`.
|
||||||
|
func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteInstead bool) (int, error) {
|
||||||
|
entries1, err := os.ReadDir(checkpointsDir)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
type workItem struct {
|
||||||
|
cdir, adir string
|
||||||
|
cluster, host string
|
||||||
|
}
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
n, errs := int32(0), int32(0)
|
||||||
|
work := make(chan workItem, Keys.NumWorkers)
|
||||||
|
|
||||||
|
wg.Add(Keys.NumWorkers)
|
||||||
|
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for workItem := range work {
|
||||||
|
m, err := archiveCheckpoints(workItem.cdir, workItem.adir, from, deleteInstead)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error())
|
||||||
|
atomic.AddInt32(&errs, 1)
|
||||||
|
}
|
||||||
|
atomic.AddInt32(&n, int32(m))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, de1 := range entries1 {
|
||||||
|
entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
|
||||||
|
if e != nil {
|
||||||
|
err = e
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, de2 := range entries2 {
|
||||||
|
cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name())
|
||||||
|
adir := filepath.Join(archiveDir, de1.Name(), de2.Name())
|
||||||
|
work <- workItem{
|
||||||
|
adir: adir, cdir: cdir,
|
||||||
|
cluster: de1.Name(), host: de2.Name(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(work)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return int(n), err
|
||||||
|
}
|
||||||
|
|
||||||
|
if errs > 0 {
|
||||||
|
return int(n), fmt.Errorf("%d errors happened while archiving (%d successes)", errs, n)
|
||||||
|
}
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function for `ArchiveCheckpoints`.
|
||||||
|
func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead bool) (int, error) {
|
||||||
|
entries, err := os.ReadDir(dir)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
extension := Keys.Checkpoints.FileFormat
|
||||||
|
files, err := findFiles(entries, from, extension, false)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if deleteInstead {
|
||||||
|
n := 0
|
||||||
|
for _, checkpoint := range files {
|
||||||
|
filename := filepath.Join(dir, checkpoint)
|
||||||
|
if err = os.Remove(filename); err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
n += 1
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from))
|
||||||
|
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||||
|
if err != nil && os.IsNotExist(err) {
|
||||||
|
err = os.MkdirAll(archiveDir, CheckpointDirPerms)
|
||||||
|
if err == nil {
|
||||||
|
f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
bw := bufio.NewWriter(f)
|
||||||
|
defer bw.Flush()
|
||||||
|
zw := zip.NewWriter(bw)
|
||||||
|
defer zw.Close()
|
||||||
|
|
||||||
|
n := 0
|
||||||
|
for _, checkpoint := range files {
|
||||||
|
// Use closure to ensure file is closed immediately after use,
|
||||||
|
// avoiding file descriptor leak from defer in loop
|
||||||
|
err := func() error {
|
||||||
|
filename := filepath.Join(dir, checkpoint)
|
||||||
|
r, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
w, err := zw.Create(checkpoint)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err = io.Copy(w, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = os.Remove(filename); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}()
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
n += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return n, nil
|
||||||
|
}
|
||||||
477
internal/memorystore/avroCheckpoint.go
Normal file
477
internal/memorystore/avroCheckpoint.go
Normal file
@@ -0,0 +1,477 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/linkedin/goavro/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
var NumAvroWorkers int = DefaultAvroWorkers
|
||||||
|
var startUp bool = true
|
||||||
|
|
||||||
|
func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
|
||||||
|
levels := make([]*AvroLevel, 0)
|
||||||
|
selectors := make([][]string, 0)
|
||||||
|
as.root.lock.RLock()
|
||||||
|
// Cluster
|
||||||
|
for sel1, l1 := range as.root.children {
|
||||||
|
l1.lock.RLock()
|
||||||
|
// Node
|
||||||
|
for sel2, l2 := range l1.children {
|
||||||
|
l2.lock.RLock()
|
||||||
|
// Frequency
|
||||||
|
for sel3, l3 := range l2.children {
|
||||||
|
levels = append(levels, l3)
|
||||||
|
selectors = append(selectors, []string{sel1, sel2, sel3})
|
||||||
|
}
|
||||||
|
l2.lock.RUnlock()
|
||||||
|
}
|
||||||
|
l1.lock.RUnlock()
|
||||||
|
}
|
||||||
|
as.root.lock.RUnlock()
|
||||||
|
|
||||||
|
type workItem struct {
|
||||||
|
level *AvroLevel
|
||||||
|
dir string
|
||||||
|
selector []string
|
||||||
|
}
|
||||||
|
|
||||||
|
n, errs := int32(0), int32(0)
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(NumAvroWorkers)
|
||||||
|
work := make(chan workItem, NumAvroWorkers*2)
|
||||||
|
for range NumAvroWorkers {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
for workItem := range work {
|
||||||
|
from := getTimestamp(workItem.dir)
|
||||||
|
|
||||||
|
if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil {
|
||||||
|
if err == ErrNoNewArchiveData {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.Errorf("error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||||
|
atomic.AddInt32(&errs, 1)
|
||||||
|
} else {
|
||||||
|
atomic.AddInt32(&n, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range len(levels) {
|
||||||
|
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||||
|
work <- workItem{
|
||||||
|
level: levels[i],
|
||||||
|
dir: dir,
|
||||||
|
selector: selectors[i],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(work)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if errs > 0 {
|
||||||
|
return int(n), fmt.Errorf("%d errors happend while creating avro checkpoints (%d successes)", errs, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
startUp = false
|
||||||
|
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getTimestamp returns the timestamp from the directory name
|
||||||
|
func getTimestamp(dir string) int64 {
|
||||||
|
// Extract the resolution and timestamp from the directory name
|
||||||
|
// The existing avro file will be in epoch timestamp format
|
||||||
|
// iterate over all the files in the directory and find the maximum timestamp
|
||||||
|
// and return it
|
||||||
|
|
||||||
|
resolution := path.Base(dir)
|
||||||
|
dir = path.Dir(dir)
|
||||||
|
|
||||||
|
files, err := os.ReadDir(dir)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
var maxTS int64 = 0
|
||||||
|
|
||||||
|
if len(files) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range files {
|
||||||
|
if file.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := file.Name()
|
||||||
|
|
||||||
|
if len(name) < 5 || !strings.HasSuffix(name, ".avro") || !strings.HasPrefix(name, resolution+"_") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("error while parsing timestamp: %s\n", err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if ts > maxTS {
|
||||||
|
maxTS = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interval, _ := time.ParseDuration(Keys.Checkpoints.Interval)
|
||||||
|
updateTime := time.Unix(maxTS, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix()
|
||||||
|
|
||||||
|
if startUp {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if updateTime < time.Now().Unix() {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return maxTS
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
|
||||||
|
l.lock.Lock()
|
||||||
|
defer l.lock.Unlock()
|
||||||
|
|
||||||
|
// fmt.Printf("Checkpointing directory: %s\n", dir)
|
||||||
|
// filepath contains the resolution
|
||||||
|
intRes, _ := strconv.Atoi(path.Base(dir))
|
||||||
|
|
||||||
|
// find smallest overall timestamp in l.data map and delete it from l.data
|
||||||
|
minTS := int64(1<<63 - 1)
|
||||||
|
for ts, dat := range l.data {
|
||||||
|
if ts < minTS && len(dat) != 0 {
|
||||||
|
minTS = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if from == 0 && minTS != int64(1<<63-1) {
|
||||||
|
from = minTS
|
||||||
|
}
|
||||||
|
|
||||||
|
if from == 0 {
|
||||||
|
return ErrNoNewArchiveData
|
||||||
|
}
|
||||||
|
|
||||||
|
var schema string
|
||||||
|
var codec *goavro.Codec
|
||||||
|
recordList := make([]map[string]any, 0)
|
||||||
|
|
||||||
|
var f *os.File
|
||||||
|
|
||||||
|
filePath := dir + fmt.Sprintf("_%d.avro", from)
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
fp_, err_ := os.Stat(filePath)
|
||||||
|
if errors.Is(err_, os.ErrNotExist) {
|
||||||
|
err = os.MkdirAll(path.Dir(dir), 0o755)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create directory: %v", err)
|
||||||
|
}
|
||||||
|
} else if fp_.Size() != 0 {
|
||||||
|
f, err = os.Open(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open existing avro file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
|
||||||
|
reader, err := goavro.NewOCFReader(br)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create OCF reader: %v", err)
|
||||||
|
}
|
||||||
|
codec = reader.Codec()
|
||||||
|
schema = codec.Schema()
|
||||||
|
|
||||||
|
f.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
timeRef := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
|
||||||
|
|
||||||
|
if dumpAll {
|
||||||
|
timeRef = time.Now().Unix()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Empty values
|
||||||
|
if len(l.data) == 0 {
|
||||||
|
// we checkpoint avro files every 60 seconds
|
||||||
|
repeat := 60 / intRes
|
||||||
|
|
||||||
|
for range repeat {
|
||||||
|
recordList = append(recordList, make(map[string]any))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
readFlag := true
|
||||||
|
|
||||||
|
for ts := range l.data {
|
||||||
|
flag := false
|
||||||
|
if ts < timeRef {
|
||||||
|
data := l.data[ts]
|
||||||
|
|
||||||
|
schemaGen, err := generateSchema(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
flag, schema, err = compareSchema(schema, schemaGen)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to compare read and generated schema: %v", err)
|
||||||
|
}
|
||||||
|
if flag && readFlag && !errors.Is(err_, os.ErrNotExist) {
|
||||||
|
|
||||||
|
f.Close()
|
||||||
|
|
||||||
|
f, err = os.Open(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open Avro file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
|
||||||
|
ocfReader, err := goavro.NewOCFReader(br)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create OCF reader while changing schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for ocfReader.Scan() {
|
||||||
|
record, err := ocfReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read record: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
recordList = append(recordList, record.(map[string]any))
|
||||||
|
}
|
||||||
|
|
||||||
|
f.Close()
|
||||||
|
|
||||||
|
err = os.Remove(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to delete file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
readFlag = false
|
||||||
|
}
|
||||||
|
codec, err = goavro.NewCodec(schema)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create codec after merged schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
recordList = append(recordList, generateRecord(data))
|
||||||
|
delete(l.data, ts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(recordList) == 0 {
|
||||||
|
return ErrNoNewArchiveData
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to append new avro file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fmt.Printf("Codec : %#v\n", codec)
|
||||||
|
|
||||||
|
writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
|
||||||
|
W: f,
|
||||||
|
Codec: codec,
|
||||||
|
CompressionName: goavro.CompressionDeflateLabel,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create OCF writer: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append the new record
|
||||||
|
if err := writer.Append(recordList); err != nil {
|
||||||
|
return fmt.Errorf("failed to append record: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f.Close()
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func compareSchema(schemaRead, schemaGen string) (bool, string, error) {
|
||||||
|
var genSchema, readSchema AvroSchema
|
||||||
|
|
||||||
|
if schemaRead == "" {
|
||||||
|
return false, schemaGen, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmarshal the schema strings into AvroSchema structs
|
||||||
|
if err := json.Unmarshal([]byte(schemaGen), &genSchema); err != nil {
|
||||||
|
return false, "", fmt.Errorf("failed to parse generated schema: %v", err)
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal([]byte(schemaRead), &readSchema); err != nil {
|
||||||
|
return false, "", fmt.Errorf("failed to parse read schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(genSchema.Fields, func(i, j int) bool {
|
||||||
|
return genSchema.Fields[i].Name < genSchema.Fields[j].Name
|
||||||
|
})
|
||||||
|
|
||||||
|
sort.Slice(readSchema.Fields, func(i, j int) bool {
|
||||||
|
return readSchema.Fields[i].Name < readSchema.Fields[j].Name
|
||||||
|
})
|
||||||
|
|
||||||
|
// Check if schemas are identical
|
||||||
|
schemasEqual := true
|
||||||
|
if len(genSchema.Fields) <= len(readSchema.Fields) {
|
||||||
|
|
||||||
|
for i := range genSchema.Fields {
|
||||||
|
if genSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||||
|
schemasEqual = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If schemas are identical, return the read schema
|
||||||
|
if schemasEqual {
|
||||||
|
return false, schemaRead, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a map to hold unique fields from both schemas
|
||||||
|
fieldMap := make(map[string]AvroField)
|
||||||
|
|
||||||
|
// Add fields from the read schema
|
||||||
|
for _, field := range readSchema.Fields {
|
||||||
|
fieldMap[field.Name] = field
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add or update fields from the generated schema
|
||||||
|
for _, field := range genSchema.Fields {
|
||||||
|
fieldMap[field.Name] = field
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a union schema by collecting fields from the map
|
||||||
|
var mergedFields []AvroField
|
||||||
|
for _, field := range fieldMap {
|
||||||
|
mergedFields = append(mergedFields, field)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort fields by name for consistency
|
||||||
|
sort.Slice(mergedFields, func(i, j int) bool {
|
||||||
|
return mergedFields[i].Name < mergedFields[j].Name
|
||||||
|
})
|
||||||
|
|
||||||
|
// Create the merged schema
|
||||||
|
mergedSchema := AvroSchema{
|
||||||
|
Type: "record",
|
||||||
|
Name: genSchema.Name,
|
||||||
|
Fields: mergedFields,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if schemas are identical
|
||||||
|
schemasEqual = len(mergedSchema.Fields) == len(readSchema.Fields)
|
||||||
|
if schemasEqual {
|
||||||
|
for i := range mergedSchema.Fields {
|
||||||
|
if mergedSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||||
|
schemasEqual = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if schemasEqual {
|
||||||
|
return false, schemaRead, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Marshal the merged schema back to JSON
|
||||||
|
mergedSchemaJSON, err := json.Marshal(mergedSchema)
|
||||||
|
if err != nil {
|
||||||
|
return false, "", fmt.Errorf("failed to marshal merged schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, string(mergedSchemaJSON), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateSchema(data map[string]schema.Float) (string, error) {
|
||||||
|
// Define the Avro schema structure
|
||||||
|
schema := map[string]any{
|
||||||
|
"type": "record",
|
||||||
|
"name": "DataRecord",
|
||||||
|
"fields": []map[string]any{},
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldTracker := make(map[string]struct{})
|
||||||
|
|
||||||
|
for key := range data {
|
||||||
|
if _, exists := fieldTracker[key]; !exists {
|
||||||
|
key = correctKey(key)
|
||||||
|
|
||||||
|
field := map[string]any{
|
||||||
|
"name": key,
|
||||||
|
"type": "double",
|
||||||
|
"default": -1.0,
|
||||||
|
}
|
||||||
|
schema["fields"] = append(schema["fields"].([]map[string]any), field)
|
||||||
|
fieldTracker[key] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
schemaString, err := json.Marshal(schema)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to marshal schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return string(schemaString), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateRecord(data map[string]schema.Float) map[string]any {
|
||||||
|
record := make(map[string]any)
|
||||||
|
|
||||||
|
// Iterate through each map in data
|
||||||
|
for key, value := range data {
|
||||||
|
key = correctKey(key)
|
||||||
|
|
||||||
|
// Set the value in the record
|
||||||
|
// avro only accepts basic types
|
||||||
|
record[key] = value.Double()
|
||||||
|
}
|
||||||
|
|
||||||
|
return record
|
||||||
|
}
|
||||||
|
|
||||||
|
func correctKey(key string) string {
|
||||||
|
key = strings.ReplaceAll(key, "_", "_0x5F_")
|
||||||
|
key = strings.ReplaceAll(key, ":", "_0x3A_")
|
||||||
|
key = strings.ReplaceAll(key, ".", "_0x2E_")
|
||||||
|
return key
|
||||||
|
}
|
||||||
|
|
||||||
|
func ReplaceKey(key string) string {
|
||||||
|
key = strings.ReplaceAll(key, "_0x2E_", ".")
|
||||||
|
key = strings.ReplaceAll(key, "_0x3A_", ":")
|
||||||
|
key = strings.ReplaceAll(key, "_0x5F_", "_")
|
||||||
|
return key
|
||||||
|
}
|
||||||
84
internal/memorystore/avroHelper.go
Normal file
84
internal/memorystore/avroHelper.go
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"slices"
|
||||||
|
"strconv"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
)
|
||||||
|
|
||||||
|
func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
|
// AvroPool is a pool of Avro writers.
|
||||||
|
go func() {
|
||||||
|
if Keys.Checkpoints.FileFormat == "json" {
|
||||||
|
wg.Done() // Mark this goroutine as done
|
||||||
|
return // Exit the goroutine
|
||||||
|
}
|
||||||
|
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
var avroLevel *AvroLevel
|
||||||
|
oldSelector := make([]string, 0)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case val := <-LineProtocolMessages:
|
||||||
|
// Fetch the frequency of the metric from the global configuration
|
||||||
|
freq, err := GetMetricFrequency(val.MetricName)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("Error fetching metric frequency: %s\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
metricName := ""
|
||||||
|
|
||||||
|
for _, selectorName := range val.Selector {
|
||||||
|
metricName += selectorName + SelectorDelimiter
|
||||||
|
}
|
||||||
|
|
||||||
|
metricName += val.MetricName
|
||||||
|
|
||||||
|
// Create a new selector for the Avro level
|
||||||
|
// The selector is a slice of strings that represents the path to the
|
||||||
|
// Avro level. It is created by appending the cluster, node, and metric
|
||||||
|
// name to the selector.
|
||||||
|
var selector []string
|
||||||
|
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
|
||||||
|
|
||||||
|
if !stringSlicesEqual(oldSelector, selector) {
|
||||||
|
// Get the Avro level for the metric
|
||||||
|
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
|
||||||
|
|
||||||
|
// If the Avro level is nil, create a new one
|
||||||
|
if avroLevel == nil {
|
||||||
|
cclog.Errorf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
|
||||||
|
}
|
||||||
|
oldSelector = slices.Clone(selector)
|
||||||
|
}
|
||||||
|
|
||||||
|
avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
func stringSlicesEqual(a, b []string) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
167
internal/memorystore/avroStruct.go
Normal file
167
internal/memorystore/avroStruct.go
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
LineProtocolMessages = make(chan *AvroStruct)
|
||||||
|
// SelectorDelimiter separates hierarchical selector components in metric names for Avro encoding
|
||||||
|
SelectorDelimiter = "_SEL_"
|
||||||
|
)
|
||||||
|
|
||||||
|
var CheckpointBufferMinutes = DefaultCheckpointBufferMin
|
||||||
|
|
||||||
|
type AvroStruct struct {
|
||||||
|
MetricName string
|
||||||
|
Cluster string
|
||||||
|
Node string
|
||||||
|
Selector []string
|
||||||
|
Value schema.Float
|
||||||
|
Timestamp int64
|
||||||
|
}
|
||||||
|
|
||||||
|
type AvroStore struct {
|
||||||
|
root AvroLevel
|
||||||
|
}
|
||||||
|
|
||||||
|
var avroStore AvroStore
|
||||||
|
|
||||||
|
type AvroLevel struct {
|
||||||
|
children map[string]*AvroLevel
|
||||||
|
data map[int64]map[string]schema.Float
|
||||||
|
lock sync.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
type AvroField struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Type any `json:"type"`
|
||||||
|
Default any `json:"default,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type AvroSchema struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Fields []AvroField `json:"fields"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel {
|
||||||
|
if len(selector) == 0 {
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow concurrent reads:
|
||||||
|
l.lock.RLock()
|
||||||
|
var child *AvroLevel
|
||||||
|
var ok bool
|
||||||
|
if l.children == nil {
|
||||||
|
// Children map needs to be created...
|
||||||
|
l.lock.RUnlock()
|
||||||
|
} else {
|
||||||
|
child, ok := l.children[selector[0]]
|
||||||
|
l.lock.RUnlock()
|
||||||
|
if ok {
|
||||||
|
return child.findAvroLevelOrCreate(selector[1:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The level does not exist, take write lock for unique access:
|
||||||
|
l.lock.Lock()
|
||||||
|
// While this thread waited for the write lock, another thread
|
||||||
|
// could have created the child node.
|
||||||
|
if l.children != nil {
|
||||||
|
child, ok = l.children[selector[0]]
|
||||||
|
if ok {
|
||||||
|
l.lock.Unlock()
|
||||||
|
return child.findAvroLevelOrCreate(selector[1:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
child = &AvroLevel{
|
||||||
|
data: make(map[int64]map[string]schema.Float, 0),
|
||||||
|
children: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
if l.children != nil {
|
||||||
|
l.children[selector[0]] = child
|
||||||
|
} else {
|
||||||
|
l.children = map[string]*AvroLevel{selector[0]: child}
|
||||||
|
}
|
||||||
|
l.lock.Unlock()
|
||||||
|
return child.findAvroLevelOrCreate(selector[1:])
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *AvroLevel) addMetric(metricName string, value schema.Float, timestamp int64, Freq int) {
|
||||||
|
l.lock.Lock()
|
||||||
|
defer l.lock.Unlock()
|
||||||
|
|
||||||
|
KeyCounter := int(CheckpointBufferMinutes * 60 / Freq)
|
||||||
|
|
||||||
|
// Create keys in advance for the given amount of time
|
||||||
|
if len(l.data) != KeyCounter {
|
||||||
|
if len(l.data) == 0 {
|
||||||
|
for i := range KeyCounter {
|
||||||
|
l.data[timestamp+int64(i*Freq)] = make(map[string]schema.Float, 0)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Get the last timestamp
|
||||||
|
var lastTS int64
|
||||||
|
for ts := range l.data {
|
||||||
|
if ts > lastTS {
|
||||||
|
lastTS = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create keys for the next KeyCounter timestamps
|
||||||
|
l.data[lastTS+int64(Freq)] = make(map[string]schema.Float, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
closestTS := int64(0)
|
||||||
|
minDiff := int64(Freq) + 1 // Start with diff just outside the valid range
|
||||||
|
found := false
|
||||||
|
|
||||||
|
// Iterate over timestamps and choose the one which is within range.
|
||||||
|
// Since its epoch time, we check if the difference is less than 60 seconds.
|
||||||
|
for ts, dat := range l.data {
|
||||||
|
// Check if timestamp is within range
|
||||||
|
diff := timestamp - ts
|
||||||
|
if diff < -int64(Freq) || diff > int64(Freq) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metric already present at this timestamp — skip
|
||||||
|
if _, ok := dat[metricName]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is the closest timestamp so far
|
||||||
|
if Abs(diff) < minDiff {
|
||||||
|
minDiff = Abs(diff)
|
||||||
|
closestTS = ts
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if found {
|
||||||
|
l.data[closestTS][metricName] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetAvroStore() *AvroStore {
|
||||||
|
return &avroStore
|
||||||
|
}
|
||||||
|
|
||||||
|
// Abs returns the absolute value of x.
|
||||||
|
func Abs(x int64) int64 {
|
||||||
|
if x < 0 {
|
||||||
|
return -x
|
||||||
|
}
|
||||||
|
return x
|
||||||
|
}
|
||||||
190
internal/memorystore/buffer.go
Normal file
190
internal/memorystore/buffer.go
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BufferCap is the default buffer capacity.
|
||||||
|
// buffer.data will only ever grow up to its capacity and a new link
|
||||||
|
// in the buffer chain will be created if needed so that no copying
|
||||||
|
// of data or reallocation needs to happen on writes.
|
||||||
|
const BufferCap int = DefaultBufferCapacity
|
||||||
|
|
||||||
|
var bufferPool sync.Pool = sync.Pool{
|
||||||
|
New: func() any {
|
||||||
|
return &buffer{
|
||||||
|
data: make([]schema.Float, 0, BufferCap),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
ErrNoData error = errors.New("[METRICSTORE]> no data for this metric/level")
|
||||||
|
ErrDataDoesNotAlign error = errors.New("[METRICSTORE]> data from lower granularities does not align")
|
||||||
|
)
|
||||||
|
|
||||||
|
// Each metric on each level has it's own buffer.
|
||||||
|
// This is where the actual values go.
|
||||||
|
// If `cap(data)` is reached, a new buffer is created and
|
||||||
|
// becomes the new head of a buffer list.
|
||||||
|
type buffer struct {
|
||||||
|
prev *buffer
|
||||||
|
next *buffer
|
||||||
|
data []schema.Float
|
||||||
|
frequency int64
|
||||||
|
start int64
|
||||||
|
archived bool
|
||||||
|
closed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBuffer(ts, freq int64) *buffer {
|
||||||
|
b := bufferPool.Get().(*buffer)
|
||||||
|
b.frequency = freq
|
||||||
|
b.start = ts - (freq / 2)
|
||||||
|
b.prev = nil
|
||||||
|
b.next = nil
|
||||||
|
b.archived = false
|
||||||
|
b.closed = false
|
||||||
|
b.data = b.data[:0]
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// If a new buffer was created, the new head is returnd.
|
||||||
|
// Otherwise, the existing buffer is returnd.
|
||||||
|
// Normaly, only "newer" data should be written, but if the value would
|
||||||
|
// end up in the same buffer anyways it is allowed.
|
||||||
|
func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {
|
||||||
|
if ts < b.start {
|
||||||
|
return nil, errors.New("[METRICSTORE]> cannot write value to buffer from past")
|
||||||
|
}
|
||||||
|
|
||||||
|
// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)
|
||||||
|
idx := int((ts - b.start) / b.frequency)
|
||||||
|
if idx >= cap(b.data) {
|
||||||
|
newbuf := newBuffer(ts, b.frequency)
|
||||||
|
newbuf.prev = b
|
||||||
|
b.next = newbuf
|
||||||
|
b = newbuf
|
||||||
|
idx = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Overwriting value or writing value from past
|
||||||
|
if idx < len(b.data) {
|
||||||
|
b.data[idx] = value
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill up unwritten slots with NaN
|
||||||
|
for i := len(b.data); i < idx; i++ {
|
||||||
|
b.data = append(b.data, schema.NaN)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.data = append(b.data, value)
|
||||||
|
return b, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) end() int64 {
|
||||||
|
return b.firstWrite() + int64(len(b.data))*b.frequency
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) firstWrite() int64 {
|
||||||
|
return b.start + (b.frequency / 2)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
|
||||||
|
// Simple linear interpolation is done between the two neighboring cells if possible.
|
||||||
|
// If values at the start or end are missing, instead of NaN values, the second and thrid
|
||||||
|
// return values contain the actual `from`/`to`.
|
||||||
|
// This function goes back the buffer chain if `from` is older than the currents buffer start.
|
||||||
|
// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
|
||||||
|
// If `data` is not long enough to hold all values, this function will panic!
|
||||||
|
func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {
|
||||||
|
if from < b.firstWrite() {
|
||||||
|
if b.prev != nil {
|
||||||
|
return b.prev.read(from, to, data)
|
||||||
|
}
|
||||||
|
from = b.firstWrite()
|
||||||
|
}
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
t := from
|
||||||
|
for ; t < to; t += b.frequency {
|
||||||
|
idx := int((t - b.start) / b.frequency)
|
||||||
|
if idx >= cap(b.data) {
|
||||||
|
if b.next == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
b = b.next
|
||||||
|
idx = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if idx >= len(b.data) {
|
||||||
|
if b.next == nil || to <= b.next.start {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
data[i] += schema.NaN
|
||||||
|
} else if t < b.start {
|
||||||
|
data[i] += schema.NaN
|
||||||
|
} else {
|
||||||
|
data[i] += b.data[idx]
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
|
||||||
|
return data[:i], from, t, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns true if this buffer needs to be freed.
|
||||||
|
func (b *buffer) free(t int64) (delme bool, n int) {
|
||||||
|
if b.prev != nil {
|
||||||
|
delme, m := b.prev.free(t)
|
||||||
|
n += m
|
||||||
|
if delme {
|
||||||
|
b.prev.next = nil
|
||||||
|
if cap(b.prev.data) == BufferCap {
|
||||||
|
bufferPool.Put(b.prev)
|
||||||
|
}
|
||||||
|
b.prev = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
end := b.end()
|
||||||
|
if end < t {
|
||||||
|
return true, n + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return false, n
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call `callback` on every buffer that contains data in the range from `from` to `to`.
|
||||||
|
func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
|
||||||
|
if b == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := b.prev.iterFromTo(from, to, callback); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if from <= b.end() && b.start <= to {
|
||||||
|
return callback(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *buffer) count() int64 {
|
||||||
|
res := int64(len(b.data))
|
||||||
|
if b.prev != nil {
|
||||||
|
res += b.prev.count()
|
||||||
|
}
|
||||||
|
return res
|
||||||
|
}
|
||||||
761
internal/memorystore/checkpoint.go
Normal file
761
internal/memorystore/checkpoint.go
Normal file
@@ -0,0 +1,761 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"path/filepath"
|
||||||
|
"runtime"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/schema"
|
||||||
|
"github.com/linkedin/goavro/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
CheckpointFilePerms = 0o644
|
||||||
|
CheckpointDirPerms = 0o755
|
||||||
|
GCTriggerInterval = DefaultGCTriggerInterval
|
||||||
|
)
|
||||||
|
|
||||||
|
// Whenever changed, update MarshalJSON as well!
|
||||||
|
type CheckpointMetrics struct {
|
||||||
|
Data []schema.Float `json:"data"`
|
||||||
|
Frequency int64 `json:"frequency"`
|
||||||
|
Start int64 `json:"start"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type CheckpointFile struct {
|
||||||
|
Metrics map[string]*CheckpointMetrics `json:"metrics"`
|
||||||
|
Children map[string]*CheckpointFile `json:"children"`
|
||||||
|
From int64 `json:"from"`
|
||||||
|
To int64 `json:"to"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var lastCheckpoint time.Time
|
||||||
|
|
||||||
|
func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
|
lastCheckpoint = time.Now()
|
||||||
|
|
||||||
|
if Keys.Checkpoints.FileFormat == "json" {
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
d, err := time.ParseDuration(Keys.Checkpoints.Interval)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Fatal(err)
|
||||||
|
}
|
||||||
|
if d <= 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(d)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
cclog.Infof("[METRICSTORE]> start checkpointing (starting at %s)...", lastCheckpoint.Format(time.RFC3339))
|
||||||
|
now := time.Now()
|
||||||
|
n, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir,
|
||||||
|
lastCheckpoint.Unix(), now.Unix())
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("[METRICSTORE]> checkpointing failed: %s", err.Error())
|
||||||
|
} else {
|
||||||
|
cclog.Infof("[METRICSTORE]> done: %d checkpoint files created", n)
|
||||||
|
lastCheckpoint = now
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
} else {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-time.After(time.Duration(CheckpointBufferMinutes) * time.Minute):
|
||||||
|
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(DefaultAvroCheckpointInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// As `Float` implements a custom MarshalJSON() function,
|
||||||
|
// serializing an array of such types has more overhead
|
||||||
|
// than one would assume (because of extra allocations, interfaces and so on).
|
||||||
|
func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {
|
||||||
|
buf := make([]byte, 0, 128+len(cm.Data)*8)
|
||||||
|
buf = append(buf, `{"frequency":`...)
|
||||||
|
buf = strconv.AppendInt(buf, cm.Frequency, 10)
|
||||||
|
buf = append(buf, `,"start":`...)
|
||||||
|
buf = strconv.AppendInt(buf, cm.Start, 10)
|
||||||
|
buf = append(buf, `,"data":[`...)
|
||||||
|
for i, x := range cm.Data {
|
||||||
|
if i != 0 {
|
||||||
|
buf = append(buf, ',')
|
||||||
|
}
|
||||||
|
if x.IsNaN() {
|
||||||
|
buf = append(buf, `null`...)
|
||||||
|
} else {
|
||||||
|
buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buf = append(buf, `]}`...)
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
|
||||||
|
// On a per-host basis a new JSON file is created. I have no idea if this will scale.
|
||||||
|
// The good thing: Only a host at a time is locked, so this function can run
|
||||||
|
// in parallel to writes/reads.
|
||||||
|
func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
|
||||||
|
levels := make([]*Level, 0)
|
||||||
|
selectors := make([][]string, 0)
|
||||||
|
m.root.lock.RLock()
|
||||||
|
for sel1, l1 := range m.root.children {
|
||||||
|
l1.lock.RLock()
|
||||||
|
for sel2, l2 := range l1.children {
|
||||||
|
levels = append(levels, l2)
|
||||||
|
selectors = append(selectors, []string{sel1, sel2})
|
||||||
|
}
|
||||||
|
l1.lock.RUnlock()
|
||||||
|
}
|
||||||
|
m.root.lock.RUnlock()
|
||||||
|
|
||||||
|
type workItem struct {
|
||||||
|
level *Level
|
||||||
|
dir string
|
||||||
|
selector []string
|
||||||
|
}
|
||||||
|
|
||||||
|
n, errs := int32(0), int32(0)
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(Keys.NumWorkers)
|
||||||
|
work := make(chan workItem, Keys.NumWorkers*2)
|
||||||
|
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
for workItem := range work {
|
||||||
|
if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {
|
||||||
|
if err == ErrNoNewArchiveData {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.Errorf("[METRICSTORE]> error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||||
|
atomic.AddInt32(&errs, 1)
|
||||||
|
} else {
|
||||||
|
atomic.AddInt32(&n, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < len(levels); i++ {
|
||||||
|
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||||
|
work <- workItem{
|
||||||
|
level: levels[i],
|
||||||
|
dir: dir,
|
||||||
|
selector: selectors[i],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(work)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if errs > 0 {
|
||||||
|
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)", errs, n)
|
||||||
|
}
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
|
||||||
|
l.lock.RLock()
|
||||||
|
defer l.lock.RUnlock()
|
||||||
|
|
||||||
|
retval := &CheckpointFile{
|
||||||
|
From: from,
|
||||||
|
To: to,
|
||||||
|
Metrics: make(map[string]*CheckpointMetrics),
|
||||||
|
Children: make(map[string]*CheckpointFile),
|
||||||
|
}
|
||||||
|
|
||||||
|
for metric, minfo := range m.Metrics {
|
||||||
|
b := l.metrics[minfo.offset]
|
||||||
|
if b == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
allArchived := true
|
||||||
|
b.iterFromTo(from, to, func(b *buffer) error {
|
||||||
|
if !b.archived {
|
||||||
|
allArchived = false
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if allArchived {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
data := make([]schema.Float, (to-from)/b.frequency+1)
|
||||||
|
data, start, end, err := b.read(from, to, data)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := int((end - start) / b.frequency); i < len(data); i++ {
|
||||||
|
data[i] = schema.NaN
|
||||||
|
}
|
||||||
|
|
||||||
|
retval.Metrics[metric] = &CheckpointMetrics{
|
||||||
|
Frequency: b.frequency,
|
||||||
|
Start: start,
|
||||||
|
Data: data,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, child := range l.children {
|
||||||
|
val, err := child.toCheckpointFile(from, to, m)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if val != nil {
|
||||||
|
retval.Children[name] = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(retval.Children) == 0 && len(retval.Metrics) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return retval, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
||||||
|
cf, err := l.toCheckpointFile(from, to, m)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if cf == nil {
|
||||||
|
return ErrNoNewArchiveData
|
||||||
|
}
|
||||||
|
|
||||||
|
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
|
||||||
|
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||||
|
if err != nil && os.IsNotExist(err) {
|
||||||
|
err = os.MkdirAll(dir, CheckpointDirPerms)
|
||||||
|
if err == nil {
|
||||||
|
f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
bw := bufio.NewWriter(f)
|
||||||
|
if err = json.NewEncoder(bw).Encode(cf); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return bw.Flush()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) {
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
work := make(chan [2]string, Keys.NumWorkers)
|
||||||
|
n, errs := int32(0), int32(0)
|
||||||
|
|
||||||
|
wg.Add(Keys.NumWorkers)
|
||||||
|
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for host := range work {
|
||||||
|
lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))
|
||||||
|
nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from, extension)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("[METRICSTORE]> error while loading checkpoints for %s/%s: %s", host[0], host[1], err.Error())
|
||||||
|
atomic.AddInt32(&errs, 1)
|
||||||
|
}
|
||||||
|
atomic.AddInt32(&n, int32(nn))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
i := 0
|
||||||
|
clustersDir, err := os.ReadDir(dir)
|
||||||
|
for _, clusterDir := range clustersDir {
|
||||||
|
if !clusterDir.IsDir() {
|
||||||
|
err = errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
|
||||||
|
goto done
|
||||||
|
}
|
||||||
|
|
||||||
|
hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name()))
|
||||||
|
if e != nil {
|
||||||
|
err = e
|
||||||
|
goto done
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, hostDir := range hostsDir {
|
||||||
|
if !hostDir.IsDir() {
|
||||||
|
err = errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
|
||||||
|
goto done
|
||||||
|
}
|
||||||
|
|
||||||
|
i++
|
||||||
|
if i%Keys.NumWorkers == 0 && i > GCTriggerInterval {
|
||||||
|
// Forcing garbage collection runs here regulary during the loading of checkpoints
|
||||||
|
// will decrease the total heap size after loading everything back to memory is done.
|
||||||
|
// While loading data, the heap will grow fast, so the GC target size will double
|
||||||
|
// almost always. By forcing GCs here, we can keep it growing more slowly so that
|
||||||
|
// at the end, less memory is wasted.
|
||||||
|
runtime.GC()
|
||||||
|
}
|
||||||
|
|
||||||
|
work <- [2]string{clusterDir.Name(), hostDir.Name()}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
close(work)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return int(n), err
|
||||||
|
}
|
||||||
|
|
||||||
|
if errs > 0 {
|
||||||
|
return int(n), fmt.Errorf("[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)", errs, n)
|
||||||
|
}
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metrics stored at the lowest 2 levels are not loaded (root and cluster)!
|
||||||
|
// This function can only be called once and before the very first write or read.
|
||||||
|
// Different host's data is loaded to memory in parallel.
|
||||||
|
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||||
|
if _, err := os.Stat(dir); os.IsNotExist(err) {
|
||||||
|
// The directory does not exist, so create it using os.MkdirAll()
|
||||||
|
err := os.MkdirAll(dir, CheckpointDirPerms) // CheckpointDirPerms sets the permissions for the directory
|
||||||
|
if err != nil {
|
||||||
|
cclog.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
|
||||||
|
}
|
||||||
|
cclog.Debugf("[METRICSTORE]> %#v Directory created successfully", dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Config read (replace with your actual config read)
|
||||||
|
fileFormat := Keys.Checkpoints.FileFormat
|
||||||
|
if fileFormat == "" {
|
||||||
|
fileFormat = "avro"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map to easily get the fallback format
|
||||||
|
oppositeFormat := map[string]string{
|
||||||
|
"json": "avro",
|
||||||
|
"avro": "json",
|
||||||
|
}
|
||||||
|
|
||||||
|
// First, attempt to load the specified format
|
||||||
|
if found, err := checkFilesWithExtension(dir, fileFormat); err != nil {
|
||||||
|
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
|
||||||
|
} else if found {
|
||||||
|
cclog.Infof("[METRICSTORE]> Loading %s files because fileformat is %s", fileFormat, fileFormat)
|
||||||
|
return m.FromCheckpoint(dir, from, fileFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If not found, attempt the opposite format
|
||||||
|
altFormat := oppositeFormat[fileFormat]
|
||||||
|
if found, err := checkFilesWithExtension(dir, altFormat); err != nil {
|
||||||
|
return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
|
||||||
|
} else if found {
|
||||||
|
cclog.Infof("[METRICSTORE]> Loading %s files but fileformat is %s", altFormat, fileFormat)
|
||||||
|
return m.FromCheckpoint(dir, from, altFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.Print("[METRICSTORE]> No valid checkpoint files found in the directory")
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkFilesWithExtension(dir string, extension string) (bool, error) {
|
||||||
|
found := false
|
||||||
|
|
||||||
|
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error accessing path %s: %v", path, err)
|
||||||
|
}
|
||||||
|
if !info.IsDir() && filepath.Ext(info.Name()) == "."+extension {
|
||||||
|
found = true
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("[METRICSTORE]> error walking through directories: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return found, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
|
||||||
|
fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:]
|
||||||
|
resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while reading avro file (resolution parsing) : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fromTimestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64)
|
||||||
|
|
||||||
|
// Same logic according to lineprotocol
|
||||||
|
fromTimestamp -= (resolution / 2)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error converting timestamp from the avro file : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fmt.Printf("File : %s with resolution : %d\n", fileName, resolution)
|
||||||
|
|
||||||
|
var recordCounter int64 = 0
|
||||||
|
|
||||||
|
// Create a new OCF reader from the buffered reader
|
||||||
|
ocfReader, err := goavro.NewOCFReader(br)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error creating OCF reader: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
metricsData := make(map[string]schema.FloatArray)
|
||||||
|
|
||||||
|
for ocfReader.Scan() {
|
||||||
|
datum, err := ocfReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while reading avro file : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
record, ok := datum.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> failed to assert datum as map[string]interface{}")
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, value := range record {
|
||||||
|
metricsData[key] = append(metricsData[key], schema.ConvertToFloat(value.(float64)))
|
||||||
|
}
|
||||||
|
|
||||||
|
recordCounter += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
to := (fromTimestamp + (recordCounter / (60 / resolution) * 60))
|
||||||
|
if to < from {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, floatArray := range metricsData {
|
||||||
|
metricName := ReplaceKey(key)
|
||||||
|
|
||||||
|
if strings.Contains(metricName, SelectorDelimiter) {
|
||||||
|
subString := strings.Split(metricName, SelectorDelimiter)
|
||||||
|
|
||||||
|
lvl := l
|
||||||
|
|
||||||
|
for i := 0; i < len(subString)-1; i++ {
|
||||||
|
|
||||||
|
sel := subString[i]
|
||||||
|
|
||||||
|
if lvl.children == nil {
|
||||||
|
lvl.children = make(map[string]*Level)
|
||||||
|
}
|
||||||
|
|
||||||
|
child, ok := lvl.children[sel]
|
||||||
|
if !ok {
|
||||||
|
child = &Level{
|
||||||
|
metrics: make([]*buffer, len(m.Metrics)),
|
||||||
|
children: nil,
|
||||||
|
}
|
||||||
|
lvl.children[sel] = child
|
||||||
|
}
|
||||||
|
lvl = child
|
||||||
|
}
|
||||||
|
|
||||||
|
leafMetricName := subString[len(subString)-1]
|
||||||
|
err = lvl.createBuffer(m, leafMetricName, floatArray, fromTimestamp, resolution)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
err = l.createBuffer(m, metricName, floatArray, fromTimestamp, resolution)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schema.FloatArray, from int64, resolution int64) error {
|
||||||
|
n := len(floatArray)
|
||||||
|
b := &buffer{
|
||||||
|
frequency: resolution,
|
||||||
|
start: from,
|
||||||
|
data: floatArray[0:n:n],
|
||||||
|
prev: nil,
|
||||||
|
next: nil,
|
||||||
|
archived: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
minfo, ok := m.Metrics[metricName]
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
prev := l.metrics[minfo.offset]
|
||||||
|
if prev == nil {
|
||||||
|
l.metrics[minfo.offset] = b
|
||||||
|
} else {
|
||||||
|
if prev.start > b.start {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.prev = prev
|
||||||
|
prev.next = b
|
||||||
|
|
||||||
|
missingCount := ((int(b.start) - int(prev.start)) - len(prev.data)*int(b.frequency))
|
||||||
|
if missingCount > 0 {
|
||||||
|
missingCount /= int(b.frequency)
|
||||||
|
|
||||||
|
for range missingCount {
|
||||||
|
prev.data = append(prev.data, schema.NaN)
|
||||||
|
}
|
||||||
|
|
||||||
|
prev.data = prev.data[0:len(prev.data):len(prev.data)]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l.metrics[minfo.offset] = b
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
cf := &CheckpointFile{}
|
||||||
|
if err := json.NewDecoder(br).Decode(cf); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if cf.To != 0 && cf.To < from {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := l.loadFile(cf, m); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
|
||||||
|
for name, metric := range cf.Metrics {
|
||||||
|
n := len(metric.Data)
|
||||||
|
b := &buffer{
|
||||||
|
frequency: metric.Frequency,
|
||||||
|
start: metric.Start,
|
||||||
|
data: metric.Data[0:n:n],
|
||||||
|
prev: nil,
|
||||||
|
next: nil,
|
||||||
|
archived: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
minfo, ok := m.Metrics[name]
|
||||||
|
if !ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
prev := l.metrics[minfo.offset]
|
||||||
|
if prev == nil {
|
||||||
|
l.metrics[minfo.offset] = b
|
||||||
|
} else {
|
||||||
|
if prev.start > b.start {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.prev = prev
|
||||||
|
prev.next = b
|
||||||
|
}
|
||||||
|
l.metrics[minfo.offset] = b
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(cf.Children) > 0 && l.children == nil {
|
||||||
|
l.children = make(map[string]*Level)
|
||||||
|
}
|
||||||
|
|
||||||
|
for sel, childCf := range cf.Children {
|
||||||
|
child, ok := l.children[sel]
|
||||||
|
if !ok {
|
||||||
|
child = &Level{
|
||||||
|
metrics: make([]*buffer, len(m.Metrics)),
|
||||||
|
children: nil,
|
||||||
|
}
|
||||||
|
l.children[sel] = child
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := child.loadFile(childCf, m); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension string) (int, error) {
|
||||||
|
direntries, err := os.ReadDir(dir)
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
allFiles := make([]fs.DirEntry, 0)
|
||||||
|
filesLoaded := 0
|
||||||
|
for _, e := range direntries {
|
||||||
|
if e.IsDir() {
|
||||||
|
child := &Level{
|
||||||
|
metrics: make([]*buffer, len(m.Metrics)),
|
||||||
|
children: make(map[string]*Level),
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from, extension)
|
||||||
|
filesLoaded += files
|
||||||
|
if err != nil {
|
||||||
|
return filesLoaded, err
|
||||||
|
}
|
||||||
|
|
||||||
|
l.children[e.Name()] = child
|
||||||
|
} else if strings.HasSuffix(e.Name(), "."+extension) {
|
||||||
|
allFiles = append(allFiles, e)
|
||||||
|
} else {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := findFiles(allFiles, from, extension, true)
|
||||||
|
if err != nil {
|
||||||
|
return filesLoaded, err
|
||||||
|
}
|
||||||
|
|
||||||
|
loaders := map[string]func(*MemoryStore, *os.File, int64) error{
|
||||||
|
"json": l.loadJSONFile,
|
||||||
|
"avro": l.loadAvroFile,
|
||||||
|
}
|
||||||
|
|
||||||
|
loader := loaders[extension]
|
||||||
|
|
||||||
|
for _, filename := range files {
|
||||||
|
// Use a closure to ensure file is closed immediately after use
|
||||||
|
err := func() error {
|
||||||
|
f, err := os.Open(path.Join(dir, filename))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
return loader(m, f, from)
|
||||||
|
}()
|
||||||
|
if err != nil {
|
||||||
|
return filesLoaded, err
|
||||||
|
}
|
||||||
|
|
||||||
|
filesLoaded += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return filesLoaded, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// This will probably get very slow over time!
|
||||||
|
// A solution could be some sort of an index file in which all other files
|
||||||
|
// and the timespan they contain is listed.
|
||||||
|
func findFiles(direntries []fs.DirEntry, t int64, extension string, findMoreRecentFiles bool) ([]string, error) {
|
||||||
|
nums := map[string]int64{}
|
||||||
|
for _, e := range direntries {
|
||||||
|
if !strings.HasSuffix(e.Name(), "."+extension) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
nums[e.Name()] = ts
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(direntries, func(i, j int) bool {
|
||||||
|
a, b := direntries[i], direntries[j]
|
||||||
|
return nums[a.Name()] < nums[b.Name()]
|
||||||
|
})
|
||||||
|
|
||||||
|
filenames := make([]string, 0)
|
||||||
|
for i := range direntries {
|
||||||
|
e := direntries[i]
|
||||||
|
ts1 := nums[e.Name()]
|
||||||
|
|
||||||
|
if findMoreRecentFiles && t <= ts1 {
|
||||||
|
filenames = append(filenames, e.Name())
|
||||||
|
}
|
||||||
|
if i == len(direntries)-1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
enext := direntries[i+1]
|
||||||
|
ts2 := nums[enext.Name()]
|
||||||
|
|
||||||
|
if findMoreRecentFiles {
|
||||||
|
if ts1 < t && t < ts2 {
|
||||||
|
filenames = append(filenames, e.Name())
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ts2 < t {
|
||||||
|
filenames = append(filenames, e.Name())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return filenames, nil
|
||||||
|
}
|
||||||
115
internal/memorystore/config.go
Normal file
115
internal/memorystore/config.go
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
DefaultMaxWorkers = 10
|
||||||
|
DefaultBufferCapacity = 512
|
||||||
|
DefaultGCTriggerInterval = 100
|
||||||
|
DefaultAvroWorkers = 4
|
||||||
|
DefaultCheckpointBufferMin = 3
|
||||||
|
DefaultAvroCheckpointInterval = time.Minute
|
||||||
|
)
|
||||||
|
|
||||||
|
type MetricStoreConfig struct {
|
||||||
|
// Number of concurrent workers for checkpoint and archive operations.
|
||||||
|
// If not set or 0, defaults to min(runtime.NumCPU()/2+1, 10)
|
||||||
|
NumWorkers int `json:"num-workers"`
|
||||||
|
Checkpoints struct {
|
||||||
|
FileFormat string `json:"file-format"`
|
||||||
|
Interval string `json:"interval"`
|
||||||
|
RootDir string `json:"directory"`
|
||||||
|
Restore string `json:"restore"`
|
||||||
|
} `json:"checkpoints"`
|
||||||
|
Debug struct {
|
||||||
|
DumpToFile string `json:"dump-to-file"`
|
||||||
|
EnableGops bool `json:"gops"`
|
||||||
|
} `json:"debug"`
|
||||||
|
RetentionInMemory string `json:"retention-in-memory"`
|
||||||
|
Archive struct {
|
||||||
|
Interval string `json:"interval"`
|
||||||
|
RootDir string `json:"directory"`
|
||||||
|
DeleteInstead bool `json:"delete-instead"`
|
||||||
|
} `json:"archive"`
|
||||||
|
Subscriptions []struct {
|
||||||
|
// Channel name
|
||||||
|
SubscribeTo string `json:"subscribe-to"`
|
||||||
|
|
||||||
|
// Allow lines without a cluster tag, use this as default, optional
|
||||||
|
ClusterTag string `json:"cluster-tag"`
|
||||||
|
} `json:"subscriptions"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var Keys MetricStoreConfig
|
||||||
|
|
||||||
|
// AggregationStrategy for aggregation over multiple values at different cpus/sockets/..., not time!
|
||||||
|
type AggregationStrategy int
|
||||||
|
|
||||||
|
const (
|
||||||
|
NoAggregation AggregationStrategy = iota
|
||||||
|
SumAggregation
|
||||||
|
AvgAggregation
|
||||||
|
)
|
||||||
|
|
||||||
|
func AssignAggregationStrategy(str string) (AggregationStrategy, error) {
|
||||||
|
switch str {
|
||||||
|
case "":
|
||||||
|
return NoAggregation, nil
|
||||||
|
case "sum":
|
||||||
|
return SumAggregation, nil
|
||||||
|
case "avg":
|
||||||
|
return AvgAggregation, nil
|
||||||
|
default:
|
||||||
|
return NoAggregation, fmt.Errorf("[METRICSTORE]> unknown aggregation strategy: %s", str)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricConfig struct {
|
||||||
|
// Interval in seconds at which measurements are stored
|
||||||
|
Frequency int64
|
||||||
|
|
||||||
|
// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
|
||||||
|
Aggregation AggregationStrategy
|
||||||
|
|
||||||
|
// Private, used internally...
|
||||||
|
offset int
|
||||||
|
}
|
||||||
|
|
||||||
|
var Metrics map[string]MetricConfig
|
||||||
|
|
||||||
|
func GetMetricFrequency(metricName string) (int64, error) {
|
||||||
|
if metric, ok := Metrics[metricName]; ok {
|
||||||
|
return metric.Frequency, nil
|
||||||
|
}
|
||||||
|
return 0, fmt.Errorf("[METRICSTORE]> metric %s not found", metricName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// AddMetric adds logic to add metrics. Redundant metrics should be updated with max frequency.
|
||||||
|
// use metric.Name to check if the metric already exists.
|
||||||
|
// if not, add it to the Metrics map.
|
||||||
|
func AddMetric(name string, metric MetricConfig) error {
|
||||||
|
if Metrics == nil {
|
||||||
|
Metrics = make(map[string]MetricConfig, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
if existingMetric, ok := Metrics[name]; ok {
|
||||||
|
if existingMetric.Frequency != metric.Frequency {
|
||||||
|
if existingMetric.Frequency < metric.Frequency {
|
||||||
|
existingMetric.Frequency = metric.Frequency
|
||||||
|
Metrics[name] = existingMetric
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Metrics[name] = metric
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
95
internal/memorystore/configSchema.go
Normal file
95
internal/memorystore/configSchema.go
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
const configSchema = `{
|
||||||
|
"type": "object",
|
||||||
|
"description": "Configuration specific to built-in metric-store.",
|
||||||
|
"properties": {
|
||||||
|
"checkpoints": {
|
||||||
|
"description": "Configuration for checkpointing the metrics within metric-store",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"file-format": {
|
||||||
|
"description": "Specify the type of checkpoint file. There are 2 variants: 'avro' and 'json'. If nothing is specified, 'avro' is default.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"interval": {
|
||||||
|
"description": "Interval at which the metrics should be checkpointed.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"directory": {
|
||||||
|
"description": "Specify the parent directy in which the checkpointed files should be placed.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"restore": {
|
||||||
|
"description": "When cc-backend starts up, look for checkpointed files that are less than X hours old and load metrics from these selected checkpoint files.",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"archive": {
|
||||||
|
"description": "Configuration for archiving the already checkpointed files.",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"interval": {
|
||||||
|
"description": "Interval at which the checkpointed files should be archived.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"directory": {
|
||||||
|
"description": "Specify the parent directy in which the archived files should be placed.",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"retention-in-memory": {
|
||||||
|
"description": "Keep the metrics within memory for given time interval. Retention for X hours, then the metrics would be freed.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"nats": {
|
||||||
|
"description": "Configuration for accepting published data through NATS.",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"address": {
|
||||||
|
"description": "Address of the NATS server.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"username": {
|
||||||
|
"description": "Optional: If configured with username/password method.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"password": {
|
||||||
|
"description": "Optional: If configured with username/password method.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"creds-file-path": {
|
||||||
|
"description": "Optional: If configured with Credential File method. Path to your NATS cred file.",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"subscriptions": {
|
||||||
|
"description": "Array of various subscriptions. Allows to subscibe to different subjects and publishers.",
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"subscribe-to": {
|
||||||
|
"description": "Channel name",
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"cluster-tag": {
|
||||||
|
"description": "Optional: Allow lines without a cluster tag, use this as default",
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}`
|
||||||
112
internal/memorystore/debug.go
Normal file
112
internal/memorystore/debug.go
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package memorystore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
)
|
||||||
|
|
||||||
|
func (b *buffer) debugDump(buf []byte) []byte {
|
||||||
|
if b.prev != nil {
|
||||||
|
buf = b.prev.debugDump(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
start, len, end := b.start, len(b.data), b.start+b.frequency*int64(len(b.data))
|
||||||
|
buf = append(buf, `{"start":`...)
|
||||||
|
buf = strconv.AppendInt(buf, start, 10)
|
||||||
|
buf = append(buf, `,"len":`...)
|
||||||
|
buf = strconv.AppendInt(buf, int64(len), 10)
|
||||||
|
buf = append(buf, `,"end":`...)
|
||||||
|
buf = strconv.AppendInt(buf, end, 10)
|
||||||
|
if b.archived {
|
||||||
|
buf = append(buf, `,"saved":true`...)
|
||||||
|
}
|
||||||
|
if b.next != nil {
|
||||||
|
buf = append(buf, `},`...)
|
||||||
|
} else {
|
||||||
|
buf = append(buf, `}`...)
|
||||||
|
}
|
||||||
|
return buf
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf []byte, depth int) ([]byte, error) {
|
||||||
|
l.lock.RLock()
|
||||||
|
defer l.lock.RUnlock()
|
||||||
|
for i := 0; i < depth; i++ {
|
||||||
|
buf = append(buf, '\t')
|
||||||
|
}
|
||||||
|
buf = append(buf, '"')
|
||||||
|
buf = append(buf, lvlname...)
|
||||||
|
buf = append(buf, "\":{\n"...)
|
||||||
|
depth += 1
|
||||||
|
objitems := 0
|
||||||
|
for name, mc := range m.Metrics {
|
||||||
|
if b := l.metrics[mc.offset]; b != nil {
|
||||||
|
for i := 0; i < depth; i++ {
|
||||||
|
buf = append(buf, '\t')
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = append(buf, '"')
|
||||||
|
buf = append(buf, name...)
|
||||||
|
buf = append(buf, `":[`...)
|
||||||
|
buf = b.debugDump(buf)
|
||||||
|
buf = append(buf, "],\n"...)
|
||||||
|
objitems++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for name, lvl := range l.children {
|
||||||
|
_, err := w.Write(buf)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = buf[0:0]
|
||||||
|
buf, err = lvl.debugDump(m, w, name, buf, depth)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = append(buf, ',', '\n')
|
||||||
|
objitems++
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove final `,`:
|
||||||
|
if objitems > 0 {
|
||||||
|
buf = append(buf[0:len(buf)-1], '\n')
|
||||||
|
}
|
||||||
|
|
||||||
|
depth -= 1
|
||||||
|
for i := 0; i < depth; i++ {
|
||||||
|
buf = append(buf, '\t')
|
||||||
|
}
|
||||||
|
buf = append(buf, '}')
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *MemoryStore) DebugDump(w *bufio.Writer, selector []string) error {
|
||||||
|
lvl := m.root.findLevel(selector)
|
||||||
|
if lvl == nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
|
||||||
|
}
|
||||||
|
|
||||||
|
buf := make([]byte, 0, 2048)
|
||||||
|
buf = append(buf, "{"...)
|
||||||
|
|
||||||
|
buf, err := lvl.debugDump(m, w, "data", buf, 0)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = append(buf, "}\n"...)
|
||||||
|
if _, err = w.Write(buf); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return w.Flush()
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user