742 Commits

Author SHA1 Message Date
Jan Eitzinger
d98d998106 Merge pull request #315 from ClusterCockpit/hotfix
Prepare Bugfix release 1.4.1
2024-12-10 16:54:17 +01:00
212c45e070 Prepare bug fix release 1.4.1 2024-12-10 16:45:05 +01:00
143fa9b6ed Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2024-12-10 16:36:22 +01:00
4849928288 Rename old column name for user
Fixes #314
2024-12-10 16:35:43 +01:00
Christoph Kluge
9248ee8868 fix: fix renamed column reference in searchbar workflow 2024-12-09 11:06:12 +01:00
Jan Eitzinger
1616d96732 Merge pull request #312 from ClusterCockpit/hotfix
Remove obsolete archive migration from build list
2024-12-05 10:43:38 +01:00
0bbedd1600 Remove obsolete archive migration from build list 2024-12-05 10:41:54 +01:00
Jan Eitzinger
c7e49644d8 Merge pull request #311 from ClusterCockpit/hotfix
Hotfix
2024-12-05 08:41:04 +01:00
010c903c74 Add known issues section to release notes 2024-12-05 08:35:10 +01:00
e4d12e3537 Merge branch 'master' into hotfix 2024-12-05 07:50:48 +01:00
051cc8384e Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2024-12-05 07:50:38 +01:00
49a94170d2 Add Fixme note for Energy calculation 2024-12-05 07:49:52 +01:00
Jan Eitzinger
42e8e37bd4 Merge pull request #309 from ClusterCockpit/devel
fix: Update to resampler handling different resolutions
2024-12-04 18:37:02 +01:00
Jan Eitzinger
5d2c350ce2 Merge pull request #310 from ClusterCockpit/hotfix
Hotfix
2024-12-04 18:12:19 +01:00
Aditya Ujeniya
85dc0362c1 fix: SimpleResampler fixed 2024-12-04 17:54:54 +01:00
Christoph Kluge
01c06728eb review footprint iconography and messages 2024-12-04 16:09:06 +01:00
Christoph Kluge
257250714d review polar plot component, adds min dataset 2024-12-04 15:22:19 +01:00
Aditya Ujeniya
3b769c3059 fix: Update to resampler handling different resolutions 2024-12-04 14:19:56 +01:00
Christoph Kluge
a7395ed45b remove config for polarPlotMetrics 2024-12-04 13:57:05 +01:00
Christoph Kluge
ab07c7928f fix: fix footprint logic, do not scale thresholds on multi node jobs 2024-12-04 13:56:00 +01:00
Christoph Kluge
b0c0d15505 fix stat filter url write 2024-12-04 10:55:29 +01:00
Jan Eitzinger
fcf50790da Merge pull request #307 from ClusterCockpit/hotfix
Prepare release v1.4.0
2024-12-04 06:39:35 +01:00
Christoph Kluge
1e43654607 Merge pull request #308 from ClusterCockpit/dev
Move to open Release PR
2024-12-03 17:19:09 +01:00
Christoph Kluge
4fecbe820d change order to match docs 2024-12-03 17:11:32 +01:00
Christoph Kluge
763c9dfa6b fix schema definition of apiAllowedIPs 2024-12-03 15:22:34 +01:00
9de5879786 Prepare release v1.4.0 2024-12-03 09:01:21 +01:00
Jan Eitzinger
9396e7492c Merge pull request #306 from ClusterCockpit/dev
fix: fix job list render for continuous mode on filter or sort changes
2024-12-03 07:47:46 +01:00
3ac3415178 Mark new ui options as required 2024-12-03 07:41:23 +01:00
1aae1c59d0 Make continous scroll the default 2024-12-03 07:27:10 +01:00
907e80a01c Update config json schema
Fixes #256
2024-12-03 07:26:36 +01:00
Christoph Kluge
8a10b69716 review findThresholds logic in metricPlot 2024-12-02 17:27:41 +01:00
Christoph Kluge
1a3cf7edd6 fix wrong var insert 2024-12-02 17:02:04 +01:00
Christoph Kluge
76d0fc979b fix: fix job list render for continuous mode on filter or sort changes 2024-12-02 12:49:43 +01:00
Jan Eitzinger
a42d8ece35 Merge pull request #305 from ClusterCockpit/dev
Fix Perl Skript to generate subCluster Config
2024-11-30 06:43:46 +01:00
Christoph Kluge
93377f53fc add lastThreshold to jobListRow 2024-11-29 14:15:15 +01:00
Christoph Kluge
c853d74ba0 Update frontend dependencies 2024-11-29 12:57:34 +01:00
Christoph Kluge
0b9f74f4f4 fix: fix plot render for summed metrics on scope change 2024-11-29 12:56:53 +01:00
Christoph Kluge
5da6baf828 fix: prevent jump to table head on continuous scroll load 2024-11-29 12:00:28 +01:00
5766945006 Merge branch 'dev' of github.com:ClusterCockpit/cc-backend into dev 2024-11-28 17:12:21 +01:00
a53d473b58 Update subcluster-generate Perl Skript
Fixes #278
2024-11-28 17:12:05 +01:00
Jan Eitzinger
d1207ad80e Merge pull request #304 from ClusterCockpit/dev
Dev
2024-11-28 15:23:02 +01:00
Christoph Kluge
e2efe71b33 Merge branch 'dev' of https://github.com/ClusterCockpit/cc-backend into dev 2024-11-28 15:18:14 +01:00
Christoph Kluge
2aef6ed9c0 fix: oversight error on redirect target 2024-11-28 15:18:07 +01:00
Jan Eitzinger
fcb6db0603 Merge pull request #303 from ClusterCockpit/dev
Fix Cookie settings, upgrade packages Fixes #301
2024-11-28 15:02:05 +01:00
01b1136316 Fix Cookie settings, upgrade packages 2024-11-28 14:58:33 +01:00
Jan Eitzinger
2512fe9e75 Merge pull request #302 from ClusterCockpit/dev
fix: solve inconsistencies with filters, fixes #280
2024-11-28 09:29:40 +01:00
Christoph Kluge
f89b5cd2ec fix: solve inconsistencies with filters, fixes #280 2024-11-27 18:43:56 +01:00
Jan Eitzinger
ab284ed208 Merge pull request #299 from ClusterCockpit/dev
feat: Add buffered channel with worker thread for job start API
2024-11-27 14:45:26 +01:00
Christoph Kluge
00a578657c feat: add edit of notice box content to admin settings 2024-11-27 10:50:11 +01:00
Christoph Kluge
38ce40ae7d feat: redirect to requested page after login, solves #281 2024-11-26 16:21:16 +01:00
e1be6c7138 Remove UpdateEnergy from UpdateFootprint Task
Conputing total energy for running jobs does not make any sense
2024-11-26 10:49:44 +01:00
28539e60b0 Regenerate Swagger, fix tests, cleanup 2024-11-26 07:02:53 +01:00
adb11b3ed0 Re-enable Footprint worker 2024-11-25 17:35:22 +01:00
Jan Eitzinger
f1e6dedd44 Merge pull request #300 from ClusterCockpit/improve_footprint_transactions
Improve footprint transactions
2024-11-25 17:08:46 +01:00
Christoph Kluge
8ea1454c06 improve transaction init error handling 2024-11-25 17:03:59 +01:00
81b8d578f2 feat: Add buffered channel with worker thread for job start API
Fixes #293
Refactoring on the way
2024-11-25 16:44:50 +01:00
Jan Eitzinger
16b11db39c Merge pull request #298 from ClusterCockpit/dev
Database migration porting and keyword cleanup
2024-11-24 08:24:09 +01:00
0d923cc920 Ignore generated test artefacts 2024-11-24 07:49:26 +01:00
c523e93564 Update to new db schema 2024-11-24 07:48:30 +01:00
d588798ea1 Update test sqlite db 2024-11-24 07:41:39 +01:00
a11f165f2a Cleanup 2024-11-24 07:09:31 +01:00
Christoph Kluge
d4f487d554 comment debug logging 2024-11-22 17:56:55 +01:00
Christoph Kluge
93d5a0e532 correct input for check 2024-11-22 16:59:18 +01:00
Christoph Kluge
00ddc462d2 expand check, change to zero init 2024-11-22 16:31:35 +01:00
Christoph Kluge
5f4a74f8ba add check on returned stats 2024-11-22 15:57:28 +01:00
Christoph Kluge
a8eff6fbd1 small logging changes 2024-11-22 15:08:53 +01:00
Christoph Kluge
baa7367ebe change array init to empty array 2024-11-22 13:39:59 +01:00
Christoph Kluge
69f8a34aac more logging 2024-11-22 13:36:26 +01:00
Christoph Kluge
21b3a67988 add timers, add else case for transaction add 2024-11-22 13:13:43 +01:00
Christoph Kluge
d89574ce73 Use repo.loadStats, move transaction init 2024-11-22 12:42:49 +01:00
ddeac6b9d9 Merge branch 'dev' of github.com:ClusterCockpit/cc-backend into dev 2024-11-21 15:54:58 +01:00
17906ec0eb Add down migrations for documentation 2024-11-21 15:54:46 +01:00
Christoph Kluge
311c088d3d removes debug logging 2024-11-21 15:47:09 +01:00
a2584d6083 Merge branch 'dev' of github.com:ClusterCockpit/cc-backend into dev 2024-11-21 15:03:55 +01:00
35bd7739c6 fix: Replace reserved keywords in database schemas
Port migration to mariadb
2024-11-21 15:02:30 +01:00
7f43c88a39 Add example config for mariadb backend 2024-11-21 14:54:04 +01:00
Christoph Kluge
fc1c54a141 fix: use left join to keep unmatched stats query result rows 2024-11-21 14:39:03 +01:00
Jan Eitzinger
2af111c584 Merge pull request #297 from ClusterCockpit/hotfix
Update README
2024-11-16 07:52:52 +01:00
c093cca8b1 Update README 2024-11-16 07:45:18 +01:00
Jan Eitzinger
2bb1b78ba4 Merge pull request #296 from ClusterCockpit/hotfix
Hotfix
2024-11-16 07:42:15 +01:00
3ab26172c4 Port tests to new job archive version 2024-11-16 07:03:29 +01:00
cdd45ce88b Fix importers and add Energy footprint to import 2024-11-16 06:36:55 +01:00
210a7d3136 Debugging initDB archive import
Footprint working
EnergyFootprint still missing
2024-11-14 19:13:07 +01:00
92ec64d80f Update demo config file 2024-11-14 19:10:55 +01:00
ff37f71fdb Increase job archive required version 2024-11-14 19:10:37 +01:00
6056341525 Remove obsolete Archive Migration Tool 2024-11-14 19:09:56 +01:00
Jan Eitzinger
075612f5bd Merge pull request #294 from ClusterCockpit/hotfix
Disable UpdateFootprint service for debugging
2024-11-12 06:37:44 +01:00
1a87ed8210 Disable UpdateFootprint service for debugging 2024-11-09 09:24:51 +01:00
Jan Eitzinger
c05ffeb16d Merge pull request #289 from ClusterCockpit/dev
Update Q4 2024
2024-11-09 09:05:25 +01:00
ee3710c5ed Merge branch 'master' into dev 2024-11-09 09:01:04 +01:00
4327c4b1f7 Start archive worker 2024-11-08 19:44:11 +01:00
492e56a098 Put privilege drop to previous location 2024-11-08 19:23:54 +01:00
f0257a2784 Drop privileges after server start 2024-11-08 19:16:56 +01:00
ec1ead89ab Switch back to previous meaning of energy metric attribute 2024-11-08 06:27:27 +01:00
Christoph Kluge
ae53e87aba Merge pull request #292 from ClusterCockpit/hotfix
Hotfix: add orderBy param to jobRepo.Find
2024-10-31 15:51:32 +01:00
Christoph Kluge
939dd2320a Cleanup debug logging, keep orderBy param for repo.Find 2024-10-31 15:47:45 +01:00
Christoph Kluge
2c8b73e2e2 add logged timing to homeroute calls 2024-10-31 14:34:32 +01:00
Christoph Kluge
eabc6212ea add debug logging for user context and web render 2024-10-31 13:36:27 +01:00
Christoph Kluge
c120d6517f change logging key, add args, add orderby id job.Find() 2024-10-30 16:24:58 +01:00
Christoph Kluge
597ee1dad7 change log to request and sql prints 2024-10-29 18:39:23 +01:00
Christoph Kluge
c4a901504d change debug format key 2024-10-29 18:25:41 +01:00
Christoph Kluge
f5cc5d07fd add more logging to rest api stopJobByRequest 2024-10-29 17:01:05 +01:00
Christoph Kluge
8a0e6c921c Merge pull request #291 from ClusterCockpit/hotfix
add logging to rest stopJobHandler
2024-10-29 08:21:33 +01:00
Christoph Kluge
bf1bff9ace fix tagManagement condition 2024-10-28 16:42:19 +01:00
Christoph Kluge
06f24e988f fix incorrect config conditions 2024-10-28 11:56:34 +01:00
Christoph Kluge
ae327f545e add logging to rest stopJobHandler 2024-10-25 15:23:49 +02:00
Christoph Kluge
01102cb9b0 feat: add updateUserOnLogin config option for oidc, jwt 2024-10-23 16:17:47 +02:00
Christoph Kluge
934d1a6114 fix: use configured footprint statType for update 2024-10-23 16:16:28 +02:00
Christoph Kluge
6f74c8cb77 feat: make cron worker frequency configurable 2024-10-23 16:15:44 +02:00
Christoph Kluge
63b9e619a4 fix: fixed and changed to footprint update by transactions 2024-10-22 14:37:22 +02:00
Christoph Kluge
82e28f26d7 feedback: add jobID copy btn to jobInfo 2024-10-21 15:45:27 +02:00
Christoph Kluge
ca9fd96baa update frontend dependencies and save them 2024-10-18 10:08:43 +02:00
Christoph Kluge
2f0460d6ec feat: make quick select starttimes url copyable 2024-10-10 18:35:53 +02:00
Christoph Kluge
37f4ed7770 add additional indices for sorting performance 2024-10-09 17:52:46 +02:00
Christoph Kluge
e3104c61cb filter taglist scope visibility by role, add global tag handling to support role 2024-10-09 13:23:06 +02:00
Christoph Kluge
bc434ee8cb add managed projects, update navbar layout, fix small issues 2024-10-09 11:08:14 +02:00
Christoph Kluge
f4102b948e rework clientwidth binds and size defaults for histograms 2024-10-08 18:46:59 +02:00
Christoph Kluge
ed991de11a fix: ad dmissing resampleConfig handling to scope select 2024-10-08 17:54:12 +02:00
Christoph Kluge
322e161064 cleanup leftover 2024-10-08 17:36:28 +02:00
Christoph Kluge
1adc741cc2 remove dev logging 2024-10-08 17:32:51 +02:00
Christoph Kluge
4eff87bbf7 update frontend dependency manager version, adds license info 2024-10-08 17:31:47 +02:00
Christoph Kluge
fc6970d08a fix plotgrid display error, use plotheight default 2024-10-08 17:31:15 +02:00
Christoph Kluge
f616c7e1c6 remove width tags from slot defs 2024-10-08 15:26:09 +02:00
Christoph Kluge
89ec749172 experimental rework of metricplot render and resize handling 2024-10-08 15:25:31 +02:00
Christoph Kluge
182f0f2c64 fix: add missing default resolution case 2024-10-08 10:42:13 +02:00
Christoph Kluge
e3681495ce update frontend dependencies 2024-10-07 17:40:21 +02:00
Christoph Kluge
37415fa261 improve job list toolbar layouting, smaller layout fixes 2024-10-07 17:36:40 +02:00
Christoph Kluge
7243dbe763 replace plotTable with new bootstrap plotGrid component
- helps with narrow window sizes
- plotTable kept for now
2024-10-02 17:48:46 +02:00
Christoph Kluge
0ff5c4bedd Make global searchfield adaptive to screensize 2024-10-02 15:43:46 +02:00
Christoph Kluge
f047f89ad5 fix column count and add margins 2024-10-02 14:48:21 +02:00
Christoph Kluge
0eb0aa1d3b change default range to 12h, rework layout in system node views 2024-10-02 14:37:32 +02:00
Christoph Kluge
6019891591 add energy filterr in new component 2024-10-01 16:25:09 +02:00
Christoph Kluge
615281601c fix wrong flag labelling, change to kWh energy calculation 2024-10-01 14:58:19 +02:00
Christoph Kluge
82baf5d384 fix deepCopy of statisticsSeries for archived jobs 2024-10-01 12:48:32 +02:00
Christoph Kluge
6fe93ecb7e fix adaptive legend title 2024-10-01 11:42:46 +02:00
Christoph Kluge
b3222f3523 fix: archived statisticsSeries with mean data now shown again 2024-09-30 18:31:49 +02:00
Christoph Kluge
3b94863521 add sorting for job energy column 2024-09-30 18:30:26 +02:00
Christoph Kluge
582dc8bf46 add energy column index 2024-09-30 18:29:46 +02:00
Christoph Kluge
a9868fd275 display energySumary only if energy data is present 2024-09-30 16:43:38 +02:00
Christoph Kluge
218e56576a round calculated updateFootprint values to two digits 2024-09-30 16:33:28 +02:00
Christoph Kluge
c50e79375a fix ccb side of unintentionally added endpoint format change in ccms 2024-09-30 15:27:49 +02:00
Christoph Kluge
dcb8308f35 add icons to energySummary component 2024-09-30 12:27:32 +02:00
Christoph Kluge
183b310696 add base constant to tooltip 2024-09-27 13:48:14 +02:00
Christoph Kluge
c7d0c86d52 add missing template changes 2024-09-27 13:46:19 +02:00
Christoph Kluge
48225662b1 feat: display energy usage in job view
- optional emission constant config line added
2024-09-27 13:45:44 +02:00
Christoph Kluge
f53fc088ec fix bugs in autoupdater query builder returns 2024-09-25 18:05:04 +02:00
Christoph Kluge
05517fcbcd use direct db execution for autoupdaters
- transactions need to be reinvestigated
2024-09-25 18:04:29 +02:00
Christoph Kluge
18af51b0a4 improve tag list template 2024-09-25 13:24:01 +02:00
Christoph Kluge
ede3da7a87 improve tag scope clarity 2024-09-25 12:23:21 +02:00
Christoph Kluge
8e3327ef6a Merge branch 'sample_resolution_select' into dev 2024-09-24 17:43:15 +02:00
Christoph Kluge
827f6daabc Merge branch '275_tag_scope_jobview_rework' into dev 2024-09-24 17:25:20 +02:00
Christoph Kluge
2567442321 Merge branch 'master' into dev 2024-09-24 17:22:14 +02:00
Christoph Kluge
9cf5478519 Merge pull request #288 from ClusterCockpit/hotfix
fix: fix crashing job view if roofline metrics missing
2024-09-24 14:47:51 +02:00
Christoph Kluge
e5275311c2 fix: fix crashing job view if roofline metrics missing 2024-09-24 14:37:39 +02:00
Christoph Kluge
21e4870e4c feat: add configurability to frontend plot zoom 2024-09-24 11:13:39 +02:00
Christoph Kluge
beba7c8d2e fix tag count bug if names non-unique, set global as default scope if none entered 2024-09-19 15:21:32 +02:00
Christoph Kluge
fe35313305 handle tag management based on role 2024-09-19 11:15:46 +02:00
Christoph Kluge
d7a8bbf40b Rework tag and tag edit placement, add other feedback
- admin message shown primarily if exists
- comment demo summary tab
2024-09-18 17:23:29 +02:00
Aditya Ujeniya
f1893c596e Versioning to query endpoint 2024-09-17 14:36:42 +02:00
Christoph Kluge
6367c1ab4d Merge branch 'dev' into 275_tag_scope_jobview_rework 2024-09-17 14:32:06 +02:00
Christoph Kluge
9579887fc4 Merge branch '275_add_tag_scope' into 275_tag_scope_jobview_rework 2024-09-16 15:04:01 +02:00
Christoph Kluge
e29be2f140 fix missing scope field request for jobview 2024-09-16 15:03:38 +02:00
Christoph Kluge
2736b5d1ef change background color for tag listitems 2024-09-16 15:00:42 +02:00
Christoph Kluge
ff52fb16b6 Merge branch '275_add_tag_scope' into 275_tag_scope_jobview_rework 2024-09-16 13:55:17 +02:00
Christoph Kluge
ccbf3867e1 change global tag color from gray to magenta 2024-09-16 13:54:40 +02:00
Christoph Kluge
f0de422c6e rework tagManagement modal render 2024-09-11 11:28:11 +02:00
Christoph Kluge
64cc19b252 remove icon from metric select, change color 2024-09-10 16:53:34 +02:00
Christoph Kluge
26226009f0 Merge branch 'rework_jobview_header' into 275_tag_scope_jobview_rework 2024-09-10 16:44:56 +02:00
Christoph Kluge
d10e09da02 button width to tablecolumn width 2024-09-10 16:43:43 +02:00
Christoph Kluge
00a2e58fee Merge branch 'rework_jobview_header' into 275_tag_scope_jobview_rework 2024-09-10 12:35:21 +02:00
Christoph Kluge
b1cb45dfe6 add overflow-x to statsTable, use sveltestrap input 2024-09-10 12:14:34 +02:00
Christoph Kluge
a2951d1f05 Add message to tagManegement 2024-09-10 09:45:47 +02:00
Christoph Kluge
c0b1e97602 adds message if no tags attached to job 2024-09-10 09:23:01 +02:00
Christoph Kluge
71621a9dc4 Wrap plottable in job view 2024-09-09 19:01:07 +02:00
Christoph Kluge
b3ed2afebe feat: move tag management to new job view header 2024-09-09 18:06:13 +02:00
Christoph Kluge
704620baff Remove unnecessary bind 2024-09-09 11:41:14 +02:00
Christoph Kluge
8feb805167 Merge branch 'rework_jobview_header' into 275_tag_scope_jobview_rework 2024-09-09 11:39:52 +02:00
Christoph Kluge
065b32755a small size correction if footprint not shown 2024-09-09 11:09:21 +02:00
Christoph Kluge
1b5f4bff2c feat: SyncUserOnLogin now updates name of token logged user 2024-09-09 10:32:26 +02:00
Christoph Kluge
8e1c5a485f Improve grid scaling 2024-09-06 12:00:33 +02:00
5fa6c9db35 Merge branch 'dev' of github.com:ClusterCockpit/cc-backend into dev 2024-09-06 11:25:03 +02:00
5482b9be2c Add debug output 2024-09-06 11:24:54 +02:00
Christoph Kluge
7400273b0a Manual merge changes not staged last time ... 2024-09-05 17:27:18 +02:00
Christoph Kluge
0b7cdde4a0 Merge branch 'dev' into sample_resolution_select
- Moved resample changes to metricDataDispatcher
- Added res argument to archiver, updateFootprintService
2024-09-05 17:26:43 +02:00
Christoph Kluge
d5382aec4f Merge branch 'dev' into 275_add_tag_scope 2024-09-05 16:44:41 +02:00
Christoph Kluge
df484dc816 rework job view header, change footprint to summary component 2024-09-05 16:44:03 +02:00
Christoph Kluge
7ea4086807 Rework sqlite indices in v8 migration 2024-09-05 15:06:38 +02:00
Christoph Kluge
b04bf6a951 fix missing condition in migration 2024-09-05 15:00:43 +02:00
7c33dcf630 Bugfix in footprint update 2024-09-05 14:58:08 +02:00
5e65e21f0b Add quotes in duration query 2024-09-05 12:38:39 +02:00
53ca38ce53 Add debug output to duration query 2024-09-05 11:18:00 +02:00
Christoph Kluge
398e3c1b91 feat: split concurrent jobs list to own scrollable component 2024-09-04 10:23:23 +02:00
508978d586 Initial attempt to update footprints in transaction 2024-09-03 15:59:01 +02:00
e267481f71 Cleanup transaction api 2024-09-03 15:40:02 +02:00
Christoph Kluge
193bee5ac8 fix: prevent addition of existing scopes to table 2024-09-03 14:16:16 +02:00
f58efa2871 Allow to combine job update queries 2024-09-03 13:41:00 +02:00
6568b6d723 Prepare transaction API for general usage 2024-09-03 13:40:11 +02:00
Christoph Kluge
4b1b34d8a7 remove logging, remove forced change to node scope 2024-09-03 13:10:44 +02:00
39c09f8565 Introduce job duration update task 2024-09-03 10:03:38 +02:00
Christoph Kluge
275a77807e fix typo in migration 2024-09-03 09:40:00 +02:00
Christoph Kluge
6443541a79 fix SQL migration syntax 2024-09-03 09:34:45 +02:00
Christoph Kluge
5eb6f7d307 fix: user name join not required for normal jobStats 2024-09-02 18:45:33 +02:00
Christoph Kluge
bce2a66177 Merge branch 'change_resolution_on_zoom' into sample_resolution_select 2024-09-02 18:23:13 +02:00
Christoph Kluge
7602641909 feat: change to resolution increase on zoom 2024-09-02 18:22:34 +02:00
Christoph Kluge
54f3a261c5 Rewrite sqlite indices from scratch for v8 migration 2024-09-02 18:20:32 +02:00
Christoph Kluge
906bac965f feat: add dropdown to user and project list navbar 2024-09-02 17:55:12 +02:00
Christoph Kluge
4ec1de6900 fix constant gql query 2024-09-02 17:54:45 +02:00
Christoph Kluge
8ded131666 Change user list name lookup to join 2024-09-02 17:54:25 +02:00
47b14f932e Start footprint service 2024-09-02 12:07:44 +02:00
Aditya Ujeniya
838ebb3f69 Updates res 2024-09-01 22:54:43 +02:00
c459724114 Resolve build errors 2024-08-30 13:50:49 +02:00
b0c9d1164d Add initial version of footprint update service
Not tested yet
2024-08-30 07:22:40 +02:00
7c51d88501 Add stub for Footprint update service 2024-08-29 08:45:04 +02:00
5b03cf826b feat: Add total energy and energy footprint 2024-08-29 07:26:49 +02:00
f305863616 Bugs fixed in unit tests and archiver init 2024-08-28 12:26:35 +02:00
db5809d522 Move rest of archiveing code into new archive package 2024-08-28 11:13:54 +02:00
Jan Eitzinger
83df6f015c Merge pull request #287 from ClusterCockpit/refactor-archiving
Refactor archiving
2024-08-28 10:14:46 +02:00
e7231b0e13 Finish refactoring
Add new packages:
- metricDataDispatcher
- archiver
2024-08-28 10:03:04 +02:00
Christoph Kluge
cff60eb51c increase server timeout limit, improve and add db indices
- change energy footprint key to string
2024-08-27 17:43:48 +02:00
f914a312f5 Introduce metricDataDispatcher
Does not compile yet
2024-08-27 16:44:16 +02:00
56ebb301ca Start to restructure
Does not compile
2024-08-27 10:14:33 +02:00
Christoph Kluge
a59df12595 init basic proof of concept 2024-08-26 17:37:23 +02:00
Christoph Kluge
5cc7fc6ccb Merge branch 'sample_resolution_select' of https://github.com/ClusterCockpit/cc-backend into sample_resolution_select 2024-08-26 09:55:36 +02:00
Christoph Kluge
55027cb630 fix: add resolution 60 default to ccms nodeData query 2024-08-26 09:55:33 +02:00
Aditya Ujeniya
036eba68e1 Fix for resampler 2024-08-25 16:13:43 +02:00
Christoph Kluge
d34e0d9348 fix: omit resources prop from metricPlot, use series for legend instead 2024-08-23 16:59:45 +02:00
Christoph Kluge
31765ce0ef Merge branch 'dev' into 275_add_tag_scope 2024-08-23 14:52:42 +02:00
Christoph Kluge
9fe7cdca92 fix: fix plot labeling if specific host selected, hide loadall if only node returned 2024-08-23 13:53:15 +02:00
Christoph Kluge
adc3502b6b cleanup dev logline 2024-08-23 13:37:42 +02:00
Christoph Kluge
95fe369648 fix: add additionally loaded scopes to statsTable again 2024-08-23 13:26:56 +02:00
Christoph Kluge
01845a0cb7 add comment regarding metric data load 2024-08-22 18:33:18 +02:00
Christoph Kluge
708eaf4178 fix dev leftovers 2024-08-22 17:55:21 +02:00
Christoph Kluge
d629a58712 Merge branch 'dev' into sample_resolution_select 2024-08-22 17:33:16 +02:00
Christoph Kluge
90886b63d6 Merge pull request #286 from ClusterCockpit/devel
Sampling Feature for archived and fresh data
2024-08-22 17:16:28 +02:00
Christoph Kluge
084f89fa32 fix: fix svelte source paths in makefile 2024-08-22 14:46:27 +02:00
Aditya Ujeniya
ceb3a095d8 Sampling Feature for archived and fresh data 2024-08-22 14:29:51 +02:00
Christoph Kluge
1758275f11 fix: fix getMetricConfigDeep util function
- threw error for mismatching metric availability between clusters
2024-08-22 14:01:27 +02:00
Christoph Kluge
e74e506ffe cleanup outdated code 2024-08-20 16:41:35 +02:00
Christoph Kluge
599a36466a fix new data reactivity for accelerators 2024-08-20 14:52:13 +02:00
Christoph Kluge
613e128cab cleanup dev logging 2024-08-20 11:51:38 +02:00
Christoph Kluge
e4f8022b7a change to one reactive metric data load on two variables 2024-08-20 11:39:19 +02:00
Jan Eitzinger
5603c41900 Merge pull request #284 from ClusterCockpit/Refactor-job-footprint
Refactor job footprint
2024-08-19 12:15:59 +02:00
a8a27c9b51 Add project index to job table 2024-08-19 12:11:53 +02:00
Christoph Kluge
b70de5a4be Handle single update data 2024-08-16 16:35:17 +02:00
Christoph Kluge
b1fd07cd30 add single update gql queries to metric wrapper 2024-08-16 14:50:31 +02:00
Christoph Kluge
6ab2e02fe6 Merge branch 'Refactor-job-footprint' into sample_resolution_select 2024-08-16 13:05:09 +02:00
Christoph Kluge
5535c5780c Merge branch 'Refactor-job-footprint' of https://github.com/ClusterCockpit/cc-backend into Refactor-job-footprint 2024-08-15 14:33:08 +02:00
Christoph Kluge
49e0a2c055 fix: add compatibility for footprint metrics without config 2024-08-15 14:33:04 +02:00
5e074dad10 Resolve error in migration 2024-08-15 12:39:14 +02:00
d6a88896d0 Refactor: Reduce struct memory size 2024-08-15 12:36:21 +02:00
5c99f5f8bb Only add footprint columns if not 0 2024-08-15 12:35:11 +02:00
e1faba0ff2 Update cluster json schema 2024-08-15 10:39:32 +02:00
ba2f406bc0 Extend sqlite db migration 2024-08-15 09:41:54 +02:00
9b6db4684a Refactor: Remove redundant code 2024-08-15 08:53:49 +02:00
Christoph Kluge
561fd41d5d fix: add accelerator scope to to-be archived scopes
- if numAcc > 0
- fixes Add accelerator scope to archive requests #282
2024-08-13 17:49:28 +02:00
Christoph Kluge
ce9995dac7 fix: fix wrongly inserted gql request and import path error 2024-08-08 12:29:45 +02:00
Christoph Kluge
0afaea9513 initial commit with example event dispatch 2024-08-08 12:28:36 +02:00
Christoph Kluge
9b5c6e3164 fix StartJobTest, add tag_scope to migration 2024-08-05 10:37:42 +02:00
Christoph Kluge
e6ebec8c1e fix TestGetTags test, was missing scope and ctx 2024-08-05 10:19:00 +02:00
Christoph Kluge
2551921ed6 fix: wrong display of tag after filter select
- exitent pills were non-updated on change of key
2024-08-02 18:14:24 +02:00
Christoph Kluge
e02575aad7 adds comments 2024-08-02 16:42:55 +02:00
Christoph Kluge
ff3502c87a fix: fix tag filter results
- displayed multiple identical entries before
- job count was incorrect before
2024-08-02 16:11:47 +02:00
Christoph Kluge
017f9b2140 feat: Add tag scopes to front and backend, initial commit 2024-08-01 18:59:24 +02:00
Christoph Kluge
c80d3a6958 fix: errors in import paths 2024-08-01 16:11:23 +02:00
Christoph Kluge
3ca1127685 Restructure frontend svelte file src folder
- Goal: Dependency structure mirrored in file structure
2024-07-26 12:34:18 +02:00
Christoph Kluge
18369da5bc Fix small oversight. remove wip plot component 2024-07-26 10:46:13 +02:00
Christoph Kluge
e65100cdc8 Add vscode @component comment to every svelte file, remove unused js exports 2024-07-25 17:10:00 +02:00
Christoph Kluge
6a1cb51c2f Refactor svelte frontend
- Adapt to new metricConfig logic
- Footprint-Metrics generalized for bar card
- Footprint-Metrics in stats filter and sorting
- Frontend always uses GQL, except adminOptions
- Job View will load scopes for all metrics on request
2024-07-22 15:41:33 +02:00
c4d93e492b Remove bugs in main init 2024-07-20 10:03:14 +02:00
c2f72f72ac Update go dependencies 2024-07-20 08:59:51 +02:00
721b6b2afa Change footprint variabel from bool to string
The footprint variable also indicates the type of statistic used now
2024-07-20 08:59:07 +02:00
b6f011c669 Move footprint update task placeholder to taskmanager 2024-07-16 12:34:27 +02:00
801607fc16 Refactor main
Convert components to Singletons
Restructure main package
Reduce dependencies
2024-07-16 12:08:10 +02:00
01a4d33514 Refactor: Archive workers and Tasks
Work in progress
2024-07-14 11:18:38 +02:00
e348ec74fd Fix bugs in stats.go 2024-07-12 14:08:48 +02:00
0458675608 Convert histogram query to json keys 2024-07-12 13:42:12 +02:00
c61ffce0e9 Make job query on metric stats generic 2024-07-12 13:21:19 +02:00
68a97dc980 Add footprint to global metric list 2024-07-12 13:20:54 +02:00
a07d167390 Fix build error with updated prometheus client 2024-07-12 09:17:31 +02:00
Christoph Kluge
a8721dcc69 Regenerate gql after internal merge 2024-07-11 17:37:53 +02:00
Christoph Kluge
68cf952ac6 Merge branch 'Refactor-job-footprint' of https://github.com/ClusterCockpit/cc-backend into Refactor-job-footprint 2024-07-11 17:33:21 +02:00
Christoph Kluge
e14d6a81fe fix: fix db migration to v8, changes key name to cpu_load 2024-07-11 17:24:33 +02:00
Christoph Kluge
a4912893a8 Frontend refactor backend changes 2024-07-11 17:23:59 +02:00
0adfb631ef Update go version to 1.22 for Github test workflow 2024-07-11 17:11:01 +02:00
b64ce1f67f Add LowerIsBetter Metric boolean. Upgrade dependencies. 2024-07-11 16:58:12 +02:00
e8e3b1595d Switch to Go 1.22 to get rid of global loop variable bug 2024-07-11 16:12:20 +02:00
f1427d5272 Add global metric list including graphQL query 2024-07-11 11:09:14 +02:00
Christoph Kluge
bf6b87d65c Fix circular import after merge 2024-07-09 09:50:32 +02:00
Christoph Kluge
0240997257 Merge branch '263_use_median_for_statsseries' into Refactor-job-footprint 2024-07-09 09:28:21 +02:00
Christoph Kluge
f1e341f0b9 Initial commit for frontend refactor 2024-07-09 09:17:50 +02:00
a54acb8c42 Merge branch '264_user_api_access' into Refactor-job-footprint 2024-07-05 16:17:57 +02:00
c6ede67589 Add energy footprint 2024-07-05 16:16:01 +02:00
Christoph Kluge
11176da5d8 Merge branch 'Refactor-job-footprint' into 264_user_api_access 2024-07-05 16:11:42 +02:00
Christoph Kluge
0a604336c4 Fix other apitest subtests 2024-07-05 15:42:08 +02:00
Christoph Kluge
be9df7649f fix: setup user in api test config 2024-07-05 15:25:24 +02:00
Christoph Kluge
63fb923995 fix: fix api test router init 2024-07-05 13:16:21 +02:00
Christoph Kluge
3afe40083d rename api userconfig to frontend, return json on api auth error 2024-07-05 11:48:06 +02:00
Christoph Kluge
9d4767539c Restructure config frontend, add user jwt request 2024-07-04 17:30:16 +02:00
ac9bba8b5b Restructure and simplify job repo 2024-07-04 15:05:24 +02:00
80c46bea7f Fix bugs and failed testcases 2024-07-04 14:14:27 +02:00
Christoph Kluge
614f694777 fix typo in api url 2024-07-04 11:41:17 +02:00
Christoph Kluge
1072d7b449 Improve auth handling of rest apis used in frontend for compatibility 2024-07-04 11:16:45 +02:00
1b70596735 Fix and test subcluster Config 2024-07-04 06:49:59 +02:00
Christoph Kluge
61eebc9fbd Rework initial commit
- moved frontend configuration api to new subrouter for compatibility
2024-07-03 17:24:26 +02:00
b05909969f Add test for clusterConfig 2024-07-03 12:11:43 +02:00
bd89ce7cc9 Extend schema and start Unit test implementation
Does not compile and work yet
2024-07-02 10:13:11 +02:00
130613b717 Fix build errors
Code not yet functional
2024-06-28 17:08:28 +02:00
b3c1f39a0e Merge branch 'master' into Refactor-job-footprint 2024-06-28 16:50:04 +02:00
97c807cd33 Add migration for footprint 2024-06-28 16:49:24 +02:00
aede5f71ec Introduce adapted graphql schema 2024-06-28 16:49:02 +02:00
786770f56a Start to convert to new footprint layout 2024-06-28 16:48:10 +02:00
Jan Eitzinger
74d4f00784 Merge pull request #276 from ClusterCockpit/hotfix
Hotfix
2024-06-28 15:43:54 +02:00
d61c4235dc Merge branch 'master' into hotfix 2024-06-28 15:41:52 +02:00
e8794b8c79 Add graphql generation target to Makefile 2024-06-28 15:41:11 +02:00
552da005dc Add make target for swagger UI generator 2024-06-26 05:41:42 +02:00
Jan Eitzinger
51452d2e68 Merge pull request #272 from ClusterCockpit/hotfix
Export package runtimeEnv
2024-06-25 07:16:22 +02:00
5c5484b4d2 Export package runtimeEnv 2024-06-25 07:12:46 +02:00
Jan Eitzinger
9974a851e8 Merge pull request #271 from ClusterCockpit/hotfix
Prepare release 1.3.1
2024-06-22 08:59:35 +02:00
6c0bfc6c35 Prepare release 1.3.1 2024-06-22 08:55:37 +02:00
Christoph Kluge
41bbd203cc Merge pull request #270 from ClusterCockpit/hotfix
fix: make foorprint from statsSeries nullsafe
2024-06-21 09:38:16 +02:00
Christoph Kluge
4344c26bef fix: make foorprint from statsSeries nullsafe 2024-06-19 13:12:51 +02:00
Jan Eitzinger
e1c1c06fb2 Merge pull request #268 from ClusterCockpit/hotfix
Hotfix
2024-06-14 14:27:13 +02:00
Christoph Kluge
70e63764ff fix: allow single partial errors on otherwise non-empty returned metric array 2024-06-13 12:38:29 +02:00
Christoph Kluge
d10f3e3af6 add maxwidth to projects column 2024-05-27 15:00:00 +02:00
Christoph Kluge
a4397d5447 fix: add scramble to textfilter component 2024-05-27 12:09:55 +02:00
Christoph Kluge
320c87a1db fix: add additional 30d fitler to searchbar fallback handling 2024-05-27 11:11:25 +02:00
Christoph Kluge
8d1228c9e8 feat: rework list searchbar, adds project-specific mode, add to user-joblist 2024-05-23 15:43:09 +02:00
Christoph Kluge
420bec7c46 fix: fix jobname and arrayjobid timeouts by adding additional 30d filter
- improve archive worker logs
- add arrayjobid filter to url if used
2024-05-23 11:53:23 +02:00
Christoph Kluge
ba1658beac fix: correct selectable histogram placement in status view 2024-05-22 18:50:52 +02:00
Christoph Kluge
575753038b feat: add jobname filter to joblist textfilter
- allows combination of filters now including jobname
- rename component
2024-05-22 18:22:35 +02:00
Christoph Kluge
061c9f0979 fix: deselected metrics were marked as missing on new jobview load 2024-05-22 15:57:22 +02:00
Christoph Kluge
b48d1b8ad6 fix: correct status view columns on mobile displays 2024-05-22 14:21:54 +02:00
dff7aeefb8 Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2024-05-16 11:19:00 +02:00
54f7980162 fix: Add required key to init config file 2024-05-16 11:18:57 +02:00
Christoph Kluge
684cb5a376 feat: change statistics render of metric plot to min/max/median
- #263
2024-05-08 16:17:42 +02:00
Christoph Kluge
597bccc080 fix: add SQL JSON validity check to meta_data query 2024-05-06 13:15:15 +02:00
Christoph Kluge
72557fd0bf feat: add statistics series render to job view metric plots 2024-05-02 16:32:01 +02:00
Jan Eitzinger
0b2f2214f9 Merge pull request #259 from ClusterCockpit/hotfix
Hotfix: Improve hasNextPage and jobName Queries
2024-04-26 12:20:52 +02:00
Christoph Kluge
ef51e69ffb feat: Add roofline color scale for time information 2024-04-26 11:11:55 +02:00
Christoph Kluge
c9eb40f455 fix: fix metricPlot y zoom reset 2024-04-25 16:59:27 +02:00
Christoph Kluge
b66750339d add default value, remove unused argument 2024-04-25 16:59:04 +02:00
Christoph Kluge
136460567c Feat: Add by-user setting for paging type
- Solves Add User-Configuration for Infinite Scroll #262
2024-04-25 15:00:53 +02:00
Christoph Kluge
f80123c85d Fix: Add missing nullsafe for admin user table 2024-04-24 13:47:29 +02:00
Christoph Kluge
a22340196f Fix: Improve jobName query by parsing DB field as JSON
- No DB mirgration required
- SQLite internal EXTRACT function used
2024-04-22 12:14:40 +02:00
Christoph Kluge
cbaeffde2c fix: improve speed of hasNextPage query for infinite scroll 2024-04-22 11:29:31 +02:00
b67f5436f8 Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2024-04-21 15:04:09 +02:00
b637ddeb28 Refactor and reformat userConfig 2024-04-21 15:04:00 +02:00
Jan Eitzinger
a20b7eacd6 Merge pull request #258 from ClusterCockpit/hotfix
Hotfix
2024-04-15 12:58:52 +02:00
6df639a0c3 Prepare Release 1.3.0 2024-04-15 12:54:50 +02:00
Christoph Kluge
d4a9887532 Merge branch 'master' into hotfix 2024-04-15 10:39:00 +02:00
Christoph Kluge
79b08a181d fix: trigger continuous load condition earlier 2024-04-15 10:36:26 +02:00
Christoph Kluge
758cef1bd3 Merge pull request #257 from ClusterCockpit/hotfix
Hotfix
2024-04-12 15:43:25 +02:00
fb8bbea99d Remove year in copyright notice 2024-04-11 23:04:30 +02:00
9b261a4778 Merge branch 'master' into hotfix 2024-04-10 14:24:12 +02:00
Christoph Kluge
aafa29db8b fix: add acc scope to job query if acc >= 1 2024-04-03 14:15:04 +02:00
Jan Eitzinger
896c39f9bc Merge pull request #255 from ClusterCockpit/dependabot/go_modules/github.com/go-jose/go-jose/v3-3.0.3
Bump github.com/go-jose/go-jose/v3 from 3.0.1 to 3.0.3
2024-03-29 06:09:01 +01:00
dependabot[bot]
3a97ff7f57 Bump github.com/go-jose/go-jose/v3 from 3.0.1 to 3.0.3
Bumps [github.com/go-jose/go-jose/v3](https://github.com/go-jose/go-jose) from 3.0.1 to 3.0.3.
- [Release notes](https://github.com/go-jose/go-jose/releases)
- [Changelog](https://github.com/go-jose/go-jose/blob/v3.0.3/CHANGELOG.md)
- [Commits](https://github.com/go-jose/go-jose/compare/v3.0.1...v3.0.3)

---
updated-dependencies:
- dependency-name: github.com/go-jose/go-jose/v3
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-29 05:06:00 +00:00
Jan Eitzinger
7518c41fab Merge pull request #254 from ClusterCockpit/dependabot/go_modules/github.com/containerd/containerd-1.6.26
Bump github.com/containerd/containerd from 1.6.18 to 1.6.26
2024-03-29 06:04:58 +01:00
dependabot[bot]
8cb00a5340 Bump github.com/containerd/containerd from 1.6.18 to 1.6.26
Bumps [github.com/containerd/containerd](https://github.com/containerd/containerd) from 1.6.18 to 1.6.26.
- [Release notes](https://github.com/containerd/containerd/releases)
- [Changelog](https://github.com/containerd/containerd/blob/main/RELEASES.md)
- [Commits](https://github.com/containerd/containerd/compare/v1.6.18...v1.6.26)

---
updated-dependencies:
- dependency-name: github.com/containerd/containerd
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-03-28 16:45:31 +00:00
Jan Eitzinger
baa51db26c Merge pull request #252 from ClusterCockpit/20_infinite_scroll
20 infinite scroll
2024-03-28 17:43:02 +01:00
Christoph Kluge
fc260b2291 fix number of cols to prevent uplot overflow
-relates to Broken layout  in status view for roofline plot #225
2024-03-28 17:26:31 +01:00
Christoph Kluge
43ebb01b63 fix: move scroll event behind condition 2024-03-28 15:57:24 +01:00
Jan Eitzinger
067dc0df5d feat: Add OpenID Connect Authentication support
236 user authentication using keycloak or any openid client for using external auth providers such as ldap GitHub google
2024-03-28 14:36:58 +01:00
6828c97415 Add central function to persist users on Login 2024-03-28 14:22:23 +01:00
50401e0030 Fix conditional rendering of OIDC button in login 2024-03-28 13:18:25 +01:00
c3d2508693 Update package deps after merge 2024-03-28 12:09:08 +01:00
642fd5cc91 Merge branch 'master' into 236-user-authentication-using-keycloak-or-any-openid-client-for-using-external-auth-providers-such-as-ldap-github-google 2024-03-28 12:07:58 +01:00
e8fb5a0030 Add OpenID Connect authentication
Fixes #236
Template conditional not yet working
Needs more testing
2024-03-28 12:01:13 +01:00
Christoph Kluge
0dee5073c6 fix: make hasnextpage optional parameter, use only if inf scroll configured 2024-03-26 16:27:04 +01:00
Christoph Kluge
b9b452f043 feat: prototype infinite scroll implementation 2024-03-26 15:56:07 +01:00
Jan Eitzinger
ddd3fad1c6 Merge pull request #251 from ClusterCockpit/hotfix
Accelerator ID Display Bugs and Footprint
2024-03-25 10:35:48 +01:00
Christoph Kluge
1f5723a97e Merge branch 'hotfix' of https://github.com/ClusterCockpit/cc-backend into hotfix 2024-03-22 16:10:35 +01:00
Christoph Kluge
5a177c952d fix: multiple accs with identical label, cloned data for single acc
- GPU id label in job view statistic table is always the same on multi GPU jobs #239
- Multiple accelerators listed in plot despite using only one #241
2024-03-22 16:10:30 +01:00
Jan Eitzinger
86e456d152 Merge pull request #250 from ClusterCockpit/hotfix
Hotfix
2024-03-22 09:54:29 +01:00
03895f9e45 Swag dependency needs at least Go 1.20 2024-03-22 09:41:18 +01:00
5c79f44055 Clarify functionality of gen-keypair tool 2024-03-22 08:59:59 +01:00
83c38e74db Refactoring: Reduze bytesize of structs. 2024-03-22 08:59:35 +01:00
1e5f2944cf Upgrade dependencies. Port to jwt-auth v5. 2024-03-21 22:02:59 +01:00
Jan Eitzinger
e45ecbdef7 Merge pull request #249 from ClusterCockpit/hotfix
Hotfix
2024-03-19 16:21:47 +01:00
c65694b36c Add tags and metadata to job queries. Fix query parameter handling.
Fixes #248
2024-03-19 16:18:43 +01:00
0005469101 Refactor 2024-03-19 16:16:02 +01:00
Christoph Kluge
60b56bd41a Fix: Simplify footprint logic, fix aggregated sum values 2024-03-18 18:57:15 +01:00
Jan Eitzinger
81fe492655 Merge pull request #247 from ClusterCockpit/hotfix
Hotfix
2024-03-15 09:30:48 +01:00
Christoph Kluge
849b7e038d Fix: make footprint display configurable app-wide
- note: requires full ui-defaults object in config
2024-03-14 15:14:19 +01:00
Christoph Kluge
82f5257cf1 fix merge bugs 2024-03-14 14:24:54 +01:00
Christoph Kluge
e347659db4 moved module context script 2024-03-14 11:09:18 +01:00
Christoph Kluge
7940317857 Merge branch 'hotfix' of https://github.com/ClusterCockpit/cc-backend into hotfix 2024-03-14 11:08:37 +01:00
Christoph Kluge
58415ab5c3 Adapt for accs in shared threshold s 2024-03-14 10:35:14 +01:00
Jan Eitzinger
1176974a78 Merge pull request #245 from pc2/master
Disable foreign key check while updating database
2024-03-14 09:42:24 +01:00
Michael Schwarz
ce792426e6 Disable foreign key check while updating database 2024-03-14 09:15:15 +01:00
e92e727279 Extend oidc auth provider 2024-03-13 17:09:36 +01:00
f761900a3e Add initial code for oidc authentication support 2024-03-13 09:37:12 +01:00
32a57661fd Upgrade frontend dependencies
Change to most recent @sveltestrap/sveltestrap
Reformat with Svelte LSP
2024-03-09 10:30:40 +01:00
Jan Eitzinger
5004e44934 feat: Add jobs endpoint to retrieve job meta and all job metric data
203 make full jobarchive available per simple api call
2024-03-08 16:41:57 +01:00
99d55f05f8 feat: Add cluster config endpoint to rest api 2024-03-08 16:35:30 +01:00
9fd839fad8 Add rest endpoint to get all job data
Fixes #203
2024-03-08 15:31:34 +01:00
Jan Eitzinger
1c7cc9e16f fix: Adapt tag db queries to also work with mysql/mariadb
231 sql statement syntax
2024-03-08 11:32:30 +01:00
06d01962a6 feat: Allow to revert db to previous version 2024-03-08 11:28:26 +01:00
2c2c1accb5 Allow up and down migration of database 2024-03-08 10:58:45 +01:00
105b7eabf0 Add migration and introduce dirty flag handling 2024-03-08 10:47:32 +01:00
Jan Eitzinger
de1d83e1a9 Merge branch 'master' into 231-sql-statement-syntax 2024-03-08 09:05:13 +01:00
Jan Eitzinger
ce97780741 feat: Add rest endpoint to add/edit Metadata entry
Add rest endpoint edit_meta including helper routines
2024-03-08 09:01:39 +01:00
e81e56ea1d Add rest endpoint edit_meta including helper routines
Fixes #219
2024-03-08 08:51:05 +01:00
aa6336ea1e Refactor
Reformat.
Convert to query builder.
Add descriptive error log messages.
2024-03-06 14:50:08 +01:00
dd887cbb1f Do all tag queries with query builder
Fix bug in mysql database initialization.
Fixes #231
2024-03-06 13:47:52 +01:00
Jan Eitzinger
860053be67 Merge pull request #240 from ClusterCockpit/hotfix
fix: Return on error from metricstore
2024-02-23 10:17:45 +01:00
5a4671b7b1 Always return on metricstore error. 2024-02-23 10:08:41 +01:00
Christoph Kluge
ec581e3509 Adapt normal marker line for shared jobs 2024-02-16 16:57:02 +01:00
e7ae9dd06d Cleanup README 2024-02-13 11:55:00 +01:00
0c7f55ff8d Remove obsolete package build rule
In case of cc-backend it is recommended to setup by hand or use the prebuild binary.
2024-02-13 11:54:50 +01:00
bcd7f47409 Transfer docs to dedicated doc webpage 2024-02-13 11:54:02 +01:00
476caebe7f Merge branch 'master' into hotfix 2024-02-13 11:05:09 +01:00
Christoph Kluge
dadc81c026 Add title to logout btn 2024-02-13 10:06:52 +01:00
Christoph Kluge
cc719d0ae5 Add Docs to Navbartools, move logout btn 2024-02-13 09:59:38 +01:00
Christoph Kluge
53af79cf0d Match mem_used color logic for footprint with plots 2024-02-12 17:12:04 +01:00
Christoph Kluge
f534ad66e1 Merge pull request #238 from ClusterCockpit/hotfix
Hotfix
2024-02-09 17:26:33 +01:00
Christoph Kluge
2d8cf02296 Add nullsafe to footprint mean gather 2024-02-09 17:19:58 +01:00
Christoph Kluge
71386f8466 Fix footprint logic for metrics equal zero 2024-02-09 17:09:08 +01:00
Christoph Kluge
c897c8e56b Add missing rounding func 2024-02-09 17:06:46 +01:00
Christoph Kluge
2036069051 Remove unresponsive histogram selections 2024-02-09 16:49:56 +01:00
Christoph Kluge
be6c63e526 Merge pull request #237 from ClusterCockpit/hotfix
Fix pageload block due to missing nullsafe
2024-02-09 16:26:16 +01:00
Christoph Kluge
a2af9c152a Fix pageload block due to missing nullsafe 2024-02-09 16:21:11 +01:00
Jan Eitzinger
63f3dc926c Merge pull request #233 from ClusterCockpit/214_user_status_histograms
214 user status histograms
2024-02-08 12:28:13 +01:00
Christoph Kluge
21dde870c6 Merge branch 'master' into 214_user_status_histograms 2024-02-08 12:26:07 +01:00
Jan Eitzinger
04f37a85ce Merge pull request #232 from ClusterCockpit/196_add_footprint
196 add footprint
2024-02-08 12:07:42 +01:00
10a332083b Merge branch 'master' into 196_add_footprint 2024-02-08 12:03:00 +01:00
Christoph Kluge
6818d1de62 Resolve pullrequest comments 2024-02-07 13:26:13 +01:00
Jan Eitzinger
1b10b75e25 Merge pull request #230 from ClusterCockpit/hotfix
Intermediate Hotfix Merge
2024-01-18 14:54:02 +01:00
Christoph Kluge
b829a5aafe Improve binned data histogram legends 2023-12-13 11:58:14 +01:00
Christoph Kluge
07073e290a feat: add selectable histograms to status view 2023-12-12 16:46:03 +01:00
Christoph Kluge
ee6d286cd7 Small corrections 2023-12-12 15:42:14 +01:00
Christoph Kluge
119637cb9b Fix using crossjoin arguments not used 2023-12-12 15:07:23 +01:00
Christoph Kluge
ee4097a2dd Add missing filters to crossjoinquery 2023-12-11 13:55:56 +01:00
Christoph Kluge
1185737eaa Add metrics to histoselect, add userfilters
- edit struct to make only count return required
2023-12-08 12:03:04 +01:00
Christoph Kluge
7d14086e54 Rework histogramselection, fix reactivity 2023-12-06 12:58:03 +01:00
Christoph Kluge
78494cd30e fix selection, add zero default 2023-12-05 17:33:30 +01:00
Christoph Kluge
ead5c54bcb Prototype completed 2023-12-05 15:30:40 +01:00
Christoph Kluge
b5b355c16c Finished backend sql query and gql resolve 2023-12-05 11:59:01 +01:00
Christoph Kluge
3067d7b250 fix: Use peak threshold for render limit maxy 2023-12-01 14:18:37 +01:00
Christoph Kluge
9bc36152d9 intermediate save
- DOES NOT COMPILE
2023-12-01 13:22:01 +01:00
Christoph Kluge
c1b944b838 sec: update dependencies 2023-11-29 14:25:12 +01:00
Christoph Kluge
175a88f1c4 Merge branch 'hotfix' into 196_add_footprint 2023-11-29 10:42:56 +01:00
Christoph Kluge
aac3e7d2f4 fix: fix scope autoselect on jobview statstable 2023-11-29 10:42:32 +01:00
Christoph Kluge
c0488b8cbe Update comments roofline 2023-11-29 10:40:59 +01:00
Christoph Kluge
d66703c4d0 update pckagelock 2023-11-29 10:40:25 +01:00
Christoph Kluge
173975aadd Add footprint select to user jobList 2023-11-28 09:58:36 +01:00
Christoph Kluge
d97fa37d2c feat: add footprint card displaying basic metrics 2023-11-27 10:07:13 +01:00
Christoph Kluge
782262b52e add missing package date-fns 2023-11-27 10:04:36 +01:00
Christoph Kluge
b8213ef6be Remove logs, reduce code 2023-11-24 17:22:06 +01:00
Christoph Kluge
e34623b1ce Add db average stats to gql, use in footprint 2023-11-24 15:11:38 +01:00
Christoph Kluge
4e375ff32b Handle accelerated and shared jobs 2023-11-24 10:36:22 +01:00
Christoph Kluge
f7529be3ea Add threshold scaling based on used resources
- required for shared jobs
2023-11-23 12:15:35 +01:00
Christoph Kluge
1aa9720405 Switch from title to sveltestrap tooltip 2023-11-22 12:12:36 +01:00
Christoph Kluge
709880ff5a Use html tag for metadata message
- remove old footprint version based on chartjs pie
2023-11-22 10:53:18 +01:00
Christoph Kluge
6b78b4e12b Adds message display in jobView 2023-11-21 15:38:57 +01:00
Christoph Kluge
f342a65aba Adds persistance to showfootprint selection 2023-11-21 15:38:28 +01:00
Christoph Kluge
dc860f8fd9 Handle artifacts, fix single node footprint flops 2023-11-21 10:27:16 +01:00
Christoph Kluge
f8f900151a Fix width, spacing, render 2023-11-20 18:08:33 +01:00
Christoph Kluge
8d409eed0f Footprint in jobList as selectable 2023-11-20 17:53:12 +01:00
Christoph Kluge
dc86523cce Add alternative ver with progress bars 2023-11-20 14:16:01 +01:00
Christoph Kluge
506d112cce Wording changes 2023-11-17 16:47:07 +01:00
Christoph Kluge
eb7f92282d add log, force node scope 2023-11-17 11:34:17 +01:00
Christoph Kluge
3468e987b6 Reformat footprintData mapping 2023-11-17 11:18:30 +01:00
Christoph Kluge
5acd9ece7f Adds messages to footprint 2023-11-16 18:31:45 +01:00
Christoph Kluge
8bc43baf2c Fix units and labels 2023-11-16 16:45:29 +01:00
Christoph Kluge
a2c99fb56d Add colors based on thresholds 2023-11-16 15:07:17 +01:00
Christoph Kluge
9689f95ea1 Initial implementaion 2023-11-16 12:49:20 +01:00
Christoph Kluge
84d6b48353 Fix: default values and new option for time filter 2023-11-15 15:03:58 +01:00
Christoph Kluge
bf64fc5213 Add completed state indicator 2023-11-13 13:43:44 +01:00
Christoph Kluge
d9f9c8aaf5 fix: retrigger gql api at manual refresh
- solves #221
2023-11-03 17:09:16 +01:00
Jan Eitzinger
280b16c11c Merge pull request #218 from ClusterCockpit/hotfix
Prepare bugfix release
2023-09-15 16:02:19 +02:00
4b922c575e Prepare bugfix release 2023-09-15 15:59:54 +02:00
Jan Eitzinger
09528ed6b9 Merge pull request #217 from ClusterCockpit/hotfix
fix: adapt roofline render to browser zoomlevel
2023-09-15 12:34:47 +02:00
Christoph Kluge
e61ff01518 fix: adapt roofline render to browser zoomlevel
- make roofline linewidth configurable
2023-09-15 11:09:01 +02:00
Jan Eitzinger
a4c68bf7fe Merge pull request #215 from ClusterCockpit/hotfix
Hotfix
2023-09-08 12:17:49 +02:00
bb1c8cc25d fix: Move name extract from token in else branch 2023-09-08 12:11:49 +02:00
4b06fa788d fix: Fix buggy logic and simplify code if ValidateUser enabled 2023-09-08 11:50:28 +02:00
Jan Eitzinger
ab08600486 Merge pull request #213 from ClusterCockpit/hotfix
Hotfix
2023-09-07 16:39:01 +02:00
7a5ccff6da fix: Remove port before IP check 2023-09-07 16:36:47 +02:00
a407a5cf01 Add note on apiAllowedIPs to Release Notes 2023-09-07 15:27:46 +02:00
2b3e2f25ec fix: Add correct duration string for max-age option 2023-09-07 15:25:22 +02:00
ed5ecbd914 fix: Restructure swagger docs 2023-09-07 15:14:09 +02:00
2d4759114e Add Release Notes link to release page 2023-09-07 14:33:22 +02:00
c68b9fec42 fix: Add documentation for apiAllowedIPs option 2023-09-07 14:03:41 +02:00
Jan Eitzinger
0f34c8cac6 Merge pull request #212 from ClusterCockpit/moebiusband73-patch-1
Update README.md
2023-09-06 14:02:16 +02:00
Jan Eitzinger
d388a45630 Update README.md 2023-09-06 13:56:55 +02:00
Jan Eitzinger
84b63af080 Merge pull request #211 from ClusterCockpit/uplot_roofline_scatter
Uplot roofline scatter
2023-09-05 15:23:06 +02:00
20902f842d Merge branch 'uplot_roofline_scatter' of github.com:ClusterCockpit/cc-backend into uplot_roofline_scatter 2023-09-05 15:18:38 +02:00
df7217f79c Change margins 2023-09-05 15:17:39 +02:00
Christoph Kluge
bd6f38b4f3 job view layout fixes, fix polar plot axis scales 2023-09-05 15:15:09 +02:00
Christoph Kluge
827a85412e Fix polar and roofline sizes in jobView
- add option for adaptable roofline sizing
2023-09-05 14:55:36 +02:00
Christoph Kluge
c4a9fcc1ca feat: Implemented rooflineplot with uPlot 2023-09-05 12:00:58 +02:00
Christoph Kluge
0993549cac Remove in-dev uplot scatter from this branch 2023-09-05 11:51:50 +02:00
Christoph Kluge
1b8c4e293c Change to prod data, allow and handle null data
- fix errors regarding render timing
- always collect time info in transFormData function
- remove size from polar plot
2023-09-05 11:46:34 +02:00
Christoph Kluge
b449b77b95 Rename dev component, separate rooflineHeatmap
- moved roofline helper functions to utils
2023-09-05 10:01:34 +02:00
Christoph Kluge
f235b1a99c Allow render of time information as color gradient 2023-09-05 09:19:43 +02:00
Christoph Kluge
b2b4beaeaa Finish direct data render roofplot demo 2023-09-04 16:31:47 +02:00
Christoph Kluge
8d7f942de4 Render loglog scatter, fix data format, start draw 2023-09-04 12:53:38 +02:00
Christoph Kluge
c1b5134627 Reduce uplot example code to common
denominator
2023-09-04 10:37:20 +02:00
Christoph Kluge
f5c43d60d3 initial commit for rooflineuplot 2023-09-01 13:12:55 +02:00
Christoph Kluge
69ee19bed0 fix: include running jobs case in statsQueries 2023-09-01 10:23:14 +02:00
Christoph Kluge
4d7819802d fix typo 2023-09-01 10:13:26 +02:00
Jan Eitzinger
07acbf673a Merge pull request #210 from ClusterCockpit/hotfix
Hotfix
2023-08-31 16:20:40 +02:00
47a82bf843 Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2023-08-31 16:17:17 +02:00
cdb66365bf Prepare release 2023-08-31 16:17:15 +02:00
Christoph Kluge
a2a4b2e6c2 fix variable 2023-08-31 15:19:54 +02:00
Christoph Kluge
cffdd055c9 change: use continue for ccms.loadStats errors 2023-08-31 15:17:40 +02:00
Christoph Kluge
64796519c6 change: use continue for rooflineHeatmap errors
- hard errors blocked rendering in frontend
2023-08-31 15:10:57 +02:00
Christoph Kluge
98f1255d4f Merge branch 'hotfix' of https://github.com/ClusterCockpit/cc-backend into hotfix 2023-08-31 14:48:22 +02:00
Christoph Kluge
b52330ebf0 simplify bins return 2023-08-31 14:48:19 +02:00
Jan Eitzinger
f857ac0c4e Merge pull request #209 from ClusterCockpit/hotfix
Fix Navbar layout
2023-08-31 14:43:38 +02:00
369757b35b Fix Navbar layout 2023-08-31 14:18:33 +02:00
Jan Eitzinger
68f5b0bba4 Merge pull request #204 from ClusterCockpit/137-improve-layout-for-smaller-screen-sizes
137 improve layout for smaller screen sizes
2023-08-31 13:15:08 +02:00
1da0e3a747 Reformat 2023-08-31 13:10:01 +02:00
Jan Eitzinger
a0b8f36dbb Merge branch 'master' into 137-improve-layout-for-smaller-screen-sizes 2023-08-31 13:05:27 +02:00
Christoph Kluge
c401e195f6 Merge pull request #208 from ClusterCockpit/hotfix
fix plot timestamp format in systems/node view
2023-08-31 12:08:16 +02:00
Jan Eitzinger
b836eee1e7 Merge pull request #205 from ClusterCockpit/166_add_scopes_analysis
166 add scopes analysis
2023-08-31 12:03:37 +02:00
Christoph Kluge
b623092721 feat: persist analysis and status pie selections 2023-08-30 15:15:53 +02:00
9533f06eaf Refactor repository tests
Add context to tests.
Remove special test routines
2023-08-30 15:04:50 +02:00
Jan Eitzinger
8143ca1741 Merge pull request #207 from giesselmann/ldap_name_config
Ldap name config
2023-08-30 09:24:03 +02:00
Christoph Kluge
59c749a164 feat: add select to analysis view pie chart
- 'Walltime' as generic default value for top list
-  Change from nodes distribution to cores distribution
2023-08-29 17:38:17 +02:00
Christoph Kluge
1771883754 fix missing key tag on piecharts 2023-08-29 15:58:25 +02:00
Christoph Kluge
3014f59cc2 feat: add new distribution plots to status view
- numCores and numAccs
2023-08-29 14:02:23 +02:00
Christoph Kluge
f933cad87f feat: add select to status view pie charts
- 'Jobs' as generic default value for top lists
- Prepare histograms for cores and accs in schema
2023-08-29 14:01:01 +02:00
Pay Giesselmann
fd94d30a8e make ldap username attribute configurable 2023-08-29 09:30:57 +02:00
Pay Giesselmann
81d9015d59 :fix delete user logging 2023-08-29 09:29:56 +02:00
Christoph Kluge
5772f38deb fix plot timestamp format in systems/node view 2023-08-28 12:53:09 +02:00
4f9b7b4e52 Merge branch 'master' into 137-improve-layout-for-smaller-screen-sizes 2023-08-28 11:00:55 +02:00
Jan Eitzinger
73a6f6c13c Merge pull request #206 from ClusterCockpit/hotfix
Hotfix
2023-08-28 11:00:27 +02:00
Christoph Kluge
69519ec040 Add requireField cases to resolver 2023-08-28 10:19:26 +02:00
Christoph Kluge
c84b819212 Fix frontend errors
- todo: debug backend handling
2023-08-28 10:00:20 +02:00
Christoph Kluge
ce758610b6 change: implement topX query in jobsStatistics 2023-08-25 17:38:25 +02:00
Christoph Kluge
08bfd3edff change: move searchbar and buttons into burger 2023-08-25 14:32:46 +02:00
1d0db276e8 Merge branch '166_add_scopes_analysis' of github.com:ClusterCockpit/cc-backend into 166_add_scopes_analysis 2023-08-25 13:23:53 +02:00
d7117f3d49 Add sorting and paging to JobStatsGrouped 2023-08-25 13:14:34 +02:00
Christoph Kluge
4a622aae41 fix link typo 2023-08-25 09:51:33 +02:00
Christoph Kluge
c84a0fb8c3 fix: push bootstrap to v5.3.1 and icons to v1.10.5 2023-08-24 16:36:59 +02:00
Christoph Kluge
ba42f4efc0 del obsolete enum 2023-08-24 16:20:52 +02:00
13d99a6ae0 Fix typo in Jobstats resolver 2023-08-24 14:55:49 +02:00
3b8bcf7b32 Remove obsolete jobsCount resolver 2023-08-24 14:51:26 +02:00
a7dd3fbc0b fix bug in stats AddJobCount 2023-08-24 14:26:23 +02:00
Christoph Kluge
77677a9f1b specify label 2023-08-24 13:11:46 +02:00
Christoph Kluge
28609a3372 adapt core timeweight to sqlite name logic 2023-08-24 12:56:35 +02:00
Christoph Kluge
4eceab4dc7 fix: change analysis top users to core hours 2023-08-24 12:51:55 +02:00
Christoph Kluge
6a1e35107f fix: analysis metric histogram normalized by scope
- native acc metrics normalized by accHours
- native core metrics normalized by coreHours
2023-08-24 11:52:36 +02:00
4aa9c4831f Make columns responsive 2023-08-24 09:49:19 +02:00
69b3f767f6 Make menu flat if collapsed 2023-08-24 09:38:12 +02:00
204901189d Add NavbarLinks component 2023-08-24 09:22:12 +02:00
80be78604f fix: Responsive Navbar
Missing Burger menu icon
2023-08-23 21:15:49 +02:00
Christoph Kluge
61c83c375d enable forNode special handling 2023-08-23 16:29:46 +02:00
Jan Eitzinger
b7aacd1b33 Merge pull request #194 from ClusterCockpit/105_modify_user_via_api
first iteraton of implementing ip-secured enpoint
2023-08-23 08:57:40 +02:00
Christoph Kluge
2f35482aff Merge branch 'master' into 166_add_scopes_analysis 2023-08-22 15:26:20 +02:00
Christoph Kluge
f36f62fb47 Improve user endpoint swagger docs 2023-08-21 12:12:28 +02:00
Jan Eitzinger
2ddc24b7ee Merge pull request #202 from ClusterCockpit/hotfix
Update frontend and backend dependencies
2023-08-21 09:41:39 +02:00
dc67a1f103 Update frontend and backend dependencies 2023-08-21 08:49:42 +02:00
Christoph Kluge
f6c4c963ec feat: Add users rest endpoint swagger docs 2023-08-18 17:18:31 +02:00
Jan Eitzinger
987f77170f Merge pull request #201 from ClusterCockpit/hotfix
Hotfix
2023-08-18 16:01:04 +02:00
ebcae32e23 Update docs and cleanup 2023-08-18 15:56:11 +02:00
8a0977561f Merge branch 'master' into hotfix 2023-08-18 15:00:17 +02:00
Jan Eitzinger
3b9b37a0f3 Merge pull request #198 from ClusterCockpit/189-refactor-authentication-module
refactor auth module
2023-08-18 14:35:02 +02:00
9fec8a4822 Change to html output for jwt-login 2023-08-18 14:29:24 +02:00
c87db1dfe6 login for all Methods 2023-08-18 14:03:52 +02:00
e99d1a1e90 Add endpoint for jwt session login 2023-08-18 14:02:21 +02:00
cda46141cc Cleanup and add wildcard for IP Filter 2023-08-18 13:03:11 +02:00
3028f60807 Reformat and add debug output 2023-08-18 11:59:16 +02:00
Christoph Kluge
734e818b19 Merge branch '189-refactor-authentication-module' of https://github.com/ClusterCockpit/cc-backend into 189-refactor-authentication-module 2023-08-18 11:17:33 +02:00
Christoph Kluge
57bda63506 Cleanup some error strings 2023-08-18 11:17:31 +02:00
da551a0bb4 Repair broken error handlng 2023-08-18 11:00:13 +02:00
32b0c8bdd7 Refactor and cleanup Auth configuration 2023-08-18 10:43:06 +02:00
Christoph Kluge
129e6a69b8 fix: metric y-range render limit for data outliers 2023-08-18 10:15:07 +02:00
14c487c9e4 Update test inputs 2023-08-18 09:31:57 +02:00
6185635aa9 Extend config schema 2023-08-18 09:19:55 +02:00
56d559fdd7 Fix bug with jwt max-age option 2023-08-18 09:19:30 +02:00
cfcf939339 Add config to jwt again 2023-08-18 08:57:56 +02:00
d51be5c308 Formatting and minor fixes 2023-08-18 08:49:25 +02:00
29552fadc3 Cleanup SyncOnLogin Handling 2023-08-17 14:02:04 +02:00
15231bc683 Cleanup and adapt to new structure 2023-08-17 12:34:30 +02:00
c7a04328d9 Fix schema and tests 2023-08-17 10:35:16 +02:00
87ce4f63d4 Refactor auth module
Separate parts
Add user repository
Add user schema
2023-08-17 10:29:00 +02:00
80aed87415 Retry fetching user after CanLogin 2023-08-16 17:21:12 +02:00
65cf86586a Merge branch '105_modify_user_via_api' into 189-refactor-authentication-module 2023-08-16 09:46:41 +02:00
4f6d1fec68 Fix errors in ldap auth 2023-08-16 09:19:41 +02:00
202521cbfd Restructure routing and security check 2023-08-14 18:38:30 +02:00
90bdfcfbb6 Add secured subrouter for REST API
Rename IP filter option
Add array helper in util
2023-08-14 14:33:05 +02:00
42e05fc999 Merge branch '189-refactor-authentication-module' into 105_modify_user_via_api 2023-08-14 14:00:27 +02:00
Christoph Kluge
fe6de5bc68 Merge branch '189-refactor-authentication-module' of https://github.com/ClusterCockpit/cc-backend into 189-refactor-authentication-module 2023-08-14 13:52:29 +02:00
Christoph Kluge
e550e57ac0 Fix Java/Grails issued token parsing
- Tested locally until successfull login
- Initialize empty projects array
2023-08-14 13:52:26 +02:00
e69f2c4253 Update Release notes 2023-08-14 13:35:32 +02:00
4a2afc7a5a Add LDAPSyncOnLogin option
Cleanup
Extend docs
Remove obsolete Expiration attribute
2023-08-14 12:40:21 +02:00
Jan Eitzinger
cf91563912 Merge pull request #200 from ClusterCockpit/hotfix
Cleanup and fixes on new plots
2023-08-14 08:15:19 +02:00
9e3ba41746 Correct jwt docs 2023-08-12 09:30:33 +02:00
19d645f65c Readd URL token and cleanup
Fix session values.
2023-08-12 09:02:41 +02:00
Christoph Kluge
05b43c0f21 Cleanup and fixes on new plots 2023-08-11 13:34:30 +02:00
Jan Eitzinger
cf04f420e0 Merge pull request #199 from ClusterCockpit/197_apply_chartjs_update
197 apply chartjs update
2023-08-11 11:44:22 +02:00
Christoph Kluge
f758e52ccd Disable chart.js animations 2023-08-11 11:42:30 +02:00
Jan Eitzinger
ea0c0de687 Merge pull request #193 from ClusterCockpit/71_improve_systemsview
71 improve systemsview
2023-08-11 10:54:04 +02:00
Christoph Kluge
a2cc1bd226 Merge branch 'master' into 71_improve_systemsview 2023-08-11 10:34:05 +02:00
Jan Eitzinger
8910a612ac Merge pull request #191 from ClusterCockpit/138_show_core_accelerator_scope_statstable
138 show core accelerator scope statstable
2023-08-11 10:27:52 +02:00
Christoph Kluge
c04344bfde Remove unused css styles 2023-08-11 10:25:35 +02:00
Jan Eitzinger
6834f07df3 Merge pull request #192 from ClusterCockpit/31_141_apply_uplot_update
31 141 apply uplot update
2023-08-11 10:24:32 +02:00
b8273a9b02 refactor auth module
Restructure module
Separate JWT auth variants
Cleanup code
Fixes #189
2023-08-11 10:00:23 +02:00
Christoph Kluge
32420fb531 Add pie color legend to Top-Tables 2023-08-11 09:41:39 +02:00
Christoph Kluge
e91cdf6b79 Add dependencies 2023-08-11 09:15:43 +02:00
Christoph Kluge
e80ce7a474 feat: Rework analysis view top to contain piechart 2023-08-10 18:06:19 +02:00
Christoph Kluge
da8cefe153 feat: Change histogram to piechart in status view 2023-08-10 18:05:16 +02:00
Christoph Kluge
bbd8637ca6 Fix Polar sizing
- Note: Not adaptive to viewport size, but will snap to grid
2023-08-10 15:10:06 +02:00
Christoph Kluge
b42a11d30e feat: Use chart.js for polarplot n jobview 2023-08-10 12:05:02 +02:00
Christoph Kluge
ed056b065e feat: add sorting in sub-node scopes in statsTable 2023-08-09 17:24:01 +02:00
Christoph Kluge
163462b29c pre-sort numerical ids in statstable
- sorting still fixed if not nodescope
2023-08-09 16:34:00 +02:00
Christoph Kluge
f286872a33 fix: hover legend display now depends on datasize 2023-08-09 12:43:58 +02:00
Christoph Kluge
e32042204b Change histograms to uplot where x-axis is number 2023-08-09 12:42:25 +02:00
Jan Eitzinger
bc6e6250e1 Merge pull request #195 from giesselmann/prometheus_missing_metrics
Prometheus backend: omit metrics with empty series
2023-08-09 08:56:38 +02:00
Christoph Kluge
423e800d9e feat: add hover-legend to histograms & metricplots 2023-08-08 13:27:01 +02:00
Pay Giesselmann
d929bdc9a1 omit metrics with empty series 2023-08-08 10:19:37 +02:00
Christoph Kluge
298051c334 add fallback for acc metrics only on node level 2023-08-04 10:54:18 +02:00
Christoph Kluge
8a473de793 fix: core/accelerator scope in statstable on load 2023-08-03 19:09:15 +02:00
Christoph Kluge
6393035e55 first iteraton of implementing ip-secured enpoint 2023-08-03 17:47:09 +02:00
Christoph Kluge
4244a37440 fix: correct timestamp logic in node-view 2023-08-03 12:38:00 +02:00
Christoph Kluge
6e0c13df89 Correct negative relative timestamps for node view 2023-08-03 12:27:03 +02:00
Christoph Kluge
1921be661b Fix x-axis order for relative timestamps
- WIP: Will change to full times, as tick location is incorrect
2023-08-02 14:20:39 +02:00
Christoph Kluge
284a7079d6 Invert time x-axis for node view metric plots 2023-08-01 18:37:50 +02:00
Christoph Kluge
36abed2093 feat: add auto-reloading to system and node views 2023-08-01 15:30:21 +02:00
Christoph Kluge
742c2e399e feat: Add uplot histogram, implemented in userview
- For testing
- add conversion function to utils
2023-07-26 13:44:06 +02:00
Christoph Kluge
2655bda644 feat: enable uplot XY-Zoom for metrics
- Disable jobView manual zoom
2023-07-24 11:12:22 +02:00
Christoph Kluge
f7571211fd initial branch commit 2023-07-21 16:33:53 +02:00
Jan Eitzinger
38f58047f2 Merge pull request #190 from ClusterCockpit/hotfix
Hotfix
2023-07-20 09:33:48 +02:00
3e73df76dd Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2023-07-20 09:31:27 +02:00
284258fbc6 Document notification banner 2023-07-20 09:31:25 +02:00
Christoph Kluge
968434be49 Add arrayJobId to seachbar docs 2023-07-20 08:43:46 +02:00
Jan Eitzinger
ae79f3e98a Merge pull request #188 from ClusterCockpit/hotfix
Prepare minor release 1.1.0
2023-07-20 08:39:29 +02:00
f81ffbe83d Prepare minor release 1.1.0 2023-07-20 08:33:42 +02:00
Jan Eitzinger
c0ab5de2f1 Merge pull request #182 from ClusterCockpit/179_fix_frontend_apiusers
Fix frontend render for users with api role
2023-07-20 07:42:15 +02:00
Jan Eitzinger
25f5a889d0 Merge pull request #183 from ClusterCockpit/180_fix_render_acc_nodescope
fix: check if acc metrics are acc scope by default
2023-07-20 07:41:09 +02:00
Jan Eitzinger
71739e301c Merge pull request #187 from fodinabor/feature/arrayJobIdSearch
Add arrayJobId searchbar option.
2023-07-20 07:39:31 +02:00
Joachim Meyer
650bcae6be Add arrayJobId searchbar option. 2023-07-19 09:46:48 +02:00
Jan Eitzinger
1062989686 Merge pull request #184 from ClusterCockpit/177_add_scrambling
fix: add scrambling to user names and projectIds
2023-07-19 09:30:09 +02:00
Jan Eitzinger
536a51b93f Merge pull request #186 from ClusterCockpit/185-add-notification-banner
185 add notification banner
2023-07-19 09:13:15 +02:00
Jan Eitzinger
19f2e16bae Merge pull request #178 from ClusterCockpit/hotfix
Hotfix
2023-07-19 09:12:30 +02:00
5923070191 make distclean target phony 2023-07-19 09:04:46 +02:00
04e8279ae4 Change log level for JWT Cross login warning to debug 2023-07-19 09:04:27 +02:00
aab50775d8 Improve message layout and styling 2023-07-19 08:47:42 +02:00
c6a0d442cc feat: Add optional notification banner on homepage
Fixes #185
2023-07-19 08:25:14 +02:00
2674f2a769 Add message banner to template 2023-07-19 07:51:04 +02:00
Christoph Kluge
eed8bb2d44 fix: add scrambling to user names and projectIds 2023-07-17 15:45:40 +02:00
Christoph Kluge
58c7b0d1b4 fix: check if acc metrics are acc scope by default
- Fixes #180
2023-07-17 14:26:24 +02:00
Christoph Kluge
55943cacbf Fix frontend render for users with api role 2023-07-17 12:19:49 +02:00
b25ceccae9 Minor typos 2023-07-05 10:15:12 +02:00
c5633e9e6d Remove typos 2023-07-05 10:01:46 +02:00
df9fd77d06 Refactor auth and add docs
Cleanup and reformat
2023-07-05 09:50:44 +02:00
Jan Eitzinger
56f66aa706 Merge pull request #176 from ClusterCockpit/hotfix
Switch release changelog to include filter
2023-07-03 13:34:57 +02:00
e7ecc260f8 Switch release changelog to include filter 2023-07-03 13:24:03 +02:00
Jan Eitzinger
c935af2ba2 Merge pull request #175 from ClusterCockpit/hotfix
Prepare release
2023-07-03 12:34:36 +02:00
cd5cc9bc2e Prepare release 2023-07-03 12:32:05 +02:00
Jan Eitzinger
07a6d113a7 Merge pull request #174 from ClusterCockpit/hotfix
Hotfix
2023-07-03 12:17:30 +02:00
fd38e1ddc0 Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2023-07-03 11:49:07 +02:00
07a3fa00a9 Update db version tags 2023-07-03 11:49:05 +02:00
Jan Eitzinger
5362164970 Merge pull request #173 from ClusterCockpit/hotfix
Fix keyword in node view jobs query
2023-07-03 09:52:29 +02:00
Christoph Kluge
5a67569fd3 Fix keyword in node view jobs query 2023-07-03 09:45:09 +02:00
Jan Eitzinger
185f7144b0 Merge pull request #172 from ClusterCockpit/hotfix
Revert check for null in concurrent job list
2023-07-03 09:40:15 +02:00
8fee8fcab2 Revert check for null in concurrent job list 2023-07-03 09:37:25 +02:00
Jan Eitzinger
9bd5468089 Merge pull request #171 from ClusterCockpit/hotfix
Make state filter array
2023-07-03 09:11:41 +02:00
120567269d Make state filter array 2023-07-03 09:01:28 +02:00
Jan Eitzinger
036685cbe7 Merge pull request #170 from ClusterCockpit/hotfix
Hotfix
2023-07-01 07:14:46 +02:00
Christoph Kluge
2f471dc192 Change listQuery to jobId array, adapt filter pipe 2023-06-30 16:55:34 +02:00
Christoph Kluge
4729905322 Forgot to adapt link 2023-06-30 15:06:26 +02:00
Christoph Kluge
aacb7489e6 StartTime not required for jobs running on node 2023-06-30 15:04:01 +02:00
Christoph Kluge
c04aea89c9 Implement node filter in frontend, fix backend
- Add running job count and link to list to single node view
2023-06-30 12:01:27 +02:00
b5a7249ad5 Fix sqlite migration for configuration 2023-06-30 09:15:58 +02:00
Jan Eitzinger
ae543447b8 Merge pull request #169 from ClusterCockpit/moebiusband73-patch-1
Update README.md
2023-06-29 15:18:47 +02:00
Jan Eitzinger
2c56bfd89e Update README.md 2023-06-29 15:16:21 +02:00
Christoph Kluge
b6be76eb07 Merge branch 'hotfix' of https://github.com/ClusterCockpit/cc-backend into hotfix 2023-06-29 14:59:54 +02:00
Christoph Kluge
08063feef2 Add listQuery link to job view concurrentJobs 2023-06-29 14:59:51 +02:00
Jan Eitzinger
affcba441b Merge pull request #168 from ClusterCockpit/hotfix
Hotfix
2023-06-29 14:59:08 +02:00
dd80d5af9e Change config value datatype
Fixes #167
2023-06-29 14:55:38 +02:00
7174f27a89 Add node filter and concurrent job list query 2023-06-28 13:35:41 +02:00
3828c138b8 Add init flag 2023-06-28 12:41:27 +02:00
Jan Eitzinger
e3f195dad0 Merge pull request #165 from ClusterCockpit/hotfix
Hotfix
2023-06-28 08:33:52 +02:00
6d8a3aa256 Fix bug in fsBackend and add tests for file utils 2023-06-28 07:39:39 +02:00
ab1a9fa781 Update goreleaser config 2023-06-27 15:35:57 +02:00
cf1b7e2db4 Fix typo in schema id 2023-06-27 15:35:29 +02:00
34050c8ce0 Fix argument dashes 2023-06-27 14:30:09 +02:00
eb626db9c2 Debug compression service 2023-06-27 14:29:56 +02:00
Jan Eitzinger
6cd98693c3 Merge pull request #164 from ClusterCockpit/155-improve-tag-schema
Add insert timestamp to tag, jobtag
2023-06-27 09:43:17 +02:00
d1e3e06b10 Add insert timestamp to tag, jobtag
Fixes #43
2023-06-27 09:41:06 +02:00
Jan Eitzinger
60a3b653af Merge pull request #163 from ClusterCockpit/hotfix
Hotfix
2023-06-26 16:52:46 +02:00
8aaa5722e9 Fix errors in query logic 2023-06-26 16:48:13 +02:00
99a2888431 Fix typo in Query 2023-06-26 14:45:49 +02:00
f3a8061dfc Merge branch 'master' into hotfix 2023-06-26 14:40:44 +02:00
Jan Eitzinger
7d85c0e9ad Merge pull request #162 from ClusterCockpit/refactor-concurrent-job-resolver
Refactor concurrent job resolver
2023-06-26 14:28:00 +02:00
c0a319ba7e Add tool binaries 2023-06-26 14:15:52 +02:00
24678ff952 Rename convert pubkey folder 2023-06-26 12:47:21 +02:00
d289360cb2 Add login overwrite docu 2023-06-26 12:46:45 +02:00
463b60acb6 Update doc files 2023-06-26 12:39:08 +02:00
Christoph Kluge
af67022a5d Merge pull request #161 from ClusterCockpit/hotfix
Hotfix
2023-06-23 16:20:58 +02:00
Christoph Kluge
fe78c8b851 Merge branch 'hotfix' of https://github.com/ClusterCockpit/cc-backend into hotfix 2023-06-23 16:09:36 +02:00
Christoph Kluge
f992ffc2da Fix SSL timing issues with best guess search 2023-06-23 16:09:33 +02:00
Jan Eitzinger
981551e9c6 Merge pull request #160 from ClusterCockpit/hotfix
Hotfix
2023-06-23 13:42:52 +02:00
c1397ef7d5 Make img directory configurable 2023-06-23 13:34:57 +02:00
Christoph Kluge
45a3346783 Merge branch 'master' into hotfix: Bring branch up to date 2023-06-23 10:41:59 +02:00
Jan Eitzinger
2f6c16a7d8 Merge pull request #156 from giesselmann/log_levels
lower log level for frequent messages
2023-06-23 10:33:44 +02:00
Jan Eitzinger
76d3018b5c Merge pull request #159 from ClusterCockpit/158_fix_searchbar
158 fix searchbar
2023-06-23 10:31:19 +02:00
Christoph Kluge
95b170e542 Merge branch '158_fix_searchbar' of https://github.com/ClusterCockpit/cc-backend into 158_fix_searchbar 2023-06-23 10:21:06 +02:00
Christoph Kluge
26875e06f5 Update docs 2023-06-23 10:21:03 +02:00
34f15cc407 Convert styling and layout to bootstrap 2023-06-23 10:20:48 +02:00
Christoph Kluge
3c26deda30 Format message.tmpl 2023-06-23 09:42:31 +02:00
Christoph Kluge
1761754865 Forgot tmpl 2023-06-23 09:03:36 +02:00
Christoph Kluge
9afb6dc933 Add Header Requirements and BuildInfos to message 2023-06-22 19:56:21 +02:00
8eda4b306d Unify and cleanup message template 2023-06-22 18:09:40 +02:00
82b8e8c284 Fix bug in SearchBar Handler
Introduce Message boxes
Incomplete and needs cleanup
2023-06-22 16:26:09 +02:00
Christoph Kluge
bcadb1adda Remove errorcase from single searchterm logic 2023-06-22 10:58:36 +02:00
eb867528e3 feat - Allow to overwrite login template 2023-06-22 07:01:29 +02:00
feba722a53 Refactor and Cleanup
Add SecurityCheck
2023-06-22 06:26:19 +02:00
bb7c1005c9 Refactor query logic and move to job.go 2023-06-20 15:52:16 +02:00
Pay Giesselmann
a9544f5609 lower log level for frequent messages 2023-06-20 15:47:38 +02:00
Christoph Kluge
fc33bfb47b Merge branch 'hotfix' of https://github.com/ClusterCockpit/cc-backend into hotfix 2023-06-20 13:16:04 +02:00
Christoph Kluge
b450cdd20f Add logic step to findThresholds
- fixes reference lines for subclusters w/o extra config
2023-06-20 13:16:00 +02:00
Jan Eitzinger
d0516f12b0 Merge pull request #154 from ClusterCockpit/hotfix
Hotfix
2023-06-20 13:00:00 +02:00
c973a29734 Restore api role access in SecurityCheck 2023-06-20 12:54:26 +02:00
33613cdda0 Sync commit 2023-06-20 10:38:53 +02:00
25acb2eaa5 Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2023-06-20 07:55:59 +02:00
9cd65bb20c Add hook for frontend build 2023-06-20 07:55:57 +02:00
Christoph Kluge
5ba84efab6 User and Project List with 30d filter plus label
- Readable labels for other quick select ranges also
- Note: URL not reformatted for copy-bility
2023-06-19 17:59:44 +02:00
Jan Eitzinger
1f60963cbb Merge pull request #152 from ClusterCockpit/hotfix
Rework disabled metrics, fix systems
2023-06-19 16:23:11 +02:00
Christoph Kluge
c8068f45eb Rework disabled metrics, fix systems
- Unify warning card texts
- Broader use of checkMetricDisabled function
2023-06-19 16:11:16 +02:00
Jan Eitzinger
fb78b3f1e2 Merge pull request #151 from ClusterCockpit/hotfix
Hotfix
2023-06-19 11:57:43 +02:00
Christoph Kluge
10ca86e583 Handle removed metrics in isMissing check 2023-06-19 10:25:34 +02:00
Christoph Kluge
c5834e69d3 Fix Plottable removed metrics filter
- excluded all plots where bool was not explicitly set
2023-06-19 09:51:50 +02:00
Jan Eitzinger
984cf46bf0 Merge pull request #150 from ClusterCockpit/hotfix
Hotfix
2023-06-16 20:17:23 +02:00
Christoph Kluge
4c708dba3f Improve alignment of notice cards 2023-06-16 16:49:19 +02:00
Christoph Kluge
f466312015 Skip render of disabled metrics in job view
-Add function to check if metric is disabled
-Relates to  #119
2023-06-16 16:27:30 +02:00
Jan Eitzinger
34fc6b1541 Merge pull request #149 from ClusterCockpit/hotfix
Hotfix
2023-06-16 16:24:56 +02:00
Christoph Kluge
19ad462abf Match fixed scopeGranularity in frontend 2023-06-16 15:21:10 +02:00
Christoph Kluge
9dc2e9c679 Merge branch 'hotfix' of https://github.com/ClusterCockpit/cc-backend into hotfix 2023-06-16 15:09:56 +02:00
Christoph Kluge
4cfe52e7c9 Add warning card if no data to plot
- series.data array empty, but toplevel data return itself OK
2023-06-16 15:09:23 +02:00
Jan Eitzinger
07f8950838 Merge pull request #148 from ClusterCockpit/hotfix
Hotfix
2023-06-16 14:37:10 +02:00
369d20930b Merge branch 'hotfix' of github.com:ClusterCockpit/cc-backend into hotfix 2023-06-16 14:31:12 +02:00
9b7b3812d9 Introduce goreleaser support 2023-06-16 14:31:09 +02:00
Christoph Kluge
e577e086a6 Fix Format for nodes status 2023-06-16 13:01:41 +02:00
Christoph Kluge
ece57bf65e Introduce units.js, centralizes value normalizing 2023-06-16 12:44:34 +02:00
Christoph Kluge
dbd2b491ed Correct default values when switching clusters 2023-06-15 16:11:51 +02:00
Christoph Kluge
2146fccaae Improve resources filter 2023-06-15 15:40:24 +02:00
f0685919fd Streamline auth error handling 2023-06-15 12:00:45 +02:00
288 changed files with 35501 additions and 17514 deletions

View File

@@ -7,7 +7,7 @@ jobs:
- name: Install Go - name: Install Go
uses: actions/setup-go@v4 uses: actions/setup-go@v4
with: with:
go-version: 1.19.x go-version: 1.22.x
- name: Checkout code - name: Checkout code
uses: actions/checkout@v3 uses: actions/checkout@v3
- name: Build, Vet & Test - name: Build, Vet & Test

5
.gitignore vendored
View File

@@ -14,3 +14,8 @@
/archive-manager /archive-manager
var/job.db-shm var/job.db-shm
var/job.db-wal var/job.db-wal
dist/
*.db
internal/repository/testdata/job.db-shm
internal/repository/testdata/job.db-wal

93
.goreleaser.yaml Normal file
View File

@@ -0,0 +1,93 @@
before:
hooks:
- go mod tidy
builds:
- env:
- CGO_ENABLED=1
goos:
- linux
goarch:
- amd64
goamd64:
- v3
id: "cc-backend"
binary: cc-backend
main: ./cmd/cc-backend
ldflags:
- -s -w -X main.version={{.Version}}
- -X main.commit={{.Commit}} -X main.date={{.Date}}
- -linkmode external -extldflags -static
tags:
- static_build
hooks:
pre: make frontend
- env:
- CGO_ENABLED=0
goos:
- linux
goarch:
- amd64
goamd64:
- v3
id: "archive-manager"
binary: archive-manager
main: ./tools/archive-manager
tags:
- static_build
- env:
- CGO_ENABLED=0
goos:
- linux
goarch:
- amd64
goamd64:
- v3
id: "gen-keypair"
binary: gen-keypair
main: ./tools/gen-keypair
tags:
- static_build
archives:
- format: tar.gz
# this name template makes the OS and Arch compatible with the results of uname.
name_template: >-
{{ .ProjectName }}_
{{- title .Os }}_
{{- if eq .Arch "amd64" }}x86_64
{{- else }}{{ .Arch }}{{ end }}
{{- if .Arm }}v{{ .Arm }}{{ end }}
checksum:
name_template: "checksums.txt"
snapshot:
name_template: "{{ incpatch .Version }}-next"
changelog:
sort: asc
filters:
include:
- "^feat:"
- "^fix:"
- "^sec:"
- "^docs:"
groups:
- title: "Dependency updates"
regexp: '^.*?(feat|fix)\(deps\)!?:.+$'
order: 300
- title: "New Features"
regexp: '^.*?feat(\([[:word:]]+\))??!?:.+$'
order: 100
- title: "Security updates"
regexp: '^.*?sec(\([[:word:]]+\))??!?:.+$'
order: 150
- title: "Bug fixes"
regexp: '^.*?fix(\([[:word:]]+\))??!?:.+$'
order: 200
- title: "Documentation updates"
regexp: ^.*?doc(\([[:word:]]+\))??!?:.+$
order: 400
release:
draft: false
footer: |
Supports job archive version 2 and database version 8.
Please check out the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for further details on breaking changes.
# vim: set ts=2 sw=2 tw=0 fo=cnqoj

View File

@@ -1,6 +1,6 @@
MIT License MIT License
Copyright (c) 2022 NHR@FAU, University Erlangen-Nuremberg Copyright (c) NHR@FAU, University Erlangen-Nuremberg
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal

126
Makefile
View File

@@ -2,10 +2,10 @@ TARGET = ./cc-backend
VAR = ./var VAR = ./var
CFG = config.json .env CFG = config.json .env
FRONTEND = ./web/frontend FRONTEND = ./web/frontend
VERSION = 1.0.0 VERSION = 1.4.1
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development') GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S") CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
LD_FLAGS = '-s -X main.buildTime=${CURRENT_TIME} -X main.version=${VERSION} -X main.hash=${GIT_HASH}' LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'
EXECUTABLES = go npm EXECUTABLES = go npm
K := $(foreach exec,$(EXECUTABLES),\ K := $(foreach exec,$(EXECUTABLES),\
@@ -22,13 +22,23 @@ SVELTE_COMPONENTS = status \
header header
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS))) SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \ SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
$(wildcard $(FRONTEND)/src/*.js) \ $(wildcard $(FRONTEND)/src/*.js) \
$(wildcard $(FRONTEND)/src/filters/*.svelte) \ $(wildcard $(FRONTEND)/src/analysis/*.svelte) \
$(wildcard $(FRONTEND)/src/plots/*.svelte) \ $(wildcard $(FRONTEND)/src/config/*.svelte) \
$(wildcard $(FRONTEND)/src/joblist/*.svelte) $(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/*.js) \
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
$(wildcard $(FRONTEND)/src/header/*.svelte) \
$(wildcard $(FRONTEND)/src/job/*.svelte)
.PHONY: clean test tags $(TARGET) .PHONY: clean distclean test tags frontend swagger graphql $(TARGET)
.NOTPARALLEL: .NOTPARALLEL:
@@ -36,6 +46,19 @@ $(TARGET): $(VAR) $(CFG) $(SVELTE_TARGETS)
$(info ===> BUILD cc-backend) $(info ===> BUILD cc-backend)
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend @go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
frontend:
$(info ===> BUILD frontend)
cd web/frontend && npm install && npm run build
swagger:
$(info ===> GENERATE swagger)
@go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api
@mv ./api/docs.go ./internal/api/docs.go
graphql:
$(info ===> GENERATE graphql)
@go run github.com/99designs/gqlgen
clean: clean:
$(info ===> CLEAN) $(info ===> CLEAN)
@go clean @go clean
@@ -72,90 +95,3 @@ config.json:
$(SVELTE_TARGETS): $(SVELTE_SRC) $(SVELTE_TARGETS): $(SVELTE_SRC)
$(info ===> BUILD frontend) $(info ===> BUILD frontend)
cd web/frontend && npm install && npm run build cd web/frontend && npm install && npm run build
install: $(TARGET)
@WORKSPACE=$(PREFIX)
@if [ -z "$${WORKSPACE}" ]; then exit 1; fi
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
@install -Dpm 755 $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
@install -Dpm 600 configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
.ONESHELL:
.PHONY: RPM
RPM: build/package/cc-backend.spec
@WORKSPACE="$${PWD}"
@SPECFILE="$${WORKSPACE}/build/package/cc-backend.spec"
# Setup RPM build tree
@eval $$(rpm --eval "ARCH='%{_arch}' RPMDIR='%{_rpmdir}' SOURCEDIR='%{_sourcedir}' SPECDIR='%{_specdir}' SRPMDIR='%{_srcrpmdir}' BUILDDIR='%{_builddir}'")
@mkdir --parents --verbose "$${RPMDIR}" "$${SOURCEDIR}" "$${SPECDIR}" "$${SRPMDIR}" "$${BUILDDIR}"
# Create source tarball
@COMMITISH="HEAD"
@VERS=$$(git describe --tags $${COMMITISH})
@VERS=$${VERS#v}
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
@eval $$(rpmspec --query --queryformat "NAME='%{name}' VERSION='%{version}' RELEASE='%{release}' NVR='%{NVR}' NVRA='%{NVRA}'" --define="VERS $${VERS}" "$${SPECFILE}")
@PREFIX="$${NAME}-$${VERSION}"
@FORMAT="tar.gz"
@SRCFILE="$${SOURCEDIR}/$${PREFIX}.$${FORMAT}"
@git archive --verbose --format "$${FORMAT}" --prefix="$${PREFIX}/" --output="$${SRCFILE}" $${COMMITISH}
# Build RPM and SRPM
@rpmbuild -ba --define="VERS $${VERS}" --rmsource --clean "$${SPECFILE}"
# Report RPMs and SRPMs when in GitHub Workflow
@if [ "$${GITHUB_ACTIONS}" = true ]; then
@ RPMFILE="$${RPMDIR}/$${ARCH}/$${NVRA}.rpm"
@ SRPMFILE="$${SRPMDIR}/$${NVR}.src.rpm"
@ echo "RPM: $${RPMFILE}"
@ echo "SRPM: $${SRPMFILE}"
@ echo "::set-output name=SRPM::$${SRPMFILE}"
@ echo "::set-output name=RPM::$${RPMFILE}"
@fi
.ONESHELL:
.PHONY: DEB
DEB: build/package/cc-backend.deb.control
@BASEDIR=$${PWD}
@WORKSPACE=$${PWD}/.dpkgbuild
@DEBIANDIR=$${WORKSPACE}/debian
@DEBIANBINDIR=$${WORKSPACE}/DEBIAN
@mkdir --parents --verbose $$WORKSPACE $$DEBIANBINDIR
#@mkdir --parents --verbose $$DEBIANDIR
@CONTROLFILE="$${BASEDIR}/build/package/cc-backend.deb.control"
@COMMITISH="HEAD"
@VERS=$$(git describe --tags --abbrev=0 $${COMMITISH})
@VERS=$${VERS#v}
@VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g)
@if [ "$${VERS}" = "" ]; then VERS="$(VERSION)"; fi
@ARCH=$$(uname -m)
@ARCH=$$(echo $$ARCH | sed -e s+'_'+'-'+g)
@if [ "$${ARCH}" = "x86-64" ]; then ARCH=amd64; fi
@PREFIX="$${NAME}-$${VERSION}_$${ARCH}"
@SIZE_BYTES=$$(du -bcs --exclude=.dpkgbuild "$${WORKSPACE}"/ | awk '{print $$1}' | head -1 | sed -e 's/^0\+//')
@SIZE="$$(awk -v size="$$SIZE_BYTES" 'BEGIN {print (size/1024)+1}' | awk '{print int($$0)}')"
#@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANDIR}/control
@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANBINDIR}/control
@mkdir --parents --verbose "$${WORKSPACE}"/$(VAR)
@touch "$${WORKSPACE}"/$(VAR)/job.db
@cd web/frontend && yarn install && yarn build && cd -
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
@mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR)
@cp $(TARGET) $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
@chmod 0755 $${WORKSPACE}/usr/$(BINDIR)/$(TARGET)
@mkdir --parents --verbose $${WORKSPACE}/etc/$(TARGET)
@cp configs/config.json $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
@chmod 0600 $${WORKSPACE}/etc/$(TARGET)/$(TARGET).json
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/systemd/system
@cp build/package/$(TARGET).service $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
@chmod 0644 $${WORKSPACE}/usr/lib/systemd/system/$(TARGET).service
@mkdir --parents --verbose $${WORKSPACE}/etc/default
@cp build/package/$(TARGET).config $${WORKSPACE}/etc/default/$(TARGET)
@chmod 0600 $${WORKSPACE}/etc/default/$(TARGET)
@mkdir --parents --verbose $${WORKSPACE}/usr/lib/sysusers.d
@cp build/package/$(TARGET).sysusers $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
@chmod 0644 $${WORKSPACE}/usr/lib/sysusers.d/$(TARGET).conf
@DEB_FILE="cc-metric-store_$${VERS}_$${ARCH}.deb"
@dpkg-deb -b $${WORKSPACE} "$$DEB_FILE"
@rm -r "$${WORKSPACE}"
@if [ "$${GITHUB_ACTIONS}" = "true" ]; then
@ echo "::set-output name=DEB::$${DEB_FILE}"
@fi

238
README.md
View File

@@ -1,67 +1,101 @@
# NOTE
Please have a look at the [Release
Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md)
for breaking changes!
# ClusterCockpit REST and GraphQL API backend # ClusterCockpit REST and GraphQL API backend
[![Build](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml/badge.svg)](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml) [![Build](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml/badge.svg)](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml)
This is a Golang backend implementation for a REST and GraphQL API according to the [ClusterCockpit specifications](https://github.com/ClusterCockpit/cc-specifications). This is a Golang backend implementation for a REST and GraphQL API according to
It also includes a web interface for ClusterCockpit. the [ClusterCockpit
While there is a backend for the InfluxDB timeseries database, the only tested and supported setup is using cc-metric-store as a mtric data backend. specifications](https://github.com/ClusterCockpit/cc-specifications). It also
We will add documentation how to integrate ClusterCockpit with other timeseries databases in the future. includes a web interface for ClusterCockpit. This implementation replaces the
This implementation replaces the previous PHP Symfony based ClusterCockpit web-interface. previous PHP Symfony based ClusterCockpit web interface. The reasons for
[Here](https://github.com/ClusterCockpit/ClusterCockpit/wiki/Why-we-switched-from-PHP-Symfony-to-a-Golang-based-solution) is a discussion of the reasons why we switched from PHP Symfony to a Golang based solution. switching from PHP Symfony to a Golang based solution are explained
[here](https://github.com/ClusterCockpit/ClusterCockpit/wiki/Why-we-switched-from-PHP-Symfony-to-a-Golang-based-solution).
## Overview ## Overview
This is a golang web backend for the ClusterCockpit job-specific performance monitoring framework. This is a Golang web backend for the ClusterCockpit job-specific performance
It provides a REST API for integrating ClusterCockpit with a HPC cluster batch system and external analysis scripts. monitoring framework. It provides a REST API for integrating ClusterCockpit with
Data exchange between the web frontend and backend is based on a GraphQL API. an HPC cluster batch system and external analysis scripts. Data exchange between
The web frontend is also served by the backend using [Svelte](https://svelte.dev/) components. the web front-end and the back-end is based on a GraphQL API. The web frontend
Layout and styling is based on [Bootstrap 5](https://getbootstrap.com/) using [Bootstrap Icons](https://icons.getbootstrap.com/). is also served by the backend using [Svelte](https://svelte.dev/) components.
The backend uses [SQLite 3](https://sqlite.org/) as relational SQL database by default. Layout and styling are based on [Bootstrap 5](https://getbootstrap.com/) using
It can optionally use a MySQL/MariaDB database server. [Bootstrap Icons](https://icons.getbootstrap.com/).
Finished batch jobs are stored in a file-based job archive following [this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
The backend supports authentication using local accounts or an external LDAP directory.
Authorization for APIs is implemented using [JWT](https://jwt.io/) tokens created with public/private key encryption.
You find more detailed information here: The backend uses [SQLite 3](https://sqlite.org/) as a relational SQL database by
* `./configs/README.md`: Infos about configuration and setup of cc-backend. default. Optionally it can use a MySQL/MariaDB database server. While there are
* `./init/README.md`: Infos on how to setup cc-backend as systemd service on Linux. metric data backends for the InfluxDB and Prometheus time series databases, the
* `./tools/README.md`: Infos on the JWT authorizatin token workflows in ClusterCockpit. only tested and supported setup is to use cc-metric-store as the metric data
* `./docs`: You can find further documentation here. There is also a Hands-on tutorial that is recommended to get familiar with the ClusterCockpit setup. backend. Documentation on how to integrate ClusterCockpit with other time series
databases will be added in the future.
**NOTICE** Completed batch jobs are stored in a file-based job archive according to
[this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive).
The backend supports authentication via local accounts, an external LDAP
directory, and JWT tokens. Authorization for APIs is implemented with
[JWT](https://jwt.io/) tokens created with public/private key encryption.
ClusterCockpit requires a recent version of the golang toolchain and node.js. You find a detailed documentation on the [ClusterCockpit
You can check in `go.mod` what is the current minimal golang version required. Webpage](https://clustercockpit.org).
Homebrew and Archlinux usually have up to date golang versions. For other Linux
distros this often means you have to install the golang compiler yourself.
Fortunatly this is easy with golang. Since a lot of functionality is based on
the go standard library it is crucial for security and performance to use a
recent golang version. Also an old golang tool chain may restrict the supported
versions of third party packages.
## Demo Setup ## Build requirements
We provide a shell skript that downloads demo data and automatically builds and ClusterCockpit requires a current version of the golang toolchain and node.js.
starts cc-backend. You need `wget`, `go`, `node`, `npm` in your path to start You can check `go.mod` to see what is the current minimal golang version needed.
the demo. The demo will download 32MB of data (223MB on disk). Homebrew and Archlinux usually have current golang versions. For other Linux
distros this often means that you have to install the golang compiler yourself.
Fortunately, this is easy with golang. Since much of the functionality is based
on the Go standard library, it is crucial for security and performance to use a
current version of golang. In addition, an old golang toolchain may limit the supported
versions of third-party packages.
## How to try ClusterCockpit with a demo setup
We provide a shell script that downloads demo data and automatically starts the
cc-backend. You will need `wget`, `go`, `node`, `npm` in your path to
start the demo. The demo downloads 32MB of data (223MB on disk).
```sh ```sh
git clone https://github.com/ClusterCockpit/cc-backend.git git clone https://github.com/ClusterCockpit/cc-backend.git
cd ./cc-backend cd ./cc-backend
./startDemo.sh ./startDemo.sh
``` ```
You can access the web interface at http://localhost:8080.
Credentials for login: `demo:demo`.
Please note that some views do not work without a metric backend (e.g., the Systems and Status view).
## Howto Build and Run You can also try the demo using the latest release binary.
Create a folder and put the release binary `cc-backend` into this folder.
Execute the following steps:
There is a Makefile to automate the build of cc-backend. The Makefile supports the following targets: ``` shell
* `$ make`: Initialize `var` directory and build svelte frontend and backend binary. Please note that there is no proper prerequesite handling. Any change of frontend source files will trigger a complete rebuild. ./cc-backend -init
* `$ make clean`: Clean go build cache and remove binary vim config.json (Add a second cluster entry and name the clusters alex and fritz)
* `$ make test`: Run the tests that are also run in the GitHub workflow setup. wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
tar xf job-archive-demo.tar
./cc-backend -init-db -add-user demo:admin:demo -loglevel info
./cc-backend -server -dev -loglevel info
```
You can access the web interface at [http://localhost:8080](http://localhost:8080).
Credentials for login are `demo:demo`.
Please note that some views do not work without a metric backend (e.g., the
Analysis, Systems and Status views).
## How to build and run
There is a Makefile to automate the build of cc-backend. The Makefile supports
the following targets:
* `make`: Initialize `var` directory and build svelte frontend and backend
binary. Note that there is no proper prerequisite handling. Any change of
frontend source files will result in a complete rebuild.
* `make clean`: Clean go build cache and remove binary.
* `make test`: Run the tests that are also run in the GitHub workflow setup.
A common workflow for setting up cc-backend from scratch is:
A common workflow to setup cc-backend fron scratch is:
```sh ```sh
git clone https://github.com/ClusterCockpit/cc-backend.git git clone https://github.com/ClusterCockpit/cc-backend.git
@@ -72,87 +106,63 @@ make
# EDIT THE .env FILE BEFORE YOU DEPLOY (Change the secrets)! # EDIT THE .env FILE BEFORE YOU DEPLOY (Change the secrets)!
# If authentication is disabled, it can be empty. # If authentication is disabled, it can be empty.
cp configs/env-template.txt .env cp configs/env-template.txt .env
vim ./.env vim .env
cp configs/config.json ./ cp configs/config.json .
vim ./config.json vim config.json
#Optional: Link an existing job archive: #Optional: Link an existing job archive:
ln -s <your-existing-job-archive> ./var/job-archive ln -s <your-existing-job-archive> ./var/job-archive
# This will first initialize the job.db database by traversing all # This will first initialize the job.db database by traversing all
# `meta.json` files in the job-archive and add a new user. `--no-server` will cause the # `meta.json` files in the job-archive and add a new user.
# executable to stop once it has done that instead of starting a server. ./cc-backend -init-db -add-user <your-username>:admin:<your-password>
./cc-backend --init-db --add-user <your-username>:admin:<your-password>
# Start a HTTP server (HTTPS can be enabled, the default port is 8080). # Start a HTTP server (HTTPS can be enabled in the configuration, the default port is 8080).
# The --dev flag enables GraphQL Playground (http://localhost:8080/playground) and Swagger UI (http://localhost:8080/swagger). # The --dev flag enables GraphQL Playground (http://localhost:8080/playground) and Swagger UI (http://localhost:8080/swagger).
./cc-backend --server --dev ./cc-backend -server -dev
# Show other options: # Show other options:
./cc-backend --help ./cc-backend -help
``` ```
### Run as systemd daemon
In order to run this program as a daemon, cc-backend ships with an [example systemd setup](./init/README.md). ## Project file structure
## Configuration and Setup * [`api/`](https://github.com/ClusterCockpit/cc-backend/tree/master/api)
contains the API schema files for the REST and GraphQL APIs. The REST API is
cc-backend can be used as a local web-interface for an existing job archive or as a general web-interface server for a live ClusterCockpit Monitoring framework. documented in the OpenAPI 3.0 format in
[./api/openapi.yaml](./api/openapi.yaml).
Create your job-archive according to [this specification](https://github.com/ClusterCockpit/cc-specifications/tree/master/job-archive). * [`cmd/cc-backend`](https://github.com/ClusterCockpit/cc-backend/tree/master/cmd/cc-backend)
At least one cluster with a valid `cluster.json` file is required. contains `main.go` for the main application.
Having no jobs in the job-archive at all is fine. * [`configs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/configs)
contains documentation about configuration and command line options and required
### Configuration environment variables. A sample configuration file is provided.
* [`docs/`](https://github.com/ClusterCockpit/cc-backend/tree/master/docs)
A config file in the JSON format has to be provided using `--config` to override the defaults. contains more in-depth documentation.
By default, if there is a `config.json` file in the current directory of the `cc-backend` process, it will be loaded even without the `--config` flag. * [`init/`](https://github.com/ClusterCockpit/cc-backend/tree/master/init)
You find documentation of all supported configuration and command line options [here](./configs/README.md). contains an example of setting up systemd for production use.
* [`internal/`](https://github.com/ClusterCockpit/cc-backend/tree/master/internal)
## Database initialization and migration contains library source code that is not intended for use by others.
* [`pkg/`](https://github.com/ClusterCockpit/cc-backend/tree/master/pkg)
Every cc-backend version supports a specific database version. contains Go packages that can be used by other projects.
On startup the version of the sqlite database is validated and cc-backend will terminate if the version does not match. * [`tools/`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools)
cc-backend supports to migrate the database schema up to the required version using the `--migrate-db` command line option. Additional command line helper tools.
In case the database file does not yet exist it is created and initialized by the `--migrate-db` command line option. * [`archive-manager`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/archive-manager)
In case you want to use a newer database version with an older version of cc-backend you can downgrade a database using the external [migrate](https://github.com/golang-migrate/migrate) tool. Commands for getting infos about and existing job archive.
In this case you have to provide the path to the migration files in a recent source tree: `./internal/repository/migrations/`. * [`convert-pem-pubkey`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/convert-pem-pubkey)
Tool to convert external pubkey for use in `cc-backend`.
## Development * [`gen-keypair`](https://github.com/ClusterCockpit/cc-backend/tree/master/tools/gen-keypair)
In case the REST or GraphQL API is changed the according code generators have to be used. contains a small application to generate a compatible JWT keypair. You find
documentation on how to use it
### Update GraphQL schema [here](https://github.com/ClusterCockpit/cc-backend/blob/master/docs/JWT-Handling.md).
* [`web/`](https://github.com/ClusterCockpit/cc-backend/tree/master/web)
This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL API. Server-side templates and frontend-related files:
The schema can be found in `./api/schema.graphqls`. * [`frontend`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/frontend)
After changing it, you need to run `go run github.com/99designs/gqlgen` which will update `./internal/graph/model`. Svelte components and static assets for the frontend UI
In case new resolvers are needed, they will be inserted into `./internal/graph/schema.resolvers.go`, where you will need to implement them. * [`templates`](https://github.com/ClusterCockpit/cc-backend/tree/master/web/templates)
If you start cc-backend with flag `--dev` the GraphQL Playground UI is available at http://localhost:8080/playground . Server-side Go templates
* [`gqlgen.yml`](https://github.com/ClusterCockpit/cc-backend/blob/master/gqlgen.yml)
### Update Swagger UI Configures the behaviour and generation of
[gqlgen](https://github.com/99designs/gqlgen).
This project integrates [swagger ui](https://swagger.io/tools/swagger-ui/) to document and test its REST API. * [`startDemo.sh`](https://github.com/ClusterCockpit/cc-backend/blob/master/startDemo.sh)
The swagger doc files can be found in `./api/`. is a shell script that sets up demo data, and builds and starts `cc-backend`.
You can generate the configuration of swagger-ui by running `go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api `.
You need to move the generated `./api/doc.go` to `./internal/api/doc.go`.
If you start cc-backend with flag `--dev` the Swagger UI is available at http://localhost:8080/swagger/ .
You have to enter a JWT key for a user with role API.
**NOTICE** The user owning the JWT token must not be logged in the same browser (have a running session), otherwise Swagger requests will not work. It is recommended to create a separate user that has just the API role.
## Project Structure
- `api/` contains the API schema files for the REST and GraphQL APIs. The REST API is documented in the OpenAPI 3.0 format in [./api/openapi.yaml](./api/openapi.yaml).
- `cmd/cc-backend` contains `main.go` for the main application.
- `cmd/gen-keypair` contains is a small application to generate a compatible JWT keypair includin a README about JWT setup in ClusterCockpit.
- `configs/` contains documentation about configuration and command line options and required environment variables. An example configuration file is provided.
- `init/` contains an example systemd setup for production use.
- `internal/` contains library source code that is not intended to be used by others.
- `pkg/` contains go packages that can also be used by other projects.
- `test/` Test apps and test data.
- `web/` Server side templates and frontend related files:
- `templates` Serverside go templates
- `frontend` Svelte components and static assets for frontend UI
- `gqlgen.yml` configures the behaviour and generation of [gqlgen](https://github.com/99designs/gqlgen).
- `startDemo.sh` is a shell script that sets up demo data, and builds and starts cc-backend.

View File

@@ -1,27 +1,45 @@
# `cc-backend` version 1.0.0 # `cc-backend` version 1.4.1
Supports job archive version 1 and database version 4. Supports job archive version 2 and database version 8.
This is the initial release of `cc-backend`, the API backend and frontend This is a small bug fix release of `cc-backend`, the API backend and frontend
implementation of ClusterCockpit. implementation of ClusterCockpit.
For release specific notes visit the [ClusterCockpit Documentation](https://clusterockpit.org/docs/release/).
**Breaking changes** ## Breaking changes
The aggregate job statistic core hours is now computed using the job table - You need to perform a database migration. Depending on your database size the
column `num_hwthreads`. In a the future release this column will be renamed to migration might require several hours!
`num_cores`. For correct display of core hours `num_hwthreads` must be correctly - You need to adapt the `cluster.json` configuration files in the job-archive,
filled on job start. If your existing jobs do not provide the correct value in add new required attributes to the metric list and after that edit
this column then you can set this with one SQL INSERT statement. This only applies `./job-archive/version.txt` to version 2.
if you have exclusive jobs, only. Please be aware that we treat this column as - Continuous scrolling is default now in all job lists. You can change this back
it is the number of cores. In case you have SMT enabled and `num_hwthreads` to paging globally, also every user can configure to use paging or continuous
is not the number of cores the core hours will be too high by a factor! scrolling individually.
- Tags have a scope now. Existing tags will get global scope in the database
migration.
**Features** ## New features
* Supports user roles admin, support, manager, user, and api.
* Unified search bar supports job id, job name, project id, user name, and name
* Performance improvements for sqlite db backend
* Extended REST api supports to query job metrics
* Better support for shared jobs
* More flexible metric list configuration
* Versioning and migration for database and job archive
- Tags have a scope now. Tags created by a basic user are only visible by that
user. Tags created by an admin/support role can be configured to be visible by
all users (global scope) or only be admin/support role.
- Re-sampling support for running (requires a recent `cc-metric-store`) and
archived jobs. This greatly speeds up loading of large or very long jobs. You
need to add the new configuration key `enable-resampling` to the `config.json`
file.
- For finished jobs a total job energy is shown in the job view.
- Continuous scrolling in job lists is default now.
- All database queries (especially for sqlite) were optimized resulting in
dramatically faster load times.
- A performance and energy footprint can be freely configured on a per
subcluster base. One can filter for footprint statistics for running and
finished jobs.
## Known issues
- Currently energy footprint metrics of type energy are ignored for calculating
total energy.
- Resampling for running jobs only works with cc-metric-store
- With energy footprint metrics of type power the unit is ignored and it is
assumed the metric has the unit Watt.

View File

@@ -18,6 +18,7 @@ type Job {
numNodes: Int! numNodes: Int!
numHWThreads: Int! numHWThreads: Int!
numAcc: Int! numAcc: Int!
energy: Float!
SMT: Int! SMT: Int!
exclusive: Int! exclusive: Int!
partition: String! partition: String!
@@ -27,7 +28,8 @@ type Job {
tags: [Tag!]! tags: [Tag!]!
resources: [Resource!]! resources: [Resource!]!
concurrentJobs: JobLinkResultList concurrentJobs: JobLinkResultList
footprint: [FootprintValue]
energyFootprint: [EnergyFootprintValue]
metaData: Any metaData: Any
userData: User userData: User
} }
@@ -40,7 +42,6 @@ type JobLink {
type Cluster { type Cluster {
name: String! name: String!
partitions: [String!]! # Slurm partitions partitions: [String!]! # Slurm partitions
metricConfig: [MetricConfig!]!
subClusters: [SubCluster!]! # Hardware partitions/subclusters subClusters: [SubCluster!]! # Hardware partitions/subclusters
} }
@@ -56,9 +57,24 @@ type SubCluster {
flopRateSimd: MetricValue! flopRateSimd: MetricValue!
memoryBandwidth: MetricValue! memoryBandwidth: MetricValue!
topology: Topology! topology: Topology!
metricConfig: [MetricConfig!]!
footprint: [String!]!
}
type FootprintValue {
name: String!
stat: String!
value: Float!
}
type EnergyFootprintValue {
hardware: String!
metric: String!
value: Float!
} }
type MetricValue { type MetricValue {
name: String
unit: Unit! unit: Unit!
value: Float! value: Float!
} }
@@ -97,6 +113,7 @@ type MetricConfig {
normal: Float normal: Float
caution: Float! caution: Float!
alert: Float! alert: Float!
lowerIsBetter: Boolean
subClusters: [SubClusterConfig!]! subClusters: [SubClusterConfig!]!
} }
@@ -104,6 +121,7 @@ type Tag {
id: ID! id: ID!
type: String! type: String!
name: String! name: String!
scope: String!
} }
type Resource { type Resource {
@@ -145,9 +163,10 @@ type MetricStatistics {
} }
type StatsSeries { type StatsSeries {
mean: [NullableFloat!]! mean: [NullableFloat!]!
min: [NullableFloat!]! median: [NullableFloat!]!
max: [NullableFloat!]! min: [NullableFloat!]!
max: [NullableFloat!]!
} }
type MetricFootprints { type MetricFootprints {
@@ -156,12 +175,18 @@ type MetricFootprints {
} }
type Footprints { type Footprints {
nodehours: [NullableFloat!]! timeWeights: TimeWeights!
metrics: [MetricFootprints!]! metrics: [MetricFootprints!]!
} }
type TimeWeights {
nodeHours: [NullableFloat!]!
accHours: [NullableFloat!]!
coreHours: [NullableFloat!]!
}
enum Aggregate { USER, PROJECT, CLUSTER } enum Aggregate { USER, PROJECT, CLUSTER }
enum Weights { NODE_COUNT, NODE_HOURS } enum SortByAggregate { TOTALWALLTIME, TOTALJOBS, TOTALNODES, TOTALNODEHOURS, TOTALCORES, TOTALCOREHOURS, TOTALACCS, TOTALACCHOURS }
type NodeMetrics { type NodeMetrics {
host: String! host: String!
@@ -169,6 +194,19 @@ type NodeMetrics {
metrics: [JobMetricWithName!]! metrics: [JobMetricWithName!]!
} }
type ClusterSupport {
cluster: String!
subClusters: [String!]!
}
type GlobalMetricListItem {
name: String!
unit: Unit!
scope: MetricScope!
footprint: String
availability: [ClusterSupport!]!
}
type Count { type Count {
name: String! name: String!
count: Int! count: Int!
@@ -180,20 +218,25 @@ type User {
email: String! email: String!
} }
input MetricStatItem {
metricName: String!
range: FloatRange!
}
type Query { type Query {
clusters: [Cluster!]! # List of all clusters clusters: [Cluster!]! # List of all clusters
tags: [Tag!]! # List of all tags tags: [Tag!]! # List of all tags
globalMetrics: [GlobalMetricListItem!]!
user(username: String!): User user(username: String!): User
allocatedNodes(cluster: String!): [Count!]! allocatedNodes(cluster: String!): [Count!]!
job(id: ID!): Job job(id: ID!): Job
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]! jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList! jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]! jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate): [JobsStatistics!]!
jobsCount(filter: [JobFilter]!, groupBy: Aggregate!, weight: Weights, limit: Int): [Count!]!
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]! rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
@@ -201,7 +244,7 @@ type Query {
} }
type Mutation { type Mutation {
createTag(type: String!, name: String!): Tag! createTag(type: String!, name: String!, scope: String!): Tag!
deleteTag(id: ID!): ID! deleteTag(id: ID!): ID!
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]! addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]! removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
@@ -210,7 +253,7 @@ type Mutation {
} }
type IntRangeOutput { from: Int!, to: Int! } type IntRangeOutput { from: Int!, to: Int! }
type TimeRangeOutput { from: Time!, to: Time! } type TimeRangeOutput { range: String, from: Time!, to: Time! }
input JobFilter { input JobFilter {
tags: [ID!] tags: [ID!]
@@ -222,6 +265,7 @@ input JobFilter {
cluster: StringInput cluster: StringInput
partition: StringInput partition: StringInput
duration: IntRange duration: IntRange
energy: FloatRange
minRunningFor: Int minRunningFor: Int
@@ -231,20 +275,14 @@ input JobFilter {
startTime: TimeRange startTime: TimeRange
state: [JobState!] state: [JobState!]
flopsAnyAvg: FloatRange metricStats: [MetricStatItem!]
memBwAvg: FloatRange
loadAvg: FloatRange
memUsedMax: FloatRange
exclusive: Int exclusive: Int
sharedNode: StringInput node: StringInput
selfJobId: StringInput
selfStartTime: Time
selfDuration: Int
} }
input OrderByInput { input OrderByInput {
field: String! field: String!
type: String!,
order: SortDirectionEnum! = ASC order: SortDirectionEnum! = ASC
} }
@@ -262,18 +300,24 @@ input StringInput {
in: [String!] in: [String!]
} }
input IntRange { from: Int!, to: Int! } input IntRange { from: Int!, to: Int! }
input FloatRange { from: Float!, to: Float! } input TimeRange { range: String, from: Time, to: Time }
input TimeRange { from: Time, to: Time }
input FloatRange {
from: Float!
to: Float!
}
type JobResultList { type JobResultList {
items: [Job!]! items: [Job!]!
offset: Int offset: Int
limit: Int limit: Int
count: Int count: Int
hasNextPage: Boolean
} }
type JobLinkResultList { type JobLinkResultList {
listQuery: String
items: [JobLink!]! items: [JobLink!]!
count: Int count: Int
} }
@@ -283,6 +327,20 @@ type HistoPoint {
value: Int! value: Int!
} }
type MetricHistoPoints {
metric: String!
unit: String!
stat: String
data: [MetricHistoPoint!]
}
type MetricHistoPoint {
bin: Int
count: Int!
min: Int
max: Int
}
type JobsStatistics { type JobsStatistics {
id: ID! # If `groupBy` was used, ID of the user/project/cluster id: ID! # If `groupBy` was used, ID of the user/project/cluster
name: String! # if User-Statistics: Given Name of Account (ID) Owner name: String! # if User-Statistics: Given Name of Account (ID) Owner
@@ -290,11 +348,17 @@ type JobsStatistics {
runningJobs: Int! # Number of running jobs runningJobs: Int! # Number of running jobs
shortJobs: Int! # Number of jobs with a duration of less than duration shortJobs: Int! # Number of jobs with a duration of less than duration
totalWalltime: Int! # Sum of the duration of all matched jobs in hours totalWalltime: Int! # Sum of the duration of all matched jobs in hours
totalNodes: Int! # Sum of the nodes of all matched jobs
totalNodeHours: Int! # Sum of the node hours of all matched jobs totalNodeHours: Int! # Sum of the node hours of all matched jobs
totalCores: Int! # Sum of the cores of all matched jobs
totalCoreHours: Int! # Sum of the core hours of all matched jobs totalCoreHours: Int! # Sum of the core hours of all matched jobs
totalAccs: Int! # Sum of the accs of all matched jobs
totalAccHours: Int! # Sum of the gpu hours of all matched jobs totalAccHours: Int! # Sum of the gpu hours of all matched jobs
histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value histDuration: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
histNumCores: [HistoPoint!]! # value: number of cores, count: number of jobs with that number of cores
histNumAccs: [HistoPoint!]! # value: number of accs, count: number of jobs with that number of accs
histMetrics: [MetricHistoPoints!]! # metric: metricname, data array of histopoints: value: metric average bin, count: number of jobs with that metric average
} }
input PageRequest { input PageRequest {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,17 +0,0 @@
CC_USER=clustercockpit
CC_GROUP=clustercockpit
CC_HOME=/tmp
LOG_DIR=/var/log
DATA_DIR=/var/run/cc-backend
MAX_OPEN_FILES=10000
CONF_DIR=/etc/cc-backend
CONF_FILE=/etc/cc-backend/cc-backend.json
RESTART_ON_UPGRADE=true

View File

@@ -1,12 +0,0 @@
Package: cc-backend
Version: {VERSION}
Installed-Size: {INSTALLED_SIZE}
Architecture: {ARCH}
Maintainer: thomas.gruber@fau.de
Depends: libc6 (>= 2.2.1)
Build-Depends: debhelper-compat (= 13), git, golang-go, npm, yarn
Description: ClusterCockpit backend and web frontend
Homepage: https://github.com/ClusterCockpit/cc-backend
Source: cc-backend
Rules-Requires-Root: no

View File

@@ -1,18 +0,0 @@
[Unit]
Description=ClusterCockpit backend and web frontend (cc-backend)
Documentation=https://github.com/ClusterCockpit/cc-backend
Wants=network-online.target
After=network-online.target
[Service]
EnvironmentFile=/etc/default/cc-backend
Type=simple
User=clustercockpit
Group=clustercockpit
Restart=on-failure
TimeoutStopSec=100
LimitNOFILE=infinity
ExecStart=/usr/bin/cc-backend --config ${CONF_FILE}
[Install]
WantedBy=multi-user.target

View File

@@ -1,70 +0,0 @@
Name: cc-backend
Version: %{VERS}
Release: 1%{?dist}
Summary: ClusterCockpit backend and web frontend
License: MIT
Source0: %{name}-%{version}.tar.gz
#BuildRequires: go-toolset
#BuildRequires: systemd-rpm-macros
#BuildRequires: npm
Provides: %{name} = %{version}
%description
ClusterCockpit backend and web frontend
%global debug_package %{nil}
%prep
%autosetup
%build
#CURRENT_TIME=$(date +%Y-%m-%d:T%H:%M:\%S)
#LD_FLAGS="-s -X main.buildTime=${CURRENT_TIME} -X main.version=%{VERS}"
mkdir ./var
touch ./var/job.db
cd web/frontend && yarn install && yarn build && cd -
go build -ldflags="-s -X main.version=%{VERS}" ./cmd/cc-backend
%install
# Install cc-backend
#make PREFIX=%{buildroot} install
install -Dpm 755 cc-backend %{buildroot}/%{_bindir}/%{name}
install -Dpm 0600 configs/config.json %{buildroot}%{_sysconfdir}/%{name}/%{name}.json
# Integrate into system
install -Dpm 0644 build/package/%{name}.service %{buildroot}%{_unitdir}/%{name}.service
install -Dpm 0600 build/package/%{name}.config %{buildroot}%{_sysconfdir}/default/%{name}
install -Dpm 0644 build/package/%{name}.sysusers %{buildroot}%{_sysusersdir}/%{name}.conf
%check
# go test should be here... :)
%pre
%sysusers_create_package scripts/%{name}.sysusers
%post
%systemd_post %{name}.service
%preun
%systemd_preun %{name}.service
%files
# Binary
%attr(-,clustercockpit,clustercockpit) %{_bindir}/%{name}
# Config
%dir %{_sysconfdir}/%{name}
%attr(0600,clustercockpit,clustercockpit) %config(noreplace) %{_sysconfdir}/%{name}/%{name}.json
# Systemd
%{_unitdir}/%{name}.service
%{_sysconfdir}/default/%{name}
%{_sysusersdir}/%{name}.conf
%changelog
* Mon Mar 07 2022 Thomas Gruber - 0.1
- Initial metric store implementation

View File

@@ -1,2 +0,0 @@
#Type Name ID GECOS Home directory Shell
u clustercockpit - "User for ClusterCockpit" /run/cc-backend /sbin/nologin

33
cmd/cc-backend/cli.go Normal file
View File

@@ -0,0 +1,33 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import "flag"
var (
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB, flagForceDB, flagDev, flagVersion, flagLogDateTime bool
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
)
func cliInit() {
flag.BoolVar(&flagInit, "init", false, "Setup var directory, initialize swlite database file, config.json and .env")
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'hpc_user' table with ldap")
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
flag.BoolVar(&flagRevertDB, "revert-db", false, "Migrate database to previous version and exit")
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
flag.Parse()
}

85
cmd/cc-backend/init.go Normal file
View File

@@ -0,0 +1,85 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"fmt"
"os"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/util"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
const envString = `
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
# You can generate your own keypair using the gen-keypair tool
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)
SESSION_KEY="67d829bf61dc5f87a73fd814e2c9f629"
`
const configString = `
{
"addr": "127.0.0.1:8080",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"jwts": {
"max-age": "2000h"
},
"clusters": [
{
"name": "name",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2023-01-01T00:00:00Z",
"to": null
}
}
}
]
}
`
func initEnv() {
if util.CheckFileExists("var") {
fmt.Print("Directory ./var already exists. Exiting!\n")
os.Exit(0)
}
if err := os.WriteFile("config.json", []byte(configString), 0o666); err != nil {
log.Fatalf("Writing config.json failed: %s", err.Error())
}
if err := os.WriteFile(".env", []byte(envString), 0o666); err != nil {
log.Fatalf("Writing .env failed: %s", err.Error())
}
if err := os.Mkdir("var", 0o777); err != nil {
log.Fatalf("Mkdir var failed: %s", err.Error())
}
err := repository.MigrateDB("sqlite3", "./var/job.db")
if err != nil {
log.Fatalf("Initialize job.db failed: %s", err.Error())
}
}

View File

@@ -1,93 +1,58 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package main package main
import ( import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"flag"
"fmt" "fmt"
"io"
"net"
"net/http"
"os" "os"
"os/signal" "os/signal"
"runtime"
"runtime/debug" "runtime/debug"
"strings" "strings"
"sync" "sync"
"syscall" "syscall"
"time"
"github.com/99designs/gqlgen/graphql/handler" "github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/99designs/gqlgen/graphql/playground"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/importer" "github.com/ClusterCockpit/cc-backend/internal/importer"
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/routerConfig" "github.com/ClusterCockpit/cc-backend/internal/taskManager"
"github.com/ClusterCockpit/cc-backend/internal/runtimeEnv"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/web"
"github.com/go-co-op/gocron"
"github.com/google/gops/agent" "github.com/google/gops/agent"
"github.com/gorilla/handlers"
"github.com/gorilla/mux"
httpSwagger "github.com/swaggo/http-swagger"
_ "github.com/go-sql-driver/mysql" _ "github.com/go-sql-driver/mysql"
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"
) )
const logoString = ` const logoString = `
____ _ _ ____ _ _ _ _____ _ _ ____ _ _ _
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_ / ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __| | | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_ | |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
\____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__| \_____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|_| |_|
` `
var ( var (
buildTime string date string
hash string commit string
version string version string
) )
func main() { func main() {
var flagReinitDB, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagDev, flagVersion, flagLogDateTime bool cliInit()
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
flag.BoolVar(&flagMigrateDB, "migrate-db", false, "Migrate database to supported version and exit")
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,manager,api,user]:<password>`")
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
flag.StringVar(&flagLogLevel, "loglevel", "warn", "Sets the logging level: `[debug,info,warn (default),err,fatal,crit]`")
flag.Parse()
if flagVersion { if flagVersion {
fmt.Print(logoString) fmt.Print(logoString)
fmt.Printf("Version:\t%s\n", version) fmt.Printf("Version:\t%s\n", version)
fmt.Printf("Git hash:\t%s\n", hash) fmt.Printf("Git hash:\t%s\n", commit)
fmt.Printf("Build time:\t%s\n", buildTime) fmt.Printf("Build time:\t%s\n", date)
fmt.Printf("SQL db version:\t%d\n", repository.Version) fmt.Printf("SQL db version:\t%d\n", repository.Version)
fmt.Printf("Job archive version:\t%d\n", archive.Version) fmt.Printf("Job archive version:\t%d\n", archive.Version)
os.Exit(0) os.Exit(0)
@@ -127,22 +92,35 @@ func main() {
os.Exit(0) os.Exit(0)
} }
if flagRevertDB {
err := repository.RevertDB(config.Keys.DBDriver, config.Keys.DB)
if err != nil {
log.Fatal(err)
}
os.Exit(0)
}
if flagForceDB {
err := repository.ForceDB(config.Keys.DBDriver, config.Keys.DB)
if err != nil {
log.Fatal(err)
}
os.Exit(0)
}
repository.Connect(config.Keys.DBDriver, config.Keys.DB) repository.Connect(config.Keys.DBDriver, config.Keys.DB)
db := repository.GetConnection()
var authentication *auth.Authentication if flagInit {
initEnv()
fmt.Print("Succesfully setup environment!\n")
fmt.Print("Please review config.json and .env and adjust it to your needs.\n")
fmt.Print("Add your job-archive at ./var/job-archive.\n")
os.Exit(0)
}
if !config.Keys.DisableAuthentication { if !config.Keys.DisableAuthentication {
var err error
if authentication, err = auth.Init(db.DB, map[string]interface{}{
"ldap": config.Keys.LdapConfig,
"jwt": config.Keys.JwtConfig,
}); err != nil {
log.Fatalf("auth initialization failed: %v", err)
}
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil { auth.Init()
authentication.SessionMaxAge = d
}
if flagNewUser != "" { if flagNewUser != "" {
parts := strings.SplitN(flagNewUser, ":", 3) parts := strings.SplitN(flagNewUser, ":", 3)
@@ -150,46 +128,52 @@ func main() {
log.Fatal("invalid argument format for user creation") log.Fatal("invalid argument format for user creation")
} }
if err := authentication.AddUser(&auth.User{ ur := repository.GetUserRepository()
if err := ur.AddUser(&schema.User{
Username: parts[0], Projects: make([]string, 0), Password: parts[2], Roles: strings.Split(parts[1], ","), Username: parts[0], Projects: make([]string, 0), Password: parts[2], Roles: strings.Split(parts[1], ","),
}); err != nil { }); err != nil {
log.Fatalf("adding '%s' user authentication failed: %v", parts[0], err) log.Fatalf("adding '%s' user authentication failed: %v", parts[0], err)
} }
} }
if flagDelUser != "" { if flagDelUser != "" {
if err := authentication.DelUser(flagDelUser); err != nil { ur := repository.GetUserRepository()
if err := ur.DelUser(flagDelUser); err != nil {
log.Fatalf("deleting user failed: %v", err) log.Fatalf("deleting user failed: %v", err)
} }
} }
authHandle := auth.GetAuthInstance()
if flagSyncLDAP { if flagSyncLDAP {
if authentication.LdapAuth == nil { if authHandle.LdapAuth == nil {
log.Fatal("cannot sync: LDAP authentication is not configured") log.Fatal("cannot sync: LDAP authentication is not configured")
} }
if err := authentication.LdapAuth.Sync(); err != nil { if err := authHandle.LdapAuth.Sync(); err != nil {
log.Fatalf("LDAP sync failed: %v", err) log.Fatalf("LDAP sync failed: %v", err)
} }
log.Info("LDAP sync successfull") log.Info("LDAP sync successfull")
} }
if flagGenJWT != "" { if flagGenJWT != "" {
user, err := authentication.GetUser(flagGenJWT) ur := repository.GetUserRepository()
user, err := ur.GetUser(flagGenJWT)
if err != nil { if err != nil {
log.Fatalf("could not get user from JWT: %v", err) log.Fatalf("could not get user from JWT: %v", err)
} }
if !user.HasRole(auth.RoleApi) { if !user.HasRole(schema.RoleApi) {
log.Warnf("user '%s' does not have the API role", user.Username) log.Warnf("user '%s' does not have the API role", user.Username)
} }
jwt, err := authentication.JwtAuth.ProvideJWT(user) jwt, err := authHandle.JwtAuth.ProvideJWT(user)
if err != nil { if err != nil {
log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err) log.Fatalf("failed to provide JWT to user '%s': %v", user.Username, err)
} }
fmt.Printf("MAIN > JWT for '%s': %s\n", user.Username, jwt) fmt.Printf("MAIN > JWT for '%s': %s\n", user.Username, jwt)
} }
} else if flagNewUser != "" || flagDelUser != "" { } else if flagNewUser != "" || flagDelUser != "" {
log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled") log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled")
} }
@@ -198,7 +182,7 @@ func main() {
log.Fatalf("failed to initialize archive: %s", err.Error()) log.Fatalf("failed to initialize archive: %s", err.Error())
} }
if err := metricdata.Init(config.Keys.DisableArchive); err != nil { if err := metricdata.Init(); err != nil {
log.Fatalf("failed to initialize metricdata repository: %s", err.Error()) log.Fatalf("failed to initialize metricdata repository: %s", err.Error())
} }
@@ -218,190 +202,16 @@ func main() {
return return
} }
// Setup the http.Handler/Router used by the server archiver.Start(repository.GetJobRepository())
jobRepo := repository.GetJobRepository() taskManager.Start()
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo} serverInit()
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
if os.Getenv("DEBUG") != "1" {
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
// The problem with this is that then, no more stacktrace is printed to stderr.
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
switch e := err.(type) {
case string:
return fmt.Errorf("MAIN > Panic: %s", e)
case error:
return fmt.Errorf("MAIN > Panic caused by: %w", e)
}
return errors.New("MAIN > Internal server error (panic)")
})
}
api := &api.RestApi{
JobRepository: jobRepo,
Resolver: resolver,
MachineStateDir: config.Keys.MachineStateDir,
Authentication: authentication,
}
r := mux.NewRouter()
buildInfo := web.Build{Version: version, Hash: hash, Buildtime: buildTime}
r.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, r, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo})
}).Methods(http.MethodGet)
r.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, r, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
})
r.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, r, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
})
// Some routes, such as /login or /query, should only be accessible to a user that is logged in.
// Those should be mounted to this subrouter. If authentication is enabled, a middleware will prevent
// any unauthenticated accesses.
secured := r.PathPrefix("/").Subrouter()
if !config.Keys.DisableAuthentication {
r.Handle("/login", authentication.Login(
// On success:
http.RedirectHandler("/", http.StatusTemporaryRedirect),
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, r, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
Error: err.Error(),
Build: buildInfo,
})
})).Methods(http.MethodPost)
r.Handle("/logout", authentication.Logout(http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusOK)
web.RenderTemplate(rw, r, "login.tmpl", &web.Page{
Title: "Bye - ClusterCockpit",
Info: "Logout sucessful",
Build: buildInfo,
})
}))).Methods(http.MethodPost)
secured.Use(func(next http.Handler) http.Handler {
return authentication.Auth(
// On success;
next,
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, r, "login.tmpl", &web.Page{
Title: "Authentication failed - ClusterCockpit",
Error: err.Error(),
Build: buildInfo,
})
})
})
}
if flagDev {
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
r.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
}
secured.Handle("/query", graphQLEndpoint)
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
routerConfig.HandleSearchBar(rw, r, api)
})
// Mount all /monitoring/... and /api/... routes.
routerConfig.SetupRoutes(secured, version, hash, buildTime)
api.MountRoutes(secured)
if config.Keys.EmbedStaticFiles {
r.PathPrefix("/").Handler(web.ServeFiles())
} else {
r.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
}
r.Use(handlers.CompressHandler)
r.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
r.Use(handlers.CORS(
handlers.AllowCredentials(),
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
handlers.AllowedOrigins([]string{"*"})))
handler := handlers.CustomLoggingHandler(io.Discard, r, func(_ io.Writer, params handlers.LogFormatterParams) {
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
log.Infof("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
} else {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
}
})
var wg sync.WaitGroup var wg sync.WaitGroup
server := http.Server{
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
Handler: handler,
Addr: config.Keys.Addr,
}
// Start http or https server
listener, err := net.Listen("tcp", config.Keys.Addr)
if err != nil {
log.Fatalf("starting http listener failed: %v", err)
}
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
go func() {
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
}()
}
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
if err != nil {
log.Fatalf("loading X509 keypair failed: %v", err)
}
listener = tls.NewListener(listener, &tls.Config{
Certificates: []tls.Certificate{cert},
CipherSuites: []uint16{
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
},
MinVersion: tls.VersionTLS12,
PreferServerCipherSuites: true,
})
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
} else {
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
}
// Because this program will want to bind to a privileged port (like 80), the listener must
// be established first, then the user can be changed, and after that,
// the actual http server can be started.
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
log.Fatalf("error while preparing server start: %s", err.Error())
}
wg.Add(1) wg.Add(1)
go func() { go func() {
defer wg.Done() defer wg.Done()
if err := server.Serve(listener); err != nil && err != http.ErrServerClosed { serverStart()
log.Fatalf("starting server failed: %v", err)
}
}() }()
wg.Add(1) wg.Add(1)
@@ -412,117 +222,15 @@ func main() {
<-sigs <-sigs
runtimeEnv.SystemdNotifiy(false, "Shutting down ...") runtimeEnv.SystemdNotifiy(false, "Shutting down ...")
// First shut down the server gracefully (waiting for all ongoing requests) serverShutdown()
server.Shutdown(context.Background())
// Then, wait for any async archivings still pending... taskManager.Shutdown()
api.JobRepository.WaitForArchiving()
}() }()
s := gocron.NewScheduler(time.Local)
if config.Keys.StopJobsExceedingWalltime > 0 {
log.Info("Register undead jobs service")
s.Every(1).Day().At("3:00").Do(func() {
err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
if err != nil {
log.Warnf("Error while looking for jobs exceeding their walltime: %s", err.Error())
}
runtime.GC()
})
}
var cfg struct {
Compression int `json:"compression"`
Retention schema.Retention `json:"retention"`
}
cfg.Retention.IncludeDB = true
if err := json.Unmarshal(config.Keys.Archive, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
}
switch cfg.Retention.Policy {
case "delete":
log.Info("Register retention delete service")
s.Every(1).Day().At("4:00").Do(func() {
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
jobs, err := jobRepo.FindJobsBetween(0, startTime)
if err != nil {
log.Warnf("Error while looking for retention jobs: %s", err.Error())
}
archive.GetHandle().CleanUp(jobs)
if cfg.Retention.IncludeDB {
cnt, err := jobRepo.DeleteJobsBefore(startTime)
if err != nil {
log.Errorf("Error while deleting retention jobs from db: %s", err.Error())
} else {
log.Infof("Retention: Removed %d jobs from db", cnt)
}
if err = jobRepo.Optimize(); err != nil {
log.Errorf("Error occured in db optimization: %s", err.Error())
}
}
})
case "move":
log.Info("Register retention move service")
s.Every(1).Day().At("4:00").Do(func() {
startTime := time.Now().Unix() - int64(cfg.Retention.Age*24*3600)
jobs, err := jobRepo.FindJobsBetween(0, startTime)
if err != nil {
log.Warnf("Error while looking for retention jobs: %s", err.Error())
}
archive.GetHandle().Move(jobs, cfg.Retention.Location)
if cfg.Retention.IncludeDB {
cnt, err := jobRepo.DeleteJobsBefore(startTime)
if err != nil {
log.Errorf("Error while deleting retention jobs from db: %v", err)
} else {
log.Infof("Retention: Removed %d jobs from db", cnt)
}
if err = jobRepo.Optimize(); err != nil {
log.Errorf("Error occured in db optimization: %v", err)
}
}
})
}
if cfg.Compression > 0 {
log.Info("Register compression service")
s.Every(1).Day().At("5:00").Do(func() {
var jobs []*schema.Job
ar := archive.GetHandle()
startTime := time.Now().Unix() - int64(cfg.Compression*24*3600)
lastTime := ar.CompressLast(startTime)
if startTime == lastTime {
log.Info("Compression Service - Complete archive run")
jobs, err = jobRepo.FindJobsBetween(0, startTime)
} else {
jobs, err = jobRepo.FindJobsBetween(lastTime, startTime)
}
if err != nil {
log.Warnf("Error while looking for retention jobs: %v", err)
}
ar.Compress(jobs)
})
}
s.StartAsync()
if os.Getenv("GOGC") == "" { if os.Getenv("GOGC") == "" {
debug.SetGCPercent(25) debug.SetGCPercent(25)
} }
runtimeEnv.SystemdNotifiy(true, "running") runtimeEnv.SystemdNotifiy(true, "running")
wg.Wait() wg.Wait()
log.Print("Gracefull shutdown completed!") log.Print("Graceful shutdown completed!")
} }

322
cmd/cc-backend/server.go Normal file
View File

@@ -0,0 +1,322 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"strings"
"time"
"github.com/99designs/gqlgen/graphql/handler"
"github.com/99designs/gqlgen/graphql/playground"
"github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/runtimeEnv"
"github.com/ClusterCockpit/cc-backend/web"
"github.com/gorilla/handlers"
"github.com/gorilla/mux"
httpSwagger "github.com/swaggo/http-swagger"
)
var (
router *mux.Router
server *http.Server
apiHandle *api.RestApi
)
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusUnauthorized)
json.NewEncoder(rw).Encode(map[string]string{
"status": http.StatusText(http.StatusUnauthorized),
"error": err.Error(),
})
}
func serverInit() {
// Setup the http.Handler/Router used by the server
graph.Init()
resolver := graph.GetResolverInstance()
graphQLEndpoint := handler.NewDefaultServer(
generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
if os.Getenv("DEBUG") != "1" {
// Having this handler means that a error message is returned via GraphQL instead of the connection simply beeing closed.
// The problem with this is that then, no more stacktrace is printed to stderr.
graphQLEndpoint.SetRecoverFunc(func(ctx context.Context, err interface{}) error {
switch e := err.(type) {
case string:
return fmt.Errorf("MAIN > Panic: %s", e)
case error:
return fmt.Errorf("MAIN > Panic caused by: %w", e)
}
return errors.New("MAIN > Internal server error (panic)")
})
}
authHandle := auth.GetAuthInstance()
apiHandle = api.New()
router = mux.NewRouter()
buildInfo := web.Build{Version: version, Hash: commit, Buildtime: date}
info := map[string]interface{}{}
info["hasOpenIDConnect"] = false
if config.Keys.OpenIDConfig != nil {
openIDConnect := auth.NewOIDC(authHandle)
openIDConnect.RegisterEndpoints(router)
info["hasOpenIDConnect"] = true
}
router.HandleFunc("/login", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
log.Debugf("##%v##", info)
web.RenderTemplate(rw, "login.tmpl", &web.Page{Title: "Login", Build: buildInfo, Infos: info})
}).Methods(http.MethodGet)
router.HandleFunc("/imprint", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "imprint.tmpl", &web.Page{Title: "Imprint", Build: buildInfo})
})
router.HandleFunc("/privacy", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
web.RenderTemplate(rw, "privacy.tmpl", &web.Page{Title: "Privacy", Build: buildInfo})
})
secured := router.PathPrefix("/").Subrouter()
securedapi := router.PathPrefix("/api").Subrouter()
userapi := router.PathPrefix("/userapi").Subrouter()
configapi := router.PathPrefix("/config").Subrouter()
frontendapi := router.PathPrefix("/frontend").Subrouter()
if !config.Keys.DisableAuthentication {
router.Handle("/login", authHandle.Login(
// On success: Handled within Login()
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
})).Methods(http.MethodPost)
router.Handle("/jwt-login", authHandle.Login(
// On success: Handled within Login()
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Login failed - ClusterCockpit",
MsgType: "alert-warning",
Message: err.Error(),
Build: buildInfo,
Infos: info,
})
}))
router.Handle("/logout", authHandle.Logout(
http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Add("Content-Type", "text/html; charset=utf-8")
rw.WriteHeader(http.StatusOK)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Bye - ClusterCockpit",
MsgType: "alert-info",
Message: "Logout successful",
Build: buildInfo,
Infos: info,
})
}))).Methods(http.MethodPost)
secured.Use(func(next http.Handler) http.Handler {
return authHandle.Auth(
// On success;
next,
// On failure:
func(rw http.ResponseWriter, r *http.Request, err error) {
rw.WriteHeader(http.StatusUnauthorized)
web.RenderTemplate(rw, "login.tmpl", &web.Page{
Title: "Authentication failed - ClusterCockpit",
MsgType: "alert-danger",
Message: err.Error(),
Build: buildInfo,
Infos: info,
Redirect: r.RequestURI,
})
})
})
securedapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
userapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthUserApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
configapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthConfigApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
frontendapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthFrontendApi(
// On success;
next,
// On failure: JSON Response
onFailureResponse)
})
}
if flagDev {
router.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
router.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
httpSwagger.URL("http://" + config.Keys.Addr + "/swagger/doc.json"))).Methods(http.MethodGet)
}
secured.Handle("/query", graphQLEndpoint)
// Send a searchId and then reply with a redirect to a user, or directly send query to job table for jobid and project.
secured.HandleFunc("/search", func(rw http.ResponseWriter, r *http.Request) {
routerConfig.HandleSearchBar(rw, r, buildInfo)
})
// Mount all /monitoring/... and /api/... routes.
routerConfig.SetupRoutes(secured, buildInfo)
apiHandle.MountApiRoutes(securedapi)
apiHandle.MountUserApiRoutes(userapi)
apiHandle.MountConfigApiRoutes(configapi)
apiHandle.MountFrontendApiRoutes(frontendapi)
if config.Keys.EmbedStaticFiles {
if i, err := os.Stat("./var/img"); err == nil {
if i.IsDir() {
log.Info("Use local directory for static images")
router.PathPrefix("/img/").Handler(http.StripPrefix("/img/", http.FileServer(http.Dir("./var/img"))))
}
}
router.PathPrefix("/").Handler(web.ServeFiles())
} else {
router.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
}
router.Use(handlers.CompressHandler)
router.Use(handlers.RecoveryHandler(handlers.PrintRecoveryStack(true)))
router.Use(handlers.CORS(
handlers.AllowCredentials(),
handlers.AllowedHeaders([]string{"X-Requested-With", "Content-Type", "Authorization", "Origin"}),
handlers.AllowedMethods([]string{"GET", "POST", "HEAD", "OPTIONS"}),
handlers.AllowedOrigins([]string{"*"})))
}
func serverStart() {
handler := handlers.CustomLoggingHandler(io.Discard, router, func(_ io.Writer, params handlers.LogFormatterParams) {
if strings.HasPrefix(params.Request.RequestURI, "/api/") {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
} else {
log.Debugf("%s %s (%d, %.02fkb, %dms)",
params.Request.Method, params.URL.RequestURI(),
params.StatusCode, float32(params.Size)/1024,
time.Since(params.TimeStamp).Milliseconds())
}
})
server = &http.Server{
ReadTimeout: 20 * time.Second,
WriteTimeout: 20 * time.Second,
Handler: handler,
Addr: config.Keys.Addr,
}
// Start http or https server
listener, err := net.Listen("tcp", config.Keys.Addr)
if err != nil {
log.Fatalf("starting http listener failed: %v", err)
}
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
go func() {
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
}()
}
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(
config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
if err != nil {
log.Fatalf("loading X509 keypair failed: %v", err)
}
listener = tls.NewListener(listener, &tls.Config{
Certificates: []tls.Certificate{cert},
CipherSuites: []uint16{
tls.TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
tls.TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
},
MinVersion: tls.VersionTLS12,
PreferServerCipherSuites: true,
})
fmt.Printf("HTTPS server listening at %s...", config.Keys.Addr)
} else {
fmt.Printf("HTTP server listening at %s...", config.Keys.Addr)
}
//
// Because this program will want to bind to a privileged port (like 80), the listener must
// be established first, then the user can be changed, and after that,
// the actual http server can be started.
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
log.Fatalf("error while preparing server start: %s", err.Error())
}
if err = server.Serve(listener); err != nil && err != http.ErrServerClosed {
log.Fatalf("starting server failed: %v", err)
}
}
func serverShutdown() {
// First shut down the server gracefully (waiting for all ongoing requests)
server.Shutdown(context.Background())
// Then, wait for any async jobStarts still pending...
repository.WaitForJobStart()
// Then, wait for any async archivings still pending...
archiver.WaitForArchiving()
}

View File

@@ -1,76 +0,0 @@
## Intro
cc-backend requires a configuration file specifying the cluster systems to be used. Still many default
options documented below are used. cc-backend tries to load a config.json from the working directory per default.
To overwrite the default specify a json config file location using the command line option `--config <filepath>`.
All security relevant configuration. e.g., keys and passwords, are set using environment variables.
It is supported to specify these by means of an `.env` file located in the project root.
## Configuration Options
* `addr`: Type string. Address where the http (or https) server will listen on (for example: 'localhost:80'). Default `:8080`.
* `user`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
* `group`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
* `disable-authentication`: Type bool. Disable authentication (for everything: API, Web-UI, ...). Default `false`.
* `embed-static-files`: Type bool. If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not. Default `true`.
* `static-files`: Type string. Folder where static assets can be found, if `embed-static-files` is `false`. No default.
* `db-driver`: Type string. 'sqlite3' or 'mysql' (mysql will work for mariadb as well). Default `sqlite3`.
* `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`.
* `job-archive`: Type string. Path to the job-archive. Default: `./var/job-archive`.
* `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`.
* `validate`: Type bool. Validate all input json documents against json schema.
* `"session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`.
* `"jwt-max-age`: Type string. Specifies for how long a JWT token shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `0`.
* `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates.
* `redirect-http-to`: Type string. If not the empty string and `addr` does not end in ":80", redirect every request incoming at port 80 to that url.
* `machine-state-dir`: Type string. Where to store MachineState files. TODO: Explain in more detail!
* `"stop-jobs-exceeding-walltime`: Type int. If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job. Default `0`.
* `short-running-jobs-duration`: Type int. Do not show running jobs shorter than X seconds. Default `300`.
* `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`.
- `url`: Type string. URL of LDAP directory server.
- `user_base`: Type string. Base DN of user tree root.
- `search_dn`: Type string. DN for authenticating LDAP admin account with general read rights.
- `user_bind`: Type string. Expression used to authenticate users via LDAP bind. Must contain `uid={username}`.
- `user_filter`: Type string. Filter to extract users for syncing.
- `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.
- `sync_del_old_users`: Type bool. Delete obsolete users in database.
* `clusters`: Type array of objects
- `name`: Type string. The name of the cluster.
- `metricDataRepository`: Type object with properties: `kind` (Type string, can be one of `cc-metric-store`, `influxdb` ), `url` (Type string), `token` (Type string)
- `filterRanges` Type object. This option controls the slider ranges for the UI controls of numNodes, duration, and startTime. Example:
```
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
```
* `ui-defaults`: Type object. Default configuration for ui views. If overwritten, all options must be provided! Most options can be overwritten by the user via the web interface.
- `analysis_view_histogramMetrics`: Type string array. Metrics to show as job count histograms in analysis view. Default `["flops_any", "mem_bw", "mem_used"]`.
- `analysis_view_scatterPlotMetrics`: Type array of string array. Initial
scatter plot configuration in analysis view. Default `[["flops_any", "mem_bw"], ["flops_any", "cpu_load"], ["cpu_load", "mem_bw"]]`.
- `job_view_nodestats_selectedMetrics`: Type string array. Initial metrics shown in node statistics table of single job view. Default `["flops_any", "mem_bw", "mem_used"]`.
- `job_view_polarPlotMetrics`: Type string array. Metrics shown in polar plot of single job view. Default `["flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"]`.
- `job_view_selectedMetrics`: Type string array. Default `["flops_any", "mem_bw", "mem_used"]`.
- `plot_general_colorBackground`: Type bool. Color plot background according to job average threshold limits. Default `true`.
- `plot_general_colorscheme`: Type string array. Initial color scheme. Default `"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"`.
- `plot_general_lineWidth`: Type int. Initial linewidth. Default `3`.
- `plot_list_jobsPerPage`: Type int. Jobs shown per page in job lists. Default `50`.
- `plot_list_selectedMetrics`: Type string array. Initial metric plots shown in jobs lists. Default `"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"`.
- `plot_view_plotsPerRow`: Type int. Number of plots per row in single job view. Default `3`.
- `plot_view_showPolarplot`: Type bool. Option to toggle polar plot in single job view. Default `true`.
- `plot_view_showRoofline`: Type bool. Option to toggle roofline plot in single job view. Default `true`.
- `plot_view_showStatTable`: Type bool. Option to toggle the node statistic table in single job view. Default `true`.
- `system_view_selectedMetric`: Type string. Initial metric shown in system view. Default `cpu_load`.
Some of the `ui-defaults` values can be appended by `:<clustername>` in order to have different settings depending on the current cluster. Those are notably `job_view_nodestats_selectedMetrics`, `job_view_polarPlotMetrics`, `job_view_selectedMetrics` and `plot_list_selectedMetrics`.
## Environment Variables
An example env file is found in this directory. Copy it to `.env` in the project root and adapt it for your needs.
* `JWT_PUBLIC_KEY` and `JWT_PRIVATE_KEY`: Base64 encoded Ed25519 keys used for JSON Web Token (JWT) authentication. You can generate your own keypair using `go run ./cmd/gen-keypair/gen-keypair.go`. More information in [README_TOKENS.md](./README_TOKENS.md).
* `SESSION_KEY`: Some random bytes used as secret for cookie-based sessions.
* `LDAP_ADMIN_PASSWORD`: The LDAP admin user password (optional).
* `CROSS_LOGIN_JWT_HS512_KEY`: Used for token based logins via another authentication service.
* `LOGLEVEL`: Can be `err`, `warn`, `info` or `debug` (optional, `debug` by default). Can be used to reduce logging.

View File

@@ -1,51 +0,0 @@
## Introduction
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs.
JSON Web Token (JWT) is an open standard (RFC 7519) that defines a compact and self-contained way for securely transmitting information between parties as a JSON object.
This information can be verified and trusted because it is digitally signed.
In ClusterCockpit JWTs are signed using a public/private key pair using ECDSA.
Because tokens are signed using public/private key pairs, the signature also certifies that only the party holding the private key is the one that signed it.
Expiration of the generated tokens as well as the max. length of a browser session can be configured in the `config.json` file described [here](./README.md).
The [Ed25519](https://ed25519.cr.yp.to/) algorithm for signatures was used because it is compatible with other tools that require authentication, such as NATS.io, and because these elliptic-curve methods provide simillar security with smaller keys compared to something like RSA. They are sligthly more expensive to validate, but that effect is negligible.
## JWT Payload
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
Currently ClusterCockpit sets the following claims:
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
* `roles`: An array of strings specifying the roles set for the subject.
* `exp`: Expiration date of the token (only if explicitly configured)
It is important to know that JWTs are not encrypted, only signed. This means that outsiders cannot create new JWTs or modify existing ones, but they are able to read out the username.
## Workflow
1. Create a new ECDSA Public/private keypair:
```
$ go build ./cmd/gen-keypair/
$ ./gen-keypair
```
2. Add keypair in your `.env` file. A template can be found in `./configs`.
When a user logs in via the `/login` page using a browser, a session cookie (secured using the random bytes in the `SESSION_KEY` env. variable you shoud change as well) is used for all requests after the successfull login. The JWTs make it easier to use the APIs of ClusterCockpit using scripts or other external programs. The token is specified n the `Authorization` HTTP header using the [Bearer schema](https://datatracker.ietf.org/doc/html/rfc6750) (there is an example below). Tokens can be issued to users from the configuration view in the Web-UI or the command line. In order to use the token for API endpoints such as `/api/jobs/start_job/`, the user that executes it needs to have the `api` role. Regular users can only perform read-only queries and only look at data connected to jobs they started themselves.
## cc-metric-store
The [cc-metric-store](https://github.com/ClusterCockpit/cc-metric-store) also uses JWTs for authentication. As it does not issue new tokens, it does not need to kown the private key. The public key of the keypair that is used to generate the JWTs that grant access to the `cc-metric-store` can be specified in its `config.json`. When configuring the `metricDataRepository` object in the `cluster.json` file, you can put a token issued by ClusterCockpit itself.
## Setup user and JWT token for REST API authorization
1. Create user:
```
$ ./cc-backend --add-user <username>:api:<password> --no-server
```
2. Issue token for user:
```
$ ./cc-backend --jwt <username> --no-server
```
3. Use issued token token on client side:
```
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
```

View File

@@ -1,53 +1,67 @@
{ {
"addr": "127.0.0.1:8080", "addr": "127.0.0.1:8080",
"archive": { "short-running-jobs-duration": 300,
"kind": "file", "archive": {
"path": "./var/job-archive" "kind": "file",
}, "path": "./var/job-archive"
"clusters": [ },
{ "jwts": {
"name": "fritz", "max-age": "2000h"
"metricDataRepository": { },
"kind": "cc-metric-store", "enable-resampling": {
"url": "http://localhost:8082", "trigger": 30,
"token": "" "resolutions": [
}, 600,
"filterRanges": { 300,
"numNodes": { 120,
"from": 1, 60
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
},
{
"name": "alex",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
}
] ]
},
"emission-constant": 317,
"clusters": [
{
"name": "fritz",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
},
{
"name": "alex",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
}
]
} }

View File

@@ -0,0 +1,69 @@
{
"addr": "127.0.0.1:8080",
"short-running-jobs-duration": 300,
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"jwts": {
"max-age": "2000h"
},
"db-driver": "mysql",
"db": "clustercockpit:demo@tcp(127.0.0.1:3306)/clustercockpit",
"enable-resampling": {
"trigger": 30,
"resolutions": [
600,
300,
120,
60
]
},
"emission-constant": 317,
"clusters": [
{
"name": "fritz",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
},
{
"name": "alex",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": ""
},
"filterRanges": {
"numNodes": {
"from": 1,
"to": 64
},
"duration": {
"from": 0,
"to": 86400
},
"startTime": {
"from": "2022-01-01T00:00:00Z",
"to": null
}
}
}
]
}

View File

@@ -5,7 +5,7 @@
"user_base": "ou=people,ou=hpc,dc=test,dc=de", "user_base": "ou=people,ou=hpc,dc=test,dc=de",
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de", "search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=test,dc=de",
"user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de", "user_bind": "uid={username},ou=people,ou=hpc,dc=test,dc=de",
"user_filter": "(&(objectclass=posixAccount)(uid=*))" "user_filter": "(&(objectclass=posixAccount))"
}, },
"https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem", "https-cert-file": "/etc/letsencrypt/live/url/fullchain.pem",
"https-key-file": "/etc/letsencrypt/live/url/privkey.pem", "https-key-file": "/etc/letsencrypt/live/url/privkey.pem",
@@ -42,9 +42,9 @@
], ],
"jwts": { "jwts": {
"cookieName": "", "cookieName": "",
"forceJWTValidationViaDatabase": false, "validateUser": false,
"max-age": 0, "max-age": "2000h",
"trustedExternalIssuer": "" "trustedIssuer": ""
}, },
"short-running-jobs-duration": 300 "short-running-jobs-duration": 300
} }

View File

@@ -1,10 +1,10 @@
# Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!) # Base64 encoded Ed25519 keys (DO NOT USE THESE TWO IN PRODUCTION!)
# You can generate your own keypair using `go run utils/gen-keypair.go` # You can generate your own keypair using `go run tools/gen-keypair/main.go`
JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0=" JWT_PUBLIC_KEY="kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0="
JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q==" JWT_PRIVATE_KEY="dtPC/6dWJFKZK7KZ78CvWuynylOmjBFyMsUWArwmodOTN9itjL5POlqdZkcnmpJ0yPm4pRaCrvgFaFAbpyik/Q=="
# Base64 encoded Ed25519 public key for accepting externally generated JWTs # Base64 encoded Ed25519 public key for accepting externally generated JWTs
# Keys in PEM format can be converted, see `tools/convert-pem-pubkey-for-cc/Readme.md` # Keys in PEM format can be converted, see `tools/convert-pem-pubkey/Readme.md`
CROSS_LOGIN_JWT_PUBLIC_KEY="" CROSS_LOGIN_JWT_PUBLIC_KEY=""
# Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION) # Some random bytes used as secret for cookie-based sessions (DO NOT USE THIS ONE IN PRODUCTION)

View File

@@ -117,10 +117,12 @@ foreach my $ln (split("\n", $topo)) {
my $node; my $node;
my @sockets; my @sockets;
my @nodeCores;
foreach my $socket ( @{$DOMAINS{socket}} ) { foreach my $socket ( @{$DOMAINS{socket}} ) {
push @sockets, "[".join(",", @{$socket})."]"; push @sockets, "[".join(",", @{$socket})."]";
$node .= join(",", @{$socket}) push @nodeCores, join(",", @{$socket});
} }
$node = join(",", @nodeCores);
$INFO{sockets} = join(",\n", @sockets); $INFO{sockets} = join(",\n", @sockets);
my @memDomains; my @memDomains;
@@ -212,9 +214,27 @@ print <<"END";
"socketsPerNode": $INFO{socketsPerNode}, "socketsPerNode": $INFO{socketsPerNode},
"coresPerSocket": $INFO{coresPerSocket}, "coresPerSocket": $INFO{coresPerSocket},
"threadsPerCore": $INFO{threadsPerCore}, "threadsPerCore": $INFO{threadsPerCore},
"flopRateScalar": $flopsScalar, "flopRateScalar": {
"flopRateSimd": $flopsSimd, "unit": {
"memoryBandwidth": $memBw, "base": "F/s",
"prefix": "G"
},
"value": $flopsScalar
},
"flopRateSimd": {
"unit": {
"base": "F/s",
"prefix": "G"
},
"value": $flopsSimd
},
"memoryBandwidth": {
"unit": {
"base": "B/s",
"prefix": "G"
},
"value": $memBw
},
"nodes": "<FILL IN NODE RANGES>", "nodes": "<FILL IN NODE RANGES>",
"topology": { "topology": {
"node": [$node], "node": [$node],

View File

@@ -1,37 +0,0 @@
# Release versioning
Releases are numbered with an integer ID, starting with 1.
Each release embeds the following assets in the binary:
* Web front-end with Javascript files and all static assets.
* Golang template files for server-side rendering.
* JSON schema files for validation.
* Database migration files
The remaining external assets are:
* The SQL database used
* The job archive
* The configuration file `config.json`
Both external assets are also versioned with integer IDs.
This means that each release binary is bound to specific versions of the SQL
database and the job archive.
The configuration file is validated against the current schema on startup.
The command line switch `-migrate-db` can be used to upgrade the SQL database
to migrate from a previous version to the latest one.
We offer a separate tool `archive-migration` to migrate an existing job archive
archive from the previous to the latest version.
# Versioning of APIs
cc-backend provides two API backends:
* A REST API for querying jobs
* A GraphQL API for data exchange between web frontend and cc-backend
Both APIs will also be versioned. We still need to decide wether we will also support
older REST API version by versioning the endpoint URLs.
# How to build
Please always build `cc-backend` with the supplied Makefile. This will ensure
that the frontend is also built correctly and that the version in the binary file is coded
in the binary.

View File

@@ -1,239 +0,0 @@
# CC-HANDSON - Setup ClusterCockpit from scratch (w/o docker)
## Prerequisites
* Perl
* Yarn
* Go
* Optional: curl
* Script migrateTimestamp.pl
## Documentation
You find READMEs or api docs in
* ./cc-backend/configs
* ./cc-backend/init
* ./cc-backend/api
## ClusterCockpit configuration files
### cc-backend
* `./.env` Passwords and Tokens set in the environment
* `./config.json` Configuration options for cc-backend
### cc-metric-store
* `./config.json` Optional to overwrite configuration options
### cc-metric-collector
Not yet included in the hands-on setup.
## Setup Components
Start by creating a base folder for all of the following steps.
* `mkdir clustercockpit`
* `cd clustercockpit`
### Setup cc-backend
* Clone Repository
- `git clone https://github.com/ClusterCockpit/cc-backend.git`
- `cd cc-backend`
* Setup Frontend
- `cd ./web/frontend`
- `yarn install`
- `yarn build`
- `cd ../..`
* Build Go Executable
- `go build ./cmd/cc-backend/`
* Activate & Config environment for cc-backend
- `cp configs/env-template.txt .env`
- Optional: Have a look via `vim ./.env`
- Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./`
* Back to toplevel `clustercockpit`
- `cd ..`
* Prepare Datafolder and Database file
- `mkdir var`
- `./cc-backend --migrate-db`
### Setup cc-metric-store
* Clone Repository
- `git clone https://github.com/ClusterCockpit/cc-metric-store.git`
- `cd cc-metric-store`
* Build Go Executable
- `go get`
- `go build`
* Prepare Datafolders
- `mkdir -p var/checkpoints`
- `mkdir -p var/archive`
* Update Config
- `vim config.json`
- Exchange existing setting in `metrics` with the following:
```
"clock": { "frequency": 60, "aggregation": null },
"cpi": { "frequency": 60, "aggregation": null },
"cpu_load": { "frequency": 60, "aggregation": null },
"flops_any": { "frequency": 60, "aggregation": null },
"flops_dp": { "frequency": 60, "aggregation": null },
"flops_sp": { "frequency": 60, "aggregation": null },
"ib_bw": { "frequency": 60, "aggregation": null },
"lustre_bw": { "frequency": 60, "aggregation": null },
"mem_bw": { "frequency": 60, "aggregation": null },
"mem_used": { "frequency": 60, "aggregation": null },
"rapl_power": { "frequency": 60, "aggregation": null }
```
* Back to toplevel `clustercockpit`
- `cd ..`
### Setup Demo Data
* `mkdir source-data`
* `cd source-data`
* Download JobArchive-Source:
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar.xz`
- `tar xJf job-archive-dev.tar.xz`
- `mv ./job-archive ./job-archive-source`
- `rm ./job-archive-dev.tar.xz`
* Download CC-Metric-Store Checkpoints:
- `mkdir -p cc-metric-store-source/checkpoints`
- `cd cc-metric-store-source/checkpoints`
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/cc-metric-store-checkpoints.tar.xz`
- `tar xf cc-metric-store-checkpoints.tar.xz`
- `rm cc-metric-store-checkpoints.tar.xz`
* Back to `source-data`
- `cd ../..`
* Run timestamp migration script. This may take tens of minutes!
- `cp ../migrateTimestamps.pl .`
- `./migrateTimestamps.pl`
- Expected output:
```
Starting to update start- and stoptimes in job-archive for emmy
Starting to update start- and stoptimes in job-archive for woody
Done for job-archive
Starting to update checkpoint filenames and data starttimes for emmy
Starting to update checkpoint filenames and data starttimes for woody
Done for checkpoints
```
* Copy `cluster.json` files from source to migrated folders
- `cp source-data/job-archive-source/emmy/cluster.json cc-backend/var/job-archive/emmy/`
- `cp source-data/job-archive-source/woody/cluster.json cc-backend/var/job-archive/woody/`
* Initialize Job-Archive in SQLite3 job.db and add demo user
- `cd cc-backend`
- `./cc-backend --init-db --add-user demo:admin:AdminDev`
- Expected output:
```
<6>[INFO] new user "demo" created (roles: ["admin"], auth-source: 0)
<6>[INFO] Building job table...
<6>[INFO] A total of 3936 jobs have been registered in 1.791 seconds.
```
* Back to toplevel `clustercockpit`
- `cd ..`
### Startup both Apps
* In cc-backend root: `$./cc-backend --server --dev`
- Starts Clustercockpit at `http:localhost:8080`
- Log: `<6>[INFO] HTTP server listening at :8080...`
- Use local internet browser to access interface
- You should see and be able to browse finished Jobs
- Metadata is read from SQLite3 database
- Metricdata is read from job-archive/JSON-Files
- Create User in settings (top-right corner)
- Name `apiuser`
- Username `apiuser`
- Role `API`
- Submit & Refresh Page
- Create JTW for `apiuser`
- In Userlist, press `Gen. JTW` for `apiuser`
- Save JWT for later use
* In cc-metric-store root: `$./cc-metric-store`
- Start the cc-metric-store on `http:localhost:8081`, Log:
```
2022/07/15 17:17:42 Loading checkpoints newer than 2022-07-13T17:17:42+02:00
2022/07/15 17:17:45 Checkpoints loaded (5621 files, 319 MB, that took 3.034652s)
2022/07/15 17:17:45 API http endpoint listening on '0.0.0.0:8081'
```
- Does *not* have a graphical interface
- Otpional: Test function by executing:
```
$ curl -H "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" -D - "http://localhost:8081/api/query" -d "{ \"cluster\": \"emmy\", \"from\": $(expr $(date +%s) - 60), \"to\": $(date +%s), \"queries\": [{
\"metric\": \"flops_any\",
\"host\": \"e1111\"
}] }"
HTTP/1.1 200 OK
Content-Type: application/json
Date: Fri, 15 Jul 2022 13:57:22 GMT
Content-Length: 119
{"results":[[JSON-DATA-ARRAY]]}
```
### Development API web interfaces
The `--dev` flag enables web interfaces to document and test the apis:
* http://localhost:8080/playground - A GraphQL playground. To use it you must have a authenticated session in the same browser.
* http://localhost:8080/swagger - A Swagger UI. To use it you have to be logged out, so no user session in the same browser. Use the JWT token with role Api generate previously to authenticate via http header.
### Use cc-backend API to start job
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
* Click the `/job/start_job` endpoint and click the Try it out button.
* Enter the following json into the request body text area and fill in a recent start timestamp by executing `date +%s`.:
```
{
"jobId": 100000,
"arrayJobId": 0,
"user": "ccdemouser",
"subCluster": "main",
"cluster": "emmy",
"startTime": <date +%s>,
"project": "ccdemoproject",
"resources": [
{"hostname": "e0601"},
{"hostname": "e0823"},
{"hostname": "e0337"},
{"hostname": "e1111"}],
"numNodes": 4,
"numHwthreads": 80,
"walltime": 86400
}
```
* The response body should be the database id of the started job, for example:
```
{
"id": 3937
}
```
* Check in ClusterCockpit
- User `ccdemouser` should appear in Users-Tab with one running job
- It could take up to 5 Minutes until the Job is displayed with some current data (5 Min Short-Job Filter)
- Job then is marked with a green `running` tag
- Metricdata displayed is read from cc-metric-store!
### Use cc-backend API to stop job
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
* Click the `/job/stop_job/{id}` endpoint and click the Try it out button.
* Enter the database id at id that was returned by `start_job` and copy the following into the request body. Replace the timestamp with a recent one:
```
{
"cluster": "emmy",
"jobState": "completed",
"stopTime": <RECENT TS>
}
```
* On success a json document with the job meta data is returned.
* Check in ClusterCockpit
- User `ccdemouser` should appear in Users-Tab with one completed job
- Job is no longer marked with a green `running` tag -> Completed!
- Metricdata displayed is now read from job-archive!
* Check in job-archive
- `cd ./cc-backend/var/job-archive/emmy/100/000`
- `cd $STARTTIME`
- Inspect `meta.json` and `data.json`
## Helper scripts
* In this tarball you can find the perl script `generate_subcluster.pl` that helps to generate the subcluster section for your system.
Usage:
* Log into an exclusive cluster node.
* The LIKWID tools likwid-topology and likwid-bench must be in the PATH!
* `$./generate_subcluster.pl` outputs the subcluster section on `stdout`
Please be aware that
* You have to enter the name and node list for the subCluster manually.
* GPU detection only works if LIKWID was build with Cuda avalable and you run likwid-topology also with Cuda loaded.
* Do not blindly trust the measured peakflops values.
* Because the script blindly relies on the CSV format output by likwid-topology this is a fragile undertaking!

View File

@@ -1,82 +0,0 @@
## Introduction
ClusterCockpit uses JSON Web Tokens (JWT) for authorization of its APIs.
JSON Web Token (JWT) is an open standard (RFC 7519) that defines a compact and self-contained way for securely transmitting information between parties as a JSON object.
This information can be verified and trusted because it is digitally signed.
In ClusterCockpit JWTs are signed using a public/private key pair using ECDSA.
Because tokens are signed using public/private key pairs, the signature also certifies that only the party holding the private key is the one that signed it.
Currently JWT tokens in ClusterCockpit not yet expire.
## JWT Payload
You may view the payload of a JWT token at [https://jwt.io/#debugger-io](https://jwt.io/#debugger-io).
Currently ClusterCockpit sets the following claims:
* `iat`: Issued at claim. The “iat” claim is used to identify the the time at which the JWT was issued. This claim can be used to determine the age of the JWT.
* `sub`: Subject claim. Identifies the subject of the JWT, in our case this is the username.
* `roles`: An array of strings specifying the roles set for the subject.
## Workflow
1. Create a new ECDSA Public/private keypair:
```
$ go build ./tools/gen-keypair.go
$ ./gen-keypair
```
2. Add keypair in your `.env` file. A template can be found in `./configs`.
There are two usage scenarios:
* The APIs are used during a browser session. In this case on login a JWT token is issued on login, that is used by the web frontend to authorize against the GraphQL and REST APIs.
* The REST API is used outside a browser session, e.g. by scripts. In this case you have to issue a token manually. This possible from within the configuration view or on the command line. It is recommended to issue a JWT token in this case for a special user that only has the `api` role. By using different users for different purposes a fine grained access control and access revocation management is possible.
The token is commonly specified in the Authorization HTTP header using the Bearer schema.
## Setup user and JWT token for REST API authorization
1. Create user:
```
$ ./cc-backend --add-user <username>:api:<Password> --no-server
```
2. Issue token for user:
```
$ ./cc-backend -jwt <username> -no-server
```
3. Use issued token token on client side:
```
$ curl -X GET "<API ENDPOINT>" -H "accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer <JWT TOKEN>"
```
## Accept externally generated JWTs provided via cookie
If there is an external service like an AuthAPI that can generate JWTs and hand them over to ClusterCockpit via cookies, CC can be configured to accept them:
1. `.env`: CC needs a public ed25519 key to verify foreign JWT signatures. Public keys in PEM format can be converted with the instructions in [/tools/convert-pem-pubkey-for-cc](../tools/convert-pem-pubkey-for-cc/Readme.md) .
```
CROSS_LOGIN_JWT_PUBLIC_KEY="+51iXX8BdLFocrppRxIw52xCOf8xFSH/eNilN5IHVGc="
```
2. `config.json`: Insert a name for the cookie (set by the external service) containing the JWT so that CC knows where to look at. Define a trusted issuer (JWT claim 'iss'), otherwise it will be rejected.
If you want usernames and user roles from JWTs ('sub' and 'roles' claim) to be validated against CC's internal database, you need to enable it here. Unknown users will then be rejected and roles set via JWT will be ignored.
```json
"jwts": {
"cookieName": "access_cc",
"forceJWTValidationViaDatabase": true,
"trustedExternalIssuer": "auth.example.com"
}
```
3. Make sure your external service includes the same issuer (`iss`) in its JWTs. Example JWT payload:
```json
{
"iat": 1668161471,
"nbf": 1668161471,
"exp": 1668161531,
"sub": "alice",
"roles": [
"user"
],
"jti": "a1b2c3d4-1234-5678-abcd-a1b2c3d4e5f6",
"iss": "auth.example.com"
}
```

View File

@@ -1,78 +0,0 @@
The job archive specifies an exchange format for job meta and performance metric
data. It consists of two parts:
* a [SQLite database schema](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#sqlite-database-schema) for job meta data and performance statistics
* a [Json file format](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#json-file-format) together with a [Directory hierarchy specification](https://github.com/ClusterCockpit/cc-backend/wiki/Job-Archive#directory-hierarchy-specification)
By using an open, portable and simple specification based on files it is
possible to exchange job performance data for research and analysis purposes as
well as use it as a robust way for archiving job performance data to disk.
# SQLite database schema
## Introduction
A SQLite 3 database schema is provided to standardize the job meta data
information in a portable way. The schema also includes optional columns for job
performance statistics (called a job performance footprint). The database acts
as a front end to filter and select subsets of job IDs, that are the keys to get
the full job performance data in the job performance tree hierarchy.
## Database schema
The schema includes 3 tables: the job table, a tag table and a jobtag table
representing the MANY-TO-MANY relation between jobs and tags. The SQL schema is
specified
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/schemas/jobs-sqlite.sql).
Explanation of the various columns including the JSON datatypes is documented
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/job-meta.schema.json).
# Directory hierarchy specification
## Specification
To manage the number of directories within a single directory a tree approach is
used splitting the integer job ID. The job id is split in junks of 1000 each.
Usually 2 layers of directories is sufficient but the concept can be used for an
arbitrary number of layers.
For a 2 layer schema this can be achieved with (code example in Perl):
``` perl
$level1 = $jobID/1000;
$level2 = $jobID%1000;
$dstPath = sprintf("%s/%s/%d/%03d", $trunk, $destdir, $level1, $level2);
```
## Example
For the job ID 1034871 the directory path is `./1034/871/`.
# Json file format
## Overview
Every cluster must be configured in a `cluster.json` file.
The job data consists of two files:
* `meta.json`: Contains job meta information and job statistics.
* `data.json`: Contains complete job data with time series
The description of the json format specification is available as [[json
schema|https://json-schema.org/]] format file. The latest version of the json
schema is part of the `cc-backend` source tree. For external reference it is
also available in a separate repository.
## Specification `cluster.json`
The json schema specification is available
[here](https://github.com/ClusterCockpit/cc-specifications/blob/master/datastructures/cluster.schema.json).
## Specification `meta.json`
The json schema specification is available
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-meta.schema.json).
## Specification `data.json`
The json schema specification is available
[here](https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/job-data.schema.json).
Metric time series data is stored for a fixed time step. The time step is set
per metric. If no value is available for a metric time series data timestamp
`null` is entered.

View File

@@ -1,13 +0,0 @@
# Overview
Customizing `cc-backend` means changing the logo and certain legal texts
instead of the placeholders. To change the logo displayed in the navigation bar, the
file `web/frontend/public/img/logo.png` in the source tree must be replaced
and cc-backend must be rebuild.
# Replace legal texts
To replace the `imprint.tmpl` and `privacy.tmpl` legal texts, you can place your
version in `./var/`. At startup `cc-backend` will check if `./var/imprint.tmpl` and/or
`./var/privacy.tmpl` exist and use them instead of the built-in placeholders.
You can use the placeholders in `web/templates` as a blueprint.

View File

@@ -1,78 +0,0 @@
In general, an upgrade is nothing more than a replacement of the binary file.
All the necessary files, except the database file, the configuration file and
the job archive, are embedded in the binary file. It is recommended to use a
directory where the file names of the binary files are named with a version
indicator. This can be, for example, the date or the Unix epoch time. A symbolic
link points to the version to be used. This makes it easier to switch to earlier
versions.
The database and the job archive are versioned. Each release binary supports
specific versions of the database and job archive. If a version mismatch is
detected, the application is terminated and migration is required.
**IMPORTANT NOTE**
It is recommended to make a backup copy of the database before each update. This
is mandatory in case the database needs to be migrated. In the case of sqlite,
this means to stopping `cc-backend` and copying the sqlite database file
somewhere.
# Migrating the database
After you have backed up the database, run the following command to migrate the
database to the latest version:
```
$ ./cc-backend -migrate-db
```
The migration files are embedded in the binary and can also be viewed in the cc
backend [source tree](https://github.com/ClusterCockpit/cc-backend/tree/master/internal/repository/migrations).
There are separate migration files for both supported
database backends.
We use the [migrate library](https://github.com/golang-migrate/migrate).
If something goes wrong, you can check the status and get the current schema
(here for sqlite):
```
$ sqlite3 var/job.db
```
In the sqlite console execute:
```
.schema
```
to get the current databse schema.
You can query the current version and whether the migration failed with:
```
SELECT * FROM schema_migrations;
```
The first column indicates the current database version and the second column is
a dirty flag indicating whether the migration was successful.
# Migrating the job archive
Job archive migration requires a separate tool (`archive-migration`), which is
part of the cc-backend source tree (build with `go build ./tools/archive-migration`)
and is also provided as part of the releases.
Migration is supported only between two successive releases. The migration tool
migrates the existing job archive to a new job archive. This means that there
must be enough disk space for two complete job archives. If the tool is called
without options:
```
$ ./archive-migration
```
it is assumed that a job archive exists in `./var/job-archive`. The new job
archive is written to `./var/job-archive-new`. Since execution is threaded in case
of a fatal error, it is impossible to determine in which job the error occurred.
In this case, you can run the tool in debug mode (with the `-debug` flag). In
debug mode, threading is disabled and the job ID of each migrated job is output.
Jobs with empty files will be skipped. Between multiple runs of the tools, the
`job-archive-new` directory must be moved or deleted.
The `cluster.json` files in `job-archive-new` must be checked for errors, especially
whether the aggregation attribute is set correctly for all metrics.
Migration takes several hours for relatively large job archives (several hundred
GB). A versioned job archive contains a version.txt file in the root directory
of the job archive. This file contains the version as an unsigned integer.

View File

@@ -1,33 +0,0 @@
## Tips for frontend development
The frontend assets including the Svelte js files are per default embedded in
the bgo binary. To enable a quick turnaround cycle for web development of the
frontend disable embedding of static assets in `config.json`:
```
"embed-static-files": false,
"static-files": "./web/frontend/public/",
```
Start the node build process (in directory `./web/frontend`) in development mode:
```
$ npm run dev
```
This will start the build process in listen mode. Whenever you change a source
files the depending javascript targets will be automatically rebuild.
In case the javascript files are minified you may need to set the production
flag by hand to false in `./web/frontend/rollup.config.mjs`:
```
const production = false
```
Usually this should work automatically.
Because the files are still served by ./cc-backend you have to reload the view
explicitly in your browser.
A common setup is to have three terminals open:
* One running cc-backend (working directory repository root): `./cc-backend -server -dev`
* Another running npm in developer mode (working directory `./web/frontend`): `npm run dev`
* And the last one editing the frontend source files

View File

@@ -1,34 +0,0 @@
## Overview
We use the standard golang testing environment.
The following conventions are used:
* *White box unit tests*: Tests for internal functionality are placed in files
* *Black box unit tests*: Tests for public interfaces are placed in files
with `<package name>_test.go` and belong to the package `<package_name>_test`.
There only exists one package test file per package.
* *Integration tests*: Tests that use multiple componenents are placed in a
package test file. These are named `<package name>_test.go` and belong to the
package `<package_name>_test`.
* *Test assets*: Any required files are placed in a directory `./testdata`
within each package directory.
## Executing tests
Visual Studio Code has a very good golang test integration.
For debugging a test this is the recommended solution.
The Makefile provided by us has a `test` target that executes:
```
$ go clean -testcache
$ go build ./...
$ go vet ./...
$ go test ./...
```
Of course the commands can also be used on the command line.
For details about golang testing refer to the standard documentation:
* [Testing package](https://pkg.go.dev/testing)
* [go test command](https://pkg.go.dev/cmd/go#hdr-Test_packages)

View File

@@ -1,229 +0,0 @@
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use JSON::PP; # from Perl default install
use Time::Local qw( timelocal ); # from Perl default install
use Time::Piece; # from Perl default install
### JSON
my $json = JSON::PP->new->allow_nonref;
### TIME AND DATE
# now
my $localtime = localtime;
my $epochtime = $localtime->epoch;
# 5 days ago: Via epoch due to possible reverse month borders
my $epochlessfive = $epochtime - (86400 * 5);
my $locallessfive = localtime($epochlessfive);
# Calc like `date --date 'TZ="Europe/Berlin" 0:00 5 days ago' +%s`)
my ($day, $month, $year) = ($locallessfive->mday, $locallessfive->_mon, $locallessfive->year);
my $checkpointStart = timelocal(0, 0, 0, $day, $month, $year);
# for checkpoints
my $halfday = 43200;
### JOB-ARCHIVE
my $archiveTarget = './cc-backend/var/job-archive';
my $archiveSrc = './source-data/job-archive-source';
my @ArchiveClusters;
# Gen folder
if ( not -d $archiveTarget ){
mkdir( $archiveTarget ) or die "Couldn't create $archiveTarget directory, $!";
}
# Get clusters by job-archive/$subfolder
opendir my $dh, $archiveSrc or die "can't open directory: $!";
while ( readdir $dh ) {
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
my $cluster = $_;
push @ArchiveClusters, $cluster;
}
# start for jobarchive
foreach my $cluster ( @ArchiveClusters ) {
print "Starting to update start- and stoptimes in job-archive for $cluster\n";
my $clusterTarget = "$archiveTarget/$cluster";
if ( not -d $clusterTarget ){
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
}
opendir my $dhLevel1, "$archiveSrc/$cluster" or die "can't open directory: $!";
while ( readdir $dhLevel1 ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $level1 = $_;
if ( -d "$archiveSrc/$cluster/$level1" ) {
opendir my $dhLevel2, "$archiveSrc/$cluster/$level1" or die "can't open directory: $!";
while ( readdir $dhLevel2 ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $level2 = $_;
my $jobSource = "$archiveSrc/$cluster/$level1/$level2";
my $jobOrigin = "$jobSource";
my $jobTargetL1 = "$clusterTarget/$level1";
my $jobTargetL2 = "$jobTargetL1/$level2";
# check if files are directly accessible (old format) else get subfolders as file and update path
if ( ! -e "$jobSource/meta.json") {
opendir(D, "$jobSource") || die "Can't open directory $jobSource: $!\n";
my @folders = readdir(D);
closedir(D);
if (!@folders) {
next;
}
foreach my $folder ( @folders ) {
next if $folder eq '.' or $folder eq '..';
$jobSource = "$jobSource/".$folder;
}
}
# check if subfolder contains file, else skip
if ( ! -e "$jobSource/meta.json") {
print "$jobSource skipped\n";
next;
}
open my $metafh, '<', "$jobSource/meta.json" or die "Can't open file $!";
my $rawstr = do { local $/; <$metafh> };
close($metafh);
my $metadata = $json->decode($rawstr);
# NOTE Start meta.json iteration here
# my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT;
# Set new startTime: Between 5 days and 1 day before now
# Remove id from attributes
$metadata->{startTime} = $epochtime - (int(rand(432000)) + 86400);
$metadata->{stopTime} = $metadata->{startTime} + $metadata->{duration};
# Add starttime subfolder to target path
my $jobTargetL3 = "$jobTargetL2/".$metadata->{startTime};
if ( not -d $jobTargetL1 ){
mkdir( $jobTargetL1 ) or die "Couldn't create $jobTargetL1 directory, $!";
}
if ( not -d $jobTargetL2 ){
mkdir( $jobTargetL2 ) or die "Couldn't create $jobTargetL2 directory, $!";
}
# target is not directory
if ( not -d $jobTargetL3 ){
mkdir( $jobTargetL3 ) or die "Couldn't create $jobTargetL3 directory, $!";
my $outstr = $json->encode($metadata);
open my $metaout, '>', "$jobTargetL3/meta.json" or die "Can't write to file $!";
print $metaout $outstr;
close($metaout);
open my $datafh, '<', "$jobSource/data.json" or die "Can't open file $!";
my $datastr = do { local $/; <$datafh> };
close($datafh);
open my $dataout, '>', "$jobTargetL3/data.json" or die "Can't write to file $!";
print $dataout $datastr;
close($dataout);
}
}
}
}
}
print "Done for job-archive\n";
sleep(1);
exit;
## CHECKPOINTS
my $checkpTarget = './cc-metric-store/var/checkpoints';
my $checkpSource = './source-data/cc-metric-store-source/checkpoints';
my @CheckpClusters;
# Gen folder
if ( not -d $checkpTarget ){
mkdir( $checkpTarget ) or die "Couldn't create $checkpTarget directory, $!";
}
# Get clusters by cc-metric-store/$subfolder
opendir my $dhc, $checkpSource or die "can't open directory: $!";
while ( readdir $dhc ) {
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
my $cluster = $_;
push @CheckpClusters, $cluster;
}
closedir($dhc);
# start for checkpoints
foreach my $cluster ( @CheckpClusters ) {
print "Starting to update checkpoint filenames and data starttimes for $cluster\n";
my $clusterTarget = "$checkpTarget/$cluster";
if ( not -d $clusterTarget ){
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
}
opendir my $dhLevel1, "$checkpSource/$cluster" or die "can't open directory: $!";
while ( readdir $dhLevel1 ) {
chomp; next if $_ eq '.' or $_ eq '..';
# Nodename as level1-folder
my $level1 = $_;
if ( -d "$checkpSource/$cluster/$level1" ) {
my $nodeSource = "$checkpSource/$cluster/$level1/";
my $nodeOrigin = "$nodeSource";
my $nodeTarget = "$clusterTarget/$level1";
my @files;
if ( -e "$nodeSource/1609459200.json") { # 1609459200 == First Checkpoint time in latest dump
opendir(D, "$nodeSource") || die "Can't open directory $nodeSource: $!\n";
while ( readdir D ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $nodeFile = $_;
push @files, $nodeFile;
}
closedir(D);
my $length = @files;
if (!@files || $length != 14) { # needs 14 files == 7 days worth of data
next;
}
} else {
next;
}
# sort for integer timestamp-filename-part (moduleless): Guarantees start with index == 0 == 1609459200.json
my @sortedFiles = sort { ($a =~ /^([0-9]{10}).json$/)[0] <=> ($b =~ /^([0-9]{10}).json$/)[0] } @files;
if ( not -d $nodeTarget ){
mkdir( $nodeTarget ) or die "Couldn't create $nodeTarget directory, $!";
while (my ($index, $file) = each(@sortedFiles)) {
open my $checkfh, '<', "$nodeSource/$file" or die "Can't open file $!";
my $rawstr = do { local $/; <$checkfh> };
close($checkfh);
my $checkpdata = $json->decode($rawstr);
my $newTimestamp = $checkpointStart + ($index * $halfday);
# Get Diff from old Timestamp
my $timeDiff = $newTimestamp - $checkpdata->{from};
# Set new timestamp
$checkpdata->{from} = $newTimestamp;
foreach my $metric (keys %{$checkpdata->{metrics}}) {
$checkpdata->{metrics}->{$metric}->{start} += $timeDiff;
}
my $outstr = $json->encode($checkpdata);
open my $checkout, '>', "$nodeTarget/$newTimestamp.json" or die "Can't write to file $!";
print $checkout $outstr;
close($checkout);
}
}
}
}
closedir($dhLevel1);
}
print "Done for checkpoints\n";

View File

@@ -1,35 +0,0 @@
# Docs for ClusterCockpit Searchbar
## Usage
* Searchtags are implemented as `type:<query>` search-string
* Types `jobId, jobName, projectId, username, name` for roles `admin` and `support`
* `jobName` is jobName as persisted in `job.meta_data` table-column
* `username` is actual account identifier as persisted in `job.user` table-column
* `name` is account owners name as persisted in `user.name` table-column
* Types `jobId, jobName, projectId` for role `user`
* Examples:
* `jobName:myJob12`
* `jobId:123456`
* `username:abcd100`
* `name:Paul`
* If no searchTag used: Best guess search with the following hierarchy
* `jobId -> username -> name -> projectId -> jobName`
* Destinations:
* JobId: Job-Table (Allows multiple identical matches, e.g. JobIds from different clusters)
* JobName: Job-Table (Allows multiple identical matches, e.g. JobNames from different clusters)
* ProjectId: Job-Table
* Username: Users-Table
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table.
* Name: Users-Table
* **Please Note**: Only users with jobs will be shown in table! I.e., Users without jobs will be missing in table.
* Best guess search always redirects to Job-Table or `/monitoring/user/$USER` (first username match)
* Unprocessable queries will redirect to `/monitoring/jobs/?`
* Spaces trimmed (both for searchTag and queryString)
* ` job12` == `job12`
* `projectID : abcd ` == `projectId:abcd`
* `jobName`- and `name-`queries work with a part of the target-string
* `jobName:myjob` for jobName "myjob_cluster1"
* `name:Paul` for name "Paul Atreides"
* JobName GQL Query is resolved as matching the query as a part of the whole metaData-JSON in the SQL DB.

121
go.mod
View File

@@ -1,91 +1,90 @@
module github.com/ClusterCockpit/cc-backend module github.com/ClusterCockpit/cc-backend
go 1.18 go 1.23
require ( require (
github.com/99designs/gqlgen v0.17.24 github.com/99designs/gqlgen v0.17.57
github.com/ClusterCockpit/cc-units v0.4.0 github.com/ClusterCockpit/cc-units v0.4.0
github.com/Masterminds/squirrel v1.5.3 github.com/Masterminds/squirrel v1.5.4
github.com/go-ldap/ldap/v3 v3.4.4 github.com/coreos/go-oidc/v3 v3.11.0
github.com/go-sql-driver/mysql v1.7.0 github.com/go-co-op/gocron/v2 v2.9.0
github.com/golang-jwt/jwt/v4 v4.5.0 github.com/go-ldap/ldap/v3 v3.4.8
github.com/golang-migrate/migrate/v4 v4.15.2 github.com/go-sql-driver/mysql v1.8.1
github.com/google/gops v0.3.27 github.com/golang-jwt/jwt/v5 v5.2.1
github.com/gorilla/handlers v1.5.1 github.com/golang-migrate/migrate/v4 v4.17.1
github.com/gorilla/mux v1.8.0 github.com/google/gops v0.3.28
github.com/gorilla/sessions v1.2.1 github.com/gorilla/handlers v1.5.2
github.com/influxdata/influxdb-client-go/v2 v2.12.2 github.com/gorilla/mux v1.8.1
github.com/jmoiron/sqlx v1.3.5 github.com/gorilla/sessions v1.4.0
github.com/mattn/go-sqlite3 v1.14.16 github.com/influxdata/influxdb-client-go/v2 v2.13.0
github.com/prometheus/client_golang v1.14.0 github.com/jmoiron/sqlx v1.4.0
github.com/prometheus/common v0.40.0 github.com/mattn/go-sqlite3 v1.14.22
github.com/prometheus/client_golang v1.19.1
github.com/prometheus/common v0.55.0
github.com/qustavo/sqlhooks/v2 v2.1.0 github.com/qustavo/sqlhooks/v2 v2.1.0
github.com/santhosh-tekuri/jsonschema/v5 v5.2.0 github.com/santhosh-tekuri/jsonschema/v5 v5.3.1
github.com/swaggo/http-swagger v1.3.3 github.com/swaggo/http-swagger v1.3.4
github.com/swaggo/swag v1.8.10 github.com/swaggo/swag v1.16.4
github.com/vektah/gqlparser/v2 v2.5.1 github.com/vektah/gqlparser/v2 v2.5.20
golang.org/x/crypto v0.6.0 golang.org/x/crypto v0.29.0
golang.org/x/exp v0.0.0-20240707233637-46b078467d37
golang.org/x/oauth2 v0.21.0
) )
require ( require (
filippo.io/edwards25519 v1.1.0 // indirect
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
github.com/KyleBanks/depth v1.2.1 // indirect github.com/KyleBanks/depth v1.2.1 // indirect
github.com/agnivade/levenshtein v1.1.1 // indirect github.com/agnivade/levenshtein v1.2.0 // indirect
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/containerd v1.6.18 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/deepmap/oapi-codegen v1.12.4 // indirect github.com/go-asn1-ber/asn1-ber v1.5.7 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect github.com/go-jose/go-jose/v4 v4.0.3 // indirect
github.com/ghodss/yaml v1.0.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect
github.com/go-co-op/gocron v1.25.0 // indirect github.com/go-openapi/spec v0.21.0 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
github.com/go-openapi/spec v0.20.8 // indirect github.com/google/uuid v1.6.0 // indirect
github.com/go-openapi/swag v0.22.3 // indirect github.com/gorilla/securecookie v1.1.2 // indirect
github.com/golang/protobuf v1.5.2 // indirect github.com/gorilla/websocket v1.5.3 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/gorilla/securecookie v1.1.1 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
github.com/jonboulle/clockwork v0.4.0 // indirect
github.com/josharian/intern v1.0.0 // indirect github.com/josharian/intern v1.0.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/kr/pretty v0.3.0 // indirect
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect github.com/oapi-codegen/runtime v1.1.1 // indirect
github.com/pkg/errors v0.9.1 // indirect github.com/opencontainers/image-spec v1.1.0-rc2.0.20221005185240-3a7f492d3f1b // indirect
github.com/prometheus/client_model v0.3.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/procfs v0.9.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect github.com/robfig/cron/v3 v3.0.1 // indirect
github.com/rogpeppe/go-internal v1.8.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/stretchr/testify v1.8.2 // indirect github.com/sosodev/duration v1.3.1 // indirect
github.com/swaggo/files v1.0.0 // indirect github.com/swaggo/files v1.0.1 // indirect
github.com/urfave/cli/v2 v2.24.4 // indirect github.com/urfave/cli/v2 v2.27.5 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
go.uber.org/atomic v1.10.0 // indirect go.uber.org/atomic v1.11.0 // indirect
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea // indirect golang.org/x/mod v0.22.0 // indirect
golang.org/x/mod v0.8.0 // indirect golang.org/x/net v0.31.0 // indirect
golang.org/x/net v0.7.0 // indirect golang.org/x/sync v0.9.0 // indirect
golang.org/x/oauth2 v0.5.0 // indirect golang.org/x/sys v0.27.0 // indirect
golang.org/x/sys v0.5.0 // indirect golang.org/x/text v0.20.0 // indirect
golang.org/x/text v0.7.0 // indirect golang.org/x/tools v0.27.0 // indirect
golang.org/x/tools v0.6.0 // indirect google.golang.org/protobuf v1.35.2 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.28.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect
) )

2072
go.sum

File diff suppressed because it is too large Load Diff

View File

@@ -61,23 +61,50 @@ models:
fields: fields:
partitions: partitions:
resolver: true resolver: true
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" } NullableFloat:
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" } { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
MetricValue: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" } MetricScope:
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" } { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
MetricValue:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricValue" }
JobStatistics:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
GlobalMetricListItem:
{
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.GlobalMetricListItem",
}
ClusterSupport:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.ClusterSupport" }
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" } Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" } Resource:
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" } { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" } JobState:
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" } { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" } TimeRange:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
IntRange:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
JobMetric:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" } Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" } MetricStatistics:
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" } {
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" } model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics",
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" } }
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" } MetricConfig:
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" } { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" } SubClusterConfig:
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" } {
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig",
}
Accelerator:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
Topology:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
FilterRanges:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
SubCluster:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
StatsSeries:
{ model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }
Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" } Unit: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Unit" }

View File

@@ -1,71 +1,79 @@
# How to run this as a systemd service # How to run `cc-backend` as a systemd service.
The files in this directory assume that you install ClusterCockpit to `/opt/monitoring`. The files in this directory assume that you install ClusterCockpit to
Of course you can choose any other location, but make sure to replace all paths that begin with `/opt/monitoring` in the `clustercockpit.service` file! `/opt/monitoring/cc-backend`.
Of course you can choose any other location, but make sure you replace all paths
starting with `/opt/monitoring/cc-backend` in the `clustercockpit.service` file!
If you have not installed [yarn](https://yarnpkg.com/getting-started/install) and [go](https://go.dev/doc/install) already, do that (Golang is available in most package managers). The `config.json` may contain the optional fields *user* and *group*. If
It is recommended and easy to install the most recent stable version of Golang as every version also improves the Golang standard library. specified, the application will call
[setuid](https://man7.org/linux/man-pages/man2/setuid.2.html) and
The `config.json` can have the optional fields *user* and *group*. [setgid](https://man7.org/linux/man-pages/man2/setgid.2.html) after reading the
If provided, the application will call [setuid](https://man7.org/linux/man-pages/man2/setuid.2.html) and [setgid](https://man7.org/linux/man-pages/man2/setgid.2.html) after having read the config file and having bound to a TCP port (so that it can take a privileged port), but before it starts accepting any connections. config file and binding to a TCP port (so it can take a privileged port), but
This is good for security, but means that the directories `web/frontend/public`, `var/` and `web/templates/` must be readable by that user and `var/` writable as well (All paths relative to the repos root). before it starts accepting any connections. This is good for security, but also
The `.env` and `config.json` files might contain secrets and should not be readable by that user. means that the `var/` directory must be readable and writeable by this user.
If those files are changed, the server has to be restarted. The `.env` and `config.json` files may contain secrets and should not be
readable by this user. If these files are changed, the server must be restarted.
```sh ```sh
# 1.: Clone this repository to /opt/monitoring # 1. Clone this repository somewhere in your home
git clone git@github.com:ClusterCockpit/cc-backend.git /opt/monitoring git clone git@github.com:ClusterCockpit/cc-backend.git <DSTDIR>
# 2.: Install all dependencies and build everything # 2. (Optional) Install dependencies and build. In general it is recommended to use the provided release binaries.
cd /mnt/monitoring cd <DSTDIR>
go get && go build cmd/cc-backend && (cd ./web/frontend && yarn install && yarn build) make
sudo mkdir -p /opt/monitoring/cc-backend/
cp ./cc-backend /opt/monitoring/cc-backend/
# 3.: Modify the `./config.json` and env-template.txt file from the configs directory to your liking and put it in the repo root # 3. Modify the `./config.json` and env-template.txt file from the configs directory to your liking and put it in the target directory
cp ./configs/config.json ./config.json cp ./configs/config.json /opt/monitoring/config.json
cp ./configs/env-template.txt ./.env cp ./configs/env-template.txt /opt/monitoring/.env
vim ./config.json # do your thing... vim /opt/monitoring/config.json # do your thing...
vim ./.env # do your thing... vim /opt/monitoring/.env # do your thing...
# 4.: Add the systemd service unit file (in case /opt/ is mounted on another file system it may be better to copy the file to /etc) # 4. (Optional) Customization: Add your versions of the login view, legal texts, and logo image.
sudo ln -s /mnt/monitoring/init/clustercockpit.service /etc/systemd/system/clustercockpit.service # You may use the templates in `./web/templates` as blueprint. Every overwrite separate.
cp login.tmpl /opt/monitoring/cc-backend/var/
cp imprint.tmpl /opt/monitoring/cc-backend/var/
cp privacy.tmpl /opt/monitoring/cc-backend/var/
# Ensure your logo, and any images you use in your login template has a suitable size.
cp -R img /opt/monitoring/cc-backend/img
# 5.: Enable and start the server # 5. Copy the systemd service unit file. You may adopt it to your needs.
sudo cp ./init/clustercockpit.service /etc/systemd/system/clustercockpit.service
# 6. Enable and start the server
sudo systemctl enable clustercockpit.service # optional (if done, (re-)starts automatically) sudo systemctl enable clustercockpit.service # optional (if done, (re-)starts automatically)
sudo systemctl start clustercockpit.service sudo systemctl start clustercockpit.service
# Check whats going on: # Check whats going on:
sudo systemctl status clustercockpit.service
sudo journalctl -u clustercockpit.service sudo journalctl -u clustercockpit.service
``` ```
# Recommended deployment workflow # Recommended workflow for deployment
It is recommended to install all ClusterCockpit components in a common durectory, this can be something like `/opt/monitoring`, `var/monitoring` or `var/clustercockpit`. It is recommended to install all ClusterCockpit components in a common directory, e.g. `/opt/monitoring`, `var/monitoring` or `var/clustercockpit`.
In the following we are using `/opt/monitoring`. In the following we use `/opt/monitoring`.
Two systemd services are running on the central monitoring server: Two systemd services run on the central monitoring server:
* clustercockpit : binary cc-backend in `/opt/monitoring/cc-backend`.
* cc-metric-store : Binary cc-metric-store in `/opt/monitoring/cc-metric-store`.
clustercockpit : Binary cc-backend in `/opt/monitoring/cc-backend` ClusterCockpit is deployed as a single binary that embeds all static assets.
cc-metric-store: Binary cc-metric-store in `/opt/monitoring/cc-metric-store` We recommend keeping all `cc-backend` binary versions in a folder `archive` and
linking the currently active one from the `cc-backend` root.
ClusterCockpit is deployed as a single file binary that embeds all static assets. This allows for easy roll-back in case something doesn't work.
We recommend to keep all binaries in a folder `archive` and link the currently active from cc-backend root.
This allows to easily roll-back in case something breaks.
## Workflow to deploy new version ## Workflow to deploy new version
This example assumes the DB and job archive did not change. This example assumes the DB and job archive versions did not change.
* Stop systemd service: `$ sudo systemctl stop clustercockpit.service`
* Backup the sqlite DB file and Job archive directory tree! * Backup the sqlite DB file and Job archive directory tree!
* Clone cc-backend source tree (e.g. in your home directory) * Copy `cc-backend` binary to `/opt/monitoring/cc-backend/archive` (Tip: Use a
* Copy the adapted legal text files into the git source tree (./web/templates). date tag like `YYYYMMDD-cc-backend`)
* Build cc-backend: * Link from cc-backend root to current version
``` * Start systemd service: `$ sudo systemctl start clustercockpit.service`
$ cd web/frontend * Check if everything is ok: `$ sudo systemctl status clustercockpit.service`
$ yarn && yarn build
$ cd ../../
$ go build ./cmd/cc-backend
```
* Copy `cc-backend` binary to `/opt/monitoring/cc-backend/archive`
* Link from cc-backend root to recent version
* Restart systemd service: `$ sudo systemctl restart clustercockpit.service`
* Check log for issues: `$ sudo journalctl -u clustercockpit.service` * Check log for issues: `$ sudo journalctl -u clustercockpit.service`
* Check the ClusterCockpit web frontend and your Slurm adapters if anything is broken! * Check the ClusterCockpit web frontend and your Slurm adapters if anything is broken!

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -14,13 +14,16 @@ import (
"os" "os"
"path/filepath" "path/filepath"
"reflect" "reflect"
"strconv"
"strings" "strings"
"testing" "testing"
"time"
"github.com/ClusterCockpit/cc-backend/internal/api" "github.com/ClusterCockpit/cc-backend/internal/api"
"github.com/ClusterCockpit/cc-backend/internal/archiver"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph" "github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
@@ -39,6 +42,9 @@ func setup(t *testing.T) *api.RestApi {
"kind": "file", "kind": "file",
"path": "./var/job-archive" "path": "./var/job-archive"
}, },
"jwts": {
"max-age": "2m"
},
"clusters": [ "clusters": [
{ {
"name": "testcluster", "name": "testcluster",
@@ -114,7 +120,7 @@ func setup(t *testing.T) *api.RestApi {
t.Fatal(err) t.Fatal(err)
} }
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil { if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
t.Fatal(err) t.Fatal(err)
} }
@@ -141,23 +147,20 @@ func setup(t *testing.T) *api.RestApi {
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive) archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
repository.Connect("sqlite3", dbfilepath) repository.Connect("sqlite3", dbfilepath)
db := repository.GetConnection()
if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil { if err := archive.Init(json.RawMessage(archiveCfg), config.Keys.DisableArchive); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if err := metricdata.Init(config.Keys.DisableArchive); err != nil { if err := metricdata.Init(); err != nil {
t.Fatal(err) t.Fatal(err)
} }
jobRepo := repository.GetJobRepository() archiver.Start(repository.GetJobRepository())
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo} auth.Init()
graph.Init()
return &api.RestApi{ return api.New()
JobRepository: resolver.Repo,
Resolver: resolver,
}
} }
func cleanup() { func cleanup() {
@@ -172,7 +175,6 @@ func cleanup() {
func TestRestApi(t *testing.T) { func TestRestApi(t *testing.T) {
restapi := setup(t) restapi := setup(t)
t.Cleanup(cleanup) t.Cleanup(cleanup)
testData := schema.JobData{ testData := schema.JobData{
"load_one": map[schema.MetricScope]*schema.JobMetric{ "load_one": map[schema.MetricScope]*schema.JobMetric{
schema.MetricScopeNode: { schema.MetricScopeNode: {
@@ -189,12 +191,18 @@ func TestRestApi(t *testing.T) {
}, },
} }
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) { metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
return testData, nil return testData, nil
} }
r := mux.NewRouter() r := mux.NewRouter()
restapi.MountRoutes(r) r.PathPrefix("/api").Subrouter()
r.StrictSlash(true)
restapi.MountApiRoutes(r)
var TestJobId int64 = 123
var TestClusterName string = "testcluster"
var TestStartTime int64 = 123456789
const startJobBody string = `{ const startJobBody string = `{
"jobId": 123, "jobId": 123,
@@ -210,7 +218,7 @@ func TestRestApi(t *testing.T) {
"exclusive": 1, "exclusive": 1,
"monitoringStatus": 1, "monitoringStatus": 1,
"smt": 1, "smt": 1,
"tags": [{ "type": "testTagType", "name": "testTagName" }], "tags": [{ "type": "testTagType", "name": "testTagName", "scope": "testuser" }],
"resources": [ "resources": [
{ {
"hostname": "host123", "hostname": "host123",
@@ -221,28 +229,36 @@ func TestRestApi(t *testing.T) {
"startTime": 123456789 "startTime": 123456789
}` }`
var dbid int64 const contextUserKey repository.ContextKey = "user"
contextUserValue := &schema.User{
Username: "testuser",
Projects: make([]string, 0),
Roles: []string{"user"},
AuthType: 0,
AuthSource: 2,
}
if ok := t.Run("StartJob", func(t *testing.T) { if ok := t.Run("StartJob", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody))) req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBody)))
recorder := httptest.NewRecorder() recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req) ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result() response := recorder.Result()
if response.StatusCode != http.StatusCreated { if response.StatusCode != http.StatusCreated {
t.Fatal(response.Status, recorder.Body.String()) t.Fatal(response.Status, recorder.Body.String())
} }
var res api.StartJobApiResponse time.Sleep(1 * time.Second)
if err := json.Unmarshal(recorder.Body.Bytes(), &res); err != nil {
t.Fatal(err)
}
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(res.DBID))) resolver := graph.GetResolverInstance()
job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
job.Tags, err = restapi.Resolver.Job().Tags(context.Background(), job) job.Tags, err = resolver.Job().Tags(ctx, job)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@@ -266,11 +282,9 @@ func TestRestApi(t *testing.T) {
t.Fatalf("unexpected job properties: %#v", job) t.Fatalf("unexpected job properties: %#v", job)
} }
if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" { if len(job.Tags) != 1 || job.Tags[0].Type != "testTagType" || job.Tags[0].Name != "testTagName" || job.Tags[0].Scope != "testuser" {
t.Fatalf("unexpected tags: %#v", job.Tags) t.Fatalf("unexpected tags: %#v", job.Tags)
} }
dbid = res.DBID
}); !ok { }); !ok {
return return
} }
@@ -286,17 +300,19 @@ func TestRestApi(t *testing.T) {
var stoppedJob *schema.Job var stoppedJob *schema.Job
if ok := t.Run("StopJob", func(t *testing.T) { if ok := t.Run("StopJob", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody))) req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBody)))
recorder := httptest.NewRecorder() recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req) ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result() response := recorder.Result()
if response.StatusCode != http.StatusOK { if response.StatusCode != http.StatusOK {
t.Fatal(response.Status, recorder.Body.String()) t.Fatal(response.Status, recorder.Body.String())
} }
restapi.JobRepository.WaitForArchiving() archiver.WaitForArchiving()
job, err := restapi.Resolver.Query().Job(context.Background(), strconv.Itoa(int(dbid))) job, err := restapi.JobRepository.Find(&TestJobId, &TestClusterName, &TestStartTime)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@@ -324,7 +340,7 @@ func TestRestApi(t *testing.T) {
} }
t.Run("CheckArchive", func(t *testing.T) { t.Run("CheckArchive", func(t *testing.T) {
data, err := metricdata.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background()) data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@@ -338,10 +354,12 @@ func TestRestApi(t *testing.T) {
// Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart! // Starting a job with the same jobId and cluster should only be allowed if the startTime is far appart!
body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1) body := strings.Replace(startJobBody, `"startTime": 123456789`, `"startTime": 123456790`, -1)
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(body))) req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(body)))
recorder := httptest.NewRecorder() recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req) ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result() response := recorder.Result()
if response.StatusCode != http.StatusUnprocessableEntity { if response.StatusCode != http.StatusUnprocessableEntity {
t.Fatal(response.Status, recorder.Body.String()) t.Fatal(response.Status, recorder.Body.String())
@@ -368,10 +386,12 @@ func TestRestApi(t *testing.T) {
}` }`
ok := t.Run("StartJobFailed", func(t *testing.T) { ok := t.Run("StartJobFailed", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed))) req := httptest.NewRequest(http.MethodPost, "/jobs/start_job/", bytes.NewBuffer([]byte(startJobBodyFailed)))
recorder := httptest.NewRecorder() recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req) ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result() response := recorder.Result()
if response.StatusCode != http.StatusCreated { if response.StatusCode != http.StatusCreated {
t.Fatal(response.Status, recorder.Body.String()) t.Fatal(response.Status, recorder.Body.String())
@@ -381,8 +401,10 @@ func TestRestApi(t *testing.T) {
t.Fatal("subtest failed") t.Fatal("subtest failed")
} }
time.Sleep(1 * time.Second)
const stopJobBodyFailed string = `{ const stopJobBodyFailed string = `{
"jobId": 12345, "jobId": 12345,
"cluster": "testcluster", "cluster": "testcluster",
"jobState": "failed", "jobState": "failed",
@@ -390,16 +412,18 @@ func TestRestApi(t *testing.T) {
}` }`
ok = t.Run("StopJobFailed", func(t *testing.T) { ok = t.Run("StopJobFailed", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/api/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed))) req := httptest.NewRequest(http.MethodPost, "/jobs/stop_job/", bytes.NewBuffer([]byte(stopJobBodyFailed)))
recorder := httptest.NewRecorder() recorder := httptest.NewRecorder()
r.ServeHTTP(recorder, req) ctx := context.WithValue(req.Context(), contextUserKey, contextUserValue)
r.ServeHTTP(recorder, req.WithContext(ctx))
response := recorder.Result() response := recorder.Result()
if response.StatusCode != http.StatusOK { if response.StatusCode != http.StatusOK {
t.Fatal(response.Status, recorder.Body.String()) t.Fatal(response.Status, recorder.Body.String())
} }
restapi.JobRepository.WaitForArchiving() archiver.WaitForArchiving()
jobid, cluster := int64(12345), "testcluster" jobid, cluster := int64(12345), "testcluster"
job, err := restapi.JobRepository.Find(&jobid, &cluster, nil) job, err := restapi.JobRepository.Find(&jobid, &cluster, nil)
if err != nil { if err != nil {

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,94 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archiver
import (
"context"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
var (
archivePending sync.WaitGroup
archiveChannel chan *schema.Job
jobRepo *repository.JobRepository
)
func Start(r *repository.JobRepository) {
archiveChannel = make(chan *schema.Job, 128)
jobRepo = r
go archivingWorker()
}
// Archiving worker thread
func archivingWorker() {
for {
select {
case job, ok := <-archiveChannel:
if !ok {
break
}
start := time.Now()
// not using meta data, called to load JobMeta into Cache?
// will fail if job meta not in repository
if _, err := jobRepo.FetchMetadata(job); err != nil {
log.Errorf("archiving job (dbid: %d) failed at check metadata step: %s", job.ID, err.Error())
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
// TODO: Maybe use context with cancel/timeout here
jobMeta, err := ArchiveJob(job, context.Background())
if err != nil {
log.Errorf("archiving job (dbid: %d) failed at archiving job step: %s", job.ID, err.Error())
jobRepo.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
stmt := sq.Update("job").Where("job.id = ?", job.ID)
if stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta); err != nil {
log.Errorf("archiving job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
continue
}
if stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta); err != nil {
log.Errorf("archiving job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
continue
}
// Update the jobs database entry one last time:
stmt = jobRepo.MarkArchived(stmt, schema.MonitoringStatusArchivingSuccessful)
if err := jobRepo.Execute(stmt); err != nil {
log.Errorf("archiving job (dbid: %d) failed at db execute: %s", job.ID, err.Error())
continue
}
log.Debugf("archiving job %d took %s", job.JobID, time.Since(start))
log.Printf("archiving job (dbid: %d) successful", job.ID)
archivePending.Done()
}
}
}
// Trigger async archiving
func TriggerArchiving(job *schema.Job) {
if archiveChannel == nil {
log.Fatal("Cannot archive without archiving channel. Did you Start the archiver?")
}
archivePending.Add(1)
archiveChannel <- job
}
// Wait for background thread to finish pending archiving operations
func WaitForArchiving() {
// close channel and wait for worker to process remaining jobs
archivePending.Wait()
}

View File

@@ -0,0 +1,82 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archiver
import (
"context"
"math"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
scopes := []schema.MetricScope{schema.MetricScopeNode}
// FIXME: Add a config option for this
if job.NumNodes <= 8 {
// This will add the native scope if core scope is not available
scopes = append(scopes, schema.MetricScopeCore)
}
if job.NumAcc > 0 {
scopes = append(scopes, schema.MetricScopeAccelerator)
}
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// This should never happen ?
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
},
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if config.Keys.DisableArchive {
return jobMeta, nil
}
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -7,289 +7,44 @@ package auth
import ( import (
"context" "context"
"crypto/rand" "crypto/rand"
"database/sql"
"encoding/base64" "encoding/base64"
"errors" "errors"
"fmt"
"net/http" "net/http"
"os" "os"
"strings" "sync"
"time" "time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/gorilla/sessions" "github.com/gorilla/sessions"
"github.com/jmoiron/sqlx"
) )
type AuthSource int
const (
AuthViaLocalPassword AuthSource = iota
AuthViaLDAP
AuthViaToken
)
type User struct {
Username string `json:"username"`
Password string `json:"-"`
Name string `json:"name"`
Roles []string `json:"roles"`
AuthSource AuthSource `json:"via"`
Email string `json:"email"`
Projects []string `json:"projects"`
Expiration time.Time
}
type Role int
const (
RoleAnonymous Role = iota
RoleApi
RoleUser
RoleManager
RoleSupport
RoleAdmin
RoleError
)
func GetRoleString(roleInt Role) string {
return [6]string{"anonymous", "api", "user", "manager", "support", "admin"}[roleInt]
}
func getRoleEnum(roleStr string) Role {
switch strings.ToLower(roleStr) {
case "admin":
return RoleAdmin
case "support":
return RoleSupport
case "manager":
return RoleManager
case "user":
return RoleUser
case "api":
return RoleApi
case "anonymous":
return RoleAnonymous
default:
return RoleError
}
}
func isValidRole(role string) bool {
if getRoleEnum(role) == RoleError {
return false
}
return true
}
func (u *User) HasValidRole(role string) (hasRole bool, isValid bool) {
if isValidRole(role) {
for _, r := range u.Roles {
if r == role {
return true, true
}
}
return false, true
}
return false, false
}
func (u *User) HasRole(role Role) bool {
for _, r := range u.Roles {
if r == GetRoleString(role) {
return true
}
}
return false
}
// Role-Arrays are short: performance not impacted by nested loop
func (u *User) HasAnyRole(queryroles []Role) bool {
for _, ur := range u.Roles {
for _, qr := range queryroles {
if ur == GetRoleString(qr) {
return true
}
}
}
return false
}
// Role-Arrays are short: performance not impacted by nested loop
func (u *User) HasAllRoles(queryroles []Role) bool {
target := len(queryroles)
matches := 0
for _, ur := range u.Roles {
for _, qr := range queryroles {
if ur == GetRoleString(qr) {
matches += 1
break
}
}
}
if matches == target {
return true
} else {
return false
}
}
// Role-Arrays are short: performance not impacted by nested loop
func (u *User) HasNotRoles(queryroles []Role) bool {
matches := 0
for _, ur := range u.Roles {
for _, qr := range queryroles {
if ur == GetRoleString(qr) {
matches += 1
break
}
}
}
if matches == 0 {
return true
} else {
return false
}
}
// Called by API endpoint '/roles/' from frontend: Only required for admin config -> Check Admin Role
func GetValidRoles(user *User) ([]string, error) {
var vals []string
if user.HasRole(RoleAdmin) {
for i := RoleApi; i < RoleError; i++ {
vals = append(vals, GetRoleString(i))
}
return vals, nil
}
return vals, fmt.Errorf("%s: only admins are allowed to fetch a list of roles", user.Username)
}
// Called by routerConfig web.page setup in backend: Only requires known user and/or not API user
func GetValidRolesMap(user *User) (map[string]Role, error) {
named := make(map[string]Role)
if user.HasNotRoles([]Role{RoleApi, RoleAnonymous}) {
for i := RoleApi; i < RoleError; i++ {
named[GetRoleString(i)] = i
}
return named, nil
}
return named, fmt.Errorf("Only known users are allowed to fetch a list of roles")
}
// Find highest role
func (u *User) GetAuthLevel() Role {
if u.HasRole(RoleAdmin) {
return RoleAdmin
} else if u.HasRole(RoleSupport) {
return RoleSupport
} else if u.HasRole(RoleManager) {
return RoleManager
} else if u.HasRole(RoleUser) {
return RoleUser
} else if u.HasRole(RoleApi) {
return RoleApi
} else if u.HasRole(RoleAnonymous) {
return RoleAnonymous
} else {
return RoleError
}
}
func (u *User) HasProject(project string) bool {
for _, p := range u.Projects {
if p == project {
return true
}
}
return false
}
func GetUser(ctx context.Context) *User {
x := ctx.Value(ContextUserKey)
if x == nil {
return nil
}
return x.(*User)
}
type Authenticator interface { type Authenticator interface {
Init(auth *Authentication, config interface{}) error CanLogin(user *schema.User, username string, rw http.ResponseWriter, r *http.Request) (*schema.User, bool)
CanLogin(user *User, rw http.ResponseWriter, r *http.Request) bool Login(user *schema.User, rw http.ResponseWriter, r *http.Request) (*schema.User, error)
Login(user *User, rw http.ResponseWriter, r *http.Request) (*User, error)
Auth(rw http.ResponseWriter, r *http.Request) (*User, error)
} }
type ContextKey string var (
initOnce sync.Once
const ContextUserKey ContextKey = "user" authInstance *Authentication
)
type Authentication struct { type Authentication struct {
db *sqlx.DB sessionStore *sessions.CookieStore
sessionStore *sessions.CookieStore
SessionMaxAge time.Duration
authenticators []Authenticator
LdapAuth *LdapAuthenticator LdapAuth *LdapAuthenticator
JwtAuth *JWTAuthenticator JwtAuth *JWTAuthenticator
LocalAuth *LocalAuthenticator LocalAuth *LocalAuthenticator
} authenticators []Authenticator
SessionMaxAge time.Duration
func Init(db *sqlx.DB,
configs map[string]interface{}) (*Authentication, error) {
auth := &Authentication{}
auth.db = db
sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" {
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil {
log.Error("Error while initializing authentication -> failed to generate random bytes for session key")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
} else {
bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
log.Error("Error while initializing authentication -> decoding session key failed")
return nil, err
}
auth.sessionStore = sessions.NewCookieStore(bytes)
}
auth.LocalAuth = &LocalAuthenticator{}
if err := auth.LocalAuth.Init(auth, nil); err != nil {
log.Error("Error while initializing authentication -> localAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.LocalAuth)
auth.JwtAuth = &JWTAuthenticator{}
if err := auth.JwtAuth.Init(auth, configs["jwt"]); err != nil {
log.Error("Error while initializing authentication -> jwtAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.JwtAuth)
if config, ok := configs["ldap"]; ok {
auth.LdapAuth = &LdapAuthenticator{}
if err := auth.LdapAuth.Init(auth, config); err != nil {
log.Error("Error while initializing authentication -> ldapAuth init failed")
return nil, err
}
auth.authenticators = append(auth.authenticators, auth.LdapAuth)
}
return auth, nil
} }
func (auth *Authentication) AuthViaSession( func (auth *Authentication) AuthViaSession(
rw http.ResponseWriter, rw http.ResponseWriter,
r *http.Request) (*User, error) { r *http.Request,
) (*schema.User, error) {
session, err := auth.sessionStore.Get(r, "session") session, err := auth.sessionStore.Get(r, "session")
if err != nil { if err != nil {
log.Error("Error while getting session store") log.Error("Error while getting session store")
@@ -300,108 +55,350 @@ func (auth *Authentication) AuthViaSession(
return nil, nil return nil, nil
} }
// TODO: Check if session keys exist
username, _ := session.Values["username"].(string) username, _ := session.Values["username"].(string)
projects, _ := session.Values["projects"].([]string) projects, _ := session.Values["projects"].([]string)
roles, _ := session.Values["roles"].([]string) roles, _ := session.Values["roles"].([]string)
return &User{ return &schema.User{
Username: username, Username: username,
Projects: projects, Projects: projects,
Roles: roles, Roles: roles,
AuthType: schema.AuthSession,
AuthSource: -1, AuthSource: -1,
}, nil }, nil
} }
// Handle a POST request that should log the user in, starting a new session. func Init() {
func (auth *Authentication) Login( initOnce.Do(func() {
onsuccess http.Handler, authInstance = &Authentication{}
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error)) http.Handler {
sessKey := os.Getenv("SESSION_KEY")
if sessKey == "" {
log.Warn("environment variable 'SESSION_KEY' not set (will use non-persistent random key)")
bytes := make([]byte, 32)
if _, err := rand.Read(bytes); err != nil {
log.Fatal("Error while initializing authentication -> failed to generate random bytes for session key")
}
authInstance.sessionStore = sessions.NewCookieStore(bytes)
} else {
bytes, err := base64.StdEncoding.DecodeString(sessKey)
if err != nil {
log.Fatal("Error while initializing authentication -> decoding session key failed")
}
authInstance.sessionStore = sessions.NewCookieStore(bytes)
}
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
authInstance.SessionMaxAge = d
}
if config.Keys.LdapConfig != nil {
ldapAuth := &LdapAuthenticator{}
if err := ldapAuth.Init(); err != nil {
log.Warn("Error while initializing authentication -> ldapAuth init failed")
} else {
authInstance.LdapAuth = ldapAuth
authInstance.authenticators = append(authInstance.authenticators, authInstance.LdapAuth)
}
} else {
log.Info("Missing LDAP configuration: No LDAP support!")
}
if config.Keys.JwtConfig != nil {
authInstance.JwtAuth = &JWTAuthenticator{}
if err := authInstance.JwtAuth.Init(); err != nil {
log.Fatal("Error while initializing authentication -> jwtAuth init failed")
}
jwtSessionAuth := &JWTSessionAuthenticator{}
if err := jwtSessionAuth.Init(); err != nil {
log.Info("jwtSessionAuth init failed: No JWT login support!")
} else {
authInstance.authenticators = append(authInstance.authenticators, jwtSessionAuth)
}
jwtCookieSessionAuth := &JWTCookieSessionAuthenticator{}
if err := jwtCookieSessionAuth.Init(); err != nil {
log.Info("jwtCookieSessionAuth init failed: No JWT cookie login support!")
} else {
authInstance.authenticators = append(authInstance.authenticators, jwtCookieSessionAuth)
}
} else {
log.Info("Missing JWT configuration: No JWT token support!")
}
authInstance.LocalAuth = &LocalAuthenticator{}
if err := authInstance.LocalAuth.Init(); err != nil {
log.Fatal("Error while initializing authentication -> localAuth init failed")
}
authInstance.authenticators = append(authInstance.authenticators, authInstance.LocalAuth)
})
}
func GetAuthInstance() *Authentication {
if authInstance == nil {
log.Fatal("Authentication module not initialized!")
}
return authInstance
}
func handleTokenUser(tokenUser *schema.User) {
r := repository.GetUserRepository()
dbUser, err := r.GetUser(tokenUser.Username)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%s': %v", tokenUser.Username, err)
} else if err == sql.ErrNoRows && config.Keys.JwtConfig.SyncUserOnLogin { // Adds New User
if err := r.AddUser(tokenUser); err != nil {
log.Errorf("Error while adding user '%s' to DB: %v", tokenUser.Username, err)
}
} else if err == nil && config.Keys.JwtConfig.UpdateUserOnLogin { // Update Existing User
if err := r.UpdateUser(dbUser, tokenUser); err != nil {
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
}
}
}
func handleOIDCUser(OIDCUser *schema.User) {
r := repository.GetUserRepository()
dbUser, err := r.GetUser(OIDCUser.Username)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%s': %v", OIDCUser.Username, err)
} else if err == sql.ErrNoRows && config.Keys.OpenIDConfig.SyncUserOnLogin { // Adds New User
if err := r.AddUser(OIDCUser); err != nil {
log.Errorf("Error while adding user '%s' to DB: %v", OIDCUser.Username, err)
}
} else if err == nil && config.Keys.OpenIDConfig.UpdateUserOnLogin { // Update Existing User
if err := r.UpdateUser(dbUser, OIDCUser); err != nil {
log.Errorf("Error while updating user '%s' to DB: %v", dbUser.Username, err)
}
}
}
func (auth *Authentication) SaveSession(rw http.ResponseWriter, r *http.Request, user *schema.User) error {
session, err := auth.sessionStore.New(r, "session")
if err != nil {
log.Errorf("session creation failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return err
}
if auth.SessionMaxAge != 0 {
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
}
if config.Keys.HttpsCertFile == "" && config.Keys.HttpsKeyFile == "" {
session.Options.Secure = false
}
session.Options.SameSite = http.SameSiteStrictMode
session.Values["username"] = user.Username
session.Values["projects"] = user.Projects
session.Values["roles"] = user.Roles
if err := auth.sessionStore.Save(r, rw, session); err != nil {
log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return err
}
return nil
}
func (auth *Authentication) Login(
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
err := errors.New("no authenticator applied")
username := r.FormValue("username") username := r.FormValue("username")
user := (*User)(nil) var dbUser *schema.User
if username != "" { if username != "" {
if user, _ = auth.GetUser(username); err != nil { var err error
// log.Warnf("login of unkown user %v", username) dbUser, err = repository.GetUserRepository().GetUser(username)
_ = err if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%v'", username)
} }
} }
for _, authenticator := range auth.authenticators { for _, authenticator := range auth.authenticators {
if !authenticator.CanLogin(user, rw, r) { var ok bool
var user *schema.User
if user, ok = authenticator.CanLogin(dbUser, username, rw, r); !ok {
continue continue
} else {
log.Debugf("Can login with user %v", user)
} }
user, err = authenticator.Login(user, rw, r) user, err := authenticator.Login(user, rw, r)
if err != nil { if err != nil {
log.Warnf("user login failed: %s", err.Error()) log.Warnf("user login failed: %s", err.Error())
onfailure(rw, r, err) onfailure(rw, r, err)
return return
} }
session, err := auth.sessionStore.New(r, "session") if err := auth.SaveSession(rw, r, user); err != nil {
if err != nil {
log.Errorf("session creation failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
if auth.SessionMaxAge != 0 {
session.Options.MaxAge = int(auth.SessionMaxAge.Seconds())
}
session.Values["username"] = user.Username
session.Values["projects"] = user.Projects
session.Values["roles"] = user.Roles
if err := auth.sessionStore.Save(r, rw, session); err != nil {
log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return return
} }
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects) log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
ctx := context.WithValue(r.Context(), ContextUserKey, user) ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
if r.FormValue("redirect") != "" {
http.RedirectHandler(r.FormValue("redirect"), http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
return
}
http.RedirectHandler("/", http.StatusFound).ServeHTTP(rw, r.WithContext(ctx))
return return
} }
log.Warn("login failed: no authenticator applied") log.Debugf("login failed: no authenticator applied")
onfailure(rw, r, err) onfailure(rw, r, errors.New("no authenticator applied"))
}) })
} }
// Authenticate the user and put a User object in the
// context of the request. If authentication fails,
// do not continue but send client to the login screen.
func (auth *Authentication) Auth( func (auth *Authentication) Auth(
onsuccess http.Handler, onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error)) http.Handler { onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
for _, authenticator := range auth.authenticators { user, err := auth.JwtAuth.AuthViaJWT(rw, r)
user, err := authenticator.Auth(rw, r) if err != nil {
log.Infof("auth -> authentication failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusUnauthorized)
return
}
if user == nil {
user, err = auth.AuthViaSession(rw, r)
if err != nil { if err != nil {
log.Warnf("authentication failed: %s", err.Error()) log.Infof("auth -> authentication failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusUnauthorized) http.Error(rw, err.Error(), http.StatusUnauthorized)
return return
} }
if user == nil { }
continue if user != nil {
} ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
ctx := context.WithValue(r.Context(), ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx)) onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return return
} }
log.Warnf("authentication failed: %s", "no authenticator applied") log.Info("auth -> authentication failed")
// http.Error(rw, http.StatusText(http.StatusUnauthorized), http.StatusUnauthorized) onfailure(rw, r, errors.New("unauthorized (please login first)"))
onfailure(rw, r, errors.New("unauthorized (login first or use a token)"))
}) })
} }
// Clears the session cookie func (auth *Authentication) AuthApi(
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler { onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
if err != nil {
log.Infof("auth api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil {
switch {
case len(user.Roles) == 1:
if user.HasRole(schema.RoleApi) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
case len(user.Roles) >= 2:
if user.HasAllRoles([]schema.Role{schema.RoleAdmin, schema.RoleApi}) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
default:
log.Info("auth api -> authentication failed: missing role")
onfailure(rw, r, errors.New("unauthorized"))
}
}
log.Info("auth api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthUserApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.JwtAuth.AuthViaJWT(rw, r)
if err != nil {
log.Infof("auth user api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil {
switch {
case len(user.Roles) == 1:
if user.HasRole(schema.RoleApi) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
case len(user.Roles) >= 2:
if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
default:
log.Info("auth user api -> authentication failed: missing role")
onfailure(rw, r, errors.New("unauthorized"))
}
}
log.Info("auth user api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthConfigApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.AuthViaSession(rw, r)
if err != nil {
log.Infof("auth config api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil && user.HasRole(schema.RoleAdmin) {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
log.Info("auth config api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) AuthFrontendApi(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
user, err := auth.AuthViaSession(rw, r)
if err != nil {
log.Infof("auth frontend api -> authentication failed: %s", err.Error())
onfailure(rw, r, err)
return
}
if user != nil {
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
onsuccess.ServeHTTP(rw, r.WithContext(ctx))
return
}
log.Info("auth frontend api -> authentication failed: no auth")
onfailure(rw, r, errors.New("unauthorized"))
})
}
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
session, err := auth.sessionStore.Get(r, "session") session, err := auth.sessionStore.Get(r, "session")
if err != nil { if err != nil {

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -6,39 +6,26 @@ package auth
import ( import (
"crypto/ed25519" "crypto/ed25519"
"database/sql"
"encoding/base64" "encoding/base64"
"errors" "errors"
"fmt"
"net/http" "net/http"
"os" "os"
"strings" "strings"
"time" "time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/golang-jwt/jwt/v4" "github.com/golang-jwt/jwt/v5"
) )
type JWTAuthenticator struct { type JWTAuthenticator struct {
auth *Authentication publicKey ed25519.PublicKey
privateKey ed25519.PrivateKey
publicKey ed25519.PublicKey
privateKey ed25519.PrivateKey
publicKeyCrossLogin ed25519.PublicKey // For accepting externally generated JWTs
loginTokenKey []byte // HS256 key
config *schema.JWTAuthConfig
} }
var _ Authenticator = (*JWTAuthenticator)(nil) func (ja *JWTAuthenticator) Init() error {
func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
ja.auth = auth
ja.config = conf.(*schema.JWTAuthConfig)
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY") pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
if pubKey == "" || privKey == "" { if pubKey == "" || privKey == "" {
log.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)") log.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
@@ -57,192 +44,38 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
ja.privateKey = ed25519.PrivateKey(bytes) ja.privateKey = ed25519.PrivateKey(bytes)
} }
if pubKey = os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil {
log.Warn("Could not decode cross login JWT HS512 key")
return err
}
ja.loginTokenKey = bytes
}
// Look for external public keys
pubKeyCrossLogin, keyFound := os.LookupEnv("CROSS_LOGIN_JWT_PUBLIC_KEY")
if keyFound && pubKeyCrossLogin != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
if err != nil {
log.Warn("Could not decode cross login JWT public key")
return err
}
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
// Warn if other necessary settings are not configured
if ja.config != nil {
if ja.config.CookieName == "" {
log.Warn("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
}
if !ja.config.ForceJWTValidationViaDatabase {
log.Warn("forceJWTValidationViaDatabase not set to true: CC will accept users and roles defined in JWTs regardless of its own database!")
}
if ja.config.TrustedExternalIssuer == "" {
log.Warn("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
}
} else {
log.Warn("cookieName and trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
}
} else {
ja.publicKeyCrossLogin = nil
log.Warn("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
}
return nil return nil
} }
func (ja *JWTAuthenticator) CanLogin( func (ja *JWTAuthenticator) AuthViaJWT(
user *User,
rw http.ResponseWriter, rw http.ResponseWriter,
r *http.Request) bool { r *http.Request,
) (*schema.User, error) {
return (user != nil && user.AuthSource == AuthViaToken) || r.Header.Get("Authorization") != "" || r.URL.Query().Get("login-token") != ""
}
func (ja *JWTAuthenticator) Login(
user *User,
rw http.ResponseWriter,
r *http.Request) (*User, error) {
rawtoken := r.Header.Get("X-Auth-Token")
if rawtoken == "" {
rawtoken = r.Header.Get("Authorization")
rawtoken = strings.TrimPrefix(rawtoken, "Bearer ")
if rawtoken == "" {
rawtoken = r.URL.Query().Get("login-token")
}
}
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
if t.Method == jwt.SigningMethodEdDSA {
return ja.publicKey, nil
}
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
return ja.loginTokenKey, nil
}
return nil, fmt.Errorf("AUTH/JWT > unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
})
if err != nil {
log.Warn("Error while parsing jwt token")
return nil, err
}
if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid")
return nil, err
}
claims := token.Claims.(jwt.MapClaims)
sub, _ := claims["sub"].(string)
exp, _ := claims["exp"].(float64)
var roles []string
if rawroles, ok := claims["roles"].([]interface{}); ok {
for _, rr := range rawroles {
if r, ok := rr.(string); ok {
if isValidRole(r) {
roles = append(roles, r)
}
}
}
}
if rawrole, ok := claims["roles"].(string); ok {
if isValidRole(rawrole) {
roles = append(roles, rawrole)
}
}
if user == nil {
user, err = ja.auth.GetUser(sub)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%v'", sub)
return nil, err
} else if user == nil {
user = &User{
Username: sub,
Roles: roles,
AuthSource: AuthViaToken,
}
if err := ja.auth.AddUser(user); err != nil {
log.Errorf("Error while adding user '%v' to auth from token", user.Username)
return nil, err
}
}
}
user.Expiration = time.Unix(int64(exp), 0)
return user, nil
}
func (ja *JWTAuthenticator) Auth(
rw http.ResponseWriter,
r *http.Request) (*User, error) {
rawtoken := r.Header.Get("X-Auth-Token") rawtoken := r.Header.Get("X-Auth-Token")
if rawtoken == "" { if rawtoken == "" {
rawtoken = r.Header.Get("Authorization") rawtoken = r.Header.Get("Authorization")
rawtoken = strings.TrimPrefix(rawtoken, "Bearer ") rawtoken = strings.TrimPrefix(rawtoken, "Bearer ")
} }
// If no auth header was found, check for a certain cookie containing a JWT // there is no token
cookieName := ""
cookieFound := false
if ja.config != nil && ja.config.CookieName != "" {
cookieName = ja.config.CookieName
}
// Try to read the JWT cookie
if rawtoken == "" && cookieName != "" {
jwtCookie, err := r.Cookie(cookieName)
if err == nil && jwtCookie.Value != "" {
rawtoken = jwtCookie.Value
cookieFound = true
}
}
// Because a user can also log in via a token, the
// session cookie must be checked here as well:
if rawtoken == "" { if rawtoken == "" {
return ja.auth.AuthViaSession(rw, r) return nil, nil
} }
// Try to parse JWT
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) { token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
if t.Method != jwt.SigningMethodEdDSA { if t.Method != jwt.SigningMethodEdDSA {
return nil, errors.New("only Ed25519/EdDSA supported") return nil, errors.New("only Ed25519/EdDSA supported")
} }
// Is there more than one public key?
if ja.publicKeyCrossLogin != nil && ja.config != nil && ja.config.TrustedExternalIssuer != "" {
// Determine whether to use the external public key
unvalidatedIssuer, success := t.Claims.(jwt.MapClaims)["iss"].(string)
if success && unvalidatedIssuer == ja.config.TrustedExternalIssuer {
// The (unvalidated) issuer seems to be the expected one,
// use public cross login key from config
return ja.publicKeyCrossLogin, nil
}
}
// No cross login key configured or issuer not expected
// Try own key
return ja.publicKey, nil return ja.publicKey, nil
}) })
if err != nil { if err != nil {
log.Warn("Error while parsing token") log.Warn("Error while parsing JWT token")
return nil, err return nil, err
} }
if !token.Valid {
// Check token validity
if err := token.Claims.Valid(); err != nil {
log.Warn("jwt token claims are not valid") log.Warn("jwt token claims are not valid")
return nil, err return nil, errors.New("jwt token claims are not valid")
} }
// Token is valid, extract payload // Token is valid, extract payload
@@ -252,15 +85,14 @@ func (ja *JWTAuthenticator) Auth(
var roles []string var roles []string
// Validate user + roles from JWT against database? // Validate user + roles from JWT against database?
if ja.config != nil && ja.config.ForceJWTValidationViaDatabase { if config.Keys.JwtConfig.ValidateUser {
user, err := ja.auth.GetUser(sub) ur := repository.GetUserRepository()
user, err := ur.GetUser(sub)
// Deny any logins for unknown usernames // Deny any logins for unknown usernames
if err != nil { if err != nil {
log.Warn("Could not find user from JWT in internal database.") log.Warn("Could not find user from JWT in internal database.")
return nil, errors.New("unknown user") return nil, errors.New("unknown user")
} }
// Take user roles from database instead of trusting the JWT // Take user roles from database instead of trusting the JWT
roles = user.Roles roles = user.Roles
} else { } else {
@@ -274,48 +106,16 @@ func (ja *JWTAuthenticator) Auth(
} }
} }
if cookieFound { return &schema.User{
// Create a session so that we no longer need the JTW Cookie
session, err := ja.auth.sessionStore.New(r, "session")
if err != nil {
log.Errorf("session creation failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return nil, err
}
if ja.auth.SessionMaxAge != 0 {
session.Options.MaxAge = int(ja.auth.SessionMaxAge.Seconds())
}
session.Values["username"] = sub
session.Values["roles"] = roles
if err := ja.auth.sessionStore.Save(r, rw, session); err != nil {
log.Warnf("session save failed: %s", err.Error())
http.Error(rw, err.Error(), http.StatusInternalServerError)
return nil, err
}
// (Ask browser to) Delete JWT cookie
deletedCookie := &http.Cookie{
Name: cookieName,
Value: "",
Path: "/",
MaxAge: -1,
HttpOnly: true,
}
http.SetCookie(rw, deletedCookie)
}
return &User{
Username: sub, Username: sub,
Roles: roles, Roles: roles,
AuthSource: AuthViaToken, AuthType: schema.AuthToken,
AuthSource: -1,
}, nil }, nil
} }
// Generate a new JWT that can be used for authentication // Generate a new JWT that can be used for authentication
func (ja *JWTAuthenticator) ProvideJWT(user *User) (string, error) { func (ja *JWTAuthenticator) ProvideJWT(user *schema.User) (string, error) {
if ja.privateKey == nil { if ja.privateKey == nil {
return "", errors.New("environment variable 'JWT_PRIVATE_KEY' not set") return "", errors.New("environment variable 'JWT_PRIVATE_KEY' not set")
} }
@@ -326,8 +126,12 @@ func (ja *JWTAuthenticator) ProvideJWT(user *User) (string, error) {
"roles": user.Roles, "roles": user.Roles,
"iat": now.Unix(), "iat": now.Unix(),
} }
if ja.config != nil && ja.config.MaxAge != 0 { if config.Keys.JwtConfig.MaxAge != "" {
claims["exp"] = now.Add(time.Duration(ja.config.MaxAge)).Unix() d, err := time.ParseDuration(config.Keys.JwtConfig.MaxAge)
if err != nil {
return "", errors.New("cannot parse max-age config key")
}
claims["exp"] = now.Add(d).Unix()
} }
return jwt.NewWithClaims(jwt.SigningMethodEdDSA, claims).SignedString(ja.privateKey) return jwt.NewWithClaims(jwt.SigningMethodEdDSA, claims).SignedString(ja.privateKey)

View File

@@ -0,0 +1,217 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package auth
import (
"crypto/ed25519"
"database/sql"
"encoding/base64"
"errors"
"fmt"
"net/http"
"os"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/golang-jwt/jwt/v5"
)
type JWTCookieSessionAuthenticator struct {
publicKey ed25519.PublicKey
privateKey ed25519.PrivateKey
publicKeyCrossLogin ed25519.PublicKey // For accepting externally generated JWTs
}
var _ Authenticator = (*JWTCookieSessionAuthenticator)(nil)
func (ja *JWTCookieSessionAuthenticator) Init() error {
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
if pubKey == "" || privKey == "" {
log.Warn("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
return errors.New("environment variables 'JWT_PUBLIC_KEY' or 'JWT_PRIVATE_KEY' not set (token based authentication will not work)")
} else {
bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil {
log.Warn("Could not decode JWT public key")
return err
}
ja.publicKey = ed25519.PublicKey(bytes)
bytes, err = base64.StdEncoding.DecodeString(privKey)
if err != nil {
log.Warn("Could not decode JWT private key")
return err
}
ja.privateKey = ed25519.PrivateKey(bytes)
}
// Look for external public keys
pubKeyCrossLogin, keyFound := os.LookupEnv("CROSS_LOGIN_JWT_PUBLIC_KEY")
if keyFound && pubKeyCrossLogin != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKeyCrossLogin)
if err != nil {
log.Warn("Could not decode cross login JWT public key")
return err
}
ja.publicKeyCrossLogin = ed25519.PublicKey(bytes)
} else {
ja.publicKeyCrossLogin = nil
log.Debug("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
return errors.New("environment variable 'CROSS_LOGIN_JWT_PUBLIC_KEY' not set (cross login token based authentication will not work)")
}
jc := config.Keys.JwtConfig
// Warn if other necessary settings are not configured
if jc != nil {
if jc.CookieName == "" {
log.Info("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
return errors.New("cookieName for JWTs not configured (cross login via JWT cookie will fail)")
}
if !jc.ValidateUser {
log.Info("forceJWTValidationViaDatabase not set to true: CC will accept users and roles defined in JWTs regardless of its own database!")
}
if jc.TrustedIssuer == "" {
log.Info("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
return errors.New("trustedExternalIssuer for JWTs not configured (cross login via JWT cookie will fail)")
}
} else {
log.Warn("config for JWTs not configured (cross login via JWT cookie will fail)")
return errors.New("config for JWTs not configured (cross login via JWT cookie will fail)")
}
log.Info("JWT Cookie Session authenticator successfully registered")
return nil
}
func (ja *JWTCookieSessionAuthenticator) CanLogin(
user *schema.User,
username string,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, bool) {
jc := config.Keys.JwtConfig
cookieName := ""
if jc.CookieName != "" {
cookieName = jc.CookieName
}
// Try to read the JWT cookie
if cookieName != "" {
jwtCookie, err := r.Cookie(cookieName)
if err == nil && jwtCookie.Value != "" {
return user, true
}
}
return nil, false
}
func (ja *JWTCookieSessionAuthenticator) Login(
user *schema.User,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, error) {
jc := config.Keys.JwtConfig
jwtCookie, err := r.Cookie(jc.CookieName)
var rawtoken string
if err == nil && jwtCookie.Value != "" {
rawtoken = jwtCookie.Value
}
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
if t.Method != jwt.SigningMethodEdDSA {
return nil, errors.New("only Ed25519/EdDSA supported")
}
unvalidatedIssuer, success := t.Claims.(jwt.MapClaims)["iss"].(string)
if success && unvalidatedIssuer == jc.TrustedIssuer {
// The (unvalidated) issuer seems to be the expected one,
// use public cross login key from config
return ja.publicKeyCrossLogin, nil
}
// No cross login key configured or issuer not expected
// Try own key
return ja.publicKey, nil
})
if err != nil {
log.Warn("JWT cookie session: error while parsing token")
return nil, err
}
if !token.Valid {
log.Warn("jwt token claims are not valid")
return nil, errors.New("jwt token claims are not valid")
}
claims := token.Claims.(jwt.MapClaims)
sub, _ := claims["sub"].(string)
var roles []string
projects := make([]string, 0)
if jc.ValidateUser {
var err error
user, err = repository.GetUserRepository().GetUser(sub)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%v'", sub)
}
// Deny any logins for unknown usernames
if user == nil {
log.Warn("Could not find user from JWT in internal database.")
return nil, errors.New("unknown user")
}
} else {
var name string
if wrap, ok := claims["name"].(map[string]interface{}); ok {
if vals, ok := wrap["values"].([]interface{}); ok {
if len(vals) != 0 {
name = fmt.Sprintf("%v", vals[0])
for i := 1; i < len(vals); i++ {
name += fmt.Sprintf(" %v", vals[i])
}
}
}
}
// Extract roles from JWT (if present)
if rawroles, ok := claims["roles"].([]interface{}); ok {
for _, rr := range rawroles {
if r, ok := rr.(string); ok {
roles = append(roles, r)
}
}
}
user = &schema.User{
Username: sub,
Name: name,
Roles: roles,
Projects: projects,
AuthType: schema.AuthSession,
AuthSource: schema.AuthViaToken,
}
if jc.SyncUserOnLogin || jc.UpdateUserOnLogin {
handleTokenUser(user)
}
}
// (Ask browser to) Delete JWT cookie
deletedCookie := &http.Cookie{
Name: jc.CookieName,
Value: "",
Path: "/",
MaxAge: -1,
HttpOnly: true,
}
http.SetCookie(rw, deletedCookie)
return user, nil
}

147
internal/auth/jwtSession.go Normal file
View File

@@ -0,0 +1,147 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package auth
import (
"database/sql"
"encoding/base64"
"errors"
"fmt"
"net/http"
"os"
"strings"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/golang-jwt/jwt/v5"
)
type JWTSessionAuthenticator struct {
loginTokenKey []byte // HS256 key
}
var _ Authenticator = (*JWTSessionAuthenticator)(nil)
func (ja *JWTSessionAuthenticator) Init() error {
if pubKey := os.Getenv("CROSS_LOGIN_JWT_HS512_KEY"); pubKey != "" {
bytes, err := base64.StdEncoding.DecodeString(pubKey)
if err != nil {
log.Warn("Could not decode cross login JWT HS512 key")
return err
}
ja.loginTokenKey = bytes
}
log.Info("JWT Session authenticator successfully registered")
return nil
}
func (ja *JWTSessionAuthenticator) CanLogin(
user *schema.User,
username string,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, bool) {
return user, r.Header.Get("Authorization") != "" ||
r.URL.Query().Get("login-token") != ""
}
func (ja *JWTSessionAuthenticator) Login(
user *schema.User,
rw http.ResponseWriter,
r *http.Request,
) (*schema.User, error) {
rawtoken := strings.TrimPrefix(r.Header.Get("Authorization"), "Bearer ")
if rawtoken == "" {
rawtoken = r.URL.Query().Get("login-token")
}
token, err := jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
if t.Method == jwt.SigningMethodHS256 || t.Method == jwt.SigningMethodHS512 {
return ja.loginTokenKey, nil
}
return nil, fmt.Errorf("unkown signing method for login token: %s (known: HS256, HS512, EdDSA)", t.Method.Alg())
})
if err != nil {
log.Warn("Error while parsing jwt token")
return nil, err
}
if !token.Valid {
log.Warn("jwt token claims are not valid")
return nil, errors.New("jwt token claims are not valid")
}
claims := token.Claims.(jwt.MapClaims)
sub, _ := claims["sub"].(string)
var roles []string
projects := make([]string, 0)
if config.Keys.JwtConfig.ValidateUser {
var err error
user, err = repository.GetUserRepository().GetUser(sub)
if err != nil && err != sql.ErrNoRows {
log.Errorf("Error while loading user '%v'", sub)
}
// Deny any logins for unknown usernames
if user == nil {
log.Warn("Could not find user from JWT in internal database.")
return nil, errors.New("unknown user")
}
} else {
var name string
if wrap, ok := claims["name"].(map[string]interface{}); ok {
if vals, ok := wrap["values"].([]interface{}); ok {
if len(vals) != 0 {
name = fmt.Sprintf("%v", vals[0])
for i := 1; i < len(vals); i++ {
name += fmt.Sprintf(" %v", vals[i])
}
}
}
}
// Extract roles from JWT (if present)
if rawroles, ok := claims["roles"].([]interface{}); ok {
for _, rr := range rawroles {
if r, ok := rr.(string); ok {
if schema.IsValidRole(r) {
roles = append(roles, r)
}
}
}
}
if rawprojs, ok := claims["projects"].([]interface{}); ok {
for _, pp := range rawprojs {
if p, ok := pp.(string); ok {
projects = append(projects, p)
}
}
} else if rawprojs, ok := claims["projects"]; ok {
projects = append(projects, rawprojs.([]string)...)
}
user = &schema.User{
Username: sub,
Name: name,
Roles: roles,
Projects: projects,
AuthType: schema.AuthSession,
AuthSource: schema.AuthViaToken,
}
if config.Keys.JwtConfig.SyncUserOnLogin || config.Keys.JwtConfig.UpdateUserOnLogin {
handleTokenUser(user)
}
}
return user, nil
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -6,76 +6,112 @@ package auth
import ( import (
"errors" "errors"
"fmt"
"net/http" "net/http"
"os" "os"
"strings" "strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/go-ldap/ldap/v3" "github.com/go-ldap/ldap/v3"
) )
type LdapAuthenticator struct { type LdapAuthenticator struct {
auth *Authentication
config *schema.LdapConfig
syncPassword string syncPassword string
UserAttr string
} }
var _ Authenticator = (*LdapAuthenticator)(nil) var _ Authenticator = (*LdapAuthenticator)(nil)
func (la *LdapAuthenticator) Init( func (la *LdapAuthenticator) Init() error {
auth *Authentication,
conf interface{}) error {
la.auth = auth
la.config = conf.(*schema.LdapConfig)
la.syncPassword = os.Getenv("LDAP_ADMIN_PASSWORD") la.syncPassword = os.Getenv("LDAP_ADMIN_PASSWORD")
if la.syncPassword == "" { if la.syncPassword == "" {
log.Warn("environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync will not work)") log.Warn("environment variable 'LDAP_ADMIN_PASSWORD' not set (ldap sync will not work)")
} }
if la.config != nil && la.config.SyncInterval != "" { lc := config.Keys.LdapConfig
interval, err := time.ParseDuration(la.config.SyncInterval)
if err != nil {
log.Warnf("Could not parse duration for sync interval: %v", la.config.SyncInterval)
return err
}
if interval == 0 { if lc.UserAttr != "" {
log.Info("Sync interval is zero") la.UserAttr = lc.UserAttr
return nil } else {
} la.UserAttr = "gecos"
go func() {
ticker := time.NewTicker(interval)
for t := range ticker.C {
log.Printf("sync started at %s", t.Format(time.RFC3339))
if err := la.Sync(); err != nil {
log.Errorf("sync failed: %s", err.Error())
}
log.Print("sync done")
}
}()
} }
return nil return nil
} }
func (la *LdapAuthenticator) CanLogin( func (la *LdapAuthenticator) CanLogin(
user *User, user *schema.User,
username string,
rw http.ResponseWriter, rw http.ResponseWriter,
r *http.Request) bool { r *http.Request,
) (*schema.User, bool) {
lc := config.Keys.LdapConfig
return user != nil && user.AuthSource == AuthViaLDAP if user != nil {
if user.AuthSource == schema.AuthViaLDAP {
return user, true
}
} else {
if lc.SyncUserOnLogin {
l, err := la.getLdapConnection(true)
if err != nil {
log.Error("LDAP connection error")
}
defer l.Close()
// Search for the given username
searchRequest := ldap.NewSearchRequest(
lc.UserBase,
ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
fmt.Sprintf("(&%s(uid=%s))", lc.UserFilter, username),
[]string{"dn", "uid", la.UserAttr}, nil)
sr, err := l.Search(searchRequest)
if err != nil {
log.Warn(err)
return nil, false
}
if len(sr.Entries) != 1 {
log.Warn("LDAP: User does not exist or too many entries returned")
return nil, false
}
entry := sr.Entries[0]
name := entry.GetAttributeValue(la.UserAttr)
var roles []string
roles = append(roles, schema.GetRoleString(schema.RoleUser))
projects := make([]string, 0)
user = &schema.User{
Username: username,
Name: name,
Roles: roles,
Projects: projects,
AuthType: schema.AuthSession,
AuthSource: schema.AuthViaLDAP,
}
if err := repository.GetUserRepository().AddUser(user); err != nil {
log.Errorf("User '%s' LDAP: Insert into DB failed", username)
return nil, false
}
return user, true
}
}
return nil, false
} }
func (la *LdapAuthenticator) Login( func (la *LdapAuthenticator) Login(
user *User, user *schema.User,
rw http.ResponseWriter, rw http.ResponseWriter,
r *http.Request) (*User, error) { r *http.Request,
) (*schema.User, error) {
l, err := la.getLdapConnection(false) l, err := la.getLdapConnection(false)
if err != nil { if err != nil {
log.Warn("Error while getting ldap connection") log.Warn("Error while getting ldap connection")
@@ -83,42 +119,30 @@ func (la *LdapAuthenticator) Login(
} }
defer l.Close() defer l.Close()
userDn := strings.Replace(la.config.UserBind, "{username}", user.Username, -1) userDn := strings.Replace(config.Keys.LdapConfig.UserBind, "{username}", user.Username, -1)
if err := l.Bind(userDn, r.FormValue("password")); err != nil { if err := l.Bind(userDn, r.FormValue("password")); err != nil {
log.Error("Error while binding to ldap connection") log.Errorf("AUTH/LDAP > Authentication for user %s failed: %v",
return nil, err user.Username, err)
return nil, fmt.Errorf("Authentication failed")
} }
return user, nil return user, nil
} }
func (la *LdapAuthenticator) Auth(
rw http.ResponseWriter,
r *http.Request) (*User, error) {
return la.auth.AuthViaSession(rw, r)
}
func (la *LdapAuthenticator) Sync() error { func (la *LdapAuthenticator) Sync() error {
const IN_DB int = 1 const IN_DB int = 1
const IN_LDAP int = 2 const IN_LDAP int = 2
const IN_BOTH int = 3 const IN_BOTH int = 3
ur := repository.GetUserRepository()
lc := config.Keys.LdapConfig
users := map[string]int{} users := map[string]int{}
rows, err := la.auth.db.Query(`SELECT username FROM user WHERE user.ldap = 1`) usernames, err := ur.GetLdapUsernames()
if err != nil { if err != nil {
log.Warn("Error while querying LDAP users")
return err return err
} }
for rows.Next() { for _, username := range usernames {
var username string
if err := rows.Scan(&username); err != nil {
log.Warnf("Error while scanning for user '%s'", username)
return err
}
users[username] = IN_DB users[username] = IN_DB
} }
@@ -130,8 +154,10 @@ func (la *LdapAuthenticator) Sync() error {
defer l.Close() defer l.Close()
ldapResults, err := l.Search(ldap.NewSearchRequest( ldapResults, err := l.Search(ldap.NewSearchRequest(
la.config.UserBase, ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false, lc.UserBase,
la.config.UserFilter, []string{"dn", "uid", "gecos"}, nil)) ldap.ScopeWholeSubtree, ldap.NeverDerefAliases, 0, 0, false,
lc.UserFilter,
[]string{"dn", "uid", la.UserAttr}, nil))
if err != nil { if err != nil {
log.Warn("LDAP search error") log.Warn("LDAP search error")
return err return err
@@ -147,25 +173,34 @@ func (la *LdapAuthenticator) Sync() error {
_, ok := users[username] _, ok := users[username]
if !ok { if !ok {
users[username] = IN_LDAP users[username] = IN_LDAP
newnames[username] = entry.GetAttributeValue("gecos") newnames[username] = entry.GetAttributeValue(la.UserAttr)
} else { } else {
users[username] = IN_BOTH users[username] = IN_BOTH
} }
} }
for username, where := range users { for username, where := range users {
if where == IN_DB && la.config.SyncDelOldUsers { if where == IN_DB && lc.SyncDelOldUsers {
ur.DelUser(username)
log.Debugf("sync: remove %v (does not show up in LDAP anymore)", username) log.Debugf("sync: remove %v (does not show up in LDAP anymore)", username)
if _, err := la.auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username); err != nil {
log.Errorf("User '%s' not in LDAP anymore: Delete from DB failed", username)
return err
}
} else if where == IN_LDAP { } else if where == IN_LDAP {
name := newnames[username] name := newnames[username]
var roles []string
roles = append(roles, schema.GetRoleString(schema.RoleUser))
projects := make([]string, 0)
user := &schema.User{
Username: username,
Name: name,
Roles: roles,
Projects: projects,
AuthSource: schema.AuthViaLDAP,
}
log.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name) log.Debugf("sync: add %v (name: %v, roles: [user], ldap: true)", username, name)
if _, err := la.auth.db.Exec(`INSERT INTO user (username, ldap, name, roles) VALUES (?, ?, ?, ?)`, if err := ur.AddUser(user); err != nil {
username, 1, name, "[\""+GetRoleString(RoleUser)+"\"]"); err != nil { log.Errorf("User '%s' LDAP: Insert into DB failed", username)
log.Errorf("User '%s' new in LDAP: Insert into DB failed", username)
return err return err
} }
} }
@@ -174,18 +209,16 @@ func (la *LdapAuthenticator) Sync() error {
return nil return nil
} }
// TODO: Add a connection pool or something like
// that so that connections can be reused/cached.
func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) { func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
lc := config.Keys.LdapConfig
conn, err := ldap.DialURL(la.config.Url) conn, err := ldap.DialURL(lc.Url)
if err != nil { if err != nil {
log.Warn("LDAP URL dial failed") log.Warn("LDAP URL dial failed")
return nil, err return nil, err
} }
if admin { if admin {
if err := conn.Bind(la.config.SearchDN, la.syncPassword); err != nil { if err := conn.Bind(lc.SearchDN, la.syncPassword); err != nil {
conn.Close() conn.Close()
log.Warn("LDAP connection bind failed") log.Warn("LDAP connection bind failed")
return nil, err return nil, err

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -8,6 +8,8 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"golang.org/x/crypto/bcrypt" "golang.org/x/crypto/bcrypt"
) )
@@ -17,37 +19,29 @@ type LocalAuthenticator struct {
var _ Authenticator = (*LocalAuthenticator)(nil) var _ Authenticator = (*LocalAuthenticator)(nil)
func (la *LocalAuthenticator) Init( func (la *LocalAuthenticator) Init() error {
auth *Authentication,
_ interface{}) error {
la.auth = auth
return nil return nil
} }
func (la *LocalAuthenticator) CanLogin( func (la *LocalAuthenticator) CanLogin(
user *User, user *schema.User,
username string,
rw http.ResponseWriter, rw http.ResponseWriter,
r *http.Request) bool { r *http.Request) (*schema.User, bool) {
return user != nil && user.AuthSource == AuthViaLocalPassword return user, user != nil && user.AuthSource == schema.AuthViaLocalPassword
} }
func (la *LocalAuthenticator) Login( func (la *LocalAuthenticator) Login(
user *User, user *schema.User,
rw http.ResponseWriter, rw http.ResponseWriter,
r *http.Request) (*User, error) { r *http.Request) (*schema.User, error) {
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil { if e := bcrypt.CompareHashAndPassword([]byte(user.Password),
return nil, fmt.Errorf("AUTH/LOCAL > user '%s' provided the wrong password (%w)", user.Username, e) []byte(r.FormValue("password"))); e != nil {
log.Errorf("AUTH/LOCAL > Authentication for user %s failed!", user.Username)
return nil, fmt.Errorf("Authentication failed")
} }
return user, nil return user, nil
} }
func (la *LocalAuthenticator) Auth(
rw http.ResponseWriter,
r *http.Request) (*User, error) {
return la.auth.AuthViaSession(rw, r)
}

196
internal/auth/oidc.go Normal file
View File

@@ -0,0 +1,196 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package auth
import (
"context"
"crypto/rand"
"encoding/base64"
"io"
"net/http"
"os"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/coreos/go-oidc/v3/oidc"
"github.com/gorilla/mux"
"golang.org/x/oauth2"
)
type OIDC struct {
client *oauth2.Config
provider *oidc.Provider
authentication *Authentication
clientID string
}
func randString(nByte int) (string, error) {
b := make([]byte, nByte)
if _, err := io.ReadFull(rand.Reader, b); err != nil {
return "", err
}
return base64.RawURLEncoding.EncodeToString(b), nil
}
func setCallbackCookie(w http.ResponseWriter, r *http.Request, name, value string) {
c := &http.Cookie{
Name: name,
Value: value,
MaxAge: int(time.Hour.Seconds()),
Secure: r.TLS != nil,
HttpOnly: true,
}
http.SetCookie(w, c)
}
func NewOIDC(a *Authentication) *OIDC {
provider, err := oidc.NewProvider(context.Background(), config.Keys.OpenIDConfig.Provider)
if err != nil {
log.Fatal(err)
}
clientID := os.Getenv("OID_CLIENT_ID")
if clientID == "" {
log.Warn("environment variable 'OID_CLIENT_ID' not set (Open ID connect auth will not work)")
}
clientSecret := os.Getenv("OID_CLIENT_SECRET")
if clientSecret == "" {
log.Warn("environment variable 'OID_CLIENT_SECRET' not set (Open ID connect auth will not work)")
}
client := &oauth2.Config{
ClientID: clientID,
ClientSecret: clientSecret,
Endpoint: provider.Endpoint(),
RedirectURL: "oidc-callback",
Scopes: []string{oidc.ScopeOpenID, "profile", "email"},
}
oa := &OIDC{provider: provider, client: client, clientID: clientID, authentication: a}
return oa
}
func (oa *OIDC) RegisterEndpoints(r *mux.Router) {
r.HandleFunc("/oidc-login", oa.OAuth2Login)
r.HandleFunc("/oidc-callback", oa.OAuth2Callback)
}
func (oa *OIDC) OAuth2Callback(rw http.ResponseWriter, r *http.Request) {
c, err := r.Cookie("state")
if err != nil {
http.Error(rw, "state cookie not found", http.StatusBadRequest)
return
}
state := c.Value
c, err = r.Cookie("verifier")
if err != nil {
http.Error(rw, "verifier cookie not found", http.StatusBadRequest)
return
}
codeVerifier := c.Value
_ = r.ParseForm()
if r.Form.Get("state") != state {
http.Error(rw, "State invalid", http.StatusBadRequest)
return
}
code := r.Form.Get("code")
if code == "" {
http.Error(rw, "Code not found", http.StatusBadRequest)
return
}
token, err := oa.client.Exchange(context.Background(), code, oauth2.VerifierOption(codeVerifier))
if err != nil {
http.Error(rw, "Failed to exchange token: "+err.Error(), http.StatusInternalServerError)
return
}
userInfo, err := oa.provider.UserInfo(context.Background(), oauth2.StaticTokenSource(token))
if err != nil {
http.Error(rw, "Failed to get userinfo: "+err.Error(), http.StatusInternalServerError)
return
}
// // Extract the ID Token from OAuth2 token.
// rawIDToken, ok := token.Extra("id_token").(string)
// if !ok {
// http.Error(rw, "Cannot access idToken", http.StatusInternalServerError)
// }
//
// verifier := oa.provider.Verifier(&oidc.Config{ClientID: oa.clientID})
// // Parse and verify ID Token payload.
// idToken, err := verifier.Verify(context.Background(), rawIDToken)
// if err != nil {
// http.Error(rw, "Failed to extract idToken: "+err.Error(), http.StatusInternalServerError)
// }
projects := make([]string, 0)
// Extract custom claims
var claims struct {
Username string `json:"preferred_username"`
Name string `json:"name"`
Profile struct {
Client struct {
Roles []string `json:"roles"`
} `json:"clustercockpit"`
} `json:"resource_access"`
}
if err := userInfo.Claims(&claims); err != nil {
http.Error(rw, "Failed to extract Claims: "+err.Error(), http.StatusInternalServerError)
}
var roles []string
for _, r := range claims.Profile.Client.Roles {
switch r {
case "user":
roles = append(roles, schema.GetRoleString(schema.RoleUser))
case "admin":
roles = append(roles, schema.GetRoleString(schema.RoleAdmin))
}
}
if len(roles) == 0 {
roles = append(roles, schema.GetRoleString(schema.RoleUser))
}
user := &schema.User{
Username: claims.Username,
Name: claims.Name,
Roles: roles,
Projects: projects,
AuthSource: schema.AuthViaOIDC,
}
if config.Keys.OpenIDConfig.SyncUserOnLogin || config.Keys.OpenIDConfig.UpdateUserOnLogin {
handleOIDCUser(user)
}
oa.authentication.SaveSession(rw, r, user)
log.Infof("login successfull: user: %#v (roles: %v, projects: %v)", user.Username, user.Roles, user.Projects)
ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
http.RedirectHandler("/", http.StatusTemporaryRedirect).ServeHTTP(rw, r.WithContext(ctx))
}
func (oa *OIDC) OAuth2Login(rw http.ResponseWriter, r *http.Request) {
state, err := randString(16)
if err != nil {
http.Error(rw, "Internal error", http.StatusInternalServerError)
return
}
setCallbackCookie(rw, r, "state", state)
// use PKCE to protect against CSRF attacks
codeVerifier := oauth2.GenerateVerifier()
setCallbackCookie(rw, r, "verifier", codeVerifier)
// Redirect user to consent page to ask for permission
url := oa.client.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.S256ChallengeOption(codeVerifier))
http.Redirect(rw, r, url, http.StatusFound)
}

View File

@@ -1,289 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package auth
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"strings"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log"
sq "github.com/Masterminds/squirrel"
"github.com/jmoiron/sqlx"
"golang.org/x/crypto/bcrypt"
)
func (auth *Authentication) GetUser(username string) (*User, error) {
user := &User{Username: username}
var hashedPassword, name, rawRoles, email, rawProjects sql.NullString
if err := sq.Select("password", "ldap", "name", "roles", "email", "projects").From("user").
Where("user.username = ?", username).RunWith(auth.db).
QueryRow().Scan(&hashedPassword, &user.AuthSource, &name, &rawRoles, &email, &rawProjects); err != nil {
log.Warnf("Error while querying user '%v' from database", username)
return nil, err
}
user.Password = hashedPassword.String
user.Name = name.String
user.Email = email.String
if rawRoles.Valid {
if err := json.Unmarshal([]byte(rawRoles.String), &user.Roles); err != nil {
log.Warn("Error while unmarshaling raw roles from DB")
return nil, err
}
}
if rawProjects.Valid {
if err := json.Unmarshal([]byte(rawProjects.String), &user.Projects); err != nil {
return nil, err
}
}
return user, nil
}
func (auth *Authentication) AddUser(user *User) error {
rolesJson, _ := json.Marshal(user.Roles)
projectsJson, _ := json.Marshal(user.Projects)
cols := []string{"username", "roles", "projects"}
vals := []interface{}{user.Username, string(rolesJson), string(projectsJson)}
if user.Name != "" {
cols = append(cols, "name")
vals = append(vals, user.Name)
}
if user.Email != "" {
cols = append(cols, "email")
vals = append(vals, user.Email)
}
if user.Password != "" {
password, err := bcrypt.GenerateFromPassword([]byte(user.Password), bcrypt.DefaultCost)
if err != nil {
log.Error("Error while encrypting new user password")
return err
}
cols = append(cols, "password")
vals = append(vals, string(password))
}
if _, err := sq.Insert("user").Columns(cols...).Values(vals...).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while inserting new user '%v' into DB", user.Username)
return err
}
log.Infof("new user %#v created (roles: %s, auth-source: %d, projects: %s)", user.Username, rolesJson, user.AuthSource, projectsJson)
return nil
}
func (auth *Authentication) DelUser(username string) error {
_, err := auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username)
log.Errorf("Error while deleting user '%s' from DB", username)
return err
}
func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
q := sq.Select("username", "name", "email", "roles", "projects").From("user")
if specialsOnly {
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
}
rows, err := q.RunWith(auth.db).Query()
if err != nil {
log.Warn("Error while querying user list")
return nil, err
}
users := make([]*User, 0)
defer rows.Close()
for rows.Next() {
rawroles := ""
rawprojects := ""
user := &User{}
var name, email sql.NullString
if err := rows.Scan(&user.Username, &name, &email, &rawroles, &rawprojects); err != nil {
log.Warn("Error while scanning user list")
return nil, err
}
if err := json.Unmarshal([]byte(rawroles), &user.Roles); err != nil {
log.Warn("Error while unmarshaling raw role list")
return nil, err
}
if err := json.Unmarshal([]byte(rawprojects), &user.Projects); err != nil {
return nil, err
}
user.Name = name.String
user.Email = email.String
users = append(users, user)
}
return users, nil
}
func (auth *Authentication) AddRole(
ctx context.Context,
username string,
queryrole string) error {
newRole := strings.ToLower(queryrole)
user, err := auth.GetUser(username)
if err != nil {
log.Warnf("Could not load user '%s'", username)
return err
}
exists, valid := user.HasValidRole(newRole)
if !valid {
return fmt.Errorf("Supplied role is no valid option : %v", newRole)
}
if exists {
return fmt.Errorf("User %v already has role %v", username, newRole)
}
roles, _ := json.Marshal(append(user.Roles, newRole))
if _, err := sq.Update("user").Set("roles", roles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while adding new role for user '%s'", user.Username)
return err
}
return nil
}
func (auth *Authentication) RemoveRole(ctx context.Context, username string, queryrole string) error {
oldRole := strings.ToLower(queryrole)
user, err := auth.GetUser(username)
if err != nil {
log.Warnf("Could not load user '%s'", username)
return err
}
exists, valid := user.HasValidRole(oldRole)
if !valid {
return fmt.Errorf("Supplied role is no valid option : %v", oldRole)
}
if !exists {
return fmt.Errorf("Role already deleted for user '%v': %v", username, oldRole)
}
if oldRole == GetRoleString(RoleManager) && len(user.Projects) != 0 {
return fmt.Errorf("Cannot remove role 'manager' while user %s still has assigned project(s) : %v", username, user.Projects)
}
var newroles []string
for _, r := range user.Roles {
if r != oldRole {
newroles = append(newroles, r) // Append all roles not matching requested to be deleted role
}
}
var mroles, _ = json.Marshal(newroles)
if _, err := sq.Update("user").Set("roles", mroles).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
log.Errorf("Error while removing role for user '%s'", user.Username)
return err
}
return nil
}
func (auth *Authentication) AddProject(
ctx context.Context,
username string,
project string) error {
user, err := auth.GetUser(username)
if err != nil {
return err
}
if !user.HasRole(RoleManager) {
return fmt.Errorf("user '%s' is not a manager!", username)
}
if user.HasProject(project) {
return fmt.Errorf("user '%s' already manages project '%s'", username, project)
}
projects, _ := json.Marshal(append(user.Projects, project))
if _, err := sq.Update("user").Set("projects", projects).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
return err
}
return nil
}
func (auth *Authentication) RemoveProject(ctx context.Context, username string, project string) error {
user, err := auth.GetUser(username)
if err != nil {
return err
}
if !user.HasRole(RoleManager) {
return fmt.Errorf("user '%#v' is not a manager!", username)
}
if !user.HasProject(project) {
return fmt.Errorf("user '%#v': Cannot remove project '%#v' - Does not match!", username, project)
}
var exists bool
var newprojects []string
for _, p := range user.Projects {
if p != project {
newprojects = append(newprojects, p) // Append all projects not matching requested to be deleted project
} else {
exists = true
}
}
if exists == true {
var result interface{}
if len(newprojects) == 0 {
result = "[]"
} else {
result, _ = json.Marshal(newprojects)
}
if _, err := sq.Update("user").Set("projects", result).Where("user.username = ?", username).RunWith(auth.db).Exec(); err != nil {
return err
}
return nil
} else {
return fmt.Errorf("user %s already does not manage project %s", username, project)
}
}
func FetchUser(ctx context.Context, db *sqlx.DB, username string) (*model.User, error) {
me := GetUser(ctx)
if me != nil && me.Username != username && me.HasNotRoles([]Role{RoleAdmin, RoleSupport, RoleManager}) {
return nil, errors.New("forbidden")
}
user := &model.User{Username: username}
var name, email sql.NullString
if err := sq.Select("name", "email").From("user").Where("user.username = ?", username).
RunWith(db).QueryRow().Scan(&name, &email); err != nil {
if err == sql.ErrNoRows {
/* This warning will be logged *often* for non-local users, i.e. users mentioned only in job-table or archive, */
/* since FetchUser will be called to retrieve full name and mail for every job in query/list */
// log.Warnf("User '%s' Not found in DB", username)
return nil, nil
}
log.Warnf("Error while fetching user '%s'", username)
return nil, err
}
user.Name = name.String
user.Email = email.String
return user, nil
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -22,26 +22,30 @@ var Keys schema.ProgramConfig = schema.ProgramConfig{
Archive: json.RawMessage(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`), Archive: json.RawMessage(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`),
DisableArchive: false, DisableArchive: false,
Validate: false, Validate: false,
LdapConfig: nil,
SessionMaxAge: "168h", SessionMaxAge: "168h",
StopJobsExceedingWalltime: 0, StopJobsExceedingWalltime: 0,
ShortRunningJobsDuration: 5 * 60, ShortRunningJobsDuration: 5 * 60,
UiDefaults: map[string]interface{}{ UiDefaults: map[string]interface{}{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}}, "analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"}, "job_view_showFootprint": true,
"plot_general_colorBackground": true, "job_list_usePaging": false,
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"}, "plot_general_colorBackground": true,
"plot_general_lineWidth": 3, "plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
"plot_list_jobsPerPage": 50, "plot_general_lineWidth": 3,
"plot_list_selectedMetrics": []string{"cpu_load", "mem_used", "flops_any", "mem_bw"}, "plot_list_jobsPerPage": 50,
"plot_view_plotsPerRow": 3, "plot_list_selectedMetrics": []string{"cpu_load", "mem_used", "flops_any", "mem_bw"},
"plot_view_showPolarplot": true, "plot_view_plotsPerRow": 3,
"plot_view_showRoofline": true, "plot_view_showPolarplot": true,
"plot_view_showStatTable": true, "plot_view_showRoofline": true,
"system_view_selectedMetric": "cpu_load", "plot_view_showStatTable": true,
"system_view_selectedMetric": "cpu_load",
"analysis_view_selectedTopEntity": "user",
"analysis_view_selectedTopCategory": "totalWalltime",
"status_view_selectedTopUserCategory": "totalJobs",
"status_view_selectedTopProjectCategory": "totalJobs",
}, },
} }

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.

View File

@@ -16,14 +16,26 @@ type Count struct {
Count int `json:"count"` Count int `json:"count"`
} }
type EnergyFootprintValue struct {
Hardware string `json:"hardware"`
Metric string `json:"metric"`
Value float64 `json:"value"`
}
type FloatRange struct { type FloatRange struct {
From float64 `json:"from"` From float64 `json:"from"`
To float64 `json:"to"` To float64 `json:"to"`
} }
type FootprintValue struct {
Name string `json:"name"`
Stat string `json:"stat"`
Value float64 `json:"value"`
}
type Footprints struct { type Footprints struct {
Nodehours []schema.Float `json:"nodehours"` TimeWeights *TimeWeights `json:"timeWeights"`
Metrics []*MetricFootprints `json:"metrics"` Metrics []*MetricFootprints `json:"metrics"`
} }
type HistoPoint struct { type HistoPoint struct {
@@ -37,30 +49,25 @@ type IntRangeOutput struct {
} }
type JobFilter struct { type JobFilter struct {
Tags []string `json:"tags"` Tags []string `json:"tags,omitempty"`
JobID *StringInput `json:"jobId"` JobID *StringInput `json:"jobId,omitempty"`
ArrayJobID *int `json:"arrayJobId"` ArrayJobID *int `json:"arrayJobId,omitempty"`
User *StringInput `json:"user"` User *StringInput `json:"user,omitempty"`
Project *StringInput `json:"project"` Project *StringInput `json:"project,omitempty"`
JobName *StringInput `json:"jobName"` JobName *StringInput `json:"jobName,omitempty"`
Cluster *StringInput `json:"cluster"` Cluster *StringInput `json:"cluster,omitempty"`
Partition *StringInput `json:"partition"` Partition *StringInput `json:"partition,omitempty"`
Duration *schema.IntRange `json:"duration"` Duration *schema.IntRange `json:"duration,omitempty"`
MinRunningFor *int `json:"minRunningFor"` Energy *FloatRange `json:"energy,omitempty"`
NumNodes *schema.IntRange `json:"numNodes"` MinRunningFor *int `json:"minRunningFor,omitempty"`
NumAccelerators *schema.IntRange `json:"numAccelerators"` NumNodes *schema.IntRange `json:"numNodes,omitempty"`
NumHWThreads *schema.IntRange `json:"numHWThreads"` NumAccelerators *schema.IntRange `json:"numAccelerators,omitempty"`
StartTime *schema.TimeRange `json:"startTime"` NumHWThreads *schema.IntRange `json:"numHWThreads,omitempty"`
State []schema.JobState `json:"state"` StartTime *schema.TimeRange `json:"startTime,omitempty"`
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg"` State []schema.JobState `json:"state,omitempty"`
MemBwAvg *FloatRange `json:"memBwAvg"` MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
LoadAvg *FloatRange `json:"loadAvg"` Exclusive *int `json:"exclusive,omitempty"`
MemUsedMax *FloatRange `json:"memUsedMax"` Node *StringInput `json:"node,omitempty"`
Exclusive *int `json:"exclusive"`
SharedNode *StringInput `json:"sharedNode"`
SelfJobID *StringInput `json:"selfJobId"`
SelfStartTime *time.Time `json:"selfStartTime"`
SelfDuration *int `json:"selfDuration"`
} }
type JobLink struct { type JobLink struct {
@@ -69,8 +76,9 @@ type JobLink struct {
} }
type JobLinkResultList struct { type JobLinkResultList struct {
Items []*JobLink `json:"items"` ListQuery *string `json:"listQuery,omitempty"`
Count *int `json:"count"` Items []*JobLink `json:"items"`
Count *int `json:"count,omitempty"`
} }
type JobMetricWithName struct { type JobMetricWithName struct {
@@ -80,24 +88,31 @@ type JobMetricWithName struct {
} }
type JobResultList struct { type JobResultList struct {
Items []*schema.Job `json:"items"` Items []*schema.Job `json:"items"`
Offset *int `json:"offset"` Offset *int `json:"offset,omitempty"`
Limit *int `json:"limit"` Limit *int `json:"limit,omitempty"`
Count *int `json:"count"` Count *int `json:"count,omitempty"`
HasNextPage *bool `json:"hasNextPage,omitempty"`
} }
type JobsStatistics struct { type JobsStatistics struct {
ID string `json:"id"` ID string `json:"id"`
Name string `json:"name"` Name string `json:"name"`
TotalJobs int `json:"totalJobs"` TotalJobs int `json:"totalJobs"`
RunningJobs int `json:"runningJobs"` RunningJobs int `json:"runningJobs"`
ShortJobs int `json:"shortJobs"` ShortJobs int `json:"shortJobs"`
TotalWalltime int `json:"totalWalltime"` TotalWalltime int `json:"totalWalltime"`
TotalNodeHours int `json:"totalNodeHours"` TotalNodes int `json:"totalNodes"`
TotalCoreHours int `json:"totalCoreHours"` TotalNodeHours int `json:"totalNodeHours"`
TotalAccHours int `json:"totalAccHours"` TotalCores int `json:"totalCores"`
HistDuration []*HistoPoint `json:"histDuration"` TotalCoreHours int `json:"totalCoreHours"`
HistNumNodes []*HistoPoint `json:"histNumNodes"` TotalAccs int `json:"totalAccs"`
TotalAccHours int `json:"totalAccHours"`
HistDuration []*HistoPoint `json:"histDuration"`
HistNumNodes []*HistoPoint `json:"histNumNodes"`
HistNumCores []*HistoPoint `json:"histNumCores"`
HistNumAccs []*HistoPoint `json:"histNumAccs"`
HistMetrics []*MetricHistoPoints `json:"histMetrics"`
} }
type MetricFootprints struct { type MetricFootprints struct {
@@ -105,6 +120,28 @@ type MetricFootprints struct {
Data []schema.Float `json:"data"` Data []schema.Float `json:"data"`
} }
type MetricHistoPoint struct {
Bin *int `json:"bin,omitempty"`
Count int `json:"count"`
Min *int `json:"min,omitempty"`
Max *int `json:"max,omitempty"`
}
type MetricHistoPoints struct {
Metric string `json:"metric"`
Unit string `json:"unit"`
Stat *string `json:"stat,omitempty"`
Data []*MetricHistoPoint `json:"data,omitempty"`
}
type MetricStatItem struct {
MetricName string `json:"metricName"`
Range *FloatRange `json:"range"`
}
type Mutation struct {
}
type NodeMetrics struct { type NodeMetrics struct {
Host string `json:"host"` Host string `json:"host"`
SubCluster string `json:"subCluster"` SubCluster string `json:"subCluster"`
@@ -113,6 +150,7 @@ type NodeMetrics struct {
type OrderByInput struct { type OrderByInput struct {
Field string `json:"field"` Field string `json:"field"`
Type string `json:"type"`
Order SortDirectionEnum `json:"order"` Order SortDirectionEnum `json:"order"`
} }
@@ -121,18 +159,28 @@ type PageRequest struct {
Page int `json:"page"` Page int `json:"page"`
} }
type Query struct {
}
type StringInput struct { type StringInput struct {
Eq *string `json:"eq"` Eq *string `json:"eq,omitempty"`
Neq *string `json:"neq"` Neq *string `json:"neq,omitempty"`
Contains *string `json:"contains"` Contains *string `json:"contains,omitempty"`
StartsWith *string `json:"startsWith"` StartsWith *string `json:"startsWith,omitempty"`
EndsWith *string `json:"endsWith"` EndsWith *string `json:"endsWith,omitempty"`
In []string `json:"in"` In []string `json:"in,omitempty"`
} }
type TimeRangeOutput struct { type TimeRangeOutput struct {
From time.Time `json:"from"` Range *string `json:"range,omitempty"`
To time.Time `json:"to"` From time.Time `json:"from"`
To time.Time `json:"to"`
}
type TimeWeights struct {
NodeHours []schema.Float `json:"nodeHours"`
AccHours []schema.Float `json:"accHours"`
CoreHours []schema.Float `json:"coreHours"`
} }
type User struct { type User struct {
@@ -184,6 +232,59 @@ func (e Aggregate) MarshalGQL(w io.Writer) {
fmt.Fprint(w, strconv.Quote(e.String())) fmt.Fprint(w, strconv.Quote(e.String()))
} }
type SortByAggregate string
const (
SortByAggregateTotalwalltime SortByAggregate = "TOTALWALLTIME"
SortByAggregateTotaljobs SortByAggregate = "TOTALJOBS"
SortByAggregateTotalnodes SortByAggregate = "TOTALNODES"
SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS"
SortByAggregateTotalcores SortByAggregate = "TOTALCORES"
SortByAggregateTotalcorehours SortByAggregate = "TOTALCOREHOURS"
SortByAggregateTotalaccs SortByAggregate = "TOTALACCS"
SortByAggregateTotalacchours SortByAggregate = "TOTALACCHOURS"
)
var AllSortByAggregate = []SortByAggregate{
SortByAggregateTotalwalltime,
SortByAggregateTotaljobs,
SortByAggregateTotalnodes,
SortByAggregateTotalnodehours,
SortByAggregateTotalcores,
SortByAggregateTotalcorehours,
SortByAggregateTotalaccs,
SortByAggregateTotalacchours,
}
func (e SortByAggregate) IsValid() bool {
switch e {
case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
return true
}
return false
}
func (e SortByAggregate) String() string {
return string(e)
}
func (e *SortByAggregate) UnmarshalGQL(v interface{}) error {
str, ok := v.(string)
if !ok {
return fmt.Errorf("enums must be strings")
}
*e = SortByAggregate(str)
if !e.IsValid() {
return fmt.Errorf("%s is not a valid SortByAggregate", str)
}
return nil
}
func (e SortByAggregate) MarshalGQL(w io.Writer) {
fmt.Fprint(w, strconv.Quote(e.String()))
}
type SortDirectionEnum string type SortDirectionEnum string
const ( const (
@@ -224,44 +325,3 @@ func (e *SortDirectionEnum) UnmarshalGQL(v interface{}) error {
func (e SortDirectionEnum) MarshalGQL(w io.Writer) { func (e SortDirectionEnum) MarshalGQL(w io.Writer) {
fmt.Fprint(w, strconv.Quote(e.String())) fmt.Fprint(w, strconv.Quote(e.String()))
} }
type Weights string
const (
WeightsNodeCount Weights = "NODE_COUNT"
WeightsNodeHours Weights = "NODE_HOURS"
)
var AllWeights = []Weights{
WeightsNodeCount,
WeightsNodeHours,
}
func (e Weights) IsValid() bool {
switch e {
case WeightsNodeCount, WeightsNodeHours:
return true
}
return false
}
func (e Weights) String() string {
return string(e)
}
func (e *Weights) UnmarshalGQL(v interface{}) error {
str, ok := v.(string)
if !ok {
return fmt.Errorf("enums must be strings")
}
*e = Weights(str)
if !e.IsValid() {
return fmt.Errorf("%s is not a valid Weights", str)
}
return nil
}
func (e Weights) MarshalGQL(w io.Writer) {
fmt.Fprint(w, strconv.Quote(e.String()))
}

View File

@@ -1,15 +1,39 @@
package graph package graph
import ( import (
"sync"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/jmoiron/sqlx" "github.com/jmoiron/sqlx"
) )
// This file will not be regenerated automatically. // This file will not be regenerated automatically.
// //
// It serves as dependency injection for your app, add any dependencies you require here. // It serves as dependency injection for your app, add any dependencies you require here.
var (
initOnce sync.Once
resolverInstance *Resolver
)
type Resolver struct { type Resolver struct {
DB *sqlx.DB DB *sqlx.DB
Repo *repository.JobRepository Repo *repository.JobRepository
} }
func Init() {
initOnce.Do(func() {
db := repository.GetConnection()
resolverInstance = &Resolver{
DB: db.DB, Repo: repository.GetJobRepository(),
}
})
}
func GetResolverInstance() *Resolver {
if resolverInstance == nil {
log.Fatal("Authentication module not initialized!")
}
return resolverInstance
}

View File

@@ -2,19 +2,22 @@ package graph
// This file will be automatically regenerated based on the schema, any resolver implementations // This file will be automatically regenerated based on the schema, any resolver implementations
// will be copied through when generating and any unknown code will be moved to the end. // will be copied through when generating and any unknown code will be moved to the end.
// Code generated by github.com/99designs/gqlgen version v0.17.24 // Code generated by github.com/99designs/gqlgen version v0.17.49
import ( import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"regexp"
"slices"
"strconv" "strconv"
"strings"
"time" "time"
"github.com/ClusterCockpit/cc-backend/internal/auth" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated" "github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
@@ -28,60 +31,110 @@ func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) (
// Tags is the resolver for the tags field. // Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) { func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(&obj.ID) return r.Repo.GetTags(repository.GetUserFromContext(ctx), &obj.ID)
} }
// ConcurrentJobs is the resolver for the concurrentJobs field. // ConcurrentJobs is the resolver for the concurrentJobs field.
func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) { func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
exc := int(obj.Exclusive) if obj.State == schema.JobStateRunning {
if exc != 1 { obj.Duration = int32(time.Now().Unix() - obj.StartTimeUnix)
filter := []*model.JobFilter{} }
jid := fmt.Sprint(obj.JobID)
jdu := int(obj.Duration)
filter = append(filter, &model.JobFilter{Exclusive: &exc})
filter = append(filter, &model.JobFilter{SharedNode: &model.StringInput{Contains: &obj.Resources[0].Hostname}})
filter = append(filter, &model.JobFilter{SelfJobID: &model.StringInput{Neq: &jid}})
filter = append(filter, &model.JobFilter{SelfStartTime: &obj.StartTime, SelfDuration: &jdu})
jobLinks, err := r.Repo.QueryJobLinks(ctx, filter) if obj.Exclusive != 1 && obj.Duration > 600 {
if err != nil { return r.Repo.FindConcurrentJobs(ctx, obj)
log.Warn("Error while querying jobLinks")
return nil, err
}
count, err := r.Repo.CountJobs(ctx, filter)
if err != nil {
log.Warn("Error while counting jobLinks")
return nil, err
}
result := &model.JobLinkResultList{Items: jobLinks, Count: &count}
return result, nil
} }
return nil, nil return nil, nil
} }
// Footprint is the resolver for the footprint field.
func (r *jobResolver) Footprint(ctx context.Context, obj *schema.Job) ([]*model.FootprintValue, error) {
rawFootprint, err := r.Repo.FetchFootprint(obj)
if err != nil {
log.Warn("Error while fetching job footprint data")
return nil, err
}
res := []*model.FootprintValue{}
for name, value := range rawFootprint {
parts := strings.Split(name, "_")
statPart := parts[len(parts)-1]
nameParts := parts[:len(parts)-1]
res = append(res, &model.FootprintValue{
Name: strings.Join(nameParts, "_"),
Stat: statPart,
Value: value,
})
}
return res, err
}
// EnergyFootprint is the resolver for the energyFootprint field.
func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*model.EnergyFootprintValue, error) {
rawEnergyFootprint, err := r.Repo.FetchEnergyFootprint(obj)
if err != nil {
log.Warn("Error while fetching job energy footprint data")
return nil, err
}
res := []*model.EnergyFootprintValue{}
for name, value := range rawEnergyFootprint {
// Suboptimal: Nearly hardcoded metric name expectations
matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`)
matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
matchMem := regexp.MustCompile(`mem|Mem|MEM`)
matchCore := regexp.MustCompile(`core|Core|CORE`)
hwType := ""
switch test := name; { // NOtice ';' for var declaration
case matchCpu.MatchString(test):
hwType = "CPU"
case matchAcc.MatchString(test):
hwType = "Accelerator"
case matchMem.MatchString(test):
hwType = "Memory"
case matchCore.MatchString(test):
hwType = "Core"
default:
hwType = "Other"
}
res = append(res, &model.EnergyFootprintValue{
Hardware: hwType,
Metric: name,
Value: value,
})
}
return res, err
}
// MetaData is the resolver for the metaData field. // MetaData is the resolver for the metaData field.
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) { func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (any, error) {
return r.Repo.FetchMetadata(obj) return r.Repo.FetchMetadata(obj)
} }
// UserData is the resolver for the userData field. // UserData is the resolver for the userData field.
func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.User, error) { func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.User, error) {
return auth.FetchUser(ctx, r.DB, obj.User) return repository.GetUserRepository().FetchUserInCtx(ctx, obj.User)
}
// Name is the resolver for the name field.
func (r *metricValueResolver) Name(ctx context.Context, obj *schema.MetricValue) (*string, error) {
panic(fmt.Errorf("not implemented: Name - name"))
} }
// CreateTag is the resolver for the createTag field. // CreateTag is the resolver for the createTag field.
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) { func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string, scope string) (*schema.Tag, error) {
id, err := r.Repo.CreateTag(typeArg, name) id, err := r.Repo.CreateTag(typeArg, name, scope)
if err != nil { if err != nil {
log.Warn("Error while creating tag") log.Warn("Error while creating tag")
return nil, err return nil, err
} }
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil return &schema.Tag{ID: id, Type: typeArg, Name: name, Scope: scope}, nil
} }
// DeleteTag is the resolver for the deleteTag field. // DeleteTag is the resolver for the deleteTag field.
@@ -91,6 +144,7 @@ func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, er
// AddTagsToJob is the resolver for the addTagsToJob field. // AddTagsToJob is the resolver for the addTagsToJob field.
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) { func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
// Selectable Tags Pre-Filtered by Scope in Frontend: No backend check required
jid, err := strconv.ParseInt(job, 10, 64) jid, err := strconv.ParseInt(job, 10, 64)
if err != nil { if err != nil {
log.Warn("Error while adding tag to job") log.Warn("Error while adding tag to job")
@@ -105,7 +159,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
return nil, err return nil, err
} }
if tags, err = r.Repo.AddTag(jid, tid); err != nil { if tags, err = r.Repo.AddTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
log.Warn("Error while adding tag") log.Warn("Error while adding tag")
return nil, err return nil, err
} }
@@ -116,6 +170,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field. // RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) { func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
// Removable Tags Pre-Filtered by Scope in Frontend: No backend check required
jid, err := strconv.ParseInt(job, 10, 64) jid, err := strconv.ParseInt(job, 10, 64)
if err != nil { if err != nil {
log.Warn("Error while parsing job id") log.Warn("Error while parsing job id")
@@ -130,7 +185,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
return nil, err return nil, err
} }
if tags, err = r.Repo.RemoveTag(jid, tid); err != nil { if tags, err = r.Repo.RemoveTag(repository.GetUserFromContext(ctx), jid, tid); err != nil {
log.Warn("Error while removing tag") log.Warn("Error while removing tag")
return nil, err return nil, err
} }
@@ -141,7 +196,7 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
// UpdateConfiguration is the resolver for the updateConfiguration field. // UpdateConfiguration is the resolver for the updateConfiguration field.
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) { func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, auth.GetUser(ctx)); err != nil { if err := repository.GetUserCfgRepo().UpdateConfig(name, value, repository.GetUserFromContext(ctx)); err != nil {
log.Warn("Error while updating user config") log.Warn("Error while updating user config")
return nil, err return nil, err
} }
@@ -156,12 +211,17 @@ func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error)
// Tags is the resolver for the tags field. // Tags is the resolver for the tags field.
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) { func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
return r.Repo.GetTags(nil) return r.Repo.GetTags(repository.GetUserFromContext(ctx), nil)
}
// GlobalMetrics is the resolver for the globalMetrics field.
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
return archive.GlobalMetricList, nil
} }
// User is the resolver for the user field. // User is the resolver for the user field.
func (r *queryResolver) User(ctx context.Context, username string) (*model.User, error) { func (r *queryResolver) User(ctx context.Context, username string) (*model.User, error) {
return auth.FetchUser(ctx, r.DB, username) return repository.GetUserRepository().FetchUserInCtx(ctx, username)
} }
// AllocatedNodes is the resolver for the allocatedNodes field. // AllocatedNodes is the resolver for the allocatedNodes field.
@@ -191,13 +251,15 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
return nil, err return nil, err
} }
job, err := r.Repo.FindById(numericId) job, err := r.Repo.FindById(ctx, numericId)
if err != nil { if err != nil {
log.Warn("Error while finding job by id") log.Warn("Error while finding job by id")
return nil, err return nil, err
} }
if user := auth.GetUser(ctx); user != nil && job.User != user.Username && user.HasNotRoles([]auth.Role{auth.RoleAdmin, auth.RoleSupport, auth.RoleManager}) { if user := repository.GetUserFromContext(ctx); user != nil &&
job.User != user.Username &&
user.HasNotRoles([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
return nil, errors.New("you are not allowed to see this job") return nil, errors.New("you are not allowed to see this job")
} }
@@ -205,14 +267,24 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
} }
// JobMetrics is the resolver for the jobMetrics field. // JobMetrics is the resolver for the jobMetrics field.
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) { func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
if resolution == nil { // Load from Config
if config.Keys.EnableResampling != nil {
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
resolution = &defaultRes
} else { // Set 0 (Loads configured metric timestep)
defaultRes := 0
resolution = &defaultRes
}
}
job, err := r.Query().Job(ctx, id) job, err := r.Query().Job(ctx, id)
if err != nil { if err != nil {
log.Warn("Error while querying job for metrics") log.Warn("Error while querying job for metrics")
return nil, err return nil, err
} }
data, err := metricdata.LoadData(job, metrics, scopes, ctx) data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
if err != nil { if err != nil {
log.Warn("Error while loading job data") log.Warn("Error while loading job data")
return nil, err return nil, err
@@ -258,38 +330,61 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
return nil, err return nil, err
} }
return &model.JobResultList{Items: jobs, Count: &count}, nil if !config.Keys.UiDefaults["job_list_usePaging"].(bool) {
hasNextPage := false
// page.Page += 1 : Simple, but expensive
// Example Page 4 @ 10 IpP : Does item 41 exist?
// Minimal Page 41 @ 1 IpP : If len(result) is 1, Page 5 @ 10 IpP exists.
nextPage := &model.PageRequest{
ItemsPerPage: 1,
Page: ((page.Page * page.ItemsPerPage) + 1),
}
nextJobs, err := r.Repo.QueryJobs(ctx, filter, nextPage, order)
if err != nil {
log.Warn("Error while querying next jobs")
return nil, err
}
if len(nextJobs) == 1 {
hasNextPage = true
}
return &model.JobResultList{Items: jobs, Count: &count, HasNextPage: &hasNextPage}, nil
} else {
return &model.JobResultList{Items: jobs, Count: &count}, nil
}
} }
// JobsStatistics is the resolver for the jobsStatistics field. // JobsStatistics is the resolver for the jobsStatistics field.
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) { func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
var err error var err error
var stats []*model.JobsStatistics var stats []*model.JobsStatistics
if requireField(ctx, "totalJobs") { if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
if groupBy == nil { if groupBy == nil {
stats, err = r.Repo.JobsStats(ctx, filter) stats, err = r.Repo.JobsStats(ctx, filter)
} else { } else {
stats, err = r.Repo.JobsStatsGrouped(ctx, filter, groupBy) stats, err = r.Repo.JobsStatsGrouped(ctx, filter, page, sortBy, groupBy)
} }
} else { } else {
stats = make([]*model.JobsStatistics, 0, 1) stats = make([]*model.JobsStatistics, 0, 1)
stats = append(stats, stats = append(stats, &model.JobsStatistics{})
&model.JobsStatistics{})
} }
if groupBy != nil { if groupBy != nil {
if requireField(ctx, "shortJobs") { if requireField(ctx, "shortJobs") {
stats, err = r.Repo.AddJobCountGrouped(ctx, filter, groupBy, stats, "short") stats, err = r.Repo.AddJobCountGrouped(ctx, filter, groupBy, stats, "short")
} }
if requireField(ctx, "RunningJobs") { if requireField(ctx, "runningJobs") {
stats, err = r.Repo.AddJobCountGrouped(ctx, filter, groupBy, stats, "running") stats, err = r.Repo.AddJobCountGrouped(ctx, filter, groupBy, stats, "running")
} }
} else { } else {
if requireField(ctx, "shortJobs") { if requireField(ctx, "shortJobs") {
stats, err = r.Repo.AddJobCount(ctx, filter, stats, "short") stats, err = r.Repo.AddJobCount(ctx, filter, stats, "short")
} }
if requireField(ctx, "RunningJobs") { if requireField(ctx, "runningJobs") {
stats, err = r.Repo.AddJobCount(ctx, filter, stats, "running") stats, err = r.Repo.AddJobCount(ctx, filter, stats, "running")
} }
} }
@@ -298,7 +393,7 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
return nil, err return nil, err
} }
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") { if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
if groupBy == nil { if groupBy == nil {
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0]) stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0])
if err != nil { if err != nil {
@@ -309,27 +404,20 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
} }
} }
if requireField(ctx, "histMetrics") {
if groupBy == nil {
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0])
if err != nil {
return nil, err
}
} else {
return nil, errors.New("metric histograms only implemented without groupBy argument")
}
}
return stats, nil return stats, nil
} }
// JobsCount is the resolver for the jobsCount field.
func (r *queryResolver) JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, weight *model.Weights, limit *int) ([]*model.Count, error) {
counts, err := r.Repo.CountGroupedJobs(ctx, groupBy, filter, weight, limit)
if err != nil {
log.Warn("Error while counting grouped jobs")
return nil, err
}
res := make([]*model.Count, 0, len(counts))
for name, count := range counts {
res = append(res, &model.Count{
Name: name,
Count: count,
})
}
return res, nil
}
// RooflineHeatmap is the resolver for the rooflineHeatmap field. // RooflineHeatmap is the resolver for the rooflineHeatmap field.
func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) { func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY) return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
@@ -337,8 +425,8 @@ func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.Job
// NodeMetrics is the resolver for the nodeMetrics field. // NodeMetrics is the resolver for the nodeMetrics field.
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) { func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
user := auth.GetUser(ctx) user := repository.GetUserFromContext(ctx)
if user != nil && !user.HasRole(auth.RoleAdmin) { if user != nil && !user.HasRole(schema.RoleAdmin) {
return nil, errors.New("you need to be an administrator for this query") return nil, errors.New("you need to be an administrator for this query")
} }
@@ -348,7 +436,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
} }
} }
data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx) data, err := metricDataDispatcher.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil { if err != nil {
log.Warn("Error while loading node data") log.Warn("Error while loading node data")
return nil, err return nil, err
@@ -393,6 +481,9 @@ func (r *Resolver) Cluster() generated.ClusterResolver { return &clusterResolver
// Job returns generated.JobResolver implementation. // Job returns generated.JobResolver implementation.
func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} } func (r *Resolver) Job() generated.JobResolver { return &jobResolver{r} }
// MetricValue returns generated.MetricValueResolver implementation.
func (r *Resolver) MetricValue() generated.MetricValueResolver { return &metricValueResolver{r} }
// Mutation returns generated.MutationResolver implementation. // Mutation returns generated.MutationResolver implementation.
func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} } func (r *Resolver) Mutation() generated.MutationResolver { return &mutationResolver{r} }
@@ -402,8 +493,11 @@ func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
// SubCluster returns generated.SubClusterResolver implementation. // SubCluster returns generated.SubClusterResolver implementation.
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} } func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
type clusterResolver struct{ *Resolver } type (
type jobResolver struct{ *Resolver } clusterResolver struct{ *Resolver }
type mutationResolver struct{ *Resolver } jobResolver struct{ *Resolver }
type queryResolver struct{ *Resolver } metricValueResolver struct{ *Resolver }
type subClusterResolver struct{ *Resolver } mutationResolver struct{ *Resolver }
queryResolver struct{ *Resolver }
subClusterResolver struct{ *Resolver }
)

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -6,15 +6,15 @@ package graph
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"math" "math"
"github.com/99designs/gqlgen/graphql" "github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
// "github.com/ClusterCockpit/cc-backend/pkg/archive"
) )
const MAX_JOBS_FOR_ANALYSIS = 500 const MAX_JOBS_FOR_ANALYSIS = 500
@@ -24,15 +24,15 @@ func (r *queryResolver) rooflineHeatmap(
ctx context.Context, ctx context.Context,
filter []*model.JobFilter, filter []*model.JobFilter,
rows int, cols int, rows int, cols int,
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) { minX float64, minY float64, maxX float64, maxY float64,
) ([][]float64, error) {
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil) jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil { if err != nil {
log.Error("Error while querying jobs for roofline") log.Error("Error while querying jobs for roofline")
return nil, err return nil, err
} }
if len(jobs) > MAX_JOBS_FOR_ANALYSIS { if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) return nil, fmt.Errorf("GRAPH/UTIL > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
} }
fcols, frows := float64(cols), float64(rows) fcols, frows := float64(cols), float64(rows)
@@ -47,22 +47,33 @@ func (r *queryResolver) rooflineHeatmap(
continue continue
} }
jobdata, err := metricdata.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx) // metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
// resolution := 0
// for _, mc := range metricConfigs {
// resolution = max(resolution, mc.Timestep)
// }
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
if err != nil { if err != nil {
log.Error("Error while loading metrics for roofline") log.Errorf("Error while loading roofline metrics for job %d", job.ID)
return nil, err return nil, err
} }
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"] flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
if flops_ == nil && membw_ == nil { if flops_ == nil && membw_ == nil {
return nil, fmt.Errorf("GRAPH/STATS > 'flops_any' or 'mem_bw' missing for job %d", job.ID) log.Infof("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID)
continue
// return nil, fmt.Errorf("GRAPH/UTIL > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
} }
flops, ok1 := flops_["node"] flops, ok1 := flops_["node"]
membw, ok2 := membw_["node"] membw, ok2 := membw_["node"]
if !ok1 || !ok2 { if !ok1 || !ok2 {
log.Info("rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
continue
// TODO/FIXME: // TODO/FIXME:
return nil, errors.New("GRAPH/STATS > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level") // return nil, errors.New("GRAPH/UTIL > todo: rooflineHeatmap() query not implemented for where flops_any or mem_bw not available at 'node' level")
} }
for n := 0; n < len(flops.Series); n++ { for n := 0; n < len(flops.Series); n++ {
@@ -98,7 +109,7 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
return nil, err return nil, err
} }
if len(jobs) > MAX_JOBS_FOR_ANALYSIS { if len(jobs) > MAX_JOBS_FOR_ANALYSIS {
return nil, fmt.Errorf("GRAPH/STATS > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS) return nil, fmt.Errorf("GRAPH/UTIL > too many jobs matched (max: %d)", MAX_JOBS_FOR_ANALYSIS)
} }
avgs := make([][]schema.Float, len(metrics)) avgs := make([][]schema.Float, len(metrics))
@@ -106,18 +117,33 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
avgs[i] = make([]schema.Float, 0, len(jobs)) avgs[i] = make([]schema.Float, 0, len(jobs))
} }
nodehours := make([]schema.Float, 0, len(jobs)) timeweights := new(model.TimeWeights)
timeweights.NodeHours = make([]schema.Float, 0, len(jobs))
timeweights.AccHours = make([]schema.Float, 0, len(jobs))
timeweights.CoreHours = make([]schema.Float, 0, len(jobs))
for _, job := range jobs { for _, job := range jobs {
if job.MonitoringStatus == schema.MonitoringStatusDisabled || job.MonitoringStatus == schema.MonitoringStatusArchivingFailed { if job.MonitoringStatus == schema.MonitoringStatusDisabled || job.MonitoringStatus == schema.MonitoringStatusArchivingFailed {
continue continue
} }
if err := metricdata.LoadAverages(job, metrics, avgs, ctx); err != nil { if err := metricDataDispatcher.LoadAverages(job, metrics, avgs, ctx); err != nil {
log.Error("Error while loading averages for footprint") log.Error("Error while loading averages for footprint")
return nil, err return nil, err
} }
nodehours = append(nodehours, schema.Float(float64(job.Duration)/60.0*float64(job.NumNodes))) // #166 collect arrays: Null values or no null values?
timeweights.NodeHours = append(timeweights.NodeHours, schema.Float(float64(job.Duration)/60.0*float64(job.NumNodes)))
if job.NumAcc > 0 {
timeweights.AccHours = append(timeweights.AccHours, schema.Float(float64(job.Duration)/60.0*float64(job.NumAcc)))
} else {
timeweights.AccHours = append(timeweights.AccHours, schema.Float(1.0))
}
if job.NumHWThreads > 0 {
timeweights.CoreHours = append(timeweights.CoreHours, schema.Float(float64(job.Duration)/60.0*float64(job.NumHWThreads))) // SQLite HWThreads == Cores; numCoresForJob(job)
} else {
timeweights.CoreHours = append(timeweights.CoreHours, schema.Float(1.0))
}
} }
res := make([]*model.MetricFootprints, len(avgs)) res := make([]*model.MetricFootprints, len(avgs))
@@ -129,11 +155,34 @@ func (r *queryResolver) jobsFootprints(ctx context.Context, filter []*model.JobF
} }
return &model.Footprints{ return &model.Footprints{
Nodehours: nodehours, TimeWeights: timeweights,
Metrics: res, Metrics: res,
}, nil }, nil
} }
// func numCoresForJob(job *schema.Job) (numCores int) {
// subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)
// if scerr != nil {
// return 1
// }
// totalJobCores := 0
// topology := subcluster.Topology
// for _, host := range job.Resources {
// hwthreads := host.HWThreads
// if hwthreads == nil {
// hwthreads = topology.Node
// }
// hostCores, _ := topology.GetCoresFromHWThreads(hwthreads)
// totalJobCores += len(hostCores)
// }
// return totalJobCores
// }
func requireField(ctx context.Context, name string) bool { func requireField(ctx context.Context, name string) bool {
fields := graphql.CollectAllFields(ctx) fields := graphql.CollectAllFields(ctx)

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -8,9 +8,9 @@ import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"math"
"os" "os"
"strings" "strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
@@ -42,8 +42,8 @@ func HandleImportFlag(flag string) error {
} }
dec := json.NewDecoder(bytes.NewReader(raw)) dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields() dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults} job := schema.JobMeta{BaseJob: schema.JobDefaults}
if err = dec.Decode(&jobMeta); err != nil { if err = dec.Decode(&job); err != nil {
log.Warn("Error while decoding raw json metadata for import") log.Warn("Error while decoding raw json metadata for import")
return err return err
} }
@@ -67,32 +67,60 @@ func HandleImportFlag(flag string) error {
return err return err
} }
// checkJobData(&jobData) job.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
if err != nil {
// if _, err = r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows { log.Errorf("cannot get subcluster: %s", err.Error())
// if err != nil { return err
// log.Warn("Error while finding job in jobRepository")
// return err
// }
//
// return fmt.Errorf("REPOSITORY/INIT > a job with that jobId, cluster and startTime does already exist")
// }
//
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
} }
// TODO: Other metrics... job.Footprint = make(map[string]float64)
job.LoadAvg = loadJobStat(&jobMeta, "cpu_load")
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any") for _, fp := range sc.Footprint {
job.MemUsedMax = loadJobStat(&jobMeta, "mem_used") statType := "avg"
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw") if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw") statType = sc.MetricConfig[i].Footprint
}
name := fmt.Sprintf("%s_%s", fp, statType)
job.Footprint[name] = repository.LoadJobStat(&job, fp, statType)
}
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
log.Warn("Error while marshaling job footprint")
return err
}
job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
for _, fp := range sc.EnergyFootprint {
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
}
job.EnergyFootprint[fp] = energy
totalEnergy += energy
}
job.Energy = (math.Round(totalEnergy*100) / 100)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
return err
}
job.RawResources, err = json.Marshal(job.Resources) job.RawResources, err = json.Marshal(job.Resources)
if err != nil { if err != nil {
@@ -110,7 +138,7 @@ func HandleImportFlag(flag string) error {
return err return err
} }
if err = archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil { if err = archive.GetHandle().ImportJob(&job, &jobData); err != nil {
log.Error("Error while importing job") log.Error("Error while importing job")
return err return err
} }
@@ -122,8 +150,8 @@ func HandleImportFlag(flag string) error {
} }
for _, tag := range job.Tags { for _, tag := range job.Tags {
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil { if err := r.ImportTag(id, tag.Type, tag.Name, tag.Scope); err != nil {
log.Error("Error while adding or creating tag") log.Error("Error while adding or creating tag on import")
return err return err
} }
} }

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -42,6 +42,9 @@ func setup(t *testing.T) *repository.JobRepository {
"kind": "file", "kind": "file",
"path": "./var/job-archive" "path": "./var/job-archive"
}, },
"jwts": {
"max-age": "2m"
},
"clusters": [ "clusters": [
{ {
"name": "testcluster", "name": "testcluster",
@@ -79,7 +82,7 @@ func setup(t *testing.T) *repository.JobRepository {
if err := os.Mkdir(jobarchive, 0777); err != nil { if err := os.Mkdir(jobarchive, 0777); err != nil {
t.Fatal(err) t.Fatal(err)
} }
if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 1)), 0666); err != nil { if err := os.WriteFile(filepath.Join(jobarchive, "version.txt"), []byte(fmt.Sprintf("%d", 2)), 0666); err != nil {
t.Fatal(err) t.Fatal(err)
} }
fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz") fritzArchive := filepath.Join(tmpdir, "job-archive", "fritz")

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -7,6 +7,7 @@ package importer
import ( import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"math"
"strings" "strings"
"time" "time"
@@ -16,6 +17,11 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
) )
const (
addTagQuery = "INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)"
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
)
// Delete the tables "job", "tag" and "jobtag" from the database and // Delete the tables "job", "tag" and "jobtag" from the database and
// repopulate them using the jobs found in `archive`. // repopulate them using the jobs found in `archive`.
func InitDB() error { func InitDB() error {
@@ -60,13 +66,58 @@ func InitDB() error {
StartTimeUnix: jobMeta.StartTime, StartTimeUnix: jobMeta.StartTime,
} }
// TODO: Other metrics... sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
job.LoadAvg = loadJobStat(jobMeta, "cpu_load") if err != nil {
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any") log.Errorf("cannot get subcluster: %s", err.Error())
job.MemUsedMax = loadJobStat(jobMeta, "mem_used") return err
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw") }
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw") job.Footprint = make(map[string]float64)
for _, fp := range sc.Footprint {
statType := "avg"
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
statType = sc.MetricConfig[i].Footprint
}
name := fmt.Sprintf("%s_%s", fp, statType)
job.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
}
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
log.Warn("Error while marshaling job footprint")
return err
}
job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
for _, fp := range sc.EnergyFootprint {
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
job.EnergyFootprint[fp] = energy
totalEnergy += energy
}
job.Energy = (math.Round(totalEnergy*100) / 100)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return err
}
job.RawResources, err = json.Marshal(job.Resources) job.RawResources, err = json.Marshal(job.Resources)
if err != nil { if err != nil {
@@ -88,7 +139,8 @@ func InitDB() error {
continue continue
} }
id, err := r.TransactionAdd(t, job) id, err := r.TransactionAddNamed(t,
repository.NamedJobInsert, job)
if err != nil { if err != nil {
log.Errorf("repository initDB(): %v", err) log.Errorf("repository initDB(): %v", err)
errorOccured++ errorOccured++
@@ -99,7 +151,9 @@ func InitDB() error {
tagstr := tag.Name + ":" + tag.Type tagstr := tag.Name + ":" + tag.Type
tagId, ok := tags[tagstr] tagId, ok := tags[tagstr]
if !ok { if !ok {
tagId, err = r.TransactionAddTag(t, tag) tagId, err = r.TransactionAdd(t,
addTagQuery,
tag.Name, tag.Type)
if err != nil { if err != nil {
log.Errorf("Error adding tag: %v", err) log.Errorf("Error adding tag: %v", err)
errorOccured++ errorOccured++
@@ -108,7 +162,9 @@ func InitDB() error {
tags[tagstr] = tagId tags[tagstr] = tagId
} }
r.TransactionSetTag(t, id, tagId) r.TransactionAdd(t,
setTagQuery,
id, tagId)
} }
if err == nil { if err == nil {
@@ -150,18 +206,6 @@ func SanityChecks(job *schema.BaseJob) error {
return nil return nil
} }
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
if metric == "mem_used" {
return stats.Max
} else {
return stats.Avg
}
}
return 0.0
}
func checkJobData(d *schema.JobData) error { func checkJobData(d *schema.JobData) error {
for _, scopes := range *d { for _, scopes := range *d {
// var newUnit schema.Unit // var newUnit schema.Unit

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,256 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricDataDispatcher
import (
"context"
"fmt"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/resampler"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
func cacheKey(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
resolution int,
) string {
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
job.ID, job.State, metrics, scopes, resolution)
}
// Fetches the metric data for a job.
func LoadData(job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int,
) (schema.JobData, error) {
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ interface{}, ttl time.Duration, size int) {
var jd schema.JobData
var err error
if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
config.Keys.DisableArchive {
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}
if scopes == nil {
scopes = append(scopes, schema.MetricScopeNode)
}
if metrics == nil {
cluster := archive.GetCluster(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
}
}
jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution)
if err != nil {
if len(jd) != 0 {
log.Warnf("partial error: %s", err.Error())
// return err, 0, 0 // Reactivating will block archiving on one partial error
} else {
log.Error("Error while loading job data from metric repository")
return err, 0, 0
}
}
size = jd.Size()
} else {
var jd_temp schema.JobData
jd_temp, err = archive.GetHandle().LoadJobData(job)
if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0
}
//Deep copy the cached archive hashmap
jd = metricdata.DeepCopy(jd_temp)
//Resampling for archived data.
//Pass the resolution from frontend here.
for _, v := range jd {
for _, v_ := range v {
timestep := 0
for i := 0; i < len(v_.Series); i += 1 {
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
if err != nil {
return err, 0, 0
}
}
v_.Timestep = timestep
}
}
// Avoid sending unrequested data to the client:
if metrics != nil || scopes != nil {
if metrics == nil {
metrics = make([]string, 0, len(jd))
for k := range jd {
metrics = append(metrics, k)
}
}
res := schema.JobData{}
for _, metric := range metrics {
if perscope, ok := jd[metric]; ok {
if len(perscope) > 1 {
subset := make(map[schema.MetricScope]*schema.JobMetric)
for _, scope := range scopes {
if jm, ok := perscope[scope]; ok {
subset[scope] = jm
}
}
if len(subset) > 0 {
perscope = subset
}
}
res[metric] = perscope
}
}
jd = res
}
size = jd.Size()
}
ttl = 5 * time.Hour
if job.State == schema.JobStateRunning {
ttl = 2 * time.Minute
}
// FIXME: Review: Is this really necessary or correct.
// Note: Lines 147-170 formerly known as prepareJobData(jobData, scopes)
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/median/max Graph can be
// used instead of a lot of single lines.
// NOTE: New StatsSeries will always be calculated as 'min/median/max'
// Existing (archived) StatsSeries can be 'min/mean/max'!
const maxSeriesSize int = 15
for _, scopes := range jd {
for _, jm := range scopes {
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
continue
}
jm.AddStatisticsSeries()
}
}
nodeScopeRequested := false
for _, scope := range scopes {
if scope == schema.MetricScopeNode {
nodeScopeRequested = true
}
}
if nodeScopeRequested {
jd.AddNodeScope("flops_any")
jd.AddNodeScope("mem_bw")
}
return jd, ttl, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
return data.(schema.JobData), nil
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(
job *schema.Job,
metrics []string,
data [][]schema.Float,
ctx context.Context,
) error {
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
}
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}
for i, m := range metrics {
nodes, ok := stats[m]
if !ok {
data[i] = append(data[i], schema.NaN)
continue
}
sum := 0.0
for _, node := range nodes {
sum += node.Avg
}
data[i] = append(data[i], schema.Float(sum))
}
return nil
}
// Used for the node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
repo, err := metricdata.GetMetricDataRepo(cluster)
if err != nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -32,32 +32,33 @@ type CCMetricStoreConfig struct {
} }
type CCMetricStore struct { type CCMetricStore struct {
here2there map[string]string
there2here map[string]string
client http.Client
jwt string jwt string
url string url string
queryEndpoint string queryEndpoint string
client http.Client
here2there map[string]string
there2here map[string]string
} }
type ApiQueryRequest struct { type ApiQueryRequest struct {
Cluster string `json:"cluster"` Cluster string `json:"cluster"`
Queries []ApiQuery `json:"queries"`
ForAllNodes []string `json:"for-all-nodes"`
From int64 `json:"from"` From int64 `json:"from"`
To int64 `json:"to"` To int64 `json:"to"`
WithStats bool `json:"with-stats"` WithStats bool `json:"with-stats"`
WithData bool `json:"with-data"` WithData bool `json:"with-data"`
Queries []ApiQuery `json:"queries"`
ForAllNodes []string `json:"for-all-nodes"`
} }
type ApiQuery struct { type ApiQuery struct {
Type *string `json:"type,omitempty"`
SubType *string `json:"subtype,omitempty"`
Metric string `json:"metric"` Metric string `json:"metric"`
Hostname string `json:"host"` Hostname string `json:"host"`
Aggregate bool `json:"aggreg"` Resolution int `json:"resolution"`
Type *string `json:"type,omitempty"`
TypeIds []string `json:"type-ids,omitempty"` TypeIds []string `json:"type-ids,omitempty"`
SubType *string `json:"subtype,omitempty"`
SubTypeIds []string `json:"subtype-ids,omitempty"` SubTypeIds []string `json:"subtype-ids,omitempty"`
Aggregate bool `json:"aggreg"`
} }
type ApiQueryResponse struct { type ApiQueryResponse struct {
@@ -66,17 +67,17 @@ type ApiQueryResponse struct {
} }
type ApiMetricData struct { type ApiMetricData struct {
Error *string `json:"error"` Error *string `json:"error"`
From int64 `json:"from"` Data []schema.Float `json:"data"`
To int64 `json:"to"` From int64 `json:"from"`
Data []schema.Float `json:"data"` To int64 `json:"to"`
Avg schema.Float `json:"avg"` Resolution int `json:"resolution"`
Min schema.Float `json:"min"` Avg schema.Float `json:"avg"`
Max schema.Float `json:"max"` Min schema.Float `json:"min"`
Max schema.Float `json:"max"`
} }
func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error { func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
var config CCMetricStoreConfig var config CCMetricStoreConfig
if err := json.Unmarshal(rawConfig, &config); err != nil { if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Warn("Error while unmarshaling raw json config") log.Warn("Error while unmarshaling raw json config")
@@ -122,15 +123,15 @@ func (ccms *CCMetricStore) toLocalName(metric string) string {
func (ccms *CCMetricStore) doRequest( func (ccms *CCMetricStore) doRequest(
ctx context.Context, ctx context.Context,
body *ApiQueryRequest) (*ApiQueryResponse, error) { body *ApiQueryRequest,
) (*ApiQueryResponse, error) {
buf := &bytes.Buffer{} buf := &bytes.Buffer{}
if err := json.NewEncoder(buf).Encode(body); err != nil { if err := json.NewEncoder(buf).Encode(body); err != nil {
log.Warn("Error while encoding request body") log.Warn("Error while encoding request body")
return nil, err return nil, err
} }
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf) req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf)
if err != nil { if err != nil {
log.Warn("Error while building request body") log.Warn("Error while building request body")
return nil, err return nil, err
@@ -139,6 +140,13 @@ func (ccms *CCMetricStore) doRequest(
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt)) req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
} }
// versioning the cc-metric-store query API.
// v2 = data with resampling
// v1 = data without resampling
q := req.URL.Query()
q.Add("version", "v2")
req.URL.RawQuery = q.Encode()
res, err := ccms.client.Do(req) res, err := ccms.client.Do(req)
if err != nil { if err != nil {
log.Error("Error while performing request") log.Error("Error while performing request")
@@ -162,9 +170,10 @@ func (ccms *CCMetricStore) LoadData(
job *schema.Job, job *schema.Job,
metrics []string, metrics []string,
scopes []schema.MetricScope, scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) { ctx context.Context,
resolution int,
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes) ) (schema.JobData, error) {
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution)
if err != nil { if err != nil {
log.Warn("Error while building queries") log.Warn("Error while building queries")
return nil, err return nil, err
@@ -186,7 +195,7 @@ func (ccms *CCMetricStore) LoadData(
} }
var errors []string var errors []string
var jobData schema.JobData = make(schema.JobData) jobData := make(schema.JobData)
for i, row := range resBody.Results { for i, row := range resBody.Results {
query := req.Queries[i] query := req.Queries[i]
metric := ccms.toLocalName(query.Metric) metric := ccms.toLocalName(query.Metric)
@@ -196,17 +205,23 @@ func (ccms *CCMetricStore) LoadData(
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric) jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
} }
res := row[0].Resolution
if res == 0 {
res = mc.Timestep
}
jobMetric, ok := jobData[metric][scope] jobMetric, ok := jobData[metric][scope]
if !ok { if !ok {
jobMetric = &schema.JobMetric{ jobMetric = &schema.JobMetric{
Unit: mc.Unit, Unit: mc.Unit,
Timestep: mc.Timestep, Timestep: res,
Series: make([]schema.Series, 0), Series: make([]schema.Series, 0),
} }
jobData[metric][scope] = jobMetric jobData[metric][scope] = jobMetric
} }
for _, res := range row { for ndx, res := range row {
if res.Error != nil { if res.Error != nil {
/* Build list for "partial errors", if any */ /* Build list for "partial errors", if any */
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error)) errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
@@ -216,7 +231,7 @@ func (ccms *CCMetricStore) LoadData(
id := (*string)(nil) id := (*string)(nil)
if query.Type != nil { if query.Type != nil {
id = new(string) id = new(string)
*id = query.TypeIds[0] *id = query.TypeIds[ndx]
} }
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() { if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
@@ -252,7 +267,6 @@ func (ccms *CCMetricStore) LoadData(
/* Returns list for "partial errors" */ /* Returns list for "partial errors" */
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", ")) return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
} }
return jobData, nil return jobData, nil
} }
@@ -267,8 +281,9 @@ var (
func (ccms *CCMetricStore) buildQueries( func (ccms *CCMetricStore) buildQueries(
job *schema.Job, job *schema.Job,
metrics []string, metrics []string,
scopes []schema.MetricScope) ([]ApiQuery, []schema.MetricScope, error) { scopes []schema.MetricScope,
resolution int,
) ([]ApiQuery, []schema.MetricScope, error) {
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
assignedScope := []schema.MetricScope{} assignedScope := []schema.MetricScope{}
@@ -313,12 +328,18 @@ func (ccms *CCMetricStore) buildQueries(
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node) // Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) { if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
if scope != schema.MetricScopeAccelerator {
// Skip all other catched cases
continue
}
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &acceleratorString, Type: &acceleratorString,
TypeIds: host.Accelerators, TypeIds: host.Accelerators,
Resolution: resolution,
}) })
assignedScope = append(assignedScope, schema.MetricScopeAccelerator) assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
continue continue
@@ -331,11 +352,12 @@ func (ccms *CCMetricStore) buildQueries(
} }
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &acceleratorString, Type: &acceleratorString,
TypeIds: host.Accelerators, TypeIds: host.Accelerators,
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -344,11 +366,12 @@ func (ccms *CCMetricStore) buildQueries(
// HWThread -> HWThead // HWThread -> HWThead
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &hwthreadString, Type: &hwthreadString,
TypeIds: intToStringSlice(hwthreads), TypeIds: intToStringSlice(hwthreads),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -359,11 +382,12 @@ func (ccms *CCMetricStore) buildQueries(
cores, _ := topology.GetCoresFromHWThreads(hwthreads) cores, _ := topology.GetCoresFromHWThreads(hwthreads)
for _, core := range cores { for _, core := range cores {
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &hwthreadString, Type: &hwthreadString,
TypeIds: intToStringSlice(topology.Core[core]), TypeIds: intToStringSlice(topology.Core[core]),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
} }
@@ -375,11 +399,12 @@ func (ccms *CCMetricStore) buildQueries(
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
for _, socket := range sockets { for _, socket := range sockets {
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &hwthreadString, Type: &hwthreadString,
TypeIds: intToStringSlice(topology.Socket[socket]), TypeIds: intToStringSlice(topology.Socket[socket]),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
} }
@@ -389,11 +414,12 @@ func (ccms *CCMetricStore) buildQueries(
// HWThread -> Node // HWThread -> Node
if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &hwthreadString, Type: &hwthreadString,
TypeIds: intToStringSlice(hwthreads), TypeIds: intToStringSlice(hwthreads),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -403,11 +429,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {
cores, _ := topology.GetCoresFromHWThreads(hwthreads) cores, _ := topology.GetCoresFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &coreString, Type: &coreString,
TypeIds: intToStringSlice(cores), TypeIds: intToStringSlice(cores),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -417,11 +444,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {
cores, _ := topology.GetCoresFromHWThreads(hwthreads) cores, _ := topology.GetCoresFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &coreString, Type: &coreString,
TypeIds: intToStringSlice(cores), TypeIds: intToStringSlice(cores),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -431,11 +459,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain { if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &memoryDomainString, Type: &memoryDomainString,
TypeIds: intToStringSlice(sockets), TypeIds: intToStringSlice(sockets),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -445,11 +474,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {
sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) sockets, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &memoryDomainString, Type: &memoryDomainString,
TypeIds: intToStringSlice(sockets), TypeIds: intToStringSlice(sockets),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -459,11 +489,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: false, Aggregate: false,
Type: &socketString, Type: &socketString,
TypeIds: intToStringSlice(sockets), TypeIds: intToStringSlice(sockets),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -473,11 +504,12 @@ func (ccms *CCMetricStore) buildQueries(
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Aggregate: true, Aggregate: true,
Type: &socketString, Type: &socketString,
TypeIds: intToStringSlice(sockets), TypeIds: intToStringSlice(sockets),
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -486,8 +518,9 @@ func (ccms *CCMetricStore) buildQueries(
// Node -> Node // Node -> Node
if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode { if nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {
queries = append(queries, ApiQuery{ queries = append(queries, ApiQuery{
Metric: remoteName, Metric: remoteName,
Hostname: host.Hostname, Hostname: host.Hostname,
Resolution: resolution,
}) })
assignedScope = append(assignedScope, scope) assignedScope = append(assignedScope, scope)
continue continue
@@ -504,9 +537,17 @@ func (ccms *CCMetricStore) buildQueries(
func (ccms *CCMetricStore) LoadStats( func (ccms *CCMetricStore) LoadStats(
job *schema.Job, job *schema.Job,
metrics []string, metrics []string,
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) { ctx context.Context,
) (map[string]map[string]schema.MetricStatistics, error) {
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}) // metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
// resolution := 9000
// for _, mc := range metricConfigs {
// resolution = min(resolution, mc.Timestep)
// }
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
if err != nil { if err != nil {
log.Warn("Error while building query") log.Warn("Error while building query")
return nil, err return nil, err
@@ -533,7 +574,9 @@ func (ccms *CCMetricStore) LoadStats(
metric := ccms.toLocalName(query.Metric) metric := ccms.toLocalName(query.Metric)
data := res[0] data := res[0]
if data.Error != nil { if data.Error != nil {
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) log.Infof("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
continue
// return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
} }
metricdata, ok := stats[metric] metricdata, ok := stats[metric]
@@ -543,7 +586,9 @@ func (ccms *CCMetricStore) LoadStats(
} }
if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() { if data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() {
return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN") log.Infof("fetching %s for node %s failed: one of avg/min/max is NaN", metric, query.Hostname)
continue
// return nil, fmt.Errorf("METRICDATA/CCMS > fetching %s for node %s failed: %s", metric, query.Hostname, "avg/min/max is NaN")
} }
metricdata[query.Hostname] = schema.MetricStatistics{ metricdata[query.Hostname] = schema.MetricStatistics{
@@ -562,8 +607,8 @@ func (ccms *CCMetricStore) LoadNodeData(
metrics, nodes []string, metrics, nodes []string,
scopes []schema.MetricScope, scopes []schema.MetricScope,
from, to time.Time, from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
req := ApiQueryRequest{ req := ApiQueryRequest{
Cluster: cluster, Cluster: cluster,
From: from.Unix(), From: from.Unix(),
@@ -580,8 +625,9 @@ func (ccms *CCMetricStore) LoadNodeData(
for _, node := range nodes { for _, node := range nodes {
for _, metric := range metrics { for _, metric := range metrics {
req.Queries = append(req.Queries, ApiQuery{ req.Queries = append(req.Queries, ApiQuery{
Hostname: node, Hostname: node,
Metric: ccms.toRemoteName(metric), Metric: ccms.toRemoteName(metric),
Resolution: 60, // Default for Node Queries
}) })
} }
} }
@@ -589,7 +635,7 @@ func (ccms *CCMetricStore) LoadNodeData(
resBody, err := ccms.doRequest(ctx, &req) resBody, err := ccms.doRequest(ctx, &req)
if err != nil { if err != nil {
log.Error("Error while performing request") log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
return nil, err return nil, err
} }
@@ -648,7 +694,6 @@ func (ccms *CCMetricStore) LoadNodeData(
} }
func intToStringSlice(is []int) []string { func intToStringSlice(is []int) []string {
ss := make([]string, len(is)) ss := make([]string, len(is))
for i, x := range is { for i, x := range is {
ss[i] = strconv.Itoa(x) ss[i] = strconv.Itoa(x)

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -60,7 +60,8 @@ func (idb *InfluxDBv2DataRepository) LoadData(
job *schema.Job, job *schema.Job,
metrics []string, metrics []string,
scopes []schema.MetricScope, scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) { ctx context.Context,
resolution int) (schema.JobData, error) {
measurementsConds := make([]string, 0, len(metrics)) measurementsConds := make([]string, 0, len(metrics))
for _, m := range metrics { for _, m := range metrics {

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -8,13 +8,10 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"math"
"time" "time"
"github.com/ClusterCockpit/cc-backend/internal/config" "github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
) )
@@ -24,7 +21,7 @@ type MetricDataRepository interface {
Init(rawConfig json.RawMessage) error Init(rawConfig json.RawMessage) error
// Return the JobData for the given job, only with the requested metrics. // Return the JobData for the given job, only with the requested metrics.
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now. // Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
@@ -35,11 +32,7 @@ type MetricDataRepository interface {
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{} var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
var useArchive bool func Init() error {
func Init(disableArchive bool) error {
useArchive = !disableArchive
for _, cluster := range config.Keys.Clusters { for _, cluster := range config.Keys.Clusters {
if cluster.MetricDataRepository != nil { if cluster.MetricDataRepository != nil {
var kind struct { var kind struct {
@@ -74,283 +67,13 @@ func Init(disableArchive bool) error {
return nil return nil
} }
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024) func GetMetricDataRepo(cluster string) (MetricDataRepository, error) {
var err error
// Fetches the metric data for a job.
func LoadData(job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
var jd schema.JobData
var err error
if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
!useArchive {
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
}
if scopes == nil {
scopes = append(scopes, schema.MetricScopeNode)
}
if metrics == nil {
cluster := archive.GetCluster(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
}
}
jd, err = repo.LoadData(job, metrics, scopes, ctx)
if err != nil {
if len(jd) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading job data from metric repository")
return err, 0, 0
}
}
size = jd.Size()
} else {
jd, err = archive.GetHandle().LoadJobData(job)
if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0
}
// Avoid sending unrequested data to the client:
if metrics != nil || scopes != nil {
if metrics == nil {
metrics = make([]string, 0, len(jd))
for k := range jd {
metrics = append(metrics, k)
}
}
res := schema.JobData{}
for _, metric := range metrics {
if perscope, ok := jd[metric]; ok {
if len(perscope) > 1 {
subset := make(map[schema.MetricScope]*schema.JobMetric)
for _, scope := range scopes {
if jm, ok := perscope[scope]; ok {
subset[scope] = jm
}
}
if len(subset) > 0 {
perscope = subset
}
}
res[metric] = perscope
}
}
jd = res
}
size = jd.Size()
}
ttl = 5 * time.Hour
if job.State == schema.JobStateRunning {
ttl = 2 * time.Minute
}
prepareJobData(job, jd, scopes)
return jd, ttl, size
})
if err, ok := data.(error); ok {
log.Error("Error in returned dataset")
return nil, err
}
return data.(schema.JobData), nil
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(
job *schema.Job,
metrics []string,
data [][]schema.Float,
ctx context.Context) error {
if job.State != schema.JobStateRunning && useArchive {
return archive.LoadAveragesFromArchive(job, metrics, data)
}
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx)
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}
for i, m := range metrics {
nodes, ok := stats[m]
if !ok {
data[i] = append(data[i], schema.NaN)
continue
}
sum := 0.0
for _, node := range nodes {
sum += node.Avg
}
data[i] = append(data[i], schema.Float(sum))
}
return nil
}
// Used for the node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
repo, ok := metricDataRepos[cluster] repo, ok := metricDataRepos[cluster]
if !ok { if !ok {
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster) err = fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
} }
if metrics == nil { return repo, err
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil
}
func cacheKey(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope) string {
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
return fmt.Sprintf("%d(%s):[%v],[%v]",
job.ID, job.State, metrics, scopes)
}
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/mean/max Graph can be
// used instead of a lot of single lines.
func prepareJobData(
job *schema.Job,
jobData schema.JobData,
scopes []schema.MetricScope) {
const maxSeriesSize int = 15
for _, scopes := range jobData {
for _, jm := range scopes {
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
continue
}
jm.AddStatisticsSeries()
}
}
nodeScopeRequested := false
for _, scope := range scopes {
if scope == schema.MetricScopeNode {
nodeScopeRequested = true
}
}
if nodeScopeRequested {
jobData.AddNodeScope("flops_any")
jobData.AddNodeScope("mem_bw")
}
}
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
// TODO: Talk about this! What resolutions to store data at...
scopes := []schema.MetricScope{schema.MetricScopeNode}
if job.NumNodes <= 8 {
scopes = append(scopes, schema.MetricScopeCore)
}
jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// TODO/FIXME: Calc average for non-node metrics as well!
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
},
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if !useArchive {
return jobMeta, nil
}
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
} }

View File

@@ -166,10 +166,10 @@ func (pdb *PrometheusDataRepository) Init(rawConfig json.RawMessage) error {
var rt http.RoundTripper = nil var rt http.RoundTripper = nil
if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" { if prom_pw := os.Getenv("PROMETHEUS_PASSWORD"); prom_pw != "" && config.Username != "" {
prom_pw := promcfg.Secret(prom_pw) prom_pw := promcfg.Secret(prom_pw)
rt = promcfg.NewBasicAuthRoundTripper(config.Username, prom_pw, "", promapi.DefaultRoundTripper) rt = promcfg.NewBasicAuthRoundTripper(promcfg.NewInlineSecret(config.Username), promcfg.NewInlineSecret(string(prom_pw)), promapi.DefaultRoundTripper)
} else { } else {
if config.Username != "" { if config.Username != "" {
return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set.") return errors.New("METRICDATA/PROMETHEUS > Prometheus username provided, but PROMETHEUS_PASSWORD not set")
} }
} }
// init client // init client
@@ -204,8 +204,8 @@ func (pdb *PrometheusDataRepository) FormatQuery(
metric string, metric string,
scope schema.MetricScope, scope schema.MetricScope,
nodes []string, nodes []string,
cluster string) (string, error) { cluster string,
) (string, error) {
args := PromQLArgs{} args := PromQLArgs{}
if len(nodes) > 0 { if len(nodes) > 0 {
args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix) args.Nodes = fmt.Sprintf("(%s)%s", nodeRegex(nodes), pdb.suffix)
@@ -233,12 +233,13 @@ func (pdb *PrometheusDataRepository) RowToSeries(
from time.Time, from time.Time,
step int64, step int64,
steps int64, steps int64,
row *promm.SampleStream) schema.Series { row *promm.SampleStream,
) schema.Series {
ts := from.Unix() ts := from.Unix()
hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix) hostname := strings.TrimSuffix(string(row.Metric["exported_instance"]), pdb.suffix)
// init array of expected length with NaN // init array of expected length with NaN
values := make([]schema.Float, steps+1) values := make([]schema.Float, steps+1)
for i, _ := range values { for i := range values {
values[i] = schema.NaN values[i] = schema.NaN
} }
// copy recorded values from prom sample pair // copy recorded values from prom sample pair
@@ -263,8 +264,9 @@ func (pdb *PrometheusDataRepository) LoadData(
job *schema.Job, job *schema.Job,
metrics []string, metrics []string,
scopes []schema.MetricScope, scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) { ctx context.Context,
resolution int,
) (schema.JobData, error) {
// TODO respect requested scope // TODO respect requested scope
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) { if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
scopes = append(scopes, schema.MetricScopeNode) scopes = append(scopes, schema.MetricScopeNode)
@@ -306,7 +308,6 @@ func (pdb *PrometheusDataRepository) LoadData(
Step: time.Duration(metricConfig.Timestep * 1e9), Step: time.Duration(metricConfig.Timestep * 1e9),
} }
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r) result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil { if err != nil {
log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query) log.Errorf("Prometheus query error in LoadData: %v\nQuery: %s", err, query)
return nil, errors.New("Prometheus query error") return nil, errors.New("Prometheus query error")
@@ -326,7 +327,6 @@ func (pdb *PrometheusDataRepository) LoadData(
Timestep: metricConfig.Timestep, Timestep: metricConfig.Timestep,
Series: make([]schema.Series, 0), Series: make([]schema.Series, 0),
} }
jobData[metric][scope] = jobMetric
} }
step := int64(metricConfig.Timestep) step := int64(metricConfig.Timestep)
steps := int64(to.Sub(from).Seconds()) / step steps := int64(to.Sub(from).Seconds()) / step
@@ -335,6 +335,10 @@ func (pdb *PrometheusDataRepository) LoadData(
jobMetric.Series = append(jobMetric.Series, jobMetric.Series = append(jobMetric.Series,
pdb.RowToSeries(from, step, steps, row)) pdb.RowToSeries(from, step, steps, row))
} }
// only add metric if at least one host returned data
if !ok && len(jobMetric.Series) > 0 {
jobData[metric][scope] = jobMetric
}
// sort by hostname to get uniform coloring // sort by hostname to get uniform coloring
sort.Slice(jobMetric.Series, func(i, j int) bool { sort.Slice(jobMetric.Series, func(i, j int) bool {
return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname) return (jobMetric.Series[i].Hostname < jobMetric.Series[j].Hostname)
@@ -348,12 +352,12 @@ func (pdb *PrometheusDataRepository) LoadData(
func (pdb *PrometheusDataRepository) LoadStats( func (pdb *PrometheusDataRepository) LoadStats(
job *schema.Job, job *schema.Job,
metrics []string, metrics []string,
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) { ctx context.Context,
) (map[string]map[string]schema.MetricStatistics, error) {
// map of metrics of nodes of stats // map of metrics of nodes of stats
stats := map[string]map[string]schema.MetricStatistics{} stats := map[string]map[string]schema.MetricStatistics{}
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx) data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
if err != nil { if err != nil {
log.Warn("Error while loading job for stats") log.Warn("Error while loading job for stats")
return nil, err return nil, err
@@ -373,7 +377,8 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
metrics, nodes []string, metrics, nodes []string,
scopes []schema.MetricScope, scopes []schema.MetricScope,
from, to time.Time, from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) { ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
t0 := time.Now() t0 := time.Now()
// Map of hosts of metrics of value slices // Map of hosts of metrics of value slices
data := make(map[string]map[string][]*schema.JobMetric) data := make(map[string]map[string][]*schema.JobMetric)
@@ -408,7 +413,6 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
Step: time.Duration(metricConfig.Timestep * 1e9), Step: time.Duration(metricConfig.Timestep * 1e9),
} }
result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r) result, warnings, err := pdb.queryClient.QueryRange(ctx, query, r)
if err != nil { if err != nil {
log.Errorf("Prometheus query error in LoadNodeData: %v\n", err) log.Errorf("Prometheus query error in LoadNodeData: %v\n", err)
return nil, errors.New("Prometheus query error") return nil, errors.New("Prometheus query error")

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -12,7 +12,7 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
) )
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) { var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
panic("TODO") panic("TODO")
} }
@@ -27,9 +27,10 @@ func (tmdr *TestMetricDataRepository) LoadData(
job *schema.Job, job *schema.Job,
metrics []string, metrics []string,
scopes []schema.MetricScope, scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) { ctx context.Context,
resolution int) (schema.JobData, error) {
return TestLoadDataCallback(job, metrics, scopes, ctx) return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
} }
func (tmdr *TestMetricDataRepository) LoadStats( func (tmdr *TestMetricDataRepository) LoadStats(
@@ -48,3 +49,49 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
panic("TODO") panic("TODO")
} }
func DeepCopy(jd_temp schema.JobData) schema.JobData {
var jd schema.JobData
jd = make(schema.JobData, len(jd_temp))
for k, v := range jd_temp {
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
for k_, v_ := range v {
jd[k][k_] = new(schema.JobMetric)
jd[k][k_].Series = make([]schema.Series, len(v_.Series))
for i := 0; i < len(v_.Series); i += 1 {
jd[k][k_].Series[i].Data = make([]schema.Float, len(v_.Series[i].Data))
copy(jd[k][k_].Series[i].Data, v_.Series[i].Data)
jd[k][k_].Series[i].Hostname = v_.Series[i].Hostname
jd[k][k_].Series[i].Id = v_.Series[i].Id
jd[k][k_].Series[i].Statistics.Avg = v_.Series[i].Statistics.Avg
jd[k][k_].Series[i].Statistics.Min = v_.Series[i].Statistics.Min
jd[k][k_].Series[i].Statistics.Max = v_.Series[i].Statistics.Max
}
jd[k][k_].Timestep = v_.Timestep
jd[k][k_].Unit.Base = v_.Unit.Base
jd[k][k_].Unit.Prefix = v_.Unit.Prefix
if v_.StatisticsSeries != nil {
// Init Slices
jd[k][k_].StatisticsSeries = new(schema.StatsSeries)
jd[k][k_].StatisticsSeries.Max = make([]schema.Float, len(v_.StatisticsSeries.Max))
jd[k][k_].StatisticsSeries.Min = make([]schema.Float, len(v_.StatisticsSeries.Min))
jd[k][k_].StatisticsSeries.Median = make([]schema.Float, len(v_.StatisticsSeries.Median))
jd[k][k_].StatisticsSeries.Mean = make([]schema.Float, len(v_.StatisticsSeries.Mean))
// Copy Data
copy(jd[k][k_].StatisticsSeries.Max, v_.StatisticsSeries.Max)
copy(jd[k][k_].StatisticsSeries.Min, v_.StatisticsSeries.Min)
copy(jd[k][k_].StatisticsSeries.Median, v_.StatisticsSeries.Median)
copy(jd[k][k_].StatisticsSeries.Mean, v_.StatisticsSeries.Mean)
// Handle Percentiles
for k__, v__ := range v_.StatisticsSeries.Percentiles {
jd[k][k_].StatisticsSeries.Percentiles[k__] = make([]schema.Float, len(v__))
copy(jd[k][k_].StatisticsSeries.Percentiles[k__], v__)
}
} else {
jd[k][k_].StatisticsSeries = v_.StatisticsSeries
}
}
}
return jd
}

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -82,6 +82,8 @@ func Connect(driver string, db string) {
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
} }
startJobStartWorker()
}) })
} }

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -16,13 +16,13 @@ type Hooks struct{}
// Before hook will print the query with it's args and return the context with the timestamp // Before hook will print the query with it's args and return the context with the timestamp
func (h *Hooks) Before(ctx context.Context, query string, args ...interface{}) (context.Context, error) { func (h *Hooks) Before(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
log.Infof("SQL query %s %q", query, args) log.Debugf("SQL query %s %q", query, args)
return context.WithValue(ctx, "begin", time.Now()), nil return context.WithValue(ctx, "begin", time.Now()), nil
} }
// After hook will get the timestamp registered on the Before hook and print the elapsed time // After hook will get the timestamp registered on the Before hook and print the elapsed time
func (h *Hooks) After(ctx context.Context, query string, args ...interface{}) (context.Context, error) { func (h *Hooks) After(ctx context.Context, query string, args ...interface{}) (context.Context, error) {
begin := ctx.Value("begin").(time.Time) begin := ctx.Value("begin").(time.Time)
log.Infof("Took: %s\n", time.Since(begin)) log.Debugf("Took: %s\n", time.Since(begin))
return ctx, nil return ctx, nil
} }

View File

@@ -1,22 +1,21 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package repository package repository
import ( import (
"context"
"database/sql" "database/sql"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"math"
"strconv" "strconv"
"sync" "sync"
"time" "time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph/model" "github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache" "github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema" "github.com/ClusterCockpit/cc-backend/pkg/schema"
@@ -30,14 +29,10 @@ var (
) )
type JobRepository struct { type JobRepository struct {
DB *sqlx.DB DB *sqlx.DB
driver string
stmtCache *sq.StmtCache stmtCache *sq.StmtCache
cache *lrucache.Cache cache *lrucache.Cache
driver string
archiveChannel chan *schema.Job
archivePending sync.WaitGroup
} }
func GetJobRepository() *JobRepository { func GetJobRepository() *JobRepository {
@@ -48,37 +43,41 @@ func GetJobRepository() *JobRepository {
DB: db.DB, DB: db.DB,
driver: db.Driver, driver: db.Driver,
stmtCache: sq.NewStmtCache(db.DB), stmtCache: sq.NewStmtCache(db.DB),
cache: lrucache.New(1024 * 1024), cache: lrucache.New(1024 * 1024),
archiveChannel: make(chan *schema.Job, 128),
} }
// start archiving worker
go jobRepoInstance.archivingWorker()
}) })
return jobRepoInstance return jobRepoInstance
} }
var jobColumns []string = []string{ var jobColumns []string = []string{
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id", "job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.cluster_partition", "job.array_job_id",
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state", "job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
"job.duration", "job.walltime", "job.resources", // "job.meta_data", "job.duration", "job.walltime", "job.resources", "job.footprint", "job.energy",
} }
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) { func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
job := &schema.Job{} job := &schema.Job{}
if err := row.Scan( if err := row.Scan(
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId, &job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State, &job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
&job.Duration, &job.Walltime, &job.RawResources /*&job.RawMetaData*/); err != nil { &job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
log.Warnf("Error while scanning rows (Job): %v", err) log.Warnf("Error while scanning rows (Job): %v", err)
return nil, err return nil, err
} }
if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil { if err := json.Unmarshal(job.RawResources, &job.Resources); err != nil {
log.Warn("Error while unmarhsaling raw resources json") log.Warn("Error while unmarshaling raw resources json")
return nil, err return nil, err
} }
job.RawResources = nil
if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
log.Warnf("Error while unmarshaling raw footprint json: %v", err)
return nil, err
}
job.RawFootprint = nil
// if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil { // if err := json.Unmarshal(job.RawMetaData, &job.MetaData); err != nil {
// return nil, err // return nil, err
@@ -89,7 +88,6 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
job.Duration = int32(time.Since(job.StartTime).Seconds()) job.Duration = int32(time.Since(job.StartTime).Seconds())
} }
job.RawResources = nil
return job, nil return job, nil
} }
@@ -178,7 +176,7 @@ func (r *JobRepository) FetchMetadata(job *schema.Job) (map[string]string, error
} }
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour) r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
log.Infof("Timer FetchMetadata %s", time.Since(start)) log.Debugf("Timer FetchMetadata %s", time.Since(start))
return job.MetaData, nil return job.MetaData, nil
} }
@@ -208,366 +206,142 @@ func (r *JobRepository) UpdateMetadata(job *schema.Job, key, val string) (err er
return err return err
} }
if _, err = sq.Update("job").Set("meta_data", job.RawMetaData).Where("job.id = ?", job.ID).RunWith(r.stmtCache).Exec(); err != nil { if _, err = sq.Update("job").
Set("meta_data", job.RawMetaData).
Where("job.id = ?", job.ID).
RunWith(r.stmtCache).Exec(); err != nil {
log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID) log.Warnf("Error while updating metadata for job, DB ID '%v'", job.ID)
return err return err
} }
r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour) r.cache.Put(cachekey, job.MetaData, len(job.RawMetaData), 24*time.Hour)
return nil return archive.UpdateMetadata(job, job.MetaData)
} }
// Find executes a SQL query to find a specific batch job. func (r *JobRepository) FetchFootprint(job *schema.Job) (map[string]float64, error) {
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) Find(
jobId *int64,
cluster *string,
startTime *int64) (*schema.Job, error) {
start := time.Now() start := time.Now()
q := sq.Select(jobColumns...).From("job"). cachekey := fmt.Sprintf("footprint:%d", job.ID)
Where("job.job_id = ?", *jobId) if cached := r.cache.Get(cachekey, nil); cached != nil {
job.Footprint = cached.(map[string]float64)
if cluster != nil { return job.Footprint, nil
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
} }
log.Infof("Timer Find %s", time.Since(start)) if err := sq.Select("job.footprint").From("job").Where("job.id = ?", job.ID).
return scanJob(q.RunWith(r.stmtCache).QueryRow()) RunWith(r.stmtCache).QueryRow().Scan(&job.RawFootprint); err != nil {
} log.Warn("Error while scanning for job footprint")
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindAll(
jobId *int64,
cluster *string,
startTime *int64) ([]*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err return nil, err
} }
jobs := make([]*schema.Job, 0, 10) if len(job.RawFootprint) == 0 {
for rows.Next() { return nil, nil
job, err := scanJob(rows)
if err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
} }
log.Infof("Timer FindAll %s", time.Since(start))
return jobs, nil if err := json.Unmarshal(job.RawFootprint, &job.Footprint); err != nil {
log.Warn("Error while unmarshaling raw footprint json")
return nil, err
}
r.cache.Put(cachekey, job.Footprint, len(job.Footprint), 24*time.Hour)
log.Debugf("Timer FetchFootprint %s", time.Since(start))
return job.Footprint, nil
} }
// FindById executes a SQL query to find a specific batch job. func (r *JobRepository) FetchEnergyFootprint(job *schema.Job) (map[string]float64, error) {
// The job is queried using the database id. start := time.Now()
// It returns a pointer to a schema.Job data structure and an error variable. cachekey := fmt.Sprintf("energyFootprint:%d", job.ID)
// To check if no job was found test err == sql.ErrNoRows if cached := r.cache.Get(cachekey, nil); cached != nil {
func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) { job.EnergyFootprint = cached.(map[string]float64)
q := sq.Select(jobColumns...). return job.EnergyFootprint, nil
From("job").Where("job.id = ?", jobId)
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// Start inserts a new job in the table, returning the unique job ID.
// Statistics are not transfered!
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
} }
job.RawMetaData, err = json.Marshal(job.MetaData) if err := sq.Select("job.energy_footprint").From("job").Where("job.id = ?", job.ID).
if err != nil { RunWith(r.stmtCache).QueryRow().Scan(&job.RawEnergyFootprint); err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err) log.Warn("Error while scanning for job energy_footprint")
return nil, err
} }
res, err := r.DB.NamedExec(`INSERT INTO job ( if len(job.RawEnergyFootprint) == 0 {
job_id, user, project, cluster, subcluster, `+"`partition`"+`, array_job_id, num_nodes, num_hwthreads, num_acc, return nil, nil
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data
);`, job)
if err != nil {
return -1, err
} }
return res.LastInsertId() if err := json.Unmarshal(job.RawEnergyFootprint, &job.EnergyFootprint); err != nil {
} log.Warn("Error while unmarshaling raw energy footprint json")
return nil, err
}
// Stop updates the job with the database id jobId using the provided arguments. r.cache.Put(cachekey, job.EnergyFootprint, len(job.EnergyFootprint), 24*time.Hour)
func (r *JobRepository) Stop( log.Debugf("Timer FetchEnergyFootprint %s", time.Since(start))
jobId int64, return job.EnergyFootprint, nil
duration int32,
state schema.JobState,
monitoringStatus int32) (err error) {
stmt := sq.Update("job").
Set("job_state", state).
Set("duration", duration).
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
} }
func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) { func (r *JobRepository) DeleteJobsBefore(startTime int64) (int, error) {
var cnt int var cnt int
qs := fmt.Sprintf("SELECT count(*) FROM job WHERE job.start_time < %d", startTime) q := sq.Select("count(*)").From("job").Where("job.start_time < ?", startTime)
err := r.DB.Get(&cnt, qs) //ignore error as it will also occur in delete statement q.RunWith(r.DB).QueryRow().Scan(cnt)
_, err = r.DB.Exec(`DELETE FROM job WHERE job.start_time < ?`, startTime) qd := sq.Delete("job").Where("job.start_time < ?", startTime)
_, err := qd.RunWith(r.DB).Exec()
if err != nil { if err != nil {
log.Errorf(" DeleteJobsBefore(%d): error %#v", startTime, err) s, _, _ := qd.ToSql()
log.Errorf(" DeleteJobsBefore(%d) with %s: error %#v", startTime, s, err)
} else { } else {
log.Infof("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt) log.Debugf("DeleteJobsBefore(%d): Deleted %d jobs", startTime, cnt)
} }
return cnt, err return cnt, err
} }
func (r *JobRepository) DeleteJobById(id int64) error { func (r *JobRepository) DeleteJobById(id int64) error {
_, err := r.DB.Exec(`DELETE FROM job WHERE job.id = ?`, id) qd := sq.Delete("job").Where("job.id = ?", id)
_, err := qd.RunWith(r.DB).Exec()
if err != nil { if err != nil {
log.Errorf("DeleteJobById(%d): error %#v", id, err) s, _, _ := qd.ToSql()
log.Errorf("DeleteJobById(%d) with %s : error %#v", id, s, err)
} else { } else {
log.Infof("DeleteJobById(%d): Success", id) log.Debugf("DeleteJobById(%d): Success", id)
} }
return err return err
} }
// TODO: Use node hours instead: SELECT job.user, sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN CAST(strftime('%s', 'now') AS INTEGER) - job.start_time ELSE job.duration END)) as x FROM job GROUP BY user ORDER BY x DESC; func (r *JobRepository) FindUserOrProjectOrJobname(user *schema.User, searchterm string) (jobid string, username string, project string, jobname string) {
func (r *JobRepository) CountGroupedJobs(
ctx context.Context,
aggreg model.Aggregate,
filters []*model.JobFilter,
weight *model.Weights,
limit *int) (map[string]int, error) {
start := time.Now()
if !aggreg.IsValid() {
return nil, errors.New("invalid aggregate")
}
runner := (sq.BaseRunner)(r.stmtCache)
count := "count(*) as count"
if weight != nil {
switch *weight {
case model.WeightsNodeCount:
count = "sum(job.num_nodes) as count"
case model.WeightsNodeHours:
now := time.Now().Unix()
count = fmt.Sprintf(`sum(job.num_nodes * (CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) as count`, now)
runner = r.DB
default:
log.Infof("CountGroupedJobs() Weight %v unknown.", *weight)
}
}
q, qerr := SecurityCheck(ctx, sq.Select("job."+string(aggreg), count).From("job").GroupBy("job."+string(aggreg)).OrderBy("count DESC"))
if qerr != nil {
return nil, qerr
}
for _, f := range filters {
q = BuildWhereClause(f, q)
}
if limit != nil {
q = q.Limit(uint64(*limit))
}
counts := map[string]int{}
rows, err := q.RunWith(runner).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
for rows.Next() {
var group string
var count int
if err := rows.Scan(&group, &count); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
counts[group] = count
}
log.Infof("Timer CountGroupedJobs %s", time.Since(start))
return counts, nil
}
func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
stmt := sq.Update("job").
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", job)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
}
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) MarkArchived(
jobId int64,
monitoringStatus int32,
metricStats map[string]schema.JobStatistics) error {
stmt := sq.Update("job").
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
for metric, stats := range metricStats {
switch metric {
case "flops_any":
stmt = stmt.Set("flops_any_avg", stats.Avg)
case "mem_used":
stmt = stmt.Set("mem_used_max", stats.Max)
case "mem_bw":
stmt = stmt.Set("mem_bw_avg", stats.Avg)
case "load":
case "cpu_load":
stmt = stmt.Set("load_avg", stats.Avg)
case "net_bw":
stmt = stmt.Set("net_bw_avg", stats.Avg)
case "file_bw":
stmt = stmt.Set("file_bw_avg", stats.Avg)
default:
log.Infof("MarkArchived() Metric '%v' unknown", metric)
}
}
if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
log.Warn("Error while marking job as archived")
return err
}
return nil
}
// Archiving worker thread
func (r *JobRepository) archivingWorker() {
for {
select {
case job, ok := <-r.archiveChannel:
if !ok {
break
}
// not using meta data, called to load JobMeta into Cache?
// will fail if job meta not in repository
if _, err := r.FetchMetadata(job); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// metricdata.ArchiveJob will fetch all the data from a MetricDataRepository and push into configured archive backend
// TODO: Maybe use context with cancel/timeout here
jobMeta, err := metricdata.ArchiveJob(job, context.Background())
if err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
r.UpdateMonitoringStatus(job.ID, schema.MonitoringStatusArchivingFailed)
continue
}
// Update the jobs database entry one last time:
if err := r.MarkArchived(job.ID, schema.MonitoringStatusArchivingSuccessful, jobMeta.Statistics); err != nil {
log.Errorf("archiving job (dbid: %d) failed: %s", job.ID, err.Error())
continue
}
log.Printf("archiving job (dbid: %d) successful", job.ID)
r.archivePending.Done()
}
}
}
// Trigger async archiving
func (r *JobRepository) TriggerArchiving(job *schema.Job) {
r.archivePending.Add(1)
r.archiveChannel <- job
}
// Wait for background thread to finish pending archiving operations
func (r *JobRepository) WaitForArchiving() {
// close channel and wait for worker to process remaining jobs
r.archivePending.Wait()
}
var ErrNotFound = errors.New("no such jobname, project or user")
var ErrForbidden = errors.New("not authorized")
// FindJobnameOrUserOrProject returns a jobName or a username or a projectId if a jobName or user or project matches the search term.
// If query is found to be an integer (= conversion to INT datatype succeeds), skip back to parent call
// If nothing matches the search, `ErrNotFound` is returned.
func (r *JobRepository) FindUserOrProjectOrJobname(ctx context.Context, searchterm string) (username string, project string, metasnip string, err error) {
if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId if _, err := strconv.Atoi(searchterm); err == nil { // Return empty on successful conversion: parent method will redirect for integer jobId
return "", "", "", nil return searchterm, "", "", ""
} else { // Has to have letters and logged-in user for other guesses } else { // Has to have letters and logged-in user for other guesses
user := auth.GetUser(ctx)
if user != nil { if user != nil {
// Find username in jobs (match) // Find username by username in job table (match)
uresult, _ := r.FindColumnValue(user, searchterm, "job", "user", "user", false) uresult, _ := r.FindColumnValue(user, searchterm, "job", "hpc_user", "hpc_user", false)
if uresult != "" { if uresult != "" {
return uresult, "", "", nil return "", uresult, "", ""
} }
// Find username by name (like) // Find username by real name in hpc_user table (like)
nresult, _ := r.FindColumnValue(user, searchterm, "user", "username", "name", true) nresult, _ := r.FindColumnValue(user, searchterm, "hpc_user", "username", "name", true)
if nresult != "" { if nresult != "" {
return nresult, "", "", nil return "", nresult, "", ""
} }
// Find projectId in jobs (match) // Find projectId by projectId in job table (match)
presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false) presult, _ := r.FindColumnValue(user, searchterm, "job", "project", "project", false)
if presult != "" { if presult != "" {
return "", presult, "", nil return "", "", presult, ""
}
// Still no return (or not authorized for above): Try JobName
// Match Metadata, on hit, parent method redirects to jobName GQL query
err := sq.Select("job.cluster").Distinct().From("job").
Where("job.meta_data LIKE ?", "%"+searchterm+"%").
RunWith(r.stmtCache).QueryRow().Scan(&metasnip)
if err != nil && err != sql.ErrNoRows {
return "", "", "", err
} else if err == nil {
return "", "", metasnip[0:1], nil
} }
} }
return "", "", "", ErrNotFound // Return searchterm if no match before: Forward as jobname query to GQL in handleSearchbar function
return "", "", "", searchterm
} }
} }
func (r *JobRepository) FindColumnValue(user *auth.User, searchterm string, table string, selectColumn string, whereColumn string, isLike bool) (result string, err error) { var (
ErrNotFound = errors.New("no such jobname, project or user")
ErrForbidden = errors.New("not authorized")
)
func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, table string, selectColumn string, whereColumn string, isLike bool) (result string, err error) {
compareStr := " = ?" compareStr := " = ?"
query := searchterm query := searchterm
if isLike { if isLike {
compareStr = " LIKE ?" compareStr = " LIKE ?"
query = "%" + searchterm + "%" query = "%" + searchterm + "%"
} }
if user.HasAnyRole([]auth.Role{auth.RoleAdmin, auth.RoleSupport, auth.RoleManager}) { if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
theQuery := sq.Select(table+"."+selectColumn).Distinct().From(table). theQuery := sq.Select(table+"."+selectColumn).Distinct().From(table).
Where(table+"."+whereColumn+compareStr, query) Where(table+"."+whereColumn+compareStr, query)
@@ -592,9 +366,9 @@ func (r *JobRepository) FindColumnValue(user *auth.User, searchterm string, tabl
} }
} }
func (r *JobRepository) FindColumnValues(user *auth.User, query string, table string, selectColumn string, whereColumn string) (results []string, err error) { func (r *JobRepository) FindColumnValues(user *schema.User, query string, table string, selectColumn string, whereColumn string) (results []string, err error) {
emptyResult := make([]string, 0) emptyResult := make([]string, 0)
if user.HasAnyRole([]auth.Role{auth.RoleAdmin, auth.RoleSupport, auth.RoleManager}) { if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
rows, err := sq.Select(table+"."+selectColumn).Distinct().From(table). rows, err := sq.Select(table+"."+selectColumn).Distinct().From(table).
Where(table+"."+whereColumn+" LIKE ?", fmt.Sprint("%", query, "%")). Where(table+"."+whereColumn+" LIKE ?", fmt.Sprint("%", query, "%")).
RunWith(r.stmtCache).Query() RunWith(r.stmtCache).Query()
@@ -626,7 +400,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
start := time.Now() start := time.Now()
partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) { partitions := r.cache.Get("partitions:"+cluster, func() (interface{}, time.Duration, int) {
parts := []string{} parts := []string{}
if err = r.DB.Select(&parts, `SELECT DISTINCT job.partition FROM job WHERE job.cluster = ?;`, cluster); err != nil { if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
return nil, 0, 1000 return nil, 0, 1000
} }
@@ -635,14 +409,13 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
log.Infof("Timer Partitions %s", time.Since(start)) log.Debugf("Timer Partitions %s", time.Since(start))
return partitions.([]string), nil return partitions.([]string), nil
} }
// AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host. // AllocatedNodes returns a map of all subclusters to a map of hostnames to the amount of jobs running on that host.
// Hosts with zero jobs running on them will not show up! // Hosts with zero jobs running on them will not show up!
func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) { func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]int, error) {
start := time.Now() start := time.Now()
subclusters := make(map[string]map[string]int) subclusters := make(map[string]map[string]int)
rows, err := sq.Select("resources", "subcluster").From("job"). rows, err := sq.Select("resources", "subcluster").From("job").
@@ -680,12 +453,11 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
} }
} }
log.Infof("Timer AllocatedNodes %s", time.Since(start)) log.Debugf("Timer AllocatedNodes %s", time.Since(start))
return subclusters, nil return subclusters, nil
} }
func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error { func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
start := time.Now() start := time.Now()
res, err := sq.Update("job"). res, err := sq.Update("job").
Set("monitoring_status", schema.MonitoringStatusArchivingFailed). Set("monitoring_status", schema.MonitoringStatusArchivingFailed).
@@ -709,12 +481,51 @@ func (r *JobRepository) StopJobsExceedingWalltimeBy(seconds int) error {
if rowsAffected > 0 { if rowsAffected > 0 {
log.Infof("%d jobs have been marked as failed due to running too long", rowsAffected) log.Infof("%d jobs have been marked as failed due to running too long", rowsAffected)
} }
log.Infof("Timer StopJobsExceedingWalltimeBy %s", time.Since(start)) log.Debugf("Timer StopJobsExceedingWalltimeBy %s", time.Since(start))
return nil
}
func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
query := sq.Select(jobColumns...).From("job").
Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
Where("job.job_state = 'running'").
Where("job.duration > 600")
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
jobs := make([]*schema.Job, 0, 50)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Infof("Return job count %d", len(jobs))
return jobs, nil
}
func (r *JobRepository) UpdateDuration() error {
stmnt := sq.Update("job").
Set("duration", sq.Expr("? - job.start_time", time.Now().Unix())).
Where("job_state = 'running'")
_, err := stmnt.RunWith(r.stmtCache).Exec()
if err != nil {
return err
}
return nil return nil
} }
func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) { func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64) ([]*schema.Job, error) {
var query sq.SelectBuilder var query sq.SelectBuilder
if startTimeBegin == startTimeEnd || startTimeBegin > startTimeEnd { if startTimeBegin == startTimeEnd || startTimeBegin > startTimeEnd {
@@ -722,9 +533,11 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
} }
if startTimeBegin == 0 { if startTimeBegin == 0 {
log.Infof("Find jobs before %d", startTimeEnd)
query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf( query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf(
"job.start_time < %d", startTimeEnd)) "job.start_time < %d", startTimeEnd))
} else { } else {
log.Infof("Find jobs between %d and %d", startTimeBegin, startTimeEnd)
query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf( query = sq.Select(jobColumns...).From("job").Where(fmt.Sprintf(
"job.start_time BETWEEN %d AND %d", startTimeBegin, startTimeEnd)) "job.start_time BETWEEN %d AND %d", startTimeBegin, startTimeEnd))
} }
@@ -746,30 +559,113 @@ func (r *JobRepository) FindJobsBetween(startTimeBegin int64, startTimeEnd int64
jobs = append(jobs, job) jobs = append(jobs, job)
} }
log.Infof("Return job count %d", len(jobs))
return jobs, nil return jobs, nil
} }
const NamedJobInsert string = `INSERT INTO job ( func (r *JobRepository) UpdateMonitoringStatus(job int64, monitoringStatus int32) (err error) {
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc, stmt := sq.Update("job").
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data, Set("monitoring_status", monitoringStatus).
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total Where("job.id = ?", job)
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`
func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) { _, err = stmt.RunWith(r.stmtCache).Exec()
res, err := r.DB.NamedExec(NamedJobInsert, job) return
if err != nil { }
log.Warn("Error while NamedJobInsert")
return 0, err func (r *JobRepository) Execute(stmt sq.UpdateBuilder) error {
} if _, err := stmt.RunWith(r.stmtCache).Exec(); err != nil {
id, err := res.LastInsertId() return err
if err != nil { }
log.Warn("Error while getting last insert ID")
return 0, err return nil
} }
return id, nil func (r *JobRepository) MarkArchived(
stmt sq.UpdateBuilder,
monitoringStatus int32,
) sq.UpdateBuilder {
return stmt.Set("monitoring_status", monitoringStatus)
}
func (r *JobRepository) UpdateEnergy(
stmt sq.UpdateBuilder,
jobMeta *schema.JobMeta,
) (sq.UpdateBuilder, error) {
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return stmt, err
}
energyFootprint := make(map[string]float64)
var totalEnergy float64
var energy float64
for _, fp := range sc.EnergyFootprint {
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
energyFootprint[fp] = energy
totalEnergy += energy
}
var rawFootprint []byte
if rawFootprint, err = json.Marshal(energyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return stmt, err
}
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil
}
func (r *JobRepository) UpdateFootprint(
stmt sq.UpdateBuilder,
jobMeta *schema.JobMeta,
) (sq.UpdateBuilder, error) {
/* Note: Only Called for Running Jobs during Intermediate Update or on Archiving */
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
if err != nil {
log.Errorf("cannot get subcluster: %s", err.Error())
return stmt, err
}
footprint := make(map[string]float64)
for _, fp := range sc.Footprint {
var statType string
for _, gm := range archive.GlobalMetricList {
if gm.Name == fp {
statType = gm.Footprint
}
}
if statType != "avg" && statType != "min" && statType != "max" {
log.Warnf("unknown statType for footprint update: %s", statType)
return stmt, fmt.Errorf("unknown statType for footprint update: %s", statType)
}
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
statType = sc.MetricConfig[i].Footprint
}
name := fmt.Sprintf("%s_%s", fp, statType)
footprint[name] = LoadJobStat(jobMeta, fp, statType)
}
var rawFootprint []byte
if rawFootprint, err = json.Marshal(footprint); err != nil {
log.Warnf("Error while marshaling footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return stmt, err
}
return stmt.Set("footprint", string(rawFootprint)), nil
} }

View File

@@ -0,0 +1,75 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"encoding/json"
"fmt"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
const NamedJobInsert string = `INSERT INTO job (
job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
) VALUES (
:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint, :energy, :energy_footprint, :resources, :meta_data
);`
func (r *JobRepository) InsertJob(job *schema.JobMeta) (int64, error) {
res, err := r.DB.NamedExec(NamedJobInsert, job)
if err != nil {
log.Warn("Error while NamedJobInsert")
return 0, err
}
id, err := res.LastInsertId()
if err != nil {
log.Warn("Error while getting last insert ID")
return 0, err
}
return id, nil
}
// Start inserts a new job in the table, returning the unique job ID.
// Statistics are not transfered!
func (r *JobRepository) Start(job *schema.JobMeta) (id int64, err error) {
job.RawFootprint, err = json.Marshal(job.Footprint)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding footprint field failed: %w", err)
}
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding resources field failed: %w", err)
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return -1, fmt.Errorf("REPOSITORY/JOB > encoding metaData field failed: %w", err)
}
return r.InsertJob(job)
}
// Stop updates the job with the database id jobId using the provided arguments.
func (r *JobRepository) Stop(
jobId int64,
duration int32,
state schema.JobState,
monitoringStatus int32,
) (err error) {
stmt := sq.Update("job").
Set("job_state", state).
Set("duration", duration).
Set("monitoring_status", monitoringStatus).
Where("job.id = ?", jobId)
_, err = stmt.RunWith(r.stmtCache).Exec()
return
}

View File

@@ -0,0 +1,261 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"database/sql"
"fmt"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) Find(
jobId *int64,
cluster *string,
startTime *int64,
) (*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
q = q.OrderBy("job.id DESC") // always use newest matching job by db id if more than one match
log.Debugf("Timer Find %s", time.Since(start))
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// Find executes a SQL query to find a specific batch job.
// The job is queried using the batch job id, the cluster name,
// and the start time of the job in UNIX epoch time seconds.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindAll(
jobId *int64,
cluster *string,
startTime *int64,
) ([]*schema.Job, error) {
start := time.Now()
q := sq.Select(jobColumns...).From("job").
Where("job.job_id = ?", *jobId)
if cluster != nil {
q = q.Where("job.cluster = ?", *cluster)
}
if startTime != nil {
q = q.Where("job.start_time = ?", *startTime)
}
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
jobs := make([]*schema.Job, 0, 10)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
jobs = append(jobs, job)
}
log.Debugf("Timer FindAll %s", time.Since(start))
return jobs, nil
}
// FindById executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindById(ctx context.Context, jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
q, qerr := SecurityCheck(ctx, q)
if qerr != nil {
return nil, qerr
}
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByIdWithUser executes a SQL query to find a specific batch job.
// The job is queried using the database id. The user is passed directly,
// instead as part of the context.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByIdWithUser(user *schema.User, jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
q, qerr := SecurityCheckWithUser(user, q)
if qerr != nil {
return nil, qerr
}
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByIdDirect executes a SQL query to find a specific batch job.
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByIdDirect(jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// FindByJobId executes a SQL query to find a specific batch job.
// The job is queried using the slurm id and the clustername.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime int64, cluster string) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").
Where("job.job_id = ?", jobId).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
q, qerr := SecurityCheck(ctx, q)
if qerr != nil {
return nil, qerr
}
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
// IsJobOwner executes a SQL query to find a specific batch job.
// The job is queried using the slurm id,a username and the cluster.
// It returns a bool.
// If job was found, user is owner: test err != sql.ErrNoRows
func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cluster string) bool {
q := sq.Select("id").
From("job").
Where("job.job_id = ?", jobId).
Where("job.hpc_user = ?", user).
Where("job.cluster = ?", cluster).
Where("job.start_time = ?", startTime)
_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
return err != sql.ErrNoRows
}
func (r *JobRepository) FindConcurrentJobs(
ctx context.Context,
job *schema.Job,
) (*model.JobLinkResultList, error) {
if job == nil {
return nil, nil
}
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id", "job.start_time").From("job"))
if qerr != nil {
return nil, qerr
}
query = query.Where("cluster = ?", job.Cluster)
var startTime int64
var stopTime int64
startTime = job.StartTimeUnix
hostname := job.Resources[0].Hostname
if job.State == schema.JobStateRunning {
stopTime = time.Now().Unix()
} else {
stopTime = startTime + int64(job.Duration)
}
// Add 200s overlap for jobs start time at the end
startTimeTail := startTime + 10
stopTimeTail := stopTime - 200
startTimeFront := startTime + 200
queryRunning := query.Where("job.job_state = ?").Where("(job.start_time BETWEEN ? AND ? OR job.start_time < ?)",
"running", startTimeTail, stopTimeTail, startTime)
queryRunning = queryRunning.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
query = query.Where("job.job_state != ?").Where("((job.start_time BETWEEN ? AND ?) OR (job.start_time + job.duration) BETWEEN ? AND ? OR (job.start_time < ?) AND (job.start_time + job.duration) > ?)",
"running", startTimeTail, stopTimeTail, startTimeFront, stopTimeTail, startTime, stopTime)
query = query.Where("job.resources LIKE ?", fmt.Sprint("%", hostname, "%"))
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
items := make([]*model.JobLink, 0, 10)
queryString := fmt.Sprintf("cluster=%s", job.Cluster)
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err = rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
}
rows, err = queryRunning.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
for rows.Next() {
var id, jobId, startTime sql.NullInt64
if err := rows.Scan(&id, &jobId, &startTime); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
queryString += fmt.Sprintf("&jobId=%d", int(jobId.Int64))
items = append(items,
&model.JobLink{
ID: fmt.Sprint(id.Int64),
JobID: int(jobId.Int64),
})
}
}
cnt := len(items)
return &model.JobLinkResultList{
ListQuery: &queryString,
Items: items,
Count: &cnt,
}, nil
}

View File

@@ -0,0 +1,317 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
func (r *JobRepository) QueryJobs(
ctx context.Context,
filters []*model.JobFilter,
page *model.PageRequest,
order *model.OrderByInput,
) ([]*schema.Job, error) {
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
if qerr != nil {
return nil, qerr
}
if order != nil {
field := toSnakeCase(order.Field)
if order.Type == "col" {
// "col": Fixed column name query
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for column")
}
} else {
// "foot": Order by footprint JSON field values
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(meta_data)")
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("JSON_EXTRACT(footprint, \"$.%s\") DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order for footprint")
}
}
}
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while running query: %v", err)
return nil, err
}
jobs := make([]*schema.Job, 0, 50)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows (Jobs)")
return nil, err
}
jobs = append(jobs, job)
}
return jobs, nil
}
func (r *JobRepository) CountJobs(
ctx context.Context,
filters []*model.JobFilter,
) (int, error) {
// DISTICT count for tags filters, does not affect other queries
query, qerr := SecurityCheck(ctx, sq.Select("count(DISTINCT job.id)").From("job"))
if qerr != nil {
return 0, qerr
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
var count int
if err := query.RunWith(r.DB).Scan(&count); err != nil {
return 0, err
}
return count, nil
}
func SecurityCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
if user == nil {
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user context is nil")
}
switch {
case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : All jobs
return query, nil
case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : All jobs
return query, nil
case user.HasRole(schema.RoleManager): // Manager : Add filter for managed projects' jobs only + personal jobs
if len(user.Projects) != 0 {
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.hpc_user": user.Username}}), nil
} else {
log.Debugf("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
return query.Where("job.hpc_user = ?", user.Username), nil
}
case user.HasRole(schema.RoleUser): // User : Only personal jobs
return query.Where("job.hpc_user = ?", user.Username), nil
default: // No known Role, return error
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user has no or unknown roles")
}
}
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
user := GetUserFromContext(ctx)
return SecurityCheckWithUser(user, query)
}
// Build a sq.SelectBuilder out of a schema.JobFilter.
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
if filter.Tags != nil {
// This is an OR-Logic query: Returns all distinct jobs with at least one of the requested tags; TODO: AND-Logic query?
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags}).Distinct()
}
if filter.JobID != nil {
query = buildStringCondition("job.job_id", filter.JobID, query)
}
if filter.ArrayJobID != nil {
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
}
if filter.User != nil {
query = buildStringCondition("job.hpc_user", filter.User, query)
}
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
}
if filter.JobName != nil {
query = buildMetaJsonCondition("jobName", filter.JobName, query)
}
if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
if filter.Partition != nil {
query = buildStringCondition("job.cluster_partition", filter.Partition, query)
}
if filter.StartTime != nil {
query = buildTimeCondition("job.start_time", filter.StartTime, query)
}
if filter.Duration != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(CASE WHEN job.job_state = 'running' THEN (? - job.start_time) ELSE job.duration END) BETWEEN ? AND ?", now, filter.Duration.From, filter.Duration.To)
}
if filter.MinRunningFor != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
}
if filter.State != nil {
states := make([]string, len(filter.State))
for i, val := range filter.State {
states[i] = string(val)
}
query = query.Where(sq.Eq{"job.job_state": states})
}
if filter.NumNodes != nil {
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
}
if filter.NumAccelerators != nil {
query = buildIntCondition("job.num_acc", filter.NumAccelerators, query)
}
if filter.NumHWThreads != nil {
query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
}
if filter.Node != nil {
query = buildStringCondition("job.resources", filter.Node, query)
}
if filter.Energy != nil {
query = buildFloatCondition("job.energy", filter.Energy, query)
}
if filter.MetricStats != nil {
for _, ms := range filter.MetricStats {
query = buildFloatJsonCondition(ms.MetricName, ms.Range, query)
}
}
return query
}
func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
if cond.From != nil && cond.To != nil {
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
} else if cond.From != nil {
return query.Where("? <= "+field, cond.From.Unix())
} else if cond.To != nil {
return query.Where(field+" <= ?", cond.To.Unix())
} else if cond.Range != "" {
now := time.Now().Unix()
var then int64
switch cond.Range {
case "last6h":
then = now - (60 * 60 * 6)
case "last24h":
then = now - (60 * 60 * 24)
case "last7d":
then = now - (60 * 60 * 24 * 7)
case "last30d":
then = now - (60 * 60 * 24 * 30)
default:
log.Debugf("No known named timeRange: startTime.range = %s", cond.Range)
return query
}
return query.Where(field+" BETWEEN ? AND ?", then, now)
} else {
return query
}
}
func buildFloatJsonCondition(condName string, condRange *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(footprint)")
return query.Where("JSON_EXTRACT(footprint, \"$."+condName+"\") BETWEEN ? AND ?", condRange.From, condRange.To)
}
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
if cond.Eq != nil {
return query.Where(field+" = ?", *cond.Eq)
}
if cond.Neq != nil {
return query.Where(field+" != ?", *cond.Neq)
}
if cond.StartsWith != nil {
return query.Where(field+" LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
}
if cond.EndsWith != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.EndsWith))
}
if cond.Contains != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
}
if cond.In != nil {
queryElements := make([]string, len(cond.In))
copy(queryElements, cond.In)
return query.Where(sq.Or{sq.Eq{field: queryElements}})
}
return query
}
func buildMetaJsonCondition(jsonField string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
// Verify and Search Only in Valid Jsons
query = query.Where("JSON_VALID(meta_data)")
// add "AND" Sql query Block for field match
if cond.Eq != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") = ?", *cond.Eq)
}
if cond.Neq != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") != ?", *cond.Neq)
}
if cond.StartsWith != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
}
if cond.EndsWith != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint("%", *cond.EndsWith))
}
if cond.Contains != nil {
return query.Where("JSON_EXTRACT(meta_data, \"$."+jsonField+"\") LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
}
return query
}
var (
matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
)
func toSnakeCase(str string) string {
for _, c := range str {
if c == '\'' || c == '\\' {
log.Panic("toSnakeCase() attack vector!")
}
}
str = strings.ReplaceAll(str, "'", "")
str = strings.ReplaceAll(str, "\\", "")
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
return strings.ToLower(snake)
}

View File

@@ -0,0 +1,83 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
type JobWithUser struct {
Job *schema.JobMeta
User *schema.User
}
var (
jobStartPending sync.WaitGroup
jobStartChannel chan JobWithUser
)
func startJobStartWorker() {
jobStartChannel = make(chan JobWithUser, 128)
go jobStartWorker()
}
// Archiving worker thread
func jobStartWorker() {
for {
select {
case req, ok := <-jobStartChannel:
if !ok {
break
}
jobRepo := GetJobRepository()
var id int64
for i := 0; i < 5; i++ {
var err error
id, err = jobRepo.Start(req.Job)
if err != nil {
log.Errorf("Attempt %d: insert into database failed: %v", i, err)
} else {
break
}
time.Sleep(1 * time.Second)
}
for _, tag := range req.Job.Tags {
if _, err := jobRepo.AddTagOrCreate(req.User, id,
tag.Type, tag.Name, tag.Scope); err != nil {
log.Errorf("adding tag to new job %d failed: %v", id, err)
}
}
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d",
id, req.Job.Cluster, req.Job.JobID, req.Job.User, req.Job.StartTime)
jobStartPending.Done()
}
}
}
// Trigger async archiving
func TriggerJobStart(req JobWithUser) {
if jobStartChannel == nil {
log.Fatal("Cannot start Job without jobStart channel. Did you Start the worker?")
}
jobStartPending.Add(1)
jobStartChannel <- req
}
// Wait for background thread to finish pending archiving operations
func WaitForJobStart() {
// close channel and wait for worker to process remaining jobs
jobStartPending.Wait()
}

View File

@@ -1,13 +1,15 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
package repository package repository
import ( import (
"context"
"fmt" "fmt"
"testing" "testing"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
_ "github.com/mattn/go-sqlite3" _ "github.com/mattn/go-sqlite3"
) )
@@ -30,7 +32,7 @@ func TestFind(t *testing.T) {
func TestFindById(t *testing.T) { func TestFindById(t *testing.T) {
r := setup(t) r := setup(t)
job, err := r.FindById(5) job, err := r.FindById(getContext(t), 5)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@@ -45,7 +47,19 @@ func TestFindById(t *testing.T) {
func TestGetTags(t *testing.T) { func TestGetTags(t *testing.T) {
r := setup(t) r := setup(t)
tags, counts, err := r.CountTags(nil) const contextUserKey ContextKey = "user"
contextUserValue := &schema.User{
Username: "testuser",
Projects: make([]string, 0),
Roles: []string{"user"},
AuthType: 0,
AuthSource: 2,
}
ctx := context.WithValue(getContext(t), contextUserKey, contextUserValue)
// Test Tag has Scope "global"
tags, counts, err := r.CountTags(GetUserFromContext(ctx))
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View File

@@ -1,4 +1,4 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg. // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. // All rights reserved.
// Use of this source code is governed by a MIT-style // Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
@@ -16,7 +16,7 @@ import (
"github.com/golang-migrate/migrate/v4/source/iofs" "github.com/golang-migrate/migrate/v4/source/iofs"
) )
const Version uint = 4 const Version uint = 8
//go:embed migrations/* //go:embed migrations/*
var migrationFiles embed.FS var migrationFiles embed.FS
@@ -53,9 +53,11 @@ func checkDBVersion(backend string, db *sql.DB) error {
if err != nil { if err != nil {
return err return err
} }
default:
log.Fatalf("unsupported database backend: %s", backend)
} }
v, _, err := m.Version() v, dirty, err := m.Version()
if err != nil { if err != nil {
if err == migrate.ErrNilVersion { if err == migrate.ErrNilVersion {
log.Warn("Legacy database without version or missing database file!") log.Warn("Legacy database without version or missing database file!")
@@ -65,19 +67,19 @@ func checkDBVersion(backend string, db *sql.DB) error {
} }
if v < Version { if v < Version {
return fmt.Errorf("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend --migrate-db", v, Version) return fmt.Errorf("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
} else if v > Version {
return fmt.Errorf("unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool", v, Version)
} }
if v > Version { if dirty {
return fmt.Errorf("unsupported database version %d, need %d.\nPlease refer to documentation how to downgrade db with external migrate tool", v, Version) return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
} }
return nil return nil
} }
func MigrateDB(backend string, db string) error { func getMigrateInstance(backend string, db string) (m *migrate.Migrate, err error) {
var m *migrate.Migrate
switch backend { switch backend {
case "sqlite3": case "sqlite3":
d, err := iofs.New(migrationFiles, "migrations/sqlite3") d, err := iofs.New(migrationFiles, "migrations/sqlite3")
@@ -87,18 +89,37 @@ func MigrateDB(backend string, db string) error {
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db)) m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("sqlite3://%s?_foreign_keys=on", db))
if err != nil { if err != nil {
return err return m, err
} }
case "mysql": case "mysql":
d, err := iofs.New(migrationFiles, "migrations/mysql") d, err := iofs.New(migrationFiles, "migrations/mysql")
if err != nil { if err != nil {
return err return m, err
} }
m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db)) m, err = migrate.NewWithSourceInstance("iofs", d, fmt.Sprintf("mysql://%s?multiStatements=true", db))
if err != nil { if err != nil {
return err return m, err
} }
default:
log.Fatalf("unsupported database backend: %s", backend)
}
return m, nil
}
func MigrateDB(backend string, db string) error {
m, err := getMigrateInstance(backend, db)
if err != nil {
return err
}
v, dirty, err := m.Version()
log.Infof("unsupported database version %d, need %d.\nPlease backup your database file and run cc-backend -migrate-db", v, Version)
if dirty {
return fmt.Errorf("last migration to version %d has failed, please fix the db manually and force version with -force-db flag", Version)
} }
if err := m.Up(); err != nil { if err := m.Up(); err != nil {
@@ -112,3 +133,35 @@ func MigrateDB(backend string, db string) error {
m.Close() m.Close()
return nil return nil
} }
func RevertDB(backend string, db string) error {
m, err := getMigrateInstance(backend, db)
if err != nil {
return err
}
if err := m.Migrate(Version - 1); err != nil {
if err == migrate.ErrNoChange {
log.Info("DB already up to date!")
} else {
return err
}
}
m.Close()
return nil
}
func ForceDB(backend string, db string) error {
m, err := getMigrateInstance(backend, db)
if err != nil {
return err
}
if err := m.Force(int(Version)); err != nil {
return err
}
m.Close()
return nil
}

View File

@@ -0,0 +1,2 @@
ALTER TABLE tag DROP COLUMN insert_time;
ALTER TABLE jobtag DROP COLUMN insert_time;

View File

@@ -0,0 +1,2 @@
ALTER TABLE tag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP;
ALTER TABLE jobtag ADD COLUMN insert_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP;

View File

@@ -0,0 +1 @@
ALTER TABLE configuration MODIFY value VARCHAR(255);

View File

@@ -0,0 +1 @@
ALTER TABLE configuration MODIFY value TEXT;

View File

@@ -0,0 +1,3 @@
SET FOREIGN_KEY_CHECKS = 0;
ALTER TABLE tag MODIFY id INTEGER;
SET FOREIGN_KEY_CHECKS = 1;

View File

@@ -0,0 +1,3 @@
SET FOREIGN_KEY_CHECKS = 0;
ALTER TABLE tag MODIFY id INTEGER AUTO_INCREMENT;
SET FOREIGN_KEY_CHECKS = 1;

View File

@@ -0,0 +1,83 @@
ALTER TABLE job DROP energy;
ALTER TABLE job DROP energy_footprint;
ALTER TABLE job ADD COLUMN flops_any_avg;
ALTER TABLE job ADD COLUMN mem_bw_avg;
ALTER TABLE job ADD COLUMN mem_used_max;
ALTER TABLE job ADD COLUMN load_avg;
ALTER TABLE job ADD COLUMN net_bw_avg;
ALTER TABLE job ADD COLUMN net_data_vol_total;
ALTER TABLE job ADD COLUMN file_bw_avg;
ALTER TABLE job ADD COLUMN file_data_vol_total;
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
ALTER TABLE job DROP footprint;
-- Do not use reserved keywords anymore
RENAME TABLE hpc_user TO `user`;
ALTER TABLE job RENAME COLUMN hpc_user TO `user`;
ALTER TABLE job RENAME COLUMN cluster_partition TO `partition`;
DROP INDEX IF EXISTS jobs_cluster;
DROP INDEX IF EXISTS jobs_cluster_user;
DROP INDEX IF EXISTS jobs_cluster_project;
DROP INDEX IF EXISTS jobs_cluster_subcluster;
DROP INDEX IF EXISTS jobs_cluster_starttime;
DROP INDEX IF EXISTS jobs_cluster_duration;
DROP INDEX IF EXISTS jobs_cluster_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition;
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_jobstate;
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_user;
DROP INDEX IF EXISTS jobs_user_starttime;
DROP INDEX IF EXISTS jobs_user_duration;
DROP INDEX IF EXISTS jobs_user_numnodes;
DROP INDEX IF EXISTS jobs_project;
DROP INDEX IF EXISTS jobs_project_user;
DROP INDEX IF EXISTS jobs_project_starttime;
DROP INDEX IF EXISTS jobs_project_duration;
DROP INDEX IF EXISTS jobs_project_numnodes;
DROP INDEX IF EXISTS jobs_jobstate;
DROP INDEX IF EXISTS jobs_jobstate_user;
DROP INDEX IF EXISTS jobs_jobstate_project;
DROP INDEX IF EXISTS jobs_jobstate_starttime;
DROP INDEX IF EXISTS jobs_jobstate_duration;
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_starttime;
DROP INDEX IF EXISTS jobs_duration;
DROP INDEX IF EXISTS jobs_numnodes;
DROP INDEX IF EXISTS jobs_duration_starttime;
DROP INDEX IF EXISTS jobs_numnodes_starttime;
DROP INDEX IF EXISTS jobs_numacc_starttime;
DROP INDEX IF EXISTS jobs_energy_starttime;

View File

@@ -0,0 +1,123 @@
DROP INDEX IF EXISTS job_stats ON job;
DROP INDEX IF EXISTS job_by_user ON job;
DROP INDEX IF EXISTS job_by_starttime ON job;
DROP INDEX IF EXISTS job_by_job_id ON job;
DROP INDEX IF EXISTS job_list ON job;
DROP INDEX IF EXISTS job_list_user ON job;
DROP INDEX IF EXISTS job_list_users ON job;
DROP INDEX IF EXISTS job_list_users_start ON job;
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint JSON;
ALTER TABLE job ADD COLUMN footprint JSON;
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
-- Do not use reserved keywords anymore
RENAME TABLE `user` TO hpc_user;
ALTER TABLE job RENAME COLUMN `user` TO hpc_user;
ALTER TABLE job RENAME COLUMN `partition` TO cluster_partition;
ALTER TABLE job MODIFY COLUMN cluster VARCHAR(50);
ALTER TABLE job MODIFY COLUMN hpc_user VARCHAR(50);
ALTER TABLE job MODIFY COLUMN subcluster VARCHAR(50);
ALTER TABLE job MODIFY COLUMN project VARCHAR(50);
ALTER TABLE job MODIFY COLUMN cluster_partition VARCHAR(50);
ALTER TABLE job MODIFY COLUMN job_state VARCHAR(25);
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
ALTER TABLE job DROP flops_any_avg;
ALTER TABLE job DROP mem_bw_avg;
ALTER TABLE job DROP mem_used_max;
ALTER TABLE job DROP load_avg;
ALTER TABLE job DROP net_bw_avg;
ALTER TABLE job DROP net_data_vol_total;
ALTER TABLE job DROP file_bw_avg;
ALTER TABLE job DROP file_data_vol_total;
-- Indices for: Single filters, combined filters, sorting, sorting with filters
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
-- User Filter
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
-- User Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
-- Project Filter
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
-- Project Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
-- JobState Filter
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
-- ArrayJob Filter
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
-- Sorting without active filters
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
-- Single filters with default starttime sorting
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
-- Optimize DB index usage

View File

@@ -30,6 +30,8 @@ file_bw_avg REAL NOT NULL DEFAULT 0.0,
file_data_vol_total REAL NOT NULL DEFAULT 0.0, file_data_vol_total REAL NOT NULL DEFAULT 0.0,
UNIQUE (job_id, cluster, start_time)); UNIQUE (job_id, cluster, start_time));
UPDATE job SET job_state='cancelled' WHERE job_state='canceled';
INSERT INTO job_new SELECT * FROM job; INSERT INTO job_new SELECT * FROM job;
DROP TABLE job; DROP TABLE job;
ALTER TABLE job_new RENAME TO job; ALTER TABLE job_new RENAME TO job;

View File

@@ -0,0 +1,2 @@
ALTER TABLE tag DROP COLUMN insert_time;
ALTER TABLE jobtag DROP COLUMN insert_time;

View File

@@ -0,0 +1,18 @@
ALTER TABLE tag ADD COLUMN insert_ts TEXT DEFAULT NULL /* replace me */;
ALTER TABLE jobtag ADD COLUMN insert_ts TEXT DEFAULT NULL /* replace me */;
UPDATE tag SET insert_ts = CURRENT_TIMESTAMP;
UPDATE jobtag SET insert_ts = CURRENT_TIMESTAMP;
PRAGMA writable_schema = on;
UPDATE sqlite_master
SET sql = replace(sql, 'DEFAULT NULL /* replace me */',
'DEFAULT CURRENT_TIMESTAMP')
WHERE type = 'table'
AND name = 'tag';
UPDATE sqlite_master
SET sql = replace(sql, 'DEFAULT NULL /* replace me */',
'DEFAULT CURRENT_TIMESTAMP')
WHERE type = 'table'
AND name = 'jobtag';
PRAGMA writable_schema = off;

View File

@@ -0,0 +1,10 @@
CREATE TABLE IF NOT EXISTS configuration_new (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
INSERT INTO configuration_new SELECT * FROM configuration;
DROP TABLE configuration;
ALTER TABLE configuration_new RENAME TO configuration;

View File

@@ -0,0 +1,10 @@
CREATE TABLE IF NOT EXISTS configuration_new (
username varchar(255),
confkey varchar(255),
value text,
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);
INSERT INTO configuration_new SELECT * FROM configuration;
DROP TABLE configuration;
ALTER TABLE configuration_new RENAME TO configuration;

View File

@@ -0,0 +1,103 @@
ALTER TABLE job DROP energy;
ALTER TABLE job DROP energy_footprint;
ALTER TABLE job ADD COLUMN flops_any_avg;
ALTER TABLE job ADD COLUMN mem_bw_avg;
ALTER TABLE job ADD COLUMN mem_used_max;
ALTER TABLE job ADD COLUMN load_avg;
ALTER TABLE job ADD COLUMN net_bw_avg;
ALTER TABLE job ADD COLUMN net_data_vol_total;
ALTER TABLE job ADD COLUMN file_bw_avg;
ALTER TABLE job ADD COLUMN file_data_vol_total;
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
ALTER TABLE job DROP footprint;
DROP INDEX IF EXISTS jobs_cluster;
DROP INDEX IF EXISTS jobs_cluster_user;
DROP INDEX IF EXISTS jobs_cluster_project;
DROP INDEX IF EXISTS jobs_cluster_subcluster;
DROP INDEX IF EXISTS jobs_cluster_starttime;
DROP INDEX IF EXISTS jobs_cluster_duration;
DROP INDEX IF EXISTS jobs_cluster_numnodes;
DROP INDEX IF EXISTS jobs_cluster_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_numacc;
DROP INDEX IF EXISTS jobs_cluster_energy;
DROP INDEX IF EXISTS jobs_cluster_partition;
DROP INDEX IF EXISTS jobs_cluster_partition_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_partition_numacc;
DROP INDEX IF EXISTS jobs_cluster_partition_energy;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_numacc;
DROP INDEX IF EXISTS jobs_cluster_partition_jobstate_energy;
DROP INDEX IF EXISTS jobs_cluster_jobstate;
DROP INDEX IF EXISTS jobs_cluster_jobstate_user;
DROP INDEX IF EXISTS jobs_cluster_jobstate_project;
DROP INDEX IF EXISTS jobs_cluster_jobstate_starttime;
DROP INDEX IF EXISTS jobs_cluster_jobstate_duration;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_cluster_jobstate_numacc;
DROP INDEX IF EXISTS jobs_cluster_jobstate_energy;
DROP INDEX IF EXISTS jobs_user;
DROP INDEX IF EXISTS jobs_user_starttime;
DROP INDEX IF EXISTS jobs_user_duration;
DROP INDEX IF EXISTS jobs_user_numnodes;
DROP INDEX IF EXISTS jobs_user_numhwthreads;
DROP INDEX IF EXISTS jobs_user_numacc;
DROP INDEX IF EXISTS jobs_user_energy;
DROP INDEX IF EXISTS jobs_project;
DROP INDEX IF EXISTS jobs_project_user;
DROP INDEX IF EXISTS jobs_project_starttime;
DROP INDEX IF EXISTS jobs_project_duration;
DROP INDEX IF EXISTS jobs_project_numnodes;
DROP INDEX IF EXISTS jobs_project_numhwthreads;
DROP INDEX IF EXISTS jobs_project_numacc;
DROP INDEX IF EXISTS jobs_project_energy;
DROP INDEX IF EXISTS jobs_jobstate;
DROP INDEX IF EXISTS jobs_jobstate_user;
DROP INDEX IF EXISTS jobs_jobstate_project;
DROP INDEX IF EXISTS jobs_jobstate_starttime;
DROP INDEX IF EXISTS jobs_jobstate_duration;
DROP INDEX IF EXISTS jobs_jobstate_numnodes;
DROP INDEX IF EXISTS jobs_jobstate_numhwthreads;
DROP INDEX IF EXISTS jobs_jobstate_numacc;
DROP INDEX IF EXISTS jobs_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_cluster_arrayjobid_starttime;
DROP INDEX IF EXISTS jobs_starttime;
DROP INDEX IF EXISTS jobs_duration;
DROP INDEX IF EXISTS jobs_numnodes;
DROP INDEX IF EXISTS jobs_numhwthreads;
DROP INDEX IF EXISTS jobs_numacc;
DROP INDEX IF EXISTS jobs_energy;
DROP INDEX IF EXISTS jobs_duration_starttime;
DROP INDEX IF EXISTS jobs_numnodes_starttime;
DROP INDEX IF EXISTS jobs_numhwthreads_starttime;
DROP INDEX IF EXISTS jobs_numacc_starttime;
DROP INDEX IF EXISTS jobs_energy_starttime;

View File

@@ -0,0 +1,142 @@
DROP INDEX IF EXISTS job_stats;
DROP INDEX IF EXISTS job_by_user;
DROP INDEX IF EXISTS job_by_starttime;
DROP INDEX IF EXISTS job_by_job_id;
DROP INDEX IF EXISTS job_list;
DROP INDEX IF EXISTS job_list_user;
DROP INDEX IF EXISTS job_list_users;
DROP INDEX IF EXISTS job_list_users_start;
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL;
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
-- Do not use reserved keywords anymore
ALTER TABLE "user" RENAME TO hpc_user;
ALTER TABLE job RENAME COLUMN "user" TO hpc_user;
ALTER TABLE job RENAME COLUMN "partition" TO cluster_partition;
UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
ALTER TABLE job DROP flops_any_avg;
ALTER TABLE job DROP mem_bw_avg;
ALTER TABLE job DROP mem_used_max;
ALTER TABLE job DROP load_avg;
ALTER TABLE job DROP net_bw_avg;
ALTER TABLE job DROP net_data_vol_total;
ALTER TABLE job DROP file_bw_avg;
ALTER TABLE job DROP file_data_vol_total;
-- Indices for: Single filters, combined filters, sorting, sorting with filters
-- Cluster Filter
CREATE INDEX IF NOT EXISTS jobs_cluster ON job (cluster);
CREATE INDEX IF NOT EXISTS jobs_cluster_user ON job (cluster, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_project ON job (cluster, project);
CREATE INDEX IF NOT EXISTS jobs_cluster_subcluster ON job (cluster, subcluster);
-- Cluster Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_starttime ON job (cluster, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_duration ON job (cluster, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_numnodes ON job (cluster, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_numhwthreads ON job (cluster, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_numacc ON job (cluster, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_energy ON job (cluster, energy);
-- Cluster+Partition Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition ON job (cluster, cluster_partition);
-- Cluster+Partition Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_starttime ON job (cluster, cluster_partition, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_duration ON job (cluster, cluster_partition, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numnodes ON job (cluster, cluster_partition, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numhwthreads ON job (cluster, cluster_partition, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_numacc ON job (cluster, cluster_partition, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_energy ON job (cluster, cluster_partition, energy);
-- Cluster+Partition+Jobstate Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate ON job (cluster, cluster_partition, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_user ON job (cluster, cluster_partition, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_project ON job (cluster, cluster_partition, job_state, project);
-- Cluster+Partition+Jobstate Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_starttime ON job (cluster, cluster_partition, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_duration ON job (cluster, cluster_partition, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numnodes ON job (cluster, cluster_partition, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numhwthreads ON job (cluster, cluster_partition, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_numacc ON job (cluster, cluster_partition, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_partition_jobstate_energy ON job (cluster, cluster_partition, job_state, energy);
-- Cluster+JobState Filter
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate ON job (cluster, job_state);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_user ON job (cluster, job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_project ON job (cluster, job_state, project);
-- Cluster+JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_starttime ON job (cluster, job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_duration ON job (cluster, job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numnodes ON job (cluster, job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numhwthreads ON job (cluster, job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_numacc ON job (cluster, job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_cluster_jobstate_energy ON job (cluster, job_state, energy);
-- User Filter
CREATE INDEX IF NOT EXISTS jobs_user ON job (hpc_user);
-- User Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_user_starttime ON job (hpc_user, start_time);
CREATE INDEX IF NOT EXISTS jobs_user_duration ON job (hpc_user, duration);
CREATE INDEX IF NOT EXISTS jobs_user_numnodes ON job (hpc_user, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_user_numhwthreads ON job (hpc_user, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_user_numacc ON job (hpc_user, num_acc);
CREATE INDEX IF NOT EXISTS jobs_user_energy ON job (hpc_user, energy);
-- Project Filter
CREATE INDEX IF NOT EXISTS jobs_project ON job (project);
CREATE INDEX IF NOT EXISTS jobs_project_user ON job (project, hpc_user);
-- Project Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_project_starttime ON job (project, start_time);
CREATE INDEX IF NOT EXISTS jobs_project_duration ON job (project, duration);
CREATE INDEX IF NOT EXISTS jobs_project_numnodes ON job (project, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_project_numhwthreads ON job (project, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_project_numacc ON job (project, num_acc);
CREATE INDEX IF NOT EXISTS jobs_project_energy ON job (project, energy);
-- JobState Filter
CREATE INDEX IF NOT EXISTS jobs_jobstate ON job (job_state);
CREATE INDEX IF NOT EXISTS jobs_jobstate_user ON job (job_state, hpc_user);
CREATE INDEX IF NOT EXISTS jobs_jobstate_project ON job (job_state, project);
CREATE INDEX IF NOT EXISTS jobs_jobstate_cluster ON job (job_state, cluster);
-- JobState Filter Sorting
CREATE INDEX IF NOT EXISTS jobs_jobstate_starttime ON job (job_state, start_time);
CREATE INDEX IF NOT EXISTS jobs_jobstate_duration ON job (job_state, duration);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numnodes ON job (job_state, num_nodes);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numhwthreads ON job (job_state, num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_jobstate_numacc ON job (job_state, num_acc);
CREATE INDEX IF NOT EXISTS jobs_jobstate_energy ON job (job_state, energy);
-- ArrayJob Filter
CREATE INDEX IF NOT EXISTS jobs_arrayjobid_starttime ON job (array_job_id, start_time);
CREATE INDEX IF NOT EXISTS jobs_cluster_arrayjobid_starttime ON job (cluster, array_job_id, start_time);
-- Sorting without active filters
CREATE INDEX IF NOT EXISTS jobs_starttime ON job (start_time);
CREATE INDEX IF NOT EXISTS jobs_duration ON job (duration);
CREATE INDEX IF NOT EXISTS jobs_numnodes ON job (num_nodes);
CREATE INDEX IF NOT EXISTS jobs_numhwthreads ON job (num_hwthreads);
CREATE INDEX IF NOT EXISTS jobs_numacc ON job (num_acc);
CREATE INDEX IF NOT EXISTS jobs_energy ON job (energy);
-- Single filters with default starttime sorting
CREATE INDEX IF NOT EXISTS jobs_duration_starttime ON job (duration, start_time);
CREATE INDEX IF NOT EXISTS jobs_numnodes_starttime ON job (num_nodes, start_time);
CREATE INDEX IF NOT EXISTS jobs_numhwthreads_starttime ON job (num_hwthreads, start_time);
CREATE INDEX IF NOT EXISTS jobs_numacc_starttime ON job (num_acc, start_time);
CREATE INDEX IF NOT EXISTS jobs_energy_starttime ON job (energy, start_time);
-- Optimize DB index usage
PRAGMA optimize;

View File

@@ -1,375 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
// SecurityCheck-less, private: Returns a list of jobs matching the provided filters. page and order are optional-
func (r *JobRepository) queryJobs(
query sq.SelectBuilder,
filters []*model.JobFilter,
page *model.PageRequest,
order *model.OrderByInput) ([]*schema.Job, error) {
if order != nil {
field := toSnakeCase(order.Field)
switch order.Order {
case model.SortDirectionEnumAsc:
query = query.OrderBy(fmt.Sprintf("job.%s ASC", field))
case model.SortDirectionEnumDesc:
query = query.OrderBy(fmt.Sprintf("job.%s DESC", field))
default:
return nil, errors.New("REPOSITORY/QUERY > invalid sorting order")
}
}
if page != nil && page.ItemsPerPage != -1 {
limit := uint64(page.ItemsPerPage)
query = query.Offset((uint64(page.Page) - 1) * limit).Limit(limit)
}
for _, f := range filters {
query = BuildWhereClause(f, query)
}
sql, args, err := query.ToSql()
if err != nil {
log.Warn("Error while converting query to sql")
return nil, err
}
log.Debugf("SQL query: `%s`, args: %#v", sql, args)
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
jobs := make([]*schema.Job, 0, 50)
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows (Jobs)")
return nil, err
}
jobs = append(jobs, job)
}
return jobs, nil
}
// testFunction for queryJobs
func (r *JobRepository) testQueryJobs(
filters []*model.JobFilter,
page *model.PageRequest,
order *model.OrderByInput) ([]*schema.Job, error) {
return r.queryJobs(sq.Select(jobColumns...).From("job"),
filters, page, order)
}
// Public function with added securityCheck, calls private queryJobs function above
func (r *JobRepository) QueryJobs(
ctx context.Context,
filters []*model.JobFilter,
page *model.PageRequest,
order *model.OrderByInput) ([]*schema.Job, error) {
query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("job"))
if qerr != nil {
return nil, qerr
}
return r.queryJobs(query,
filters, page, order)
}
// SecurityCheck-less, private: returns a list of minimal job information (DB-ID and jobId) of shared jobs for link-building based the provided filters.
func (r *JobRepository) queryJobLinks(
query sq.SelectBuilder,
filters []*model.JobFilter) ([]*model.JobLink, error) {
for _, f := range filters {
query = BuildWhereClause(f, query)
}
sql, args, err := query.ToSql()
if err != nil {
log.Warn("Error while converting query to sql")
return nil, err
}
log.Debugf("SQL query: `%s`, args: %#v", sql, args)
rows, err := query.RunWith(r.stmtCache).Query()
if err != nil {
log.Error("Error while running query")
return nil, err
}
jobLinks := make([]*model.JobLink, 0, 50)
for rows.Next() {
jobLink, err := scanJobLink(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows (JobLinks)")
return nil, err
}
jobLinks = append(jobLinks, jobLink)
}
return jobLinks, nil
}
// testFunction for queryJobLinks
func (r *JobRepository) testQueryJobLinks(
filters []*model.JobFilter) ([]*model.JobLink, error) {
return r.queryJobLinks(sq.Select(jobColumns...).From("job"), filters)
}
func (r *JobRepository) QueryJobLinks(
ctx context.Context,
filters []*model.JobFilter) ([]*model.JobLink, error) {
query, qerr := SecurityCheck(ctx, sq.Select("job.id", "job.job_id").From("job"))
if qerr != nil {
return nil, qerr
}
return r.queryJobLinks(query, filters)
}
// SecurityCheck-less, private: Returns the number of jobs matching the filters
func (r *JobRepository) countJobs(query sq.SelectBuilder,
filters []*model.JobFilter) (int, error) {
for _, f := range filters {
query = BuildWhereClause(f, query)
}
sql, args, err := query.ToSql()
if err != nil {
log.Warn("Error while converting query to sql")
return 0, nil
}
log.Debugf("SQL query: `%s`, args: %#v", sql, args)
var count int
if err := query.RunWith(r.DB).Scan(&count); err != nil {
return 0, err
}
return count, nil
}
// testFunction for countJobs
func (r *JobRepository) testCountJobs(
filters []*model.JobFilter) (int, error) {
return r.countJobs(sq.Select("count(*)").From("job"), filters)
}
// Public function with added securityCheck, calls private countJobs function above
func (r *JobRepository) CountJobs(
ctx context.Context,
filters []*model.JobFilter) (int, error) {
query, qerr := SecurityCheck(ctx, sq.Select("count(*)").From("job"))
if qerr != nil {
return 0, qerr
}
return r.countJobs(query, filters)
}
func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (queryOut sq.SelectBuilder, err error) {
user := auth.GetUser(ctx)
if user == nil {
var qnil sq.SelectBuilder
return qnil, fmt.Errorf("user context is nil!")
} else if user.HasAnyRole([]auth.Role{auth.RoleAdmin, auth.RoleSupport}) { // Admin & Co. : All jobs
return query, nil
} else if user.HasRole(auth.RoleManager) { // Manager : Add filter for managed projects' jobs only + personal jobs
if len(user.Projects) != 0 {
return query.Where(sq.Or{sq.Eq{"job.project": user.Projects}, sq.Eq{"job.user": user.Username}}), nil
} else {
log.Infof("Manager-User '%s' has no defined projects to lookup! Query only personal jobs ...", user.Username)
return query.Where("job.user = ?", user.Username), nil
}
} else if user.HasRole(auth.RoleUser) { // User : Only personal jobs
return query.Where("job.user = ?", user.Username), nil
} else {
// Shortterm compatibility: Return User-Query if no roles:
return query.Where("job.user = ?", user.Username), nil
// // On the longterm: Return Error instead of fallback:
// var qnil sq.SelectBuilder
// return qnil, fmt.Errorf("user '%s' with unknown roles [%#v]", user.Username, user.Roles)
}
}
// Build a sq.SelectBuilder out of a schema.JobFilter.
func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.SelectBuilder {
if filter.Tags != nil {
query = query.Join("jobtag ON jobtag.job_id = job.id").Where(sq.Eq{"jobtag.tag_id": filter.Tags})
}
if filter.JobID != nil {
query = buildStringCondition("job.job_id", filter.JobID, query)
}
if filter.ArrayJobID != nil {
query = query.Where("job.array_job_id = ?", *filter.ArrayJobID)
}
if filter.User != nil {
query = buildStringCondition("job.user", filter.User, query)
}
if filter.Project != nil {
query = buildStringCondition("job.project", filter.Project, query)
}
if filter.JobName != nil {
query = buildStringCondition("job.meta_data", filter.JobName, query)
}
if filter.Cluster != nil {
query = buildStringCondition("job.cluster", filter.Cluster, query)
}
if filter.Partition != nil {
query = buildStringCondition("job.partition", filter.Partition, query)
}
if filter.StartTime != nil {
query = buildTimeCondition("job.start_time", filter.StartTime, query)
}
if filter.Duration != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(CASE WHEN job.job_state = 'running' THEN (? - job.start_time) ELSE job.duration END) BETWEEN ? AND ?", now, filter.Duration.From, filter.Duration.To)
}
if filter.MinRunningFor != nil {
now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
}
if filter.State != nil {
states := make([]string, len(filter.State))
for i, val := range filter.State {
states[i] = string(val)
}
query = query.Where(sq.Eq{"job.job_state": states})
}
if filter.NumNodes != nil {
query = buildIntCondition("job.num_nodes", filter.NumNodes, query)
}
if filter.NumAccelerators != nil {
query = buildIntCondition("job.num_acc", filter.NumAccelerators, query)
}
if filter.NumHWThreads != nil {
query = buildIntCondition("job.num_hwthreads", filter.NumHWThreads, query)
}
if filter.FlopsAnyAvg != nil {
query = buildFloatCondition("job.flops_any_avg", filter.FlopsAnyAvg, query)
}
if filter.MemBwAvg != nil {
query = buildFloatCondition("job.mem_bw_avg", filter.MemBwAvg, query)
}
if filter.LoadAvg != nil {
query = buildFloatCondition("job.load_avg", filter.LoadAvg, query)
}
if filter.MemUsedMax != nil {
query = buildFloatCondition("job.mem_used_max", filter.MemUsedMax, query)
}
// Shared Jobs Query
if filter.Exclusive != nil {
query = query.Where("job.exclusive = ?", *filter.Exclusive)
}
if filter.SharedNode != nil {
query = buildStringCondition("job.resources", filter.SharedNode, query)
}
if filter.SelfJobID != nil {
query = buildStringCondition("job.job_id", filter.SelfJobID, query)
}
if filter.SelfStartTime != nil && filter.SelfDuration != nil {
start := filter.SelfStartTime.Unix() + 10 // There does not seem to be a portable way to get the current unix timestamp accross different DBs.
end := start + int64(*filter.SelfDuration) - 20
query = query.Where("((job.start_time BETWEEN ? AND ?) OR ((job.start_time + job.duration) BETWEEN ? AND ?))", start, end, start, end)
}
return query
}
func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
if cond.From != nil && cond.To != nil {
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
} else if cond.From != nil {
return query.Where("? <= "+field, cond.From.Unix())
} else if cond.To != nil {
return query.Where(field+" <= ?", cond.To.Unix())
} else {
return query
}
}
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildStringCondition(field string, cond *model.StringInput, query sq.SelectBuilder) sq.SelectBuilder {
if cond.Eq != nil {
return query.Where(field+" = ?", *cond.Eq)
}
if cond.Neq != nil {
return query.Where(field+" != ?", *cond.Neq)
}
if cond.StartsWith != nil {
return query.Where(field+" LIKE ?", fmt.Sprint(*cond.StartsWith, "%"))
}
if cond.EndsWith != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.EndsWith))
}
if cond.Contains != nil {
return query.Where(field+" LIKE ?", fmt.Sprint("%", *cond.Contains, "%"))
}
if cond.In != nil {
queryUsers := make([]string, len(cond.In))
for i, val := range cond.In {
queryUsers[i] = val
}
return query.Where(sq.Or{sq.Eq{"job.user": queryUsers}})
}
return query
}
var matchFirstCap = regexp.MustCompile("(.)([A-Z][a-z]+)")
var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])")
func toSnakeCase(str string) string {
for _, c := range str {
if c == '\'' || c == '\\' {
log.Panic("toSnakeCase() attack vector!")
}
}
str = strings.ReplaceAll(str, "'", "")
str = strings.ReplaceAll(str, "\\", "")
snake := matchFirstCap.ReplaceAllString(str, "${1}_${2}")
snake = matchAllCap.ReplaceAllString(snake, "${1}_${2}")
return strings.ToLower(snake)
}

Some files were not shown because too many files have changed in this diff Show More