25 Commits

Author SHA1 Message Date
Jan Eitzinger
ab08600486 Merge pull request #213 from ClusterCockpit/hotfix
Hotfix
2023-09-07 16:39:01 +02:00
7a5ccff6da fix: Remove port before IP check 2023-09-07 16:36:47 +02:00
a407a5cf01 Add note on apiAllowedIPs to Release Notes 2023-09-07 15:27:46 +02:00
2b3e2f25ec fix: Add correct duration string for max-age option 2023-09-07 15:25:22 +02:00
ed5ecbd914 fix: Restructure swagger docs 2023-09-07 15:14:09 +02:00
2d4759114e Add Release Notes link to release page 2023-09-07 14:33:22 +02:00
c68b9fec42 fix: Add documentation for apiAllowedIPs option 2023-09-07 14:03:41 +02:00
Jan Eitzinger
0f34c8cac6 Merge pull request #212 from ClusterCockpit/moebiusband73-patch-1
Update README.md
2023-09-06 14:02:16 +02:00
Jan Eitzinger
d388a45630 Update README.md 2023-09-06 13:56:55 +02:00
Jan Eitzinger
84b63af080 Merge pull request #211 from ClusterCockpit/uplot_roofline_scatter
Uplot roofline scatter
2023-09-05 15:23:06 +02:00
20902f842d Merge branch 'uplot_roofline_scatter' of github.com:ClusterCockpit/cc-backend into uplot_roofline_scatter 2023-09-05 15:18:38 +02:00
df7217f79c Change margins 2023-09-05 15:17:39 +02:00
Christoph Kluge
bd6f38b4f3 job view layout fixes, fix polar plot axis scales 2023-09-05 15:15:09 +02:00
Christoph Kluge
827a85412e Fix polar and roofline sizes in jobView
- add option for adaptable roofline sizing
2023-09-05 14:55:36 +02:00
Christoph Kluge
c4a9fcc1ca feat: Implemented rooflineplot with uPlot 2023-09-05 12:00:58 +02:00
Christoph Kluge
0993549cac Remove in-dev uplot scatter from this branch 2023-09-05 11:51:50 +02:00
Christoph Kluge
1b8c4e293c Change to prod data, allow and handle null data
- fix errors regarding render timing
- always collect time info in transFormData function
- remove size from polar plot
2023-09-05 11:46:34 +02:00
Christoph Kluge
b449b77b95 Rename dev component, separate rooflineHeatmap
- moved roofline helper functions to utils
2023-09-05 10:01:34 +02:00
Christoph Kluge
f235b1a99c Allow render of time information as color gradient 2023-09-05 09:19:43 +02:00
Christoph Kluge
b2b4beaeaa Finish direct data render roofplot demo 2023-09-04 16:31:47 +02:00
Christoph Kluge
8d7f942de4 Render loglog scatter, fix data format, start draw 2023-09-04 12:53:38 +02:00
Christoph Kluge
c1b5134627 Reduce uplot example code to common
denominator
2023-09-04 10:37:20 +02:00
Christoph Kluge
f5c43d60d3 initial commit for rooflineuplot 2023-09-01 13:12:55 +02:00
Christoph Kluge
69ee19bed0 fix: include running jobs case in statsQueries 2023-09-01 10:23:14 +02:00
Christoph Kluge
4d7819802d fix typo 2023-09-01 10:13:26 +02:00
31 changed files with 659 additions and 1924 deletions

View File

@@ -101,5 +101,6 @@ release:
draft: false
footer: |
Supports job archive version 1 and database version 6.
Please check out the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for further details on breaking changes.
# vim: set ts=2 sw=2 tw=0 fo=cnqoj

View File

@@ -2,7 +2,7 @@ TARGET = ./cc-backend
VAR = ./var
CFG = config.json .env
FRONTEND = ./web/frontend
VERSION = 1.2.0
VERSION = 1.2.1
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}'

View File

@@ -1,3 +1,6 @@
# NOTE
Please have a look at the [Release Notes](https://github.com/ClusterCockpit/cc-backend/blob/master/ReleaseNotes.md) for breaking changes!
# ClusterCockpit REST and GraphQL API backend
[![Build](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml/badge.svg)](https://github.com/ClusterCockpit/cc-backend/actions/workflows/test.yml)

View File

@@ -1,4 +1,4 @@
# `cc-backend` version 1.2.0
# `cc-backend` version 1.2.1
Supports job archive version 1 and database version 6.
@@ -25,6 +25,10 @@ is not the number of cores the core hours will be too high by a factor!
validity. Some key names have changed, please refer to
[config documentation](./configs/README.md) for details.
* The following API endpoints are only accessible from IPs registered using the apiAllowedIPs configuration option:
- `/users/` [GET, POST, DELETE]
- `/user/{id}` [POST]
** NOTE **
If you are using the sqlite3 backend the `PRAGMA` option `foreign_keys` must be
explicitly set to ON. If using the sqlite3 console it is per default set to

View File

@@ -29,7 +29,7 @@
"application/json"
],
"tags": [
"query"
"Job query"
],
"summary": "Lists all jobs",
"parameters": [
@@ -127,7 +127,7 @@
"application/json"
],
"tags": [
"remove"
"Job remove"
],
"summary": "Remove a job from the sql database",
"parameters": [
@@ -199,7 +199,7 @@
"application/json"
],
"tags": [
"remove"
"Job remove"
],
"summary": "Remove a job from the sql database",
"parameters": [
@@ -269,7 +269,7 @@
"application/json"
],
"tags": [
"remove"
"Job remove"
],
"summary": "Remove a job from the sql database",
"parameters": [
@@ -342,7 +342,7 @@
"application/json"
],
"tags": [
"add and modify"
"Job add and modify"
],
"summary": "Adds a new job as \"running\"",
"parameters": [
@@ -408,7 +408,7 @@
"application/json"
],
"tags": [
"add and modify"
"Job add and modify"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
@@ -483,7 +483,7 @@
"application/json"
],
"tags": [
"add and modify"
"Job add and modify"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
@@ -565,7 +565,7 @@
"application/json"
],
"tags": [
"add and modify"
"Job add and modify"
],
"summary": "Adds one or more tags to a job",
"parameters": [
@@ -638,7 +638,7 @@
"application/json"
],
"tags": [
"query"
"Job query"
],
"summary": "Get complete job meta and metric data",
"parameters": [
@@ -715,7 +715,7 @@
"ApiKeyAuth": []
}
],
"description": "Modifies user defined by username (id) in one of four possible ways.\nIf more than one formValue is set then only the highest priority field is used.",
"description": "Modifies user defined by username (id) in one of four possible ways.\nIf more than one formValue is set then only the highest priority field is used.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"consumes": [
"multipart/form-data"
],
@@ -723,7 +723,7 @@
"text/plain"
],
"tags": [
"add and modify"
"User"
],
"summary": "Updates an existing user",
"parameters": [
@@ -820,12 +820,12 @@
"ApiKeyAuth": []
}
],
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.",
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"produces": [
"application/json"
],
"tags": [
"query"
"User"
],
"summary": "Returns a list of users",
"parameters": [
@@ -879,7 +879,7 @@
"ApiKeyAuth": []
}
],
"description": "User specified in form data will be saved to database.",
"description": "User specified in form data will be saved to database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"consumes": [
"multipart/form-data"
],
@@ -887,7 +887,7 @@
"text/plain"
],
"tags": [
"add and modify"
"User"
],
"summary": "Adds a new user",
"parameters": [
@@ -983,7 +983,7 @@
"ApiKeyAuth": []
}
],
"description": "User defined by username in form data will be deleted from database.",
"description": "User defined by username in form data will be deleted from database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"consumes": [
"multipart/form-data"
],
@@ -991,7 +991,7 @@
"text/plain"
],
"tags": [
"remove"
"User"
],
"summary": "Deletes a user",
"parameters": [
@@ -1757,10 +1757,5 @@
"name": "X-Auth-Token",
"in": "header"
}
},
"tags": [
{
"name": "Job API"
}
]
}
}

View File

@@ -607,7 +607,7 @@ paths:
- ApiKeyAuth: []
summary: Lists all jobs
tags:
- query
- Job query
/jobs/{id}:
post:
consumes:
@@ -665,7 +665,7 @@ paths:
- ApiKeyAuth: []
summary: Get complete job meta and metric data
tags:
- query
- Job query
/jobs/delete_job/:
delete:
consumes:
@@ -715,7 +715,7 @@ paths:
- ApiKeyAuth: []
summary: Remove a job from the sql database
tags:
- remove
- Job remove
/jobs/delete_job/{id}:
delete:
description: Job to remove is specified by database ID. This will not remove
@@ -762,7 +762,7 @@ paths:
- ApiKeyAuth: []
summary: Remove a job from the sql database
tags:
- remove
- Job remove
/jobs/delete_job_before/{ts}:
delete:
description: Remove all jobs with start time before timestamp. The jobs will
@@ -809,7 +809,7 @@ paths:
- ApiKeyAuth: []
summary: Remove a job from the sql database
tags:
- remove
- Job remove
/jobs/start_job/:
post:
consumes:
@@ -856,7 +856,7 @@ paths:
- ApiKeyAuth: []
summary: Adds a new job as "running"
tags:
- add and modify
- Job add and modify
/jobs/stop_job/:
post:
description: |-
@@ -905,7 +905,7 @@ paths:
- ApiKeyAuth: []
summary: Marks job as completed and triggers archiving
tags:
- add and modify
- Job add and modify
/jobs/stop_job/{id}:
post:
consumes:
@@ -961,7 +961,7 @@ paths:
- ApiKeyAuth: []
summary: Marks job as completed and triggers archiving
tags:
- add and modify
- Job add and modify
/jobs/tag_job/{id}:
post:
consumes:
@@ -1010,7 +1010,7 @@ paths:
- ApiKeyAuth: []
summary: Adds one or more tags to a job
tags:
- add and modify
- Job add and modify
/user/{id}:
post:
consumes:
@@ -1018,6 +1018,7 @@ paths:
description: |-
Modifies user defined by username (id) in one of four possible ways.
If more than one formValue is set then only the highest priority field is used.
Only accessible from IPs registered with apiAllowedIPs configuration option.
parameters:
- description: Database ID of User
in: path
@@ -1083,12 +1084,14 @@ paths:
- ApiKeyAuth: []
summary: Updates an existing user
tags:
- add and modify
- User
/users/:
delete:
consumes:
- multipart/form-data
description: User defined by username in form data will be deleted from database.
description: |-
User defined by username in form data will be deleted from database.
Only accessible from IPs registered with apiAllowedIPs configuration option.
parameters:
- description: User ID to delete
in: formData
@@ -1124,11 +1127,12 @@ paths:
- ApiKeyAuth: []
summary: Deletes a user
tags:
- remove
- User
get:
description: |-
Returns a JSON-encoded list of users.
Required query-parameter defines if all users or only users with additional special roles are returned.
Only accessible from IPs registered with apiAllowedIPs configuration option.
parameters:
- description: If returned list should contain all users or only users with
additional special roles
@@ -1165,11 +1169,13 @@ paths:
- ApiKeyAuth: []
summary: Returns a list of users
tags:
- query
- User
post:
consumes:
- multipart/form-data
description: User specified in form data will be saved to database.
description: |-
User specified in form data will be saved to database.
Only accessible from IPs registered with apiAllowedIPs configuration option.
parameters:
- description: Unique user ID
in: formData
@@ -1235,12 +1241,10 @@ paths:
- ApiKeyAuth: []
summary: Adds a new user
tags:
- add and modify
- User
securityDefinitions:
ApiKeyAuth:
in: header
name: X-Auth-Token
type: apiKey
swagger: "2.0"
tags:
- name: Job API

View File

@@ -9,6 +9,7 @@ It is supported to set these by means of a `.env` file in the project root.
## Configuration Options
* `addr`: Type string. Address where the http (or https) server will listen on (for example: 'localhost:80'). Default `:8080`.
* `apiAllowedIPs`: Type string array. Addresses from which the secured API endpoints (/users and other auth related endpoints) can be reached
* `user`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
* `group`: Type string. Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.
* `disable-authentication`: Type bool. Disable authentication (for everything: API, Web-UI, ...). Default `false`.

View File

@@ -5,7 +5,7 @@
"path": "./var/job-archive"
},
"jwts": {
"max-age": "2m"
"max-age": "2000h"
},
"clusters": [
{

View File

@@ -43,7 +43,7 @@
"jwts": {
"cookieName": "",
"validateUser": false,
"max-age": "2m",
"max-age": "2000h",
"trustedIssuer": ""
},
"short-running-jobs-duration": 300

18
go.mod
View File

@@ -18,7 +18,6 @@ require (
github.com/influxdata/influxdb-client-go/v2 v2.12.2
github.com/jmoiron/sqlx v1.3.5
github.com/mattn/go-sqlite3 v1.14.16
github.com/nats-io/nats.go v1.28.0
github.com/prometheus/client_golang v1.14.0
github.com/prometheus/common v0.40.0
github.com/qustavo/sqlhooks/v2 v2.1.0
@@ -26,14 +25,12 @@ require (
github.com/swaggo/http-swagger v1.3.3
github.com/swaggo/swag v1.16.1
github.com/vektah/gqlparser/v2 v2.5.8
golang.org/x/crypto v0.13.0
golang.org/x/crypto v0.12.0
golang.org/x/exp v0.0.0-20230510235704-dd950f8aeaea
)
require (
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect
github.com/ClusterCockpit/slurm-rest-client-0_0_37 v0.0.0-20230901125459-dc653ac37420 // indirect
github.com/ClusterCockpit/slurm-rest-client-0_0_38 v0.0.0-20231010134848-707e8b20bde7 // indirect
github.com/KyleBanks/depth v1.2.1 // indirect
github.com/agnivade/levenshtein v1.1.1 // indirect
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
@@ -41,7 +38,6 @@ require (
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/containerd/containerd v1.6.18 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/deepmap/oapi-codegen v1.12.4 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
@@ -60,7 +56,6 @@ require (
github.com/josharian/intern v1.0.0 // indirect
github.com/jpillora/backoff v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.16.7 // indirect
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
@@ -69,26 +64,21 @@ require (
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect
github.com/nats-io/nats-server/v2 v2.9.21 // indirect
github.com/nats-io/nkeys v0.4.4 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/opencontainers/image-spec v1.0.3-0.20211202183452-c5a74bcca799 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/procfs v0.9.0 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/stretchr/testify v1.8.4 // indirect
github.com/swaggo/files v1.0.0 // indirect
github.com/urfave/cli/v2 v2.25.7 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
go.uber.org/atomic v1.10.0 // indirect
golang.org/x/mod v0.12.0 // indirect
golang.org/x/net v0.15.0 // indirect
golang.org/x/net v0.14.0 // indirect
golang.org/x/oauth2 v0.5.0 // indirect
golang.org/x/sys v0.12.0 // indirect
golang.org/x/text v0.13.0 // indirect
golang.org/x/sys v0.11.0 // indirect
golang.org/x/text v0.12.0 // indirect
golang.org/x/tools v0.12.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.30.0 // indirect

31
go.sum
View File

@@ -82,12 +82,6 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym
github.com/ClickHouse/clickhouse-go v1.4.3/go.mod h1:EaI/sW7Azgz9UATzd5ZdZHRUhHgv5+JMS9NSr2smCJI=
github.com/ClusterCockpit/cc-units v0.4.0 h1:zP5DOu99GmErW0tCDf0gcLrlWt42RQ9dpoONEOh4cI0=
github.com/ClusterCockpit/cc-units v0.4.0/go.mod h1:3S3PAhAayS3pbgcT4q9Vn9VJw22Op51X0YimtG77zBw=
github.com/ClusterCockpit/slurm-rest-client-0_0_37 v0.0.0-20230901125459-dc653ac37420 h1:AeQY40KrLQmSSyyHdbNdAqgln+0+p1dLag5yspE5M8A=
github.com/ClusterCockpit/slurm-rest-client-0_0_37 v0.0.0-20230901125459-dc653ac37420/go.mod h1:oNgVG2puNj9cNw/KgqLbgE1pPOn8jXORX3ErP58LcAA=
github.com/ClusterCockpit/slurm-rest-client-0_0_38 v0.0.0-20230906120742-0f15562ea666 h1:8PofHcOwEMmeAFqJjvAEgnu7rbRHAwJhd2XJ9u/YxiU=
github.com/ClusterCockpit/slurm-rest-client-0_0_38 v0.0.0-20230906120742-0f15562ea666/go.mod h1:vxaj1my0GNoCXx4bYyOTA/IZP/IOZImtdOIn4T7pCa4=
github.com/ClusterCockpit/slurm-rest-client-0_0_38 v0.0.0-20231010134848-707e8b20bde7 h1:YY/qDtFsp1DOJw/jyobiIBiIh1/yD2IVOdcK7EVEIKs=
github.com/ClusterCockpit/slurm-rest-client-0_0_38 v0.0.0-20231010134848-707e8b20bde7/go.mod h1:vxaj1my0GNoCXx4bYyOTA/IZP/IOZImtdOIn4T7pCa4=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/Masterminds/squirrel v1.5.3 h1:YPpoceAcxuzIljlr5iWpNKaql7hLeG1KLSrhvdHpkZc=
@@ -467,7 +461,6 @@ github.com/go-ldap/ldap/v3 v3.4.4/go.mod h1:fe1MsuN5eJJ1FeLT/LEBVdWfNWKh459R7aXg
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=
github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs=
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
@@ -600,7 +593,6 @@ github.com/golang/snappy v0.0.0-20170215233205-553a64147049/go.mod h1:/XxbfmMg8l
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.1/go.mod h1:xXMiIv4Fb/0kKde4SpL7qlzvu5cMJDRkFDxJfI9uaxA=
@@ -824,8 +816,6 @@ github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdY
github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I=
github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
@@ -897,7 +887,6 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr
github.com/maxbrunsfeld/counterfeiter/v6 v6.2.2/go.mod h1:eD9eIE7cdwcMi9rYluz88Jz2VyhSmden33/aXg4oVIY=
github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
github.com/minio/highwayhash v1.0.2 h1:Aak5U0nElisjDCfPSG79Tgzkn2gl66NxOMspRrKnA/g=
github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
@@ -942,15 +931,6 @@ github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/nakagami/firebirdsql v0.0.0-20190310045651-3c02a58cfed8/go.mod h1:86wM1zFnC6/uDBfZGNwB65O+pR2OFi5q/YQaEUid1qA=
github.com/nats-io/jwt/v2 v2.4.1 h1:Y35W1dgbbz2SQUYDPCaclXcuqleVmpbRa7646Jf2EX4=
github.com/nats-io/nats-server/v2 v2.9.21 h1:2TBTh0UDE74eNXQmV4HofsmRSCiVN0TH2Wgrp6BD6fk=
github.com/nats-io/nats-server/v2 v2.9.21/go.mod h1:ozqMZc2vTHcNcblOiXMWIXkf8+0lDGAi5wQcG+O1mHU=
github.com/nats-io/nats.go v1.28.0 h1:Th4G6zdsz2d0OqXdfzKLClo6bOfoI/b1kInhRtFIy5c=
github.com/nats-io/nats.go v1.28.0/go.mod h1:XpbWUlOElGwTYbMR7imivs7jJj9GtK7ypv321Wp6pjc=
github.com/nats-io/nkeys v0.4.4 h1:xvBJ8d69TznjcQl9t6//Q5xXuVhyYiSos6RPtvQNTwA=
github.com/nats-io/nkeys v0.4.4/go.mod h1:XUkxdLPTufzlihbamfzQ7mw/VGx6ObUs+0bN5sNvt64=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/ncw/swift v1.0.47/go.mod h1:23YIA4yWVnGwv2dQlN4bB7egfYX6YLn0Yo/S6zZO/ZM=
github.com/neo4j/neo4j-go-driver v1.8.1-0.20200803113522-b626aa943eba/go.mod h1:ncO5VaFWh0Nrt+4KT4mOZboaczBZcLuHrG+/sUeP8gI=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
@@ -1164,7 +1144,6 @@ github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1F
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/swaggo/files v1.0.0 h1:1gGXVIeUFCS/dta17rnP0iOpr6CXFwKD7EO5ID233e4=
github.com/swaggo/files v1.0.0/go.mod h1:N59U6URJLyU1PQgFqPM7wXLMhJx7QAolnvfQkqO13kc=
@@ -1188,7 +1167,6 @@ github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX
github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/urfave/cli/v2 v2.25.7 h1:VAzn5oq403l5pHjc4OhD54+XGO9cdKVL/7lDjF+iKUs=
github.com/urfave/cli/v2 v2.25.7/go.mod h1:8qnjx1vcq5s2/wpsqoZFndg2CE5tNFyrTvS6SinrnYQ=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/vektah/gqlparser/v2 v2.5.8 h1:pm6WOnGdzFOCfcQo9L3+xzW51mKrlwTEg4Wr7AH1JW4=
github.com/vektah/gqlparser/v2 v2.5.8/go.mod h1:z8xXUff237NntSuH8mLFijZ+1tjV1swDbpDqjJmk6ME=
github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk=
@@ -1308,8 +1286,6 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.12.0 h1:tFM/ta59kqch6LlvYnPa0yx5a83cL2nHflFhYKvv9Yk=
golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw=
golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck=
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -1433,8 +1409,6 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug
golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14=
golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI=
golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/oauth2 v0.0.0-20180227000427-d7d64896b5ff/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
@@ -1594,8 +1568,6 @@ golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
@@ -1614,8 +1586,6 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc=
golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -1625,7 +1595,6 @@ golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxb
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20220224211638-0e9765cccd65/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View File

@@ -1,4 +1,5 @@
// Code generated by swaggo/swag. DO NOT EDIT
// Code generated by swaggo/swag. DO NOT EDIT.
package api
import "github.com/swaggo/swag"
@@ -35,7 +36,7 @@ const docTemplate = `{
"application/json"
],
"tags": [
"query"
"Job query"
],
"summary": "Lists all jobs",
"parameters": [
@@ -133,7 +134,7 @@ const docTemplate = `{
"application/json"
],
"tags": [
"remove"
"Job remove"
],
"summary": "Remove a job from the sql database",
"parameters": [
@@ -205,7 +206,7 @@ const docTemplate = `{
"application/json"
],
"tags": [
"remove"
"Job remove"
],
"summary": "Remove a job from the sql database",
"parameters": [
@@ -275,7 +276,7 @@ const docTemplate = `{
"application/json"
],
"tags": [
"remove"
"Job remove"
],
"summary": "Remove a job from the sql database",
"parameters": [
@@ -348,7 +349,7 @@ const docTemplate = `{
"application/json"
],
"tags": [
"add and modify"
"Job add and modify"
],
"summary": "Adds a new job as \"running\"",
"parameters": [
@@ -414,7 +415,7 @@ const docTemplate = `{
"application/json"
],
"tags": [
"add and modify"
"Job add and modify"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
@@ -489,7 +490,7 @@ const docTemplate = `{
"application/json"
],
"tags": [
"add and modify"
"Job add and modify"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
@@ -571,7 +572,7 @@ const docTemplate = `{
"application/json"
],
"tags": [
"add and modify"
"Job add and modify"
],
"summary": "Adds one or more tags to a job",
"parameters": [
@@ -644,7 +645,7 @@ const docTemplate = `{
"application/json"
],
"tags": [
"query"
"Job query"
],
"summary": "Get complete job meta and metric data",
"parameters": [
@@ -721,7 +722,7 @@ const docTemplate = `{
"ApiKeyAuth": []
}
],
"description": "Modifies user defined by username (id) in one of four possible ways.\nIf more than one formValue is set then only the highest priority field is used.",
"description": "Modifies user defined by username (id) in one of four possible ways.\nIf more than one formValue is set then only the highest priority field is used.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"consumes": [
"multipart/form-data"
],
@@ -729,7 +730,7 @@ const docTemplate = `{
"text/plain"
],
"tags": [
"add and modify"
"User"
],
"summary": "Updates an existing user",
"parameters": [
@@ -826,12 +827,12 @@ const docTemplate = `{
"ApiKeyAuth": []
}
],
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.",
"description": "Returns a JSON-encoded list of users.\nRequired query-parameter defines if all users or only users with additional special roles are returned.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"produces": [
"application/json"
],
"tags": [
"query"
"User"
],
"summary": "Returns a list of users",
"parameters": [
@@ -885,7 +886,7 @@ const docTemplate = `{
"ApiKeyAuth": []
}
],
"description": "User specified in form data will be saved to database.",
"description": "User specified in form data will be saved to database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"consumes": [
"multipart/form-data"
],
@@ -893,7 +894,7 @@ const docTemplate = `{
"text/plain"
],
"tags": [
"add and modify"
"User"
],
"summary": "Adds a new user",
"parameters": [
@@ -989,7 +990,7 @@ const docTemplate = `{
"ApiKeyAuth": []
}
],
"description": "User defined by username in form data will be deleted from database.",
"description": "User defined by username in form data will be deleted from database.\nOnly accessible from IPs registered with apiAllowedIPs configuration option.",
"consumes": [
"multipart/form-data"
],
@@ -997,7 +998,7 @@ const docTemplate = `{
"text/plain"
],
"tags": [
"remove"
"User"
],
"summary": "Deletes a user",
"parameters": [
@@ -1763,12 +1764,7 @@ const docTemplate = `{
"name": "X-Auth-Token",
"in": "header"
}
},
"tags": [
{
"name": "Job API"
}
]
}
}`
// SwaggerInfo holds exported Swagger Info so clients can modify it
@@ -1781,6 +1777,8 @@ var SwaggerInfo = &swag.Spec{
Description: "API for batch job control.",
InfoInstanceName: "swagger",
SwaggerTemplate: docTemplate,
LeftDelim: "{{",
RightDelim: "}}",
}
func init() {

View File

@@ -37,8 +37,6 @@ import (
// @version 1.0.0
// @description API for batch job control.
// @tag.name Job API
// @contact.name ClusterCockpit Project
// @contact.url https://github.com/ClusterCockpit
// @contact.email support@clustercockpit.org
@@ -212,6 +210,10 @@ func securedCheck(r *http.Request) error {
IPAddress = r.RemoteAddr
}
if strings.Contains(IPAddress, ":") {
IPAddress = strings.Split(IPAddress, ":")[0]
}
// check if IP is allowed
if !util.Contains(config.Keys.ApiAllowedIPs, IPAddress) {
return fmt.Errorf("unknown ip: %v", IPAddress)
@@ -223,7 +225,7 @@ func securedCheck(r *http.Request) error {
// getJobs godoc
// @summary Lists all jobs
// @tags query
// @tags Job query
// @description Get a list of all jobs. Filters can be applied using query parameters.
// @description Number of results can be limited by page. Results are sorted by descending startTime.
// @produce json
@@ -369,7 +371,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
// getJobById godoc
// @summary Get complete job meta and metric data
// @tags query
// @tags Job query
// @description Job to get is specified by database ID
// @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
// @accept json
@@ -464,7 +466,7 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
// tagJob godoc
// @summary Adds one or more tags to a job
// @tags add and modify
// @tags Job add and modify
// @description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
// @description If tagged job is already finished: Tag will be written directly to respective archive files.
// @accept json
@@ -531,7 +533,7 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
// startJob godoc
// @summary Adds a new job as "running"
// @tags add and modify
// @tags Job add and modify
// @description Job specified in request body will be saved to database as "running" with new DB ID.
// @description Job specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.
// @accept json
@@ -612,7 +614,7 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
// stopJobById godoc
// @summary Marks job as completed and triggers archiving
// @tags add and modify
// @tags Job add and modify
// @description Job to stop is specified by database ID. Only stopTime and final state are required in request body.
// @description Returns full job resource information according to 'JobMeta' scheme.
// @accept json
@@ -669,7 +671,7 @@ func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
// stopJobByRequest godoc
// @summary Marks job as completed and triggers archiving
// @tags add and modify
// @tags Job add and modify
// @description Job to stop is specified by request body. All fields are required in this case.
// @description Returns full job resource information according to 'JobMeta' scheme.
// @produce json
@@ -718,7 +720,7 @@ func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
// deleteJobById godoc
// @summary Remove a job from the sql database
// @tags remove
// @tags Job remove
// @description Job to remove is specified by database ID. This will not remove the job from the job archive.
// @produce json
// @param id path int true "Database ID of Job"
@@ -765,7 +767,7 @@ func (api *RestApi) deleteJobById(rw http.ResponseWriter, r *http.Request) {
// deleteJobByRequest godoc
// @summary Remove a job from the sql database
// @tags remove
// @tags Job remove
// @description Job to delete is specified by request body. All fields are required in this case.
// @accept json
// @produce json
@@ -823,7 +825,7 @@ func (api *RestApi) deleteJobByRequest(rw http.ResponseWriter, r *http.Request)
// deleteJobBefore godoc
// @summary Remove a job from the sql database
// @tags remove
// @tags Job remove
// @description Remove all jobs with start time before timestamp. The jobs will not be removed from the job archive.
// @produce json
// @param ts path int true "Unix epoch timestamp"
@@ -955,8 +957,9 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
// createUser godoc
// @summary Adds a new user
// @tags add and modify
// @tags User
// @description User specified in form data will be saved to database.
// @description Only accessible from IPs registered with apiAllowedIPs configuration option.
// @accept mpfd
// @produce plain
// @param username formData string true "Unique user ID"
@@ -1022,8 +1025,9 @@ func (api *RestApi) createUser(rw http.ResponseWriter, r *http.Request) {
// deleteUser godoc
// @summary Deletes a user
// @tags remove
// @tags User
// @description User defined by username in form data will be deleted from database.
// @description Only accessible from IPs registered with apiAllowedIPs configuration option.
// @accept mpfd
// @produce plain
// @param username formData string true "User ID to delete"
@@ -1058,9 +1062,10 @@ func (api *RestApi) deleteUser(rw http.ResponseWriter, r *http.Request) {
// getUsers godoc
// @summary Returns a list of users
// @tags query
// @tags User
// @description Returns a JSON-encoded list of users.
// @description Required query-parameter defines if all users or only users with additional special roles are returned.
// @description Only accessible from IPs registered with apiAllowedIPs configuration option.
// @produce json
// @param not-just-user query bool true "If returned list should contain all users or only users with additional special roles"
// @success 200 {array} api.ApiReturnedUser "List of users returned successfully"
@@ -1093,9 +1098,10 @@ func (api *RestApi) getUsers(rw http.ResponseWriter, r *http.Request) {
// updateUser godoc
// @summary Updates an existing user
// @tags add and modify
// @tags User
// @description Modifies user defined by username (id) in one of four possible ways.
// @description If more than one formValue is set then only the highest priority field is used.
// @description Only accessible from IPs registered with apiAllowedIPs configuration option.
// @accept mpfd
// @produce plain
// @param id path string true "Database ID of User"

View File

@@ -290,30 +290,6 @@ func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
return scanJob(q.RunWith(r.stmtCache).QueryRow())
}
func (r *JobRepository) FindRunningJobs(cluster string) (map[int64]*schema.Job, error) {
jobs := map[int64]*schema.Job{}
q := sq.Select(jobColumns...).From("job").Where("job.cluster = ?", cluster)
q = q.Where("job.job_state = ?", "running")
rows, err := q.RunWith(r.stmtCache).Query()
if err != nil {
log.Errorf("Error while querying running jobs: %v", err)
return nil, err
}
for rows.Next() {
job, err := scanJob(rows)
if err != nil {
rows.Close()
log.Warn("Error while scanning rows (Jobs)")
return nil, err
}
jobs[job.JobID] = job
}
return jobs, nil
}
func (r *JobRepository) FindConcurrentJobs(
ctx context.Context,
job *schema.Job) (*model.JobLinkResultList, error) {

View File

@@ -70,28 +70,30 @@ func (r *JobRepository) buildStatsQuery(
var query sq.SelectBuilder
castType := r.getCastType()
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
if col != "" {
// Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id) as totalJobs",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s) as totalWalltime", castType),
fmt.Sprintf("CAST(SUM(job.num_nodes) as %s) as totalNodes", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes) / 3600) as %s) as totalNodeHours", castType),
fmt.Sprintf("CAST(SUM(job.num_hwthreads) as %s) as totalCores", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_hwthreads) / 3600) as %s) as totalCoreHours", castType),
fmt.Sprintf("CAST(SUM(job.num_acc) as %s) as totalAccs", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_acc) / 3600) as %s) as totalAccHours", castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s) as totalCores`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
).From("job").GroupBy(col)
} else {
// Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select("COUNT(job.id)",
fmt.Sprintf("CAST(ROUND(SUM(job.duration) / 3600) as %s)", castType),
fmt.Sprintf("CAST(SUM(job.num_nodes) as %s)", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes) / 3600) as %s)", castType),
fmt.Sprintf("CAST(SUM(job.num_hwthreads) as %s)", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_hwthreads) / 3600) as %s)", castType),
fmt.Sprintf("CAST(SUM(job.num_acc) as %s)", castType),
fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_acc) / 3600) as %s)", castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_hwthreads) as %s)`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s)`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s)`, time.Now().Unix(), castType),
).From("job")
}

View File

@@ -1,479 +0,0 @@
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package scheduler
import (
"encoding/json"
"fmt"
"os"
"github.com/ClusterCockpit/cc-backend/pkg/log"
)
type MetricConfig struct {
Name string `json:"name"`
Unit struct {
Base string `json:"base"`
} `json:"unit"`
Scope string `json:"scope"`
Aggregation string `json:"aggregation"`
Timestep int `json:"timestep"`
Peak float64 `json:"peak"`
Normal float64 `json:"normal"`
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
}
type SubCluster struct {
Name string `json:"name"`
Nodes string `json:"nodes"`
ProcessorType string `json:"processorType"`
SocketsPerNode int `json:"socketsPerNode"`
CoresPerSocket int `json:"coresPerSocket"`
ThreadsPerCore int `json:"threadsPerCore"`
FlopRateScalar struct {
Unit struct {
Base string `json:"base"`
Prefix string `json:"prefix"`
} `json:"unit"`
Value float64 `json:"value"`
} `json:"flopRateScalar"`
FlopRateSimd struct {
Unit struct {
Base string `json:"base"`
Prefix string `json:"prefix"`
} `json:"unit"`
Value float64 `json:"value"`
} `json:"flopRateSimd"`
MemoryBandwidth struct {
Unit struct {
Base string `json:"base"`
Prefix string `json:"prefix"`
} `json:"unit"`
Value float64 `json:"value"`
} `json:"memoryBandwidth"`
Topology struct {
Node []int `json:"node"`
Socket [][]int `json:"socket"`
MemoryDomain [][]int `json:"memoryDomain"`
Core [][]int `json:"core"`
Accelerators []struct {
ID string `json:"id"`
Type string `json:"type"`
Model string `json:"model"`
} `json:"accelerators"`
} `json:"topology"`
}
type ClusterConfig struct {
Name string `json:"name"`
MetricConfig []MetricConfig `json:"metricConfig"`
SubClusters []SubCluster `json:"subClusters"`
}
type Metadata struct {
Plugin struct {
Type string `json:"type"`
Name string `json:"name"`
} `json:"plugin"`
Slurm struct {
Version struct {
Major int `json:"major"`
Micro int `json:"micro"`
Minor int `json:"minor"`
} `json:"version"`
Release string `json:"release"`
} `json:"Slurm"`
}
type JobResource struct {
Nodes string `json:"nodes"`
AllocatedCores int `json:"allocated_cores"`
AllocatedHosts int `json:"allocated_hosts"`
AllocatedNodes []AllocatedNode `json:"allocated_nodes"`
}
type AllocatedNode struct {
Sockets map[string]Socket `json:"sockets"`
Nodename string `json:"nodename"`
CPUsUsed *int `json:"cpus_used"`
MemoryUsed *int `json:"memory_used"`
MemoryAllocated *int `json:"memory_allocated"`
}
type Socket struct {
Cores map[string]string `json:"cores"`
}
type Job struct {
Account string `json:"account"`
AccrueTime int `json:"accrue_time"`
AdminComment string `json:"admin_comment"`
ArrayJobID int64 `json:"array_job_id"`
ArrayTaskID interface{} `json:"array_task_id"`
ArrayMaxTasks int `json:"array_max_tasks"`
ArrayTaskString string `json:"array_task_string"`
AssociationID int `json:"association_id"`
BatchFeatures string `json:"batch_features"`
BatchFlag bool `json:"batch_flag"`
BatchHost string `json:"batch_host"`
Flags []string `json:"flags"`
BurstBuffer string `json:"burst_buffer"`
BurstBufferState string `json:"burst_buffer_state"`
Cluster string `json:"cluster"`
ClusterFeatures string `json:"cluster_features"`
Command string `json:"command"`
Comment string `json:"comment"`
Container string `json:"container"`
Contiguous bool `json:"contiguous"`
CoreSpec interface{} `json:"core_spec"`
ThreadSpec interface{} `json:"thread_spec"`
CoresPerSocket interface{} `json:"cores_per_socket"`
BillableTres interface{} `json:"billable_tres"`
CPUPerTask interface{} `json:"cpus_per_task"`
CPUFrequencyMinimum interface{} `json:"cpu_frequency_minimum"`
CPUFrequencyMaximum interface{} `json:"cpu_frequency_maximum"`
CPUFrequencyGovernor interface{} `json:"cpu_frequency_governor"`
CPUPerTres string `json:"cpus_per_tres"`
Deadline int `json:"deadline"`
DelayBoot int `json:"delay_boot"`
Dependency string `json:"dependency"`
DerivedExitCode int `json:"derived_exit_code"`
EligibleTime int `json:"eligible_time"`
EndTime int64 `json:"end_time"`
ExcludedNodes string `json:"excluded_nodes"`
ExitCode int `json:"exit_code"`
Features string `json:"features"`
FederationOrigin string `json:"federation_origin"`
FederationSiblingsActive string `json:"federation_siblings_active"`
FederationSiblingsViable string `json:"federation_siblings_viable"`
GresDetail []string `json:"gres_detail"`
GroupID int `json:"group_id"`
GroupName string `json:"group_name"`
JobID int64 `json:"job_id"`
JobResources JobResource `json:"job_resources"`
JobState string `json:"job_state"`
LastSchedEvaluation int `json:"last_sched_evaluation"`
Licenses string `json:"licenses"`
MaxCPUs int `json:"max_cpus"`
MaxNodes int `json:"max_nodes"`
MCSLabel string `json:"mcs_label"`
MemoryPerTres string `json:"memory_per_tres"`
Name string `json:"name"`
Nodes string `json:"nodes"`
Nice interface{} `json:"nice"`
TasksPerCore interface{} `json:"tasks_per_core"`
TasksPerNode int `json:"tasks_per_node"`
TasksPerSocket interface{} `json:"tasks_per_socket"`
TasksPerBoard int `json:"tasks_per_board"`
CPUs int32 `json:"cpus"`
NodeCount int32 `json:"node_count"`
Tasks int `json:"tasks"`
HETJobID int `json:"het_job_id"`
HETJobIDSet string `json:"het_job_id_set"`
HETJobOffset int `json:"het_job_offset"`
Partition string `json:"partition"`
MemoryPerNode interface{} `json:"memory_per_node"`
MemoryPerCPU int `json:"memory_per_cpu"`
MinimumCPUsPerNode int `json:"minimum_cpus_per_node"`
MinimumTmpDiskPerNode int `json:"minimum_tmp_disk_per_node"`
PreemptTime int `json:"preempt_time"`
PreSUSTime int `json:"pre_sus_time"`
Priority int `json:"priority"`
Profile interface{} `json:"profile"`
QoS string `json:"qos"`
Reboot bool `json:"reboot"`
RequiredNodes string `json:"required_nodes"`
Requeue bool `json:"requeue"`
ResizeTime int `json:"resize_time"`
RestartCnt int `json:"restart_cnt"`
ResvName string `json:"resv_name"`
Shared *string `json:"shared"`
ShowFlags []string `json:"show_flags"`
SocketsPerBoard int `json:"sockets_per_board"`
SocketsPerNode interface{} `json:"sockets_per_node"`
StartTime int64 `json:"start_time"`
StateDescription string `json:"state_description"`
StateReason string `json:"state_reason"`
StandardError string `json:"standard_error"`
StandardInput string `json:"standard_input"`
StandardOutput string `json:"standard_output"`
SubmitTime int `json:"submit_time"`
SuspendTime int `json:"suspend_time"`
SystemComment string `json:"system_comment"`
TimeLimit int `json:"time_limit"`
TimeMinimum int `json:"time_minimum"`
ThreadsPerCore interface{} `json:"threads_per_core"`
TresBind string `json:"tres_bind"`
TresFreq string `json:"tres_freq"`
TresPerJob string `json:"tres_per_job"`
TresPerNode string `json:"tres_per_node"`
TresPerSocket string `json:"tres_per_socket"`
TresPerTask string `json:"tres_per_task"`
TresReqStr string `json:"tres_req_str"`
TresAllocStr string `json:"tres_alloc_str"`
UserID int `json:"user_id"`
UserName string `json:"user_name"`
Wckey string `json:"wckey"`
CurrentWorkingDirectory string `json:"current_working_directory"`
}
type SlurmPayload struct {
Meta Metadata `json:"meta"`
Errors []interface{} `json:"errors"`
Jobs []Job `json:"jobs"`
}
type DumpedComment struct {
Administrator interface{} `json:"administrator"`
Job interface{} `json:"job"`
System interface{} `json:"system"`
}
type MaxLimits struct {
Running struct {
Tasks int `json:"tasks"`
} `json:"max"`
}
type ArrayInfo struct {
JobID int `json:"job_id"`
Limits MaxLimits `json:"limits"`
Task interface{} `json:"task"`
TaskID interface{} `json:"task_id"`
}
type Association struct {
Account string `json:"account"`
Cluster string `json:"cluster"`
Partition interface{} `json:"partition"`
User string `json:"user"`
}
type TimeInfo struct {
Elapsed int64 `json:"elapsed"`
Eligible int64 `json:"eligible"`
End int64 `json:"end"`
Start int64 `json:"start"`
Submission int64 `json:"submission"`
Suspended int64 `json:"suspended"`
System struct {
Seconds int `json:"seconds"`
Microseconds int `json:"microseconds"`
} `json:"system"`
Limit int `json:"limit"`
Total struct {
Seconds int `json:"seconds"`
Microseconds int `json:"microseconds"`
} `json:"total"`
User struct {
Seconds int `json:"seconds"`
Microseconds int `json:"microseconds"`
} `json:"user"`
}
type ExitCode struct {
Status string `json:"status"`
ReturnCode int `json:"return_code"`
}
type DumpedJob struct {
Account string `json:"account"`
Comment DumpedComment `json:"comment"`
AllocationNodes int `json:"allocation_nodes"`
Array ArrayInfo `json:"array"`
Association Association `json:"association"`
Cluster string `json:"cluster"`
Constraints string `json:"constraints"`
Container interface{} `json:"container"`
DerivedExitCode ExitCode `json:"derived_exit_code"`
Time TimeInfo `json:"time"`
ExitCode ExitCode `json:"exit_code"`
Flags []string `json:"flags"`
Group string `json:"group"`
Het struct {
JobID int `json:"job_id"`
JobOffset interface{} `json:"job_offset"`
} `json:"het"`
JobID int64 `json:"job_id"`
Name string `json:"name"`
MCS struct {
Label string `json:"label"`
} `json:"mcs"`
Nodes string `json:"nodes"`
Partition string `json:"partition"`
Priority int `json:"priority"`
QoS string `json:"qos"`
Required struct {
CPUs int `json:"CPUs"`
Memory int `json:"memory"`
} `json:"required"`
KillRequestUser interface{} `json:"kill_request_user"`
Reservation struct {
ID int `json:"id"`
Name int `json:"name"`
} `json:"reservation"`
State struct {
Current string `json:"current"`
Reason string `json:"reason"`
} `json:"state"`
Steps []struct {
Nodes struct {
List []string `json:"list"`
Count int `json:"count"`
Range string `json:"range"`
} `json:"nodes"`
Tres struct {
Requested struct {
Max []interface{} `json:"max"`
Min []interface{} `json:"min"`
Average []interface{} `json:"average"`
Total []interface{} `json:"total"`
} `json:"requested"`
Consumed struct {
Max []interface{} `json:"max"`
Min []interface{} `json:"min"`
Average []interface{} `json:"average"`
Total []interface{} `json:"total"`
} `json:"consumed"`
Allocated []struct {
Type string `json:"type"`
Name interface{} `json:"name"`
ID int `json:"id"`
Count int `json:"count"`
} `json:"allocated"`
} `json:"tres"`
Time TimeInfo `json:"time"`
ExitCode ExitCode `json:"exit_code"`
Tasks struct {
Count int `json:"count"`
} `json:"tasks"`
PID interface{} `json:"pid"`
CPU struct {
RequestedFrequency struct {
Min int `json:"min"`
Max int `json:"max"`
} `json:"requested_frequency"`
Governor []interface{} `json:"governor"`
} `json:"CPU"`
KillRequestUser interface{} `json:"kill_request_user"`
State string `json:"state"`
Statistics struct {
CPU struct {
ActualFrequency int `json:"actual_frequency"`
} `json:"CPU"`
Energy struct {
Consumed int `json:"consumed"`
} `json:"energy"`
} `json:"statistics"`
Step struct {
JobID int `json:"job_id"`
Het struct {
Component interface{} `json:"component"`
} `json:"het"`
ID string `json:"id"`
Name string `json:"name"`
} `json:"step"`
Task struct {
Distribution string `json:"distribution"`
} `json:"task"`
} `json:"steps"`
Tres struct {
Allocated []struct {
Type string `json:"type"`
Name interface{} `json:"name"`
ID int `json:"id"`
Count int `json:"count"`
} `json:"allocated"`
Requested []struct {
Type string `json:"type"`
Name interface{} `json:"name"`
ID int `json:"id"`
Count int `json:"count"`
} `json:"requested"`
} `json:"tres"`
User string `json:"user"`
Wckey struct {
Wckey string `json:"wckey"`
Flags []string `json:"flags"`
} `json:"wckey"`
WorkingDirectory string `json:"working_directory"`
}
type SlurmDBPayload struct {
Meta Metadata `json:"meta"`
Errors []string `json:"errors"`
Jobs []DumpedJob `json:"jobs"`
}
func DecodeClusterConfig(filename string) (ClusterConfig, error) {
var clusterConfig ClusterConfig
file, err := os.Open(filename)
if err != nil {
log.Errorf("Cluster config file not found. No cores/GPU ids available.")
return clusterConfig, err
}
defer file.Close()
decoder := json.NewDecoder(file)
err = decoder.Decode(&clusterConfig)
if err != nil {
log.Errorf("Error decoding cluster config file: %v", err)
}
log.Printf("Name: %s\n", clusterConfig.Name)
log.Printf("MetricConfig:\n")
for _, metric := range clusterConfig.MetricConfig {
log.Printf(" Name: %s\n", metric.Name)
log.Printf(" Unit Base: %s\n", metric.Unit.Base)
log.Printf(" Scope: %s\n", metric.Scope)
log.Printf(" Aggregation: %s\n", metric.Aggregation)
log.Printf(" Timestep: %d\n", metric.Timestep)
log.Printf(" Peak: %f\n", metric.Peak)
log.Printf(" Normal: %f\n", metric.Normal)
log.Printf(" Caution: %f\n", metric.Caution)
log.Printf(" Alert: %f\n", metric.Alert)
}
log.Printf("SubClusters:\n")
for _, subCluster := range clusterConfig.SubClusters {
log.Printf(" Name: %s\n", subCluster.Name)
log.Printf(" Nodes: %s\n", subCluster.Nodes)
log.Printf(" Processor Type: %s\n", subCluster.ProcessorType)
log.Printf(" Sockets Per Node: %d\n", subCluster.SocketsPerNode)
log.Printf(" Cores Per Socket: %d\n", subCluster.CoresPerSocket)
log.Printf(" Threads Per Core: %d\n", subCluster.ThreadsPerCore)
log.Printf(" Flop Rate Scalar Unit Base: %s\n", subCluster.FlopRateScalar.Unit.Base)
log.Printf(" Flop Rate Scalar Unit Prefix: %s\n", subCluster.FlopRateScalar.Unit.Prefix)
log.Printf(" Flop Rate Scalar Value: %f\n", subCluster.FlopRateScalar.Value)
log.Printf(" Flop Rate Simd Unit Base: %s\n", subCluster.FlopRateSimd.Unit.Base)
log.Printf(" Flop Rate Simd Unit Prefix: %s\n", subCluster.FlopRateSimd.Unit.Prefix)
log.Printf(" Flop Rate Simd Value: %f\n", subCluster.FlopRateSimd.Value)
log.Printf(" Memory Bandwidth Unit Base: %s\n", subCluster.MemoryBandwidth.Unit.Base)
log.Printf(" Memory Bandwidth Unit Prefix: %s\n", subCluster.MemoryBandwidth.Unit.Prefix)
log.Printf(" Memory Bandwidth Value: %f\n", subCluster.MemoryBandwidth.Value)
log.Printf(" Topology Node: %v\n", subCluster.Topology.Node)
log.Printf(" Topology Socket: %v\n", subCluster.Topology.Socket)
log.Printf(" Topology Memory Domain: %v\n", subCluster.Topology.MemoryDomain)
log.Printf(" Topology Core: %v\n", subCluster.Topology.Core)
log.Printf(" Topology Accelerators:\n")
for _, accelerator := range subCluster.Topology.Accelerators {
log.Printf(" ID: %s\n", accelerator.ID)
log.Printf(" Type: %s\n", accelerator.Type)
log.Printf(" Model: %s\n", accelerator.Model)
}
}
return clusterConfig, nil
}
func UnmarshalSlurmPayload(jsonPayload string) (SlurmPayload, error) {
var slurmData SlurmPayload
err := json.Unmarshal([]byte(jsonPayload), &slurmData)
if err != nil {
return slurmData, fmt.Errorf("failed to unmarshal JSON data: %v", err)
}
return slurmData, nil
}

View File

@@ -1,27 +0,0 @@
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package scheduler
import "encoding/json"
type BatchScheduler interface {
Init(rawConfig json.RawMessage) error
Sync()
}
var sd BatchScheduler
func Init(rawConfig json.RawMessage) error {
sd = &SlurmNatsScheduler{}
sd.Init(rawConfig)
return nil
}
func GetHandle() BatchScheduler {
return sd
}

View File

@@ -1,214 +0,0 @@
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package scheduler
import (
"database/sql"
"encoding/json"
"fmt"
"strings"
"sync"
"github.com/ClusterCockpit/cc-backend/internal/importer"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/nats-io/nats.go"
)
type SlurmNatsConfig struct {
URL string `json:"url"`
}
type SlurmNatsScheduler struct {
url string
RepositoryMutex sync.Mutex
JobRepository *repository.JobRepository
}
type StopJobRequest struct {
// Stop Time of job as epoch
StopTime int64 `json:"stopTime" validate:"required" example:"1649763839"`
State schema.JobState `json:"jobState" validate:"required" example:"completed"` // Final job state
JobId *int64 `json:"jobId" example:"123000"` // Cluster Job ID of job
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
}
func (sd *SlurmNatsScheduler) startJob(req *schema.JobMeta) {
fmt.Printf("DEBUG: %+v\n", *req)
log.Printf("Server Name: %s - Job ID: %v", req.BaseJob.Cluster, req.BaseJob.JobID)
log.Printf("User: %s - Project: %s", req.BaseJob.User, req.BaseJob.Project)
if req.State == "" {
req.State = schema.JobStateRunning
}
if err := importer.SanityChecks(&req.BaseJob); err != nil {
log.Errorf("Sanity checks failed: %s", err.Error())
return
}
// aquire lock to avoid race condition between API calls
var unlockOnce sync.Once
sd.RepositoryMutex.Lock()
defer unlockOnce.Do(sd.RepositoryMutex.Unlock)
// Check if combination of (job_id, cluster_id, start_time) already exists:
jobs, err := sd.JobRepository.FindAll(&req.JobID, &req.Cluster, nil)
if err != nil && err != sql.ErrNoRows {
log.Errorf("checking for duplicate failed: %s", err.Error())
return
} else if err == nil {
for _, job := range jobs {
if (req.StartTime - job.StartTimeUnix) < 86400 {
log.Errorf("a job with that jobId, cluster and startTime already exists: dbid: %d", job.ID)
return
}
}
}
id, err := sd.JobRepository.Start(req)
if err != nil {
log.Errorf("insert into database failed: %s", err.Error())
return
}
// unlock here, adding Tags can be async
unlockOnce.Do(sd.RepositoryMutex.Unlock)
for _, tag := range req.Tags {
if _, err := sd.JobRepository.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
log.Errorf("adding tag to new job %d failed: %s", id, err.Error())
return
}
}
log.Printf("new job (id: %d): cluster=%s, jobId=%d, user=%s, startTime=%d", id, req.Cluster, req.JobID, req.User, req.StartTime)
}
func (sd *SlurmNatsScheduler) checkAndHandleStopJob(job *schema.Job, req *StopJobRequest) {
// Sanity checks
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != schema.JobStateRunning {
log.Errorf("stopTime must be larger than startTime and only running jobs can be stopped")
return
}
if req.State != "" && !req.State.Valid() {
log.Errorf("invalid job state: %#v", req.State)
return
} else if req.State == "" {
req.State = schema.JobStateCompleted
}
// Mark job as stopped in the database (update state and duration)
job.Duration = int32(req.StopTime - job.StartTime.Unix())
job.State = req.State
if err := sd.JobRepository.Stop(job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
log.Errorf("marking job as stopped failed: %s", err.Error())
return
}
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime)
// Monitoring is disabled...
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
return
}
// Trigger async archiving
sd.JobRepository.TriggerArchiving(job)
}
func (sd *SlurmNatsScheduler) stopJob(req *StopJobRequest) {
// if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
// log.Errorf("missing role: %v", auth.GetRoleString(auth.RoleApi))
// return
// }
log.Printf("Server Name: %s - Job ID: %v", *req.Cluster, req.JobId)
// Fetch job (that will be stopped) from db
var job *schema.Job
var err error
if req.JobId == nil {
log.Errorf("the field 'jobId' is required")
return
}
job, err = sd.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
if err != nil {
log.Errorf("finding job failed: %s", err.Error())
return
}
sd.checkAndHandleStopJob(job, req)
}
func (sd *SlurmNatsScheduler) Init(rawConfig json.RawMessage) error {
servers := []string{"nats://127.0.0.1:4222", "nats://127.0.0.1:1223"}
nc, err := nats.Connect(strings.Join(servers, ","))
if err != nil {
log.Fatal(err.Error())
}
defer nc.Close()
getStatusTxt := func(nc *nats.Conn) string {
switch nc.Status() {
case nats.CONNECTED:
return "Connected"
case nats.CLOSED:
return "Closed"
default:
return "Other"
}
}
log.Printf("The connection status is %v\n", getStatusTxt(nc))
ec, err := nats.NewEncodedConn(nc, nats.JSON_ENCODER)
if err != nil {
log.Fatal(err.Error())
}
defer ec.Close()
// Define the object
type encodedMessage struct {
ServerName string
ResponseCode int
}
wg := sync.WaitGroup{}
wg.Add(1)
// Subscribe
if _, err := ec.Subscribe("test", func(s *encodedMessage) {
log.Printf("Server Name: %s - Response Code: %v", s.ServerName, s.ResponseCode)
if s.ResponseCode == 500 {
wg.Done()
}
}); err != nil {
log.Fatal(err.Error())
}
if _, err := ec.Subscribe("startJob", sd.startJob); err != nil {
log.Fatal(err.Error())
}
if _, err := ec.Subscribe("stopJob", sd.stopJob); err != nil {
log.Fatal(err.Error())
}
// Wait for a message to come in
wg.Wait()
return nil
}
func (sd *SlurmNatsScheduler) Sync() {
}

View File

@@ -1,565 +0,0 @@
// Copyright (C) 2023 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package scheduler
import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"os"
"os/exec"
"regexp"
"strconv"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
type SlurmRestSchedulerConfig struct {
URL string `json:"url"`
JobRepository *repository.JobRepository
clusterConfig ClusterConfig
client *http.Client
nodeClusterMap map[string]string
clusterPCIEAddressMap map[string][]string
}
func (cfg *SlurmRestSchedulerConfig) queryDB(qtime int64, clusterName string) ([]interface{}, error) {
apiEndpoint := "/slurmdb/v0.0.38/jobs"
// Construct the query parameters
queryParams := url.Values{}
queryParams.Set("users", "user1,user2")
queryParams.Set("submit_time", "2023-01-01T00:00:00")
// Add the query parameters to the API endpoint
apiEndpoint += "?" + queryParams.Encode()
// Create a new HTTP GET request
req, err := http.NewRequest("GET", apiEndpoint, nil)
if err != nil {
log.Errorf("Error creating request: %v", err)
}
// Send the request
resp, err := cfg.client.Do(req)
if err != nil {
log.Errorf("Error sending request: %v", err)
}
defer resp.Body.Close()
// Check the response status code
if resp.StatusCode != http.StatusOK {
log.Errorf("API request failed with status: %v", resp.Status)
}
// Read the response body
// Here you can parse the response body as needed
// For simplicity, let's just print the response body
var dbOutput []byte
_, err = resp.Body.Read(dbOutput)
if err != nil {
log.Errorf("Error reading response body: %v", err)
}
log.Errorf("API response: %v", string(dbOutput))
dataJobs := make(map[string]interface{})
err = json.Unmarshal(dbOutput, &dataJobs)
if err != nil {
log.Errorf("Error parsing JSON response: %v", err)
os.Exit(1)
}
if _, ok := dataJobs["jobs"]; !ok {
log.Errorf("ERROR: jobs not found - response incomplete")
os.Exit(1)
}
jobs, _ := dataJobs["jobs"].([]interface{})
return jobs, nil
}
func (cfg *SlurmRestSchedulerConfig) fetchJobs() (SlurmPayload, error) {
var ctlOutput []byte
apiEndpoint := "http://:8080/slurm/v0.0.38/jobs"
// Create a new HTTP GET request with query parameters
req, err := http.NewRequest("GET", apiEndpoint, nil)
if err != nil {
log.Errorf("Error creating request: %v", err)
}
// Send the request
resp, err := cfg.client.Do(req)
if err != nil {
log.Errorf("Error sending request: %v", err)
}
defer resp.Body.Close()
// Check the response status code
if resp.StatusCode != http.StatusOK {
log.Errorf("API request failed with status: %v", resp.Status)
}
_, err = resp.Body.Read(ctlOutput)
log.Printf("Received JSON Data: %v", ctlOutput)
if err != nil {
log.Errorf("Error reading response body: %v", err)
}
var jobsResponse SlurmPayload
err = json.Unmarshal(ctlOutput, &jobsResponse)
if err != nil {
log.Errorf("Error parsing JSON response: %v", err)
return jobsResponse, err
}
return jobsResponse, nil
}
func fetchJobsLocal() (SlurmPayload, error) {
// Read the Slurm Payload JSON file
jobsData, err := os.ReadFile("slurm_0038.json")
if err != nil {
fmt.Println("Error reading Slurm Payload JSON file:", err)
}
var jobsResponse SlurmPayload
err = json.Unmarshal(jobsData, &jobsResponse)
if err != nil {
log.Errorf("Error parsing Slurm Payload JSON response: %v", err)
return jobsResponse, err
}
return jobsResponse, nil
}
func fetchDumpedJobsLocal() (SlurmDBPayload, error) {
// Read the SlurmDB Payload JSON file
jobsData, err := os.ReadFile("slurmdb_0038-large.json")
if err != nil {
fmt.Println("Error reading SlurmDB Payload JSON file:", err)
}
var jobsResponse SlurmDBPayload
err = json.Unmarshal(jobsData, &jobsResponse)
if err != nil {
log.Errorf("Error parsing SlurmDB Payload JSON response: %v", err)
return jobsResponse, err
}
return jobsResponse, nil
}
func printSlurmInfo(job Job) string {
text := fmt.Sprintf(`
JobId=%v JobName=%v
UserId=%v(%v) GroupId=%v
Account=%v QOS=%v
Requeue=%v Restarts=%v BatchFlag=%v
TimeLimit=%v
SubmitTime=%v
Partition=%v
NodeList=%v
NumNodes=%v NumCPUs=%v NumTasks=%v CPUs/Task=%v
NTasksPerNode:Socket:Core=%v:%v:%v
TRES_req=%v
TRES_alloc=%v
Command=%v
WorkDir=%v
StdErr=%v
StdOut=%v`,
job.JobID, job.Name,
job.UserName, job.UserID, job.GroupID,
job.Account, job.QoS,
job.Requeue, job.RestartCnt, job.BatchFlag,
job.TimeLimit, job.SubmitTime,
job.Partition,
job.Nodes,
job.NodeCount, job.CPUs, job.Tasks, job.CPUPerTask,
job.TasksPerBoard, job.TasksPerSocket, job.TasksPerCore,
job.TresAllocStr,
job.TresAllocStr,
job.Command,
job.CurrentWorkingDirectory,
job.StandardError,
job.StandardOutput,
)
return text
}
func exitWithError(err error, output []byte) {
if exitError, ok := err.(*exec.ExitError); ok {
if exitError.ExitCode() == 28 {
fmt.Fprintf(os.Stderr, "ERROR: API call failed with timeout; check slurmrestd.\nOutput:\n%s\n", output)
} else {
fmt.Fprintf(os.Stderr, "ERROR: API call failed with code %d;\nOutput:\n%s\n", exitError.ExitCode(), output)
}
} else {
log.Errorf("ERROR: %v", err)
}
os.Exit(1)
}
func (cfg *SlurmRestSchedulerConfig) Init() error {
var err error
cfg.clusterConfig, err = DecodeClusterConfig("cluster-alex.json")
cfg.nodeClusterMap = make(map[string]string)
cfg.clusterPCIEAddressMap = make(map[string][]string)
for _, subCluster := range cfg.clusterConfig.SubClusters {
cfg.ConstructNodeClusterMap(subCluster.Nodes, subCluster.Name)
pcieAddresses := make([]string, 0, 32)
for idx, accelerator := range subCluster.Topology.Accelerators {
pcieAddresses[idx] = accelerator.ID
}
cfg.clusterPCIEAddressMap[subCluster.Name] = pcieAddresses
}
// Create an HTTP client
cfg.client = &http.Client{}
return err
}
func (cfg *SlurmRestSchedulerConfig) checkAndHandleStopJob(job *schema.Job, req *StopJobRequest) {
// Sanity checks
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != schema.JobStateRunning {
log.Errorf("stopTime must be larger than startTime and only running jobs can be stopped")
return
}
if req.State != "" && !req.State.Valid() {
log.Errorf("invalid job state: %#v", req.State)
return
} else if req.State == "" {
req.State = schema.JobStateCompleted
}
// Mark job as stopped in the database (update state and duration)
job.Duration = int32(req.StopTime - job.StartTime.Unix())
job.State = req.State
if err := cfg.JobRepository.Stop(job.ID, job.Duration, job.State, job.MonitoringStatus); err != nil {
log.Errorf("marking job as stopped failed: %s", err.Error())
return
}
log.Printf("archiving job... (dbid: %d): cluster=%s, jobId=%d, user=%s, startTime=%s", job.ID, job.Cluster, job.JobID, job.User, job.StartTime)
// Monitoring is disabled...
if job.MonitoringStatus == schema.MonitoringStatusDisabled {
return
}
// Trigger async archiving
cfg.JobRepository.TriggerArchiving(job)
}
func (cfg *SlurmRestSchedulerConfig) ConstructNodeClusterMap(nodes string, cluster string) {
if cfg.nodeClusterMap == nil {
cfg.nodeClusterMap = make(map[string]string)
}
// Split the input by commas
groups := strings.Split(nodes, ",")
for _, group := range groups {
// Use regular expressions to match numbers and ranges
numberRangeRegex := regexp.MustCompile(`a\[(\d+)-(\d+)\]`)
numberRegex := regexp.MustCompile(`a(\d+)`)
if numberRangeRegex.MatchString(group) {
// Extract nodes from ranges
matches := numberRangeRegex.FindStringSubmatch(group)
if len(matches) == 3 {
start, _ := strconv.Atoi(matches[1])
end, _ := strconv.Atoi(matches[2])
for i := start; i <= end; i++ {
cfg.nodeClusterMap[matches[0]+fmt.Sprintf("%04d", i)] = cluster
}
}
} else if numberRegex.MatchString(group) {
// Extract individual node
matches := numberRegex.FindStringSubmatch(group)
if len(matches) == 2 {
cfg.nodeClusterMap[group] = cluster
}
}
}
}
func extractElements(indicesStr string, addresses []string) ([]string, error) {
// Split the input string by commas to get individual index ranges
indexRanges := strings.Split(indicesStr, ",")
var selected []string
for _, indexRange := range indexRanges {
// Split each index range by hyphen to separate start and end indices
rangeParts := strings.Split(indexRange, "-")
if len(rangeParts) == 1 {
// If there's only one part, it's a single index
index, err := strconv.Atoi(rangeParts[0])
if err != nil {
return nil, err
}
selected = append(selected, addresses[index])
} else if len(rangeParts) == 2 {
// If there are two parts, it's a range
start, err := strconv.Atoi(rangeParts[0])
if err != nil {
return nil, err
}
end, err := strconv.Atoi(rangeParts[1])
if err != nil {
return nil, err
}
// Add all indices in the range to the result
for i := start; i <= end; i++ {
selected = append(selected, addresses[i])
}
} else {
// Invalid format
return nil, fmt.Errorf("invalid index range: %s", indexRange)
}
}
return selected, nil
}
func (cfg *SlurmRestSchedulerConfig) CreateJobMeta(job Job) (*schema.JobMeta, error) {
var exclusive int32
if job.Shared == nil {
exclusive = 1
} else {
exclusive = 0
}
totalGPUs := 0
var resources []*schema.Resource
for nodeIndex, node := range job.JobResources.AllocatedNodes {
var res schema.Resource
res.Hostname = node.Nodename
log.Debugf("Node %s Cores map size: %d\n", node.Nodename, len(node.Sockets))
if node.CPUsUsed == nil || node.MemoryAllocated == nil {
log.Fatalf("Either node.Cpus or node.Memory is nil\n")
}
for k, v := range node.Sockets {
fmt.Printf("core id[%s] value[%s]\n", k, v)
threadID, _ := strconv.Atoi(k)
res.HWThreads = append(res.HWThreads, threadID)
}
re := regexp.MustCompile(`\(([^)]*)\)`)
matches := re.FindStringSubmatch(job.GresDetail[nodeIndex])
if len(matches) < 2 {
return nil, fmt.Errorf("no substring found in brackets")
}
nodePCIEAddresses := cfg.clusterPCIEAddressMap[cfg.nodeClusterMap[node.Nodename]]
selectedPCIEAddresses, err := extractElements(matches[1], nodePCIEAddresses)
totalGPUs += len(selectedPCIEAddresses)
if err != nil {
return nil, err
}
// For core/GPU id mapping, need to query from cluster config file
res.Accelerators = selectedPCIEAddresses
resources = append(resources, &res)
}
metaData := make(map[string]string)
metaData["jobName"] = job.Name
metaData["slurmInfo"] = printSlurmInfo(job)
metaDataInBytes, err := json.Marshal(metaData)
if err != nil {
log.Fatalf("metaData JSON marshaling failed: %s", err)
}
var defaultJob schema.BaseJob = schema.BaseJob{
JobID: job.JobID,
User: job.UserName,
Project: job.Account,
Cluster: job.Cluster,
Partition: job.Partition,
// check nil
ArrayJobId: job.ArrayJobID,
NumNodes: job.NodeCount,
NumHWThreads: job.CPUs,
NumAcc: int32(totalGPUs),
Exclusive: exclusive,
// MonitoringStatus: job.MonitoringStatus,
// SMT: job.TasksPerCore,
State: schema.JobState(job.JobState),
// ignore this for start job
// Duration: int32(time.Now().Unix() - job.StartTime), // or SubmitTime?
Walltime: time.Now().Unix(), // max duration requested by the job
// Tags: job.Tags,
// ignore this!
// RawResources: jobResourcesInBytes,
// "job_resources": "allocated_nodes" "sockets":
// very important; has to be right
Resources: resources,
RawMetaData: metaDataInBytes,
// optional metadata with'jobScript 'jobName': 'slurmInfo':
MetaData: metaData,
// ConcurrentJobs: job.ConcurrentJobs,
}
log.Debugf("Generated BaseJob with Resources=%v", defaultJob.Resources[0])
meta := &schema.JobMeta{
BaseJob: defaultJob,
StartTime: job.StartTime,
Statistics: make(map[string]schema.JobStatistics),
}
// log.Debugf("Generated JobMeta %v", req.BaseJob.JobID)
return meta, nil
}
func (cfg *SlurmRestSchedulerConfig) HandleJobs(jobs []Job) error {
// runningJobsInCC, err := cfg.JobRepository.FindRunningJobs("alex")
// Iterate over the Jobs slice
for _, job := range jobs {
// Process each job from Slurm
fmt.Printf("Job ID: %d\n", job.JobID)
fmt.Printf("Job Name: %s\n", job.Name)
fmt.Printf("Job State: %s\n", job.JobState)
fmt.Println("Job StartTime:", job.StartTime)
fmt.Println("Job Cluster:", job.Cluster)
if job.JobState == "RUNNING" {
meta, _ := cfg.CreateJobMeta(job)
// For all running jobs from Slurm
_, notFoundError := cfg.JobRepository.Find(&job.JobID, &job.Cluster, &job.StartTime)
if notFoundError != nil {
// if it does not exist in CC, create a new entry
log.Print("Job does not exist in CC, will create a new entry:", job.JobID)
id, startJobError := cfg.JobRepository.Start(meta)
if startJobError != nil {
return startJobError
}
log.Debug("Added job", id)
}
// Running in both sides: nothing needs to be done
} else if job.JobState == "COMPLETED" {
// Check if completed job with combination of (job_id, cluster_id, start_time) already exists
log.Debugf("Processing completed job ID: %v Cluster: %v StartTime: %v", job.JobID, job.Cluster, job.StartTime)
existingJob, err := cfg.JobRepository.Find(&job.JobID, &job.Cluster, &job.StartTime)
if err == nil && existingJob.State != schema.JobStateCompleted {
// for jobs completed in Slurm (either in Slurm or maybe SlurmDB)
// update job in CC with new info (job final status, duration, end timestamp)
existingJob.BaseJob.Duration = int32(job.EndTime - job.StartTime)
existingJob.BaseJob.State = schema.JobState(job.JobState)
existingJob.BaseJob.Walltime = job.EndTime
req := &StopJobRequest{
Cluster: &job.Cluster,
JobId: &job.JobID,
State: schema.JobState(job.JobState),
StartTime: &job.StartTime,
StopTime: job.EndTime,
}
cfg.checkAndHandleStopJob(existingJob, req)
}
}
}
return nil
}
func (cfg *SlurmRestSchedulerConfig) HandleDumpedJobs(jobs []DumpedJob) error {
// Iterate over the Jobs slice
for _, job := range jobs {
// Process each job from Slurm
fmt.Printf("Job ID: %d\n", job.JobID)
fmt.Printf("Job Name: %s\n", job.Name)
fmt.Printf("Job State: %s\n", job.State.Current)
fmt.Println("Job EndTime:", job.Time.End)
fmt.Println("Job Cluster:", job.Cluster)
// Check if completed job with combination of (job_id, cluster_id, start_time) already exists
log.Debugf("Processing completed dumped job ID: %v Cluster: %v StartTime: %v", job.JobID, job.Cluster, job.Time.Start)
existingJob, err := cfg.JobRepository.Find(&job.JobID, &job.Cluster, &job.Time.Start)
if err == nil && existingJob.State != schema.JobStateCompleted {
// for jobs completed in Slurm (either in Slurm or maybe SlurmDB)
// update job in CC with new info (job final status, duration, end timestamp)
existingJob.BaseJob.Duration = int32(job.Time.End - job.Time.Start)
existingJob.BaseJob.State = schema.JobState(job.State.Current)
existingJob.BaseJob.Walltime = job.Time.End
req := &StopJobRequest{
Cluster: &job.Cluster,
JobId: &job.JobID,
State: schema.JobState(job.State.Current),
StartTime: &job.Time.Start,
StopTime: job.Time.End,
}
cfg.checkAndHandleStopJob(existingJob, req)
}
}
return nil
}
func (cfg *SlurmRestSchedulerConfig) Sync() {
// Fetch an instance of Slurm JobsResponse
jobsResponse, err := fetchJobsLocal()
if err != nil {
log.Fatal(err.Error())
}
cfg.HandleJobs(jobsResponse.Jobs)
// Fetch an instance of Slurm DB JobsResponse
dumpedJobsResponse, err := fetchDumpedJobsLocal()
if err != nil {
log.Fatal(err.Error())
}
cfg.HandleDumpedJobs(dumpedJobsResponse.Jobs)
}

View File

@@ -15,7 +15,7 @@ type LdapConfig struct {
SearchDN string `json:"search_dn"`
UserBind string `json:"user_bind"`
UserFilter string `json:"user_filter"`
UserAttr string `json:"username_attr"`
UserAttr string `json:"username_attr"`
SyncInterval string `json:"sync_interval"` // Parsed using time.ParseDuration.
SyncDelOldUsers bool `json:"sync_del_old_users"`
@@ -76,7 +76,7 @@ type ProgramConfig struct {
// Address where the http (or https) server will listen on (for example: 'localhost:80').
Addr string `json:"addr"`
// Addresses from which the /api/secured/* API endpoints can be reached
// Addresses from which secured API endpoints can be reached
ApiAllowedIPs []string `json:"apiAllowedIPs"`
// Drop root permissions once .env was read and the port was taken.

View File

@@ -1,137 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
import (
"flag"
"fmt"
"log"
"net/http"
"os"
"github.com/ClusterCockpit/cc-backend/internal/scheduler"
"github.com/nats-io/nats.go"
)
func usage() {
log.Printf("Usage: nats-pub [-s server] [-creds file] <subject> <msg>\n")
flag.PrintDefaults()
}
func showUsageAndExit(exitcode int) {
usage()
os.Exit(exitcode)
}
func setupPublisher() {
var urls = flag.String("s", nats.DefaultURL, "The nats server URLs (separated by comma)")
var userCreds = flag.String("creds", "", "User Credentials File")
var showHelp = flag.Bool("h", false, "Show help message")
log.SetFlags(0)
flag.Usage = usage
flag.Parse()
if *showHelp {
showUsageAndExit(0)
}
args := flag.Args()
if len(args) != 2 {
showUsageAndExit(1)
}
fmt.Printf("Hello Nats\n")
// Connect Options.
opts := []nats.Option{nats.Name("NATS Sample Publisher")}
// Use UserCredentials
if *userCreds != "" {
opts = append(opts, nats.UserCredentials(*userCreds))
}
// Connect to NATS
nc, err := nats.Connect(*urls, opts...)
if err != nil {
log.Fatal(err)
}
defer nc.Close()
subj, msg := args[0], []byte(args[1])
nc.Publish(subj, msg)
nc.Flush()
if err := nc.LastError(); err != nil {
log.Fatal(err)
} else {
log.Printf("Published [%s] : '%s'\n", subj, msg)
}
os.Exit(0)
}
func injectPayload() {
// Read the JSON file
jobsData, err := os.ReadFile("slurm_0038.json")
dbData, err := os.ReadFile("slurmdb_0038-large.json")
if err != nil {
fmt.Println("Error reading JSON file:", err)
return
}
// Create an HTTP handler function
http.HandleFunc("/slurm/v0.0.38/jobs", func(w http.ResponseWriter, r *http.Request) {
// Set the response content type to JSON
w.Header().Set("Content-Type", "application/json")
// Write the raw JSON data to the response writer
_, err := w.Write(jobsData)
if err != nil {
http.Error(w, "Error writing jobsData payload", http.StatusInternalServerError)
return
}
})
http.HandleFunc("/slurmdb/v0.0.38/jobs", func(w http.ResponseWriter, r *http.Request) {
// Set the response content type to JSON
w.Header().Set("Content-Type", "application/json")
// Write the raw JSON data to the response writer
_, err := w.Write(dbData)
if err != nil {
http.Error(w, "Error writing dbData payload", http.StatusInternalServerError)
return
}
})
// Start the HTTP server on port 8080
fmt.Println("Listening on :8080...")
go http.ListenAndServe(":8080", nil)
}
func loadSlurmNatsScheduler() {
cfgData := []byte(`{"target": "localhost"}`)
var sch scheduler.SlurmNatsScheduler
// sch.URL = "nats://127.0.0.1:1223"
sch.Init(cfgData)
// go injectPayload()
}
func main() {
var sch scheduler.SlurmRestSchedulerConfig
sch.Init()
// injectPayload()
sch.Sync()
os.Exit(0)
}

View File

@@ -22,6 +22,7 @@
"@rollup/plugin-commonjs": "^24.1.0",
"@rollup/plugin-node-resolve": "^15.0.2",
"@rollup/plugin-terser": "^0.4.1",
"@timohausmann/quadtree-js": "^1.2.5",
"rollup": "^3.21.0",
"rollup-plugin-css-only": "^4.3.0",
"rollup-plugin-svelte": "^7.1.4",
@@ -225,6 +226,12 @@
}
}
},
"node_modules/@timohausmann/quadtree-js": {
"version": "1.2.5",
"resolved": "https://registry.npmjs.org/@timohausmann/quadtree-js/-/quadtree-js-1.2.5.tgz",
"integrity": "sha512-WcH3pouYtpyLjTCRvNP0WuSV4m7mRyYhLzW44egveFryT7pJhpDsdIJASEe37iCFNA0vmEpqTYGoG0siyXEthA==",
"dev": true
},
"node_modules/@types/estree": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.1.tgz",

View File

@@ -10,6 +10,7 @@
"@rollup/plugin-commonjs": "^24.1.0",
"@rollup/plugin-node-resolve": "^15.0.2",
"@rollup/plugin-terser": "^0.4.1",
"@timohausmann/quadtree-js": "^1.2.5",
"rollup": "^3.21.0",
"rollup-plugin-css-only": "^4.3.0",
"rollup-plugin-svelte": "^7.1.4",

View File

@@ -10,7 +10,7 @@
import { binsFromFootprint } from './utils.js'
import ScatterPlot from './plots/Scatter.svelte'
import PlotTable from './PlotTable.svelte'
import Roofline from './plots/Roofline.svelte'
import RooflineHeatmap from './plots/RooflineHeatmap.svelte'
const { query: initq } = init()
@@ -293,7 +293,7 @@
{#each $topQuery.data.topList as te, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors[i]};"/></td>
{#if groupSelection.key == 'User'}
{#if groupSelection.key == 'user'}
<th scope="col"><a href="/monitoring/user/{te.id}?cluster={cluster.name}">{te.id}</a></th>
{:else}
<th scope="col"><a href="/monitoring/jobs/?cluster={cluster.name}&project={te.id}&projectMatch=eq">{te.id}</a></th>
@@ -315,7 +315,7 @@
{:else if $rooflineQuery.data && cluster}
<div bind:clientWidth={colWidth2}>
{#key $rooflineQuery.data}
<Roofline
<RooflineHeatmap
width={colWidth2} height={300}
tiles={$rooflineQuery.data.rooflineHeatmap}
cluster={cluster.subClusters.length == 1 ? cluster.subClusters[0] : null}

View File

@@ -4,6 +4,7 @@
groupByScope,
fetchMetricsStore,
checkMetricDisabled,
transformDataForRoofline
} from "./utils.js";
import {
Row,
@@ -131,7 +132,6 @@
let plots = {},
jobTags,
fullWidth,
statsTable;
$: document.title = $initq.fetching
? "Loading..."
@@ -190,7 +190,6 @@
}));
</script>
<div class="row" bind:clientWidth={fullWidth} />
<Row>
<Col>
{#if $initq.error}
@@ -245,7 +244,6 @@
{/if}
<Col>
<Polar
size={fullWidth / 4.1}
metrics={ccconfig[
`job_view_polarPlotMetrics:${$initq.data.job.cluster}`
] || ccconfig[`job_view_polarPlotMetrics`]}
@@ -255,19 +253,18 @@
</Col>
<Col>
<Roofline
width={fullWidth / 3 - 10}
height={fullWidth / 5}
renderTime={true}
cluster={clusters
.find((c) => c.name == $initq.data.job.cluster)
.subClusters.find(
(sc) => sc.name == $initq.data.job.subCluster
)}
flopsAny={$jobMetrics.data.jobMetrics.find(
(m) => m.name == "flops_any" && m.scope == "node"
)}
memBw={$jobMetrics.data.jobMetrics.find(
(m) => m.name == "mem_bw" && m.scope == "node"
)}
data={
transformDataForRoofline (
$jobMetrics.data.jobMetrics.find((m) => m.name == "flops_any" && m.scope == "node").metric,
$jobMetrics.data.jobMetrics.find((m) => m.name == "mem_bw" && m.scope == "node").metric
)
}
/>
</Col>
{:else}
@@ -275,8 +272,7 @@
<Col />
{/if}
</Row>
<br />
<Row>
<Row class="mb-3">
<Col xs="auto">
{#if $initq.data}
<TagManagement job={$initq.data.job} bind:jobTags />
@@ -293,7 +289,6 @@
<Zoom timeseriesPlots={plots} />
</Col> -->
</Row>
<br />
<Row>
<Col>
{#if $jobMetrics.error}
@@ -340,8 +335,7 @@
{/if}
</Col>
</Row>
<br />
<Row>
<Row class="mt-2">
<Col>
{#if $initq.data}
<TabContent>

View File

@@ -1,7 +1,7 @@
<script>
import { getContext } from "svelte";
import Refresher from "./joblist/Refresher.svelte";
import Roofline, { transformPerNodeData } from "./plots/Roofline.svelte";
import Roofline from "./plots/Roofline.svelte";
import Pie, { colors } from "./plots/Pie.svelte";
import Histogram from "./plots/Histogram.svelte";
import {
@@ -16,7 +16,7 @@
Progress,
Icon,
} from "sveltestrap";
import { init, convert2uplot } from "./utils.js";
import { init, convert2uplot, transformPerNodeDataForRoofline } from "./utils.js";
import { scaleNumbers } from "./units.js";
import {
queryStore,
@@ -31,8 +31,8 @@
export let cluster;
let plotWidths = [],
colWidth1 = 0,
colWidth2;
colWidth1,
colWidth2
let from = new Date(Date.now() - 5 * 60 * 1000),
to = new Date(Date.now());
const topOptions = [
@@ -427,16 +427,17 @@
<div bind:clientWidth={plotWidths[i]}>
{#key $mainQuery.data.nodeMetrics}
<Roofline
allowSizeChange={true}
width={plotWidths[i] - 10}
height={300}
colorDots={true}
showTime={false}
cluster={subCluster}
data={transformPerNodeData(
$mainQuery.data.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name
data={
transformPerNodeDataForRoofline(
$mainQuery.data.nodeMetrics.filter(
(data) => data.subCluster == subCluster.name
)
)
)}
}
/>
{/key}
</div>
@@ -444,7 +445,7 @@
</Row>
{/each}
<hr style="margin-top: -1em;" />
<hr/>
<!-- Usage Stats as Histograms -->

View File

@@ -22,10 +22,10 @@
LineElement
);
export let size
export let metrics
export let cluster
export let jobMetrics
export let height = 365
const metricConfig = getContext('metrics')
@@ -89,13 +89,19 @@
// No custom defined options but keep for clarity
const options = {
maintainAspectRatio: false,
animation: false
animation: false,
scales: { // fix scale
r: {
suggestedMin: 0.0,
suggestedMax: 1.0
}
}
}
</script>
<div class="chart-container">
<Radar {data} {options} width={size} height={size}/>
<Radar {data} {options} {height}/>
</div>
<style>

View File

@@ -1,42 +1,52 @@
<div class="cc-plot">
<canvas bind:this={canvasElement} width="{prevWidth}" height="{prevHeight}"></canvas>
</div>
<script>
import uPlot from 'uplot'
import { formatNumber } from '../units.js'
import { onMount, onDestroy } from 'svelte'
import { Card } from 'sveltestrap'
<script context="module">
const axesColor = '#aaaaaa'
const tickFontSize = 10
const labelFontSize = 12
const fontFamily = 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"'
const paddingLeft = 40,
paddingRight = 10,
paddingTop = 10,
paddingBottom = 50
export let data = null
export let renderTime = false
export let allowSizeChange = false
export let cluster = null
export let width = 600
export let height = 350
let plotWrapper = null
let uplot = null
let timeoutId = null
/* Data Format
* data = [null, [], []] // 0: null-axis required for scatter, 1: Array of XY-Array for Scatter, 2: Optional Time Info
* data[1][0] = [100, 200, 500, ...] // X Axis -> Intensity (Vals up to clusters' flopRateScalar value)
* data[1][1] = [1000, 2000, 1500, ...] // Y Axis -> Performance (Vals up to clusters' flopRateSimd value)
* data[2] = [0.1, 0.15, 0.2, ...] // Color Code -> Time Information (Floats from 0 to 1) (Optional)
*/
// Helpers
function getGradientR(x) {
if (x < 0.5) return 0
if (x > 0.75) return 255
x = (x - 0.5) * 4.0
return Math.floor(x * 255.0)
}
function getGradientG(x) {
if (x > 0.25 && x < 0.75) return 255
if (x < 0.25) x = x * 4.0
else x = 1.0 - (x - 0.75) * 4.0
return Math.floor(x * 255.0)
}
function getGradientB(x) {
if (x < 0.25) return 255
if (x > 0.5) return 0
x = 1.0 - (x - 0.25) * 4.0
return Math.floor(x * 255.0)
}
function getRGB(c) {
return `rgb(${getGradientR(c)}, ${getGradientG(c)}, ${getGradientB(c)})`
}
function nearestThousand (num) {
return Math.ceil(num/1000) * 1000
}
function lineIntersect(x1, y1, x2, y2, x3, y3, x4, y4) {
let l = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1)
let a = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / l
@@ -45,314 +55,194 @@
y: y1 + a * (y2 - y1)
}
}
// End Helpers
function axisStepFactor(i, size) {
if (size && size < 500)
return 10
// Dot Renderers
const drawColorPoints = (u, seriesIdx, idx0, idx1) => {
const size = 5 * devicePixelRatio;
uPlot.orient(u, seriesIdx, (series, dataX, dataY, scaleX, scaleY, valToPosX, valToPosY, xOff, yOff, xDim, yDim, moveTo, lineTo, rect, arc) => {
let d = u.data[seriesIdx];
let deg360 = 2 * Math.PI;
for (let i = 0; i < d[0].length; i++) {
let p = new Path2D();
let xVal = d[0][i];
let yVal = d[1][i];
u.ctx.strokeStyle = getRGB(u.data[2][i])
u.ctx.fillStyle = getRGB(u.data[2][i])
if (xVal >= scaleX.min && xVal <= scaleX.max && yVal >= scaleY.min && yVal <= scaleY.max) {
let cx = valToPosX(xVal, scaleX, xDim, xOff);
let cy = valToPosY(yVal, scaleY, yDim, yOff);
if (i % 3 == 0)
return 2
else if (i % 3 == 1)
return 2.5
else
return 2
}
function render(ctx, data, cluster, width, height, colorDots, showTime, defaultMaxY) {
if (width <= 0)
return
const [minX, maxX, minY, maxY] = [0.01, 1000, 1., cluster?.flopRateSimd?.value || defaultMaxY]
const w = width - paddingLeft - paddingRight
const h = height - paddingTop - paddingBottom
// Helpers:
const [log10minX, log10maxX, log10minY, log10maxY] =
[Math.log10(minX), Math.log10(maxX), Math.log10(minY), Math.log10(maxY)]
/* Value -> Pixel-Coordinate */
const getCanvasX = (x) => {
x = Math.log10(x)
x -= log10minX; x /= (log10maxX - log10minX)
return Math.round((x * w) + paddingLeft)
}
const getCanvasY = (y) => {
y = Math.log10(y)
y -= log10minY
y /= (log10maxY - log10minY)
return Math.round((h - y * h) + paddingTop)
}
// Axes
ctx.fillStyle = 'black'
ctx.strokeStyle = axesColor
ctx.font = `${tickFontSize}px ${fontFamily}`
ctx.beginPath()
for (let x = minX, i = 0; x <= maxX; i++) {
let px = getCanvasX(x)
let text = formatNumber(x)
let textWidth = ctx.measureText(text).width
ctx.fillText(text,
Math.floor(px - (textWidth / 2)),
height - paddingBottom + tickFontSize + 5)
ctx.moveTo(px, paddingTop - 5)
ctx.lineTo(px, height - paddingBottom + 5)
x *= axisStepFactor(i, w)
}
if (data.xLabel) {
ctx.font = `${labelFontSize}px ${fontFamily}`
let textWidth = ctx.measureText(data.xLabel).width
ctx.fillText(data.xLabel, Math.floor((width / 2) - (textWidth / 2)), height - 20)
}
ctx.textAlign = 'center'
ctx.font = `${tickFontSize}px ${fontFamily}`
for (let y = minY, i = 0; y <= maxY; i++) {
let py = getCanvasY(y)
ctx.moveTo(paddingLeft - 5, py)
ctx.lineTo(width - paddingRight + 5, py)
ctx.save()
ctx.translate(paddingLeft - 10, py)
ctx.rotate(-Math.PI / 2)
ctx.fillText(formatNumber(y), 0, 0)
ctx.restore()
y *= axisStepFactor(i)
}
if (data.yLabel) {
ctx.font = `${labelFontSize}px ${fontFamily}`
ctx.save()
ctx.translate(15, Math.floor(height / 2))
ctx.rotate(-Math.PI / 2)
ctx.fillText(data.yLabel, 0, 0)
ctx.restore()
}
ctx.stroke()
// Draw Data
if (data.x && data.y) {
for (let i = 0; i < data.x.length; i++) {
let x = data.x[i], y = data.y[i], c = data.c[i]
if (x == null || y == null || Number.isNaN(x) || Number.isNaN(y))
continue
const s = 3
const px = getCanvasX(x)
const py = getCanvasY(y)
ctx.fillStyle = getRGB(c)
ctx.beginPath()
ctx.arc(px, py, s, 0, Math.PI * 2, false)
ctx.fill()
p.moveTo(cx + size/2, cy);
arc(p, cx, cy, size/2, 0, deg360);
}
u.ctx.fill(p);
}
} else if (data.tiles) {
const rows = data.tiles.length
const cols = data.tiles[0].length
});
return null;
};
const tileWidth = Math.ceil(w / cols)
const tileHeight = Math.ceil(h / rows)
let max = data.tiles.reduce((max, row) =>
Math.max(max, row.reduce((max, val) =>
Math.max(max, val)), 0), 0)
if (max == 0)
max = 1
const tileColor = val => `rgba(255, 0, 0, ${(val / max)})`
for (let i = 0; i < rows; i++) {
for (let j = 0; j < cols; j++) {
let px = paddingLeft + (j / cols) * w
let py = paddingTop + (h - (i / rows) * h) - tileHeight
ctx.fillStyle = tileColor(data.tiles[i][j])
ctx.fillRect(px, py, tileWidth, tileHeight)
const drawPoints = (u, seriesIdx, idx0, idx1) => {
const size = 5 * devicePixelRatio;
uPlot.orient(u, seriesIdx, (series, dataX, dataY, scaleX, scaleY, valToPosX, valToPosY, xOff, yOff, xDim, yDim, moveTo, lineTo, rect, arc) => {
let d = u.data[seriesIdx];
u.ctx.strokeStyle = getRGB(0);
u.ctx.fillStyle = getRGB(0);
let deg360 = 2 * Math.PI;
let p = new Path2D();
for (let i = 0; i < d[0].length; i++) {
let xVal = d[0][i];
let yVal = d[1][i];
if (xVal >= scaleX.min && xVal <= scaleX.max && yVal >= scaleY.min && yVal <= scaleY.max) {
let cx = valToPosX(xVal, scaleX, xDim, xOff);
let cy = valToPosY(yVal, scaleY, yDim, yOff);
p.moveTo(cx + size/2, cy);
arc(p, cx, cy, size/2, 0, deg360);
}
}
}
u.ctx.fill(p);
});
return null;
};
// Draw roofs
ctx.strokeStyle = 'black'
ctx.lineWidth = 2
ctx.beginPath()
if (cluster != null) {
const ycut = 0.01 * cluster.memoryBandwidth.value
const scalarKnee = (cluster.flopRateScalar.value - ycut) / cluster.memoryBandwidth.value
const simdKnee = (cluster.flopRateSimd.value - ycut) / cluster.memoryBandwidth.value
const scalarKneeX = getCanvasX(scalarKnee),
simdKneeX = getCanvasX(simdKnee),
flopRateScalarY = getCanvasY(cluster.flopRateScalar.value),
flopRateSimdY = getCanvasY(cluster.flopRateSimd.value)
// Main Function
function render(plotData) {
if (plotData) {
const opts = {
title: "",
mode: 2,
width: width,
height: height,
legend: {
show: false
},
cursor: { drag: { x: false, y: false } },
axes: [
{
label: 'Intensity [FLOPS/Byte]',
values: (u, vals) => vals.map(v => formatNumber(v))
},
{
label: 'Performace [GFLOPS]',
values: (u, vals) => vals.map(v => formatNumber(v))
}
],
scales: {
x: {
time: false,
range: [0.01, 1000],
distr: 3, // Render as log
log: 10, // log exp
},
y: {
range: [1.0, cluster?.flopRateSimd?.value ? nearestThousand(cluster.flopRateSimd.value) : 10000],
distr: 3, // Render as log
log: 10, // log exp
},
},
series: [
{},
{ paths: renderTime ? drawColorPoints : drawPoints }
],
hooks: {
drawClear: [
u => {
u.series.forEach((s, i) => {
if (i > 0)
s._paths = null;
});
},
],
draw: [
u => { // draw roofs when cluster set
// console.log(u)
if (cluster != null) {
const padding = u._padding // [top, right, bottom, left]
if (scalarKneeX < width - paddingRight) {
ctx.moveTo(scalarKneeX, flopRateScalarY)
ctx.lineTo(width - paddingRight, flopRateScalarY)
}
u.ctx.strokeStyle = 'black'
u.ctx.lineWidth = 2
u.ctx.beginPath()
if (simdKneeX < width - paddingRight) {
ctx.moveTo(simdKneeX, flopRateSimdY)
ctx.lineTo(width - paddingRight, flopRateSimdY)
}
const ycut = 0.01 * cluster.memoryBandwidth.value
const scalarKnee = (cluster.flopRateScalar.value - ycut) / cluster.memoryBandwidth.value
const simdKnee = (cluster.flopRateSimd.value - ycut) / cluster.memoryBandwidth.value
const scalarKneeX = u.valToPos(scalarKnee, 'x', true), // Value, axis, toCanvasPixels
simdKneeX = u.valToPos(simdKnee, 'x', true),
flopRateScalarY = u.valToPos(cluster.flopRateScalar.value, 'y', true),
flopRateSimdY = u.valToPos(cluster.flopRateSimd.value, 'y', true)
let x1 = getCanvasX(0.01),
y1 = getCanvasY(ycut),
x2 = getCanvasX(simdKnee),
y2 = flopRateSimdY
if (scalarKneeX < width - padding[1]) { // Top horizontal roofline
u.ctx.moveTo(scalarKneeX, flopRateScalarY)
u.ctx.lineTo(width - padding[1], flopRateScalarY)
}
let xAxisIntersect = lineIntersect(
x1, y1, x2, y2,
0, height - paddingBottom, width, height - paddingBottom)
if (simdKneeX < width - padding[1]) { // Lower horitontal roofline
u.ctx.moveTo(simdKneeX, flopRateSimdY)
u.ctx.lineTo(width - padding[1], flopRateSimdY)
}
if (xAxisIntersect.x > x1) {
x1 = xAxisIntersect.x
y1 = xAxisIntersect.y
}
let x1 = u.valToPos(0.01, 'x', true),
y1 = u.valToPos(ycut, 'y', true)
let x2 = u.valToPos(simdKnee, 'x', true),
y2 = flopRateSimdY
ctx.moveTo(x1, y1)
ctx.lineTo(x2, y2)
}
ctx.stroke()
let xAxisIntersect = lineIntersect(
x1, y1, x2, y2,
u.valToPos(0.01, 'x', true), u.valToPos(1.0, 'y', true), // X-Axis Start Coords
u.valToPos(1000, 'x', true), u.valToPos(1.0, 'y', true) // X-Axis End Coords
)
if (colorDots && showTime && data.x && data.y) {
// The Color Scale For Time Information
ctx.fillStyle = 'black'
ctx.fillText('Time:', 17, height - 5)
const start = paddingLeft + 5
for (let x = start; x < width - paddingRight; x += 15) {
let c = (x - start) / (width - start - paddingRight)
ctx.fillStyle = getRGB(c)
ctx.beginPath()
ctx.arc(x, height - 10, 5, 0, Math.PI * 2, false)
ctx.fill()
}
}
}
if (xAxisIntersect.x > x1) {
x1 = xAxisIntersect.x
y1 = xAxisIntersect.y
}
function transformData(flopsAny, memBw, colorDots) { // Uses Metric Object
const nodes = flopsAny.series.length
const timesteps = flopsAny.series[0].data.length
// Diagonal
u.ctx.moveTo(x1, y1)
u.ctx.lineTo(x2, y2)
/* c will contain values from 0 to 1 representing the time */
const x = [], y = [], c = []
if (flopsAny && memBw) {
for (let i = 0; i < nodes; i++) {
const flopsData = flopsAny.series[i].data
const memBwData = memBw.series[i].data
for (let j = 0; j < timesteps; j++) {
const f = flopsData[j], m = memBwData[j]
const intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity))
continue
x.push(intensity)
y.push(f)
c.push(colorDots ? j / timesteps : 0)
}
}
u.ctx.stroke()
// Reset grid lineWidth
u.ctx.lineWidth = 0.15
}
}
]
},
};
uplot = new uPlot(opts, plotData, plotWrapper);
} else {
console.warn("transformData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
return {
x, y, c,
xLabel: 'Intensity [FLOPS/byte]',
yLabel: 'Performance [GFLOPS]'
console.log('No data for roofline!')
}
}
// Return something to be plotted. The argument shall be the result of the
// `nodeMetrics` GraphQL query.
export function transformPerNodeData(nodes) {
const x = [], y = [], c = []
for (let node of nodes) {
let flopsAny = node.metrics.find(m => m.name == 'flops_any' && m.scope == 'node')?.metric
let memBw = node.metrics.find(m => m.name == 'mem_bw' && m.scope == 'node')?.metric
if (!flopsAny || !memBw) {
console.warn("transformPerNodeData: metrics for 'mem_bw' and/or 'flops_any' missing!")
continue
}
let flopsData = flopsAny.series[0].data, memBwData = memBw.series[0].data
const f = flopsData[flopsData.length - 1], m = memBwData[flopsData.length - 1]
const intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity))
continue
x.push(intensity)
y.push(f)
c.push(0)
}
return {
x, y, c,
xLabel: 'Intensity [FLOPS/byte]',
yLabel: 'Performance [GFLOPS]'
}
}
</script>
<script>
import { onMount, tick } from 'svelte'
import { formatNumber } from '../units.js'
export let flopsAny = null
export let memBw = null
export let cluster = null
export let maxY = null
export let width = 500
export let height = 300
export let tiles = null
export let colorDots = true
export let showTime = true
export let data = null
console.assert(data || tiles || (flopsAny && memBw), "you must provide flopsAny and memBw or tiles!")
let ctx, canvasElement, prevWidth = width, prevHeight = height
data = data != null ? data : (flopsAny && memBw
? transformData(flopsAny.metric, memBw.metric, colorDots) // Use Metric Object from Parent
: {
tiles: tiles,
xLabel: 'Intensity [FLOPS/byte]',
yLabel: 'Performance [GFLOPS]'
})
// Svelte and Sizechange
onMount(() => {
ctx = canvasElement.getContext('2d')
if (prevWidth != width || prevHeight != height) {
sizeChanged()
return
}
canvasElement.width = width
canvasElement.height = height
render(ctx, data, cluster, width, height, colorDots, showTime, maxY)
render(data)
})
let timeoutId = null
function sizeChanged() {
if (!ctx)
return
onDestroy(() => {
if (uplot)
uplot.destroy()
if (timeoutId != null)
clearTimeout(timeoutId)
})
function sizeChanged() {
if (timeoutId != null)
clearTimeout(timeoutId)
prevWidth = width
prevHeight = height
timeoutId = setTimeout(() => {
if (!canvasElement)
return
timeoutId = null
canvasElement.width = width
canvasElement.height = height
render(ctx, data, cluster, width, height, colorDots, showTime, maxY)
}, 250)
if (uplot)
uplot.destroy()
render(data)
}, 200)
}
$: sizeChanged(width, height)
$: if (allowSizeChange) sizeChanged(width, height)
</script>
{#if data != null}
<div bind:this={plotWrapper}/>
{:else}
<Card class="mx-4" body color="warning">Cannot render roofline: No data!</Card>
{/if}

View File

@@ -0,0 +1,234 @@
<div class="cc-plot">
<canvas bind:this={canvasElement} width="{prevWidth}" height="{prevHeight}"></canvas>
</div>
<script context="module">
const axesColor = '#aaaaaa'
const tickFontSize = 10
const labelFontSize = 12
const fontFamily = 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"'
const paddingLeft = 40,
paddingRight = 10,
paddingTop = 10,
paddingBottom = 50
function lineIntersect(x1, y1, x2, y2, x3, y3, x4, y4) {
let l = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1)
let a = ((x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)) / l
return {
x: x1 + a * (x2 - x1),
y: y1 + a * (y2 - y1)
}
}
function axisStepFactor(i, size) {
if (size && size < 500)
return 10
if (i % 3 == 0)
return 2
else if (i % 3 == 1)
return 2.5
else
return 2
}
function render(ctx, data, cluster, width, height, defaultMaxY) {
if (width <= 0)
return
const [minX, maxX, minY, maxY] = [0.01, 1000, 1., cluster?.flopRateSimd?.value || defaultMaxY]
const w = width - paddingLeft - paddingRight
const h = height - paddingTop - paddingBottom
// Helpers:
const [log10minX, log10maxX, log10minY, log10maxY] =
[Math.log10(minX), Math.log10(maxX), Math.log10(minY), Math.log10(maxY)]
/* Value -> Pixel-Coordinate */
const getCanvasX = (x) => {
x = Math.log10(x)
x -= log10minX; x /= (log10maxX - log10minX)
return Math.round((x * w) + paddingLeft)
}
const getCanvasY = (y) => {
y = Math.log10(y)
y -= log10minY
y /= (log10maxY - log10minY)
return Math.round((h - y * h) + paddingTop)
}
// Axes
ctx.fillStyle = 'black'
ctx.strokeStyle = axesColor
ctx.font = `${tickFontSize}px ${fontFamily}`
ctx.beginPath()
for (let x = minX, i = 0; x <= maxX; i++) {
let px = getCanvasX(x)
let text = formatNumber(x)
let textWidth = ctx.measureText(text).width
ctx.fillText(text,
Math.floor(px - (textWidth / 2)),
height - paddingBottom + tickFontSize + 5)
ctx.moveTo(px, paddingTop - 5)
ctx.lineTo(px, height - paddingBottom + 5)
x *= axisStepFactor(i, w)
}
if (data.xLabel) {
ctx.font = `${labelFontSize}px ${fontFamily}`
let textWidth = ctx.measureText(data.xLabel).width
ctx.fillText(data.xLabel, Math.floor((width / 2) - (textWidth / 2)), height - 20)
}
ctx.textAlign = 'center'
ctx.font = `${tickFontSize}px ${fontFamily}`
for (let y = minY, i = 0; y <= maxY; i++) {
let py = getCanvasY(y)
ctx.moveTo(paddingLeft - 5, py)
ctx.lineTo(width - paddingRight + 5, py)
ctx.save()
ctx.translate(paddingLeft - 10, py)
ctx.rotate(-Math.PI / 2)
ctx.fillText(formatNumber(y), 0, 0)
ctx.restore()
y *= axisStepFactor(i)
}
if (data.yLabel) {
ctx.font = `${labelFontSize}px ${fontFamily}`
ctx.save()
ctx.translate(15, Math.floor(height / 2))
ctx.rotate(-Math.PI / 2)
ctx.fillText(data.yLabel, 0, 0)
ctx.restore()
}
ctx.stroke()
// Draw Data
if (data.tiles) {
const rows = data.tiles.length
const cols = data.tiles[0].length
const tileWidth = Math.ceil(w / cols)
const tileHeight = Math.ceil(h / rows)
let max = data.tiles.reduce((max, row) =>
Math.max(max, row.reduce((max, val) =>
Math.max(max, val)), 0), 0)
if (max == 0)
max = 1
const tileColor = val => `rgba(255, 0, 0, ${(val / max)})`
for (let i = 0; i < rows; i++) {
for (let j = 0; j < cols; j++) {
let px = paddingLeft + (j / cols) * w
let py = paddingTop + (h - (i / rows) * h) - tileHeight
ctx.fillStyle = tileColor(data.tiles[i][j])
ctx.fillRect(px, py, tileWidth, tileHeight)
}
}
}
// Draw roofs
ctx.strokeStyle = 'black'
ctx.lineWidth = 2
ctx.beginPath()
if (cluster != null) {
const ycut = 0.01 * cluster.memoryBandwidth.value
const scalarKnee = (cluster.flopRateScalar.value - ycut) / cluster.memoryBandwidth.value
const simdKnee = (cluster.flopRateSimd.value - ycut) / cluster.memoryBandwidth.value
const scalarKneeX = getCanvasX(scalarKnee),
simdKneeX = getCanvasX(simdKnee),
flopRateScalarY = getCanvasY(cluster.flopRateScalar.value),
flopRateSimdY = getCanvasY(cluster.flopRateSimd.value)
if (scalarKneeX < width - paddingRight) {
ctx.moveTo(scalarKneeX, flopRateScalarY)
ctx.lineTo(width - paddingRight, flopRateScalarY)
}
if (simdKneeX < width - paddingRight) {
ctx.moveTo(simdKneeX, flopRateSimdY)
ctx.lineTo(width - paddingRight, flopRateSimdY)
}
let x1 = getCanvasX(0.01),
y1 = getCanvasY(ycut),
x2 = getCanvasX(simdKnee),
y2 = flopRateSimdY
let xAxisIntersect = lineIntersect(
x1, y1, x2, y2,
0, height - paddingBottom, width, height - paddingBottom)
if (xAxisIntersect.x > x1) {
x1 = xAxisIntersect.x
y1 = xAxisIntersect.y
}
ctx.moveTo(x1, y1)
ctx.lineTo(x2, y2)
}
ctx.stroke()
}
</script>
<script>
import { onMount } from 'svelte'
import { formatNumber } from '../units.js'
export let cluster = null
export let tiles = null
export let maxY = null
export let width = 500
export let height = 300
console.assert(tiles, "you must provide tiles!")
let ctx, canvasElement, prevWidth = width, prevHeight = height
const data = {
tiles: tiles,
xLabel: 'Intensity [FLOPS/byte]',
yLabel: 'Performance [GFLOPS]'
}
onMount(() => {
ctx = canvasElement.getContext('2d')
if (prevWidth != width || prevHeight != height) {
sizeChanged()
return
}
canvasElement.width = width
canvasElement.height = height
render(ctx, data, cluster, width, height, maxY)
})
let timeoutId = null
function sizeChanged() {
if (!ctx)
return
if (timeoutId != null)
clearTimeout(timeoutId)
prevWidth = width
prevHeight = height
timeoutId = setTimeout(() => {
if (!canvasElement)
return
timeoutId = null
canvasElement.width = width
canvasElement.height = height
render(ctx, data, cluster, width, height, maxY)
}, 250)
}
$: sizeChanged(width, height)
</script>

View File

@@ -6,8 +6,8 @@ const power = [1, 1e3, 1e6, 1e9, 1e12, 1e15, 1e18, 1e21]
const prefix = ['', 'K', 'M', 'G', 'T', 'P', 'E']
export function formatNumber(x) {
if ( isNaN(x) ) {
return x // Return if String , used in Histograms
if ( isNaN(x) || x == null) {
return x // Return if String or Null
} else {
for (let i = 0; i < prefix.length; i++)
if (power[i] <= x && x < power[i+1])

View File

@@ -6,7 +6,7 @@ import {
} from "@urql/svelte";
import { setContext, getContext, hasContext, onDestroy, tick } from "svelte";
import { readable } from "svelte/store";
import { formatNumber } from './units.js'
// import { formatNumber } from './units.js'
/*
* Call this function only at component initialization time!
@@ -326,8 +326,11 @@ export function convert2uplot(canvasData) {
}
export function binsFromFootprint(weights, scope, values, numBins) {
let min = 0, max = 0
let min = 0, max = 0 //, median = 0
if (values.length != 0) {
// Extreme, wrong peak vlaues: Filter here or backend?
// median = median(values)
for (let x of values) {
min = Math.min(min, x)
max = Math.max(max, x)
@@ -363,3 +366,75 @@ export function binsFromFootprint(weights, scope, values, numBins) {
bins: bins
}
}
export function transformDataForRoofline(flopsAny, memBw) { // Uses Metric Objects: {series:[{},{},...], timestep:60, name:$NAME}
const nodes = flopsAny.series.length
const timesteps = flopsAny.series[0].data.length
/* c will contain values from 0 to 1 representing the time */
let data = null
const x = [], y = [], c = []
if (flopsAny && memBw) {
for (let i = 0; i < nodes; i++) {
const flopsData = flopsAny.series[i].data
const memBwData = memBw.series[i].data
for (let j = 0; j < timesteps; j++) {
const f = flopsData[j], m = memBwData[j]
const intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity))
continue
x.push(intensity)
y.push(f)
c.push(j / timesteps)
}
}
} else {
console.warn("transformData: metrics for 'mem_bw' and/or 'flops_any' missing!")
}
if (x.length > 0 && y.length > 0 && c.length > 0) {
data = [null, [x, y], c] // for dataformat see roofline.svelte
}
return data
}
// Return something to be plotted. The argument shall be the result of the
// `nodeMetrics` GraphQL query.
export function transformPerNodeDataForRoofline(nodes) {
let data = null
const x = [], y = []
for (let node of nodes) {
let flopsAny = node.metrics.find(m => m.name == 'flops_any' && m.scope == 'node')?.metric
let memBw = node.metrics.find(m => m.name == 'mem_bw' && m.scope == 'node')?.metric
if (!flopsAny || !memBw) {
console.warn("transformPerNodeData: metrics for 'mem_bw' and/or 'flops_any' missing!")
continue
}
let flopsData = flopsAny.series[0].data, memBwData = memBw.series[0].data
const f = flopsData[flopsData.length - 1], m = memBwData[flopsData.length - 1]
const intensity = f / m
if (Number.isNaN(intensity) || !Number.isFinite(intensity))
continue
x.push(intensity)
y.push(f)
}
if (x.length > 0 && y.length > 0) {
data = [null, [x, y], []] // for dataformat see roofline.svelte
}
return data
}
// https://stackoverflow.com/questions/45309447/calculating-median-javascript
// function median(numbers) {
// const sorted = Array.from(numbers).sort((a, b) => a - b);
// const middle = Math.floor(sorted.length / 2);
// if (sorted.length % 2 === 0) {
// return (sorted[middle - 1] + sorted[middle]) / 2;
// }
// return sorted[middle];
// }