Merge pull request #49 from ClusterCockpit/dev-job-archive-module

Refactoring and Cleanup
* Add packages for archive
* Extract all data structures into schema package
* Re-Generate GraphQL Code
* Add Swagger UI Frontend and REST Api documentation generated from code comments
* Integrate JSON Schema and add schema validation
* Add more tests
* Add Makefile for build automation
* Integrate binary version and git hash
This commit is contained in:
Jan Eitzinger 2022-09-26 15:18:41 +02:00 committed by GitHub
commit 411da118db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
91 changed files with 14113 additions and 4613 deletions

1
.gitignore vendored
View File

@ -9,3 +9,4 @@
/web/frontend/public/build
/web/frontend/node_modules
.vscode/settings.json

52
Makefile Normal file
View File

@ -0,0 +1,52 @@
TARGET = ./cc-backend
VAR = ./var
FRONTEND = ./web/frontend
VERSION = 0.1
GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development')
CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S")
LD_FLAGS = '-s -X main.buildTime=${CURRENT_TIME} -X main.version=${VERSION} -X main.hash=${GIT_HASH}'
SVELTE_COMPONENTS = status \
analysis \
node \
systems \
job \
list \
user \
jobs \
header
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
$(wildcard $(FRONTEND)/src/*.js) \
$(wildcard $(FRONTEND)/src/filters/*.svelte) \
$(wildcard $(FRONTEND)/src/plots/*.svelte) \
$(wildcard $(FRONTEND)/src/joblist/*.svelte)
.PHONY: clean test $(TARGET)
.NOTPARALLEL:
$(TARGET): $(VAR) $(SVELTE_TARGETS)
$(info ===> BUILD cc-backend)
@go build -ldflags=${LD_FLAGS} ./cmd/cc-backend
clean:
$(info ===> CLEAN)
@go clean
@rm $(TARGET)
test:
$(info ===> TESTING)
@go build ./...
@go vet ./...
@go test ./...
$(SVELTE_TARGETS): $(SVELTE_SRC)
$(info ===> BUILD frontend)
cd web/frontend && yarn build
$(VAR):
@mkdir $(VAR)
@touch ./var/job.db
cd web/frontend && yarn install

View File

@ -44,31 +44,26 @@ Please note that some views do not work without a metric backend (e.g., the Syst
```sh
git clone git@github.com:ClusterCockpit/cc-backend.git
# Prepare frontend
cd ./cc-backend/web/frontend
yarn install
yarn build
cd ..
go build ./cmd/cc-backend
ln -s <your-existing-job-archive> ./var/job-archive
# Create empty job.db (Will be initialized as SQLite3 database)
touch ./var/job.db
# Build binary
cd ./cc-backend/
make
# EDIT THE .env FILE BEFORE YOU DEPLOY (Change the secrets)!
# If authentication is disabled, it can be empty.
cp configs/env-template.txt .env
vim ./.env
#Optional: Link an existing job archive:
ln -s <your-existing-job-archive> ./var/job-archive
# This will first initialize the job.db database by traversing all
# `meta.json` files in the job-archive and add a new user. `--no-server` will cause the
# executable to stop once it has done that instead of starting a server.
./cc-backend --init-db --add-user <your-username>:admin:<your-password> --no-server
./cc-backend --init-db --add-user <your-username>:admin:<your-password>
# Start a HTTP server (HTTPS can be enabled, the default port is 8080):
./cc-backend
# Start a HTTP server (HTTPS can be enabled, the default port is 8080).
# The --dev flag enables GraphQL Playground (http://localhost:8080/playground) and Swagger UI (http://localhost:8080/swagger).
./cc-backend --server --dev
# Show other options:
./cc-backend --help
@ -87,7 +82,7 @@ Having no jobs in the job-archive at all is fine.
### Configuration
A config file in the JSON format can be provided using `--config` to override the defaults.
A config file in the JSON format has to be provided using `--config` to override the defaults.
By default, if there is a `config.json` file in the current directory of the `cc-backend` process, it will be loaded even without the `--config` flag.
You find documentation of all supported configuration and command line options [here](./configs.README.md).
@ -97,17 +92,27 @@ This project uses [gqlgen](https://github.com/99designs/gqlgen) for the GraphQL
The schema can be found in `./api/schema.graphqls`.
After changing it, you need to run `go run github.com/99designs/gqlgen` which will update `./internal/graph/model`.
In case new resolvers are needed, they will be inserted into `./internal/graph/schema.resolvers.go`, where you will need to implement them.
If you start cc-backend with flag `--dev` the GraphQL Playground UI is available at http://localhost:8080/playground .
### Update Swagger UI
This project integrates [swagger ui](https://swagger.io/tools/swagger-ui/) to document and test its REST API.
The swagger doc files can be found in `./api/`.
You can generate the configuration of swagger-ui by running `go run github.com/swaggo/swag/cmd/swag init -d ./internal/api,./pkg/schema -g rest.go -o ./api `.
You need to move the generated `./api/doc.go` to `./internal/api/doc.go`.
If you start cc-backend with flag `--dev` the Swagger UI is available at http://localhost:8080/swagger .
## Project Structure
- `api/` contains the API schema files for the REST and GraphQL APIs. The REST API is documented in the OpenAPI 3.0 format in [./api/openapi.yaml](./api/openapi.yaml).
- `cmd/cc-backend` contains `main.go` for the main application.
- `cmd/gen-keypair` contains is a small application to generate a compatible JWT keypair includin a README about JWT setup in ClusterCockpit.
- `configs/` contains documentation about configuration and command line options and required environment variables. An example configuration file is provided.
- `init/` contains an example systemd setup for production use.
- `internal/` contains library source code that is not intended to be used by others.
- `pkg/` contains go packages that can also be used by other projects.
- `test/` Test apps and test data.
- `tools/` contains supporting tools for cc-backend. At the moment this is a small application to generate a compatible JWT keypair includin a README about JWT setup in ClusterCockpit.
- `web/` Server side templates and frontend related files:
- `templates` Serverside go templates
- `frontend` Svelte components and static assets for frontend UI

View File

@ -1,221 +0,0 @@
#
# ClusterCockpit's API spec can be exported via:
# docker exec -it cc-php php bin/console api:openapi:export --yaml
#
# This spec is written by hand and hopefully up to date with the API.
#
openapi: 3.0.3
info:
title: 'ClusterCockpit REST API'
description: 'API for batch job control'
version: 0.0.2
servers:
- url: /
description: ''
paths:
'/api/jobs/':
get:
operationId: 'getJobs'
summary: 'List all jobs'
description: 'Get a list of all jobs. Filters can be applied using query parameters.'
parameters:
- name: state
in: query
schema:
type: string
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
- name: cluster
in: query
schema: { type: string }
- name: start-time
description: 'Syntax: "<from>-<to>", where <from> and <to> are unix timestamps in seconds'
in: query
schema: { type: string }
- name: page
in: query
schema: { type: integer }
- name: items-per-page
in: query
schema: { type: integer }
- name: with-metadata
in: query
schema: { type: boolean }
responses:
200:
description: 'Array of jobs'
content:
'application/json':
schema:
type: object
properties:
jobs:
type: array
items:
$ref: '#/components/schemas/Job'
400:
description: 'Bad Request'
'/api/jobs/tag_job/{id}':
post:
operationId: 'tagJob'
summary: 'Add a tag to a job'
parameters:
- name: id
in: path
required: true
schema: { type: integer }
description: 'Job ID'
requestBody:
description: 'Array of tags to add'
required: true
content:
'application/json':
schema:
type: array
items:
$ref: '#/components/schemas/Tag'
responses:
200:
description: 'Job resource'
content:
'application/json':
schema:
$ref: '#/components/schemas/Job'
404:
description: 'Job or tag does not exist'
400:
description: 'Bad request'
'/api/jobs/start_job/':
post:
operationId: 'startJob'
summary: 'Add a newly started job'
requestBody:
required: true
content:
'application/json':
schema:
$ref: '#/components/schemas/Job'
responses:
201:
description: 'Job successfully'
content:
'application/json':
schema:
type: object
properties:
id:
type: integer
description: 'The database ID assigned to this job'
400:
description: 'Bad request'
422:
description: 'The combination of jobId, clusterId and startTime does already exist'
'/api/jobs/stop_job/':
post:
operationId: stopJobViaJobID
summary: 'Mark a job as stopped. Which job to stop is specified by the request body.'
requestBody:
required: true
content:
'application/json':
schema:
type: object
required: [jobId, cluster, stopTime, jobState]
properties:
jobId: { type: integer }
cluster: { type: string }
startTime: { type: integer }
stopTime: { type: integer }
jobState:
type: string
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
responses:
200:
description: 'Job resource'
content:
'application/json':
schema:
$ref: '#/components/schemas/Job'
400:
description: 'Bad request'
404:
description: 'Resource not found'
'/api/jobs/stop_job/{id}':
post:
operationId: 'stopJobViaDBID'
summary: 'Mark a job as stopped.'
parameters:
- name: id
in: path
required: true
schema: { type: integer }
description: 'Database ID (Resource Identifier)'
requestBody:
required: true
content:
'application/json':
schema:
type: object
required: [stopTime, jobState]
properties:
stopTime: { type: integer }
jobState:
type: string
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
responses:
200:
description: 'Job resource'
content:
'application/json':
schema:
$ref: '#/components/schemas/Job'
400:
description: 'Bad request'
404:
description: 'Resource not found'
'/api/jobs/import/':
post:
operationId: 'importJob'
summary: 'Imports a job and its metric data'
requestBody:
required: true
content:
'application/json':
schema:
type: object
properties:
meta:
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
data:
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-data.schema.json
responses:
200:
description: 'Import successful'
400:
description: 'Bad request'
422:
description: 'Unprocessable Entity'
components:
schemas:
Tag:
description: 'A job tag'
type: object
properties:
id:
type: string
description: 'Database ID'
type:
type: string
description: 'Tag type'
name:
type: string
description: 'Tag name'
Job:
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
securitySchemes:
bearerAuth:
type: http
scheme: bearer
bearerFormat: JWT
security:
- bearerAuth: [] # Applies `bearerAuth` globally

View File

@ -35,7 +35,6 @@ type Cluster {
name: String!
partitions: [String!]! # Slurm partitions
metricConfig: [MetricConfig!]!
filterRanges: FilterRanges!
subClusters: [SubCluster!]! # Hardware partitions/subclusters
}
@ -196,12 +195,6 @@ type Mutation {
type IntRangeOutput { from: Int!, to: Int! }
type TimeRangeOutput { from: Time!, to: Time! }
type FilterRanges {
duration: IntRangeOutput!
numNodes: IntRangeOutput!
startTime: TimeRangeOutput!
}
input JobFilter {
tags: [ID!]
jobId: StringInput

840
api/swagger.json Normal file
View File

@ -0,0 +1,840 @@
{
"swagger": "2.0",
"info": {
"description": "Defines a tag using name and type.",
"title": "ClusterCockpit REST API",
"termsOfService": "https://monitoring.nhr.fau.de/imprint",
"contact": {
"name": "ClusterCockpit Project",
"url": "https://github.com/ClusterCockpit",
"email": "support@clustercockpit.org"
},
"license": {
"name": "MIT License",
"url": "https://opensource.org/licenses/MIT"
},
"version": "0.1.0"
},
"host": "clustercockpit.localhost:8082",
"basePath": "/api",
"paths": {
"/jobs/": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Get a list of all jobs. Filters can be applied using query parameters.\nNumber of results can be limited by page. Results are sorted by descending startTime.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"summary": "Lists all jobs",
"parameters": [
{
"enum": [
"running",
"completed",
"failed",
"cancelled",
"stopped",
"timeout"
],
"type": "string",
"description": "Job State",
"name": "state",
"in": "query"
},
{
"type": "string",
"description": "Job Cluster",
"name": "cluster",
"in": "query"
},
{
"type": "string",
"description": "Syntax: '$from-$to', as unix epoch timestamps in seconds",
"name": "start-time",
"in": "query"
},
{
"type": "integer",
"description": "Items per page (If empty: No Limit)",
"name": "items-per-page",
"in": "query"
},
{
"type": "integer",
"description": "Page Number (If empty: No Paging)",
"name": "page",
"in": "query"
},
{
"type": "boolean",
"description": "Include metadata (e.g. jobScript) in response",
"name": "with-metadata",
"in": "query"
}
],
"responses": {
"200": {
"description": "Array of matching jobs",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.Job"
}
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/start_job/": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"summary": "Adds a new job as \"running\"",
"parameters": [
{
"description": "Job to add",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.JobMeta"
}
}
],
"responses": {
"201": {
"description": "Job added successfully",
"schema": {
"$ref": "#/definitions/api.StartJobApiResponse"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: The combination of jobId, clusterId and startTime does already exist",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/stop_job/": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'JobMeta' scheme.",
"produces": [
"application/json"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
{
"description": "All fields required",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/api.StopJobApiRequest"
}
}
],
"responses": {
"200": {
"description": "Job resource",
"schema": {
"$ref": "#/definitions/schema.JobMeta"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Resource not found",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/stop_job/{id}": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job to stop is specified by database ID. Only stopTime and final state are required in request body.\nReturns full job resource information according to 'JobMeta' scheme.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
{
"type": "integer",
"description": "Database ID of Job",
"name": "id",
"in": "path",
"required": true
},
{
"description": "stopTime and final state in request body",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/api.StopJobApiRequest"
}
}
],
"responses": {
"200": {
"description": "Job resource",
"schema": {
"$ref": "#/definitions/schema.JobMeta"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Resource not found",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/tag_job/{id}": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"summary": "Adds one or more tags to a job",
"parameters": [
{
"type": "integer",
"description": "Job Database ID",
"name": "id",
"in": "path",
"required": true
},
{
"description": "Array of tag-objects to add",
"name": "request",
"in": "body",
"required": true,
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/api.Tag"
}
}
}
],
"responses": {
"200": {
"description": "Updated job resource",
"schema": {
"$ref": "#/definitions/schema.Job"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Job or tag does not exist",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
}
},
"definitions": {
"api.ErrorResponse": {
"description": "Error message as returned from backend.",
"type": "object",
"properties": {
"error": {
"description": "Error Message",
"type": "string"
},
"status": {
"description": "Statustext of Errorcode",
"type": "string"
}
}
},
"api.StartJobApiResponse": {
"description": "Successful job start response with database id of new job.",
"type": "object",
"properties": {
"id": {
"description": "Database ID of new job",
"type": "integer"
}
}
},
"api.StopJobApiRequest": {
"description": "Request to stop running job using stoptime and final state. They are only required if no database id was provided with endpoint.",
"type": "object",
"required": [
"jobState",
"stopTime"
],
"properties": {
"cluster": {
"description": "Cluster of job",
"type": "string",
"example": "fritz"
},
"jobId": {
"description": "Cluster Job ID of job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout"
],
"example": "completed"
},
"startTime": {
"description": "Start Time of job as epoch",
"type": "integer",
"example": 1649723812
},
"stopTime": {
"description": "Stop Time of job as epoch",
"type": "integer",
"example": 1649763839
}
}
},
"api.Tag": {
"description": "Defines a tag using name and type.",
"type": "object",
"properties": {
"name": {
"description": "Tag Name",
"type": "string",
"example": "Testjob"
},
"type": {
"description": "Tag Type",
"type": "string",
"example": "Debug"
}
}
},
"schema.Job": {
"description": "Information of a HPC job.",
"type": "object",
"properties": {
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer",
"example": 123000
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string",
"example": "fritz"
},
"duration": {
"description": "Duration of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 43200
},
"exclusive": {
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"id": {
"description": "The unique identifier of a job in the database",
"type": "integer"
},
"jobId": {
"description": "The unique identifier of a job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
],
"example": "completed"
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"monitoringStatus": {
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
"type": "integer",
"maximum": 3,
"minimum": 0,
"example": 1
},
"numAcc": {
"description": "Number of accelerators used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"numHwthreads": {
"description": "Number of HWThreads used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 20
},
"numNodes": {
"description": "Number of nodes used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string",
"example": "main"
},
"project": {
"description": "The unique identifier of a project",
"type": "string",
"example": "abcd200"
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Resource"
}
},
"smt": {
"description": "SMT threads used by job",
"type": "integer",
"example": 4
},
"startTime": {
"description": "Start time as 'time.Time' data type",
"type": "string"
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string",
"example": "main"
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Tag"
}
},
"user": {
"description": "The unique identifier of a user",
"type": "string",
"example": "abcd100h"
},
"walltime": {
"description": "Requested walltime of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 86400
}
}
},
"schema.JobMeta": {
"description": "Meta data information of a HPC job.",
"type": "object",
"properties": {
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer",
"example": 123000
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string",
"example": "fritz"
},
"duration": {
"description": "Duration of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 43200
},
"exclusive": {
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"id": {
"description": "The unique identifier of a job in the database",
"type": "integer"
},
"jobId": {
"description": "The unique identifier of a job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
],
"example": "completed"
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"monitoringStatus": {
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
"type": "integer",
"maximum": 3,
"minimum": 0,
"example": 1
},
"numAcc": {
"description": "Number of accelerators used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"numHwthreads": {
"description": "Number of HWThreads used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 20
},
"numNodes": {
"description": "Number of nodes used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string",
"example": "main"
},
"project": {
"description": "The unique identifier of a project",
"type": "string",
"example": "abcd200"
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Resource"
}
},
"smt": {
"description": "SMT threads used by job",
"type": "integer",
"example": 4
},
"startTime": {
"description": "Start epoch time stamp in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 1649723812
},
"statistics": {
"description": "Metric statistics of job",
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/schema.JobStatistics"
}
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string",
"example": "main"
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Tag"
}
},
"user": {
"description": "The unique identifier of a user",
"type": "string",
"example": "abcd100h"
},
"walltime": {
"description": "Requested walltime of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 86400
}
}
},
"schema.JobStatistics": {
"description": "Specification for job metric statistics.",
"type": "object",
"properties": {
"avg": {
"description": "Job metric average",
"type": "number",
"minimum": 0,
"example": 2500
},
"max": {
"description": "Job metric maximum",
"type": "number",
"minimum": 0,
"example": 3000
},
"min": {
"description": "Job metric minimum",
"type": "number",
"minimum": 0,
"example": 2000
},
"unit": {
"description": "Metric unit (see schema/unit.schema.json)",
"type": "string",
"example": "GHz"
}
}
},
"schema.Resource": {
"description": "A resource used by a job",
"type": "object",
"properties": {
"accelerators": {
"description": "List of of accelerator device ids",
"type": "array",
"items": {
"type": "string"
}
},
"configuration": {
"description": "The configuration options of the node",
"type": "string"
},
"hostname": {
"description": "Name of the host (= node)",
"type": "string"
},
"hwthreads": {
"description": "List of OS processor ids",
"type": "array",
"items": {
"type": "integer"
}
}
}
},
"schema.Tag": {
"description": "Defines a tag using name and type.",
"type": "object",
"properties": {
"id": {
"description": "The unique DB identifier of a tag",
"type": "integer"
},
"name": {
"description": "Tag Name",
"type": "string",
"example": "Testjob"
},
"type": {
"description": "Tag Type",
"type": "string",
"example": "Debug"
}
}
}
},
"securityDefinitions": {
"ApiKeyAuth": {
"description": "JWT based authentification for general API endpoint use.",
"type": "apiKey",
"name": "X-Auth-Token",
"in": "header"
}
}
}

623
api/swagger.yaml Normal file
View File

@ -0,0 +1,623 @@
basePath: /api
definitions:
api.ErrorResponse:
description: Error message as returned from backend.
properties:
error:
description: Error Message
type: string
status:
description: Statustext of Errorcode
type: string
type: object
api.StartJobApiResponse:
description: Successful job start response with database id of new job.
properties:
id:
description: Database ID of new job
type: integer
type: object
api.StopJobApiRequest:
description: Request to stop running job using stoptime and final state. They
are only required if no database id was provided with endpoint.
properties:
cluster:
description: Cluster of job
example: fritz
type: string
jobId:
description: Cluster Job ID of job
example: 123000
type: integer
jobState:
description: Final state of job
enum:
- completed
- failed
- cancelled
- stopped
- timeout
example: completed
type: string
startTime:
description: Start Time of job as epoch
example: 1649723812
type: integer
stopTime:
description: Stop Time of job as epoch
example: 1649763839
type: integer
required:
- jobState
- stopTime
type: object
api.Tag:
description: Defines a tag using name and type.
properties:
name:
description: Tag Name
example: Testjob
type: string
type:
description: Tag Type
example: Debug
type: string
type: object
schema.Job:
description: Information of a HPC job.
properties:
arrayJobId:
description: The unique identifier of an array job
example: 123000
type: integer
cluster:
description: The unique identifier of a cluster
example: fritz
type: string
duration:
description: Duration of job in seconds (Min > 0)
example: 43200
minimum: 1
type: integer
exclusive:
description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs
of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple
jobs of same user'
example: 1
maximum: 2
minimum: 0
type: integer
id:
description: The unique identifier of a job in the database
type: integer
jobId:
description: The unique identifier of a job
example: 123000
type: integer
jobState:
description: Final state of job
enum:
- completed
- failed
- cancelled
- stopped
- timeout
- out_of_memory
example: completed
type: string
metaData:
additionalProperties:
type: string
description: Additional information about the job
type: object
monitoringStatus:
description: 'State of monitoring system during job run: 0 - Disabled, 1 -
Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull'
example: 1
maximum: 3
minimum: 0
type: integer
numAcc:
description: Number of accelerators used (Min > 0)
example: 2
minimum: 1
type: integer
numHwthreads:
description: Number of HWThreads used (Min > 0)
example: 20
minimum: 1
type: integer
numNodes:
description: Number of nodes used (Min > 0)
example: 2
minimum: 1
type: integer
partition:
description: The Slurm partition to which the job was submitted
example: main
type: string
project:
description: The unique identifier of a project
example: abcd200
type: string
resources:
description: Resources used by job
items:
$ref: '#/definitions/schema.Resource'
type: array
smt:
description: SMT threads used by job
example: 4
type: integer
startTime:
description: Start time as 'time.Time' data type
type: string
subCluster:
description: The unique identifier of a sub cluster
example: main
type: string
tags:
description: List of tags
items:
$ref: '#/definitions/schema.Tag'
type: array
user:
description: The unique identifier of a user
example: abcd100h
type: string
walltime:
description: Requested walltime of job in seconds (Min > 0)
example: 86400
minimum: 1
type: integer
type: object
schema.JobMeta:
description: Meta data information of a HPC job.
properties:
arrayJobId:
description: The unique identifier of an array job
example: 123000
type: integer
cluster:
description: The unique identifier of a cluster
example: fritz
type: string
duration:
description: Duration of job in seconds (Min > 0)
example: 43200
minimum: 1
type: integer
exclusive:
description: 'Specifies how nodes are shared: 0 - Shared among multiple jobs
of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple
jobs of same user'
example: 1
maximum: 2
minimum: 0
type: integer
id:
description: The unique identifier of a job in the database
type: integer
jobId:
description: The unique identifier of a job
example: 123000
type: integer
jobState:
description: Final state of job
enum:
- completed
- failed
- cancelled
- stopped
- timeout
- out_of_memory
example: completed
type: string
metaData:
additionalProperties:
type: string
description: Additional information about the job
type: object
monitoringStatus:
description: 'State of monitoring system during job run: 0 - Disabled, 1 -
Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull'
example: 1
maximum: 3
minimum: 0
type: integer
numAcc:
description: Number of accelerators used (Min > 0)
example: 2
minimum: 1
type: integer
numHwthreads:
description: Number of HWThreads used (Min > 0)
example: 20
minimum: 1
type: integer
numNodes:
description: Number of nodes used (Min > 0)
example: 2
minimum: 1
type: integer
partition:
description: The Slurm partition to which the job was submitted
example: main
type: string
project:
description: The unique identifier of a project
example: abcd200
type: string
resources:
description: Resources used by job
items:
$ref: '#/definitions/schema.Resource'
type: array
smt:
description: SMT threads used by job
example: 4
type: integer
startTime:
description: Start epoch time stamp in seconds (Min > 0)
example: 1649723812
minimum: 1
type: integer
statistics:
additionalProperties:
$ref: '#/definitions/schema.JobStatistics'
description: Metric statistics of job
type: object
subCluster:
description: The unique identifier of a sub cluster
example: main
type: string
tags:
description: List of tags
items:
$ref: '#/definitions/schema.Tag'
type: array
user:
description: The unique identifier of a user
example: abcd100h
type: string
walltime:
description: Requested walltime of job in seconds (Min > 0)
example: 86400
minimum: 1
type: integer
type: object
schema.JobStatistics:
description: Specification for job metric statistics.
properties:
avg:
description: Job metric average
example: 2500
minimum: 0
type: number
max:
description: Job metric maximum
example: 3000
minimum: 0
type: number
min:
description: Job metric minimum
example: 2000
minimum: 0
type: number
unit:
description: Metric unit (see schema/unit.schema.json)
example: GHz
type: string
type: object
schema.Resource:
description: A resource used by a job
properties:
accelerators:
description: List of of accelerator device ids
items:
type: string
type: array
configuration:
description: The configuration options of the node
type: string
hostname:
description: Name of the host (= node)
type: string
hwthreads:
description: List of OS processor ids
items:
type: integer
type: array
type: object
schema.Tag:
description: Defines a tag using name and type.
properties:
id:
description: The unique DB identifier of a tag
type: integer
name:
description: Tag Name
example: Testjob
type: string
type:
description: Tag Type
example: Debug
type: string
type: object
host: clustercockpit.localhost:8082
info:
contact:
email: support@clustercockpit.org
name: ClusterCockpit Project
url: https://github.com/ClusterCockpit
description: Defines a tag using name and type.
license:
name: MIT License
url: https://opensource.org/licenses/MIT
termsOfService: https://monitoring.nhr.fau.de/imprint
title: ClusterCockpit REST API
version: 0.1.0
paths:
/jobs/:
get:
consumes:
- application/json
description: |-
Get a list of all jobs. Filters can be applied using query parameters.
Number of results can be limited by page. Results are sorted by descending startTime.
parameters:
- description: Job State
enum:
- running
- completed
- failed
- cancelled
- stopped
- timeout
in: query
name: state
type: string
- description: Job Cluster
in: query
name: cluster
type: string
- description: 'Syntax: ''$from-$to'', as unix epoch timestamps in seconds'
in: query
name: start-time
type: string
- description: 'Items per page (If empty: No Limit)'
in: query
name: items-per-page
type: integer
- description: 'Page Number (If empty: No Paging)'
in: query
name: page
type: integer
- description: Include metadata (e.g. jobScript) in response
in: query
name: with-metadata
type: boolean
produces:
- application/json
responses:
"200":
description: Array of matching jobs
schema:
items:
$ref: '#/definitions/schema.Job'
type: array
"400":
description: Bad Request
schema:
$ref: '#/definitions/api.ErrorResponse'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/api.ErrorResponse'
"500":
description: Internal Server Error
schema:
$ref: '#/definitions/api.ErrorResponse'
security:
- ApiKeyAuth: []
summary: Lists all jobs
/jobs/start_job/:
post:
consumes:
- application/json
description: |-
Job specified in request body will be saved to database as "running" with new DB ID.
Job specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.
parameters:
- description: Job to add
in: body
name: request
required: true
schema:
$ref: '#/definitions/schema.JobMeta'
produces:
- application/json
responses:
"201":
description: Job added successfully
schema:
$ref: '#/definitions/api.StartJobApiResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/api.ErrorResponse'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/api.ErrorResponse'
"403":
description: Forbidden
schema:
$ref: '#/definitions/api.ErrorResponse'
"422":
description: 'Unprocessable Entity: The combination of jobId, clusterId
and startTime does already exist'
schema:
$ref: '#/definitions/api.ErrorResponse'
"500":
description: Internal Server Error
schema:
$ref: '#/definitions/api.ErrorResponse'
security:
- ApiKeyAuth: []
summary: Adds a new job as "running"
/jobs/stop_job/:
post:
description: |-
Job to stop is specified by request body. All fields are required in this case.
Returns full job resource information according to 'JobMeta' scheme.
parameters:
- description: All fields required
in: body
name: request
required: true
schema:
$ref: '#/definitions/api.StopJobApiRequest'
produces:
- application/json
responses:
"200":
description: Job resource
schema:
$ref: '#/definitions/schema.JobMeta'
"400":
description: Bad Request
schema:
$ref: '#/definitions/api.ErrorResponse'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/api.ErrorResponse'
"403":
description: Forbidden
schema:
$ref: '#/definitions/api.ErrorResponse'
"404":
description: Resource not found
schema:
$ref: '#/definitions/api.ErrorResponse'
"422":
description: 'Unprocessable Entity: finding job failed: sql: no rows in
result set'
schema:
$ref: '#/definitions/api.ErrorResponse'
"500":
description: Internal Server Error
schema:
$ref: '#/definitions/api.ErrorResponse'
security:
- ApiKeyAuth: []
summary: Marks job as completed and triggers archiving
/jobs/stop_job/{id}:
post:
consumes:
- application/json
description: |-
Job to stop is specified by database ID. Only stopTime and final state are required in request body.
Returns full job resource information according to 'JobMeta' scheme.
parameters:
- description: Database ID of Job
in: path
name: id
required: true
type: integer
- description: stopTime and final state in request body
in: body
name: request
required: true
schema:
$ref: '#/definitions/api.StopJobApiRequest'
produces:
- application/json
responses:
"200":
description: Job resource
schema:
$ref: '#/definitions/schema.JobMeta'
"400":
description: Bad Request
schema:
$ref: '#/definitions/api.ErrorResponse'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/api.ErrorResponse'
"403":
description: Forbidden
schema:
$ref: '#/definitions/api.ErrorResponse'
"404":
description: Resource not found
schema:
$ref: '#/definitions/api.ErrorResponse'
"422":
description: 'Unprocessable Entity: finding job failed: sql: no rows in
result set'
schema:
$ref: '#/definitions/api.ErrorResponse'
"500":
description: Internal Server Error
schema:
$ref: '#/definitions/api.ErrorResponse'
security:
- ApiKeyAuth: []
summary: Marks job as completed and triggers archiving
/jobs/tag_job/{id}:
post:
consumes:
- application/json
description: |-
Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
If tagged job is already finished: Tag will be written directly to respective archive files.
parameters:
- description: Job Database ID
in: path
name: id
required: true
type: integer
- description: Array of tag-objects to add
in: body
name: request
required: true
schema:
items:
$ref: '#/definitions/api.Tag'
type: array
produces:
- application/json
responses:
"200":
description: Updated job resource
schema:
$ref: '#/definitions/schema.Job'
"400":
description: Bad Request
schema:
$ref: '#/definitions/api.ErrorResponse'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/api.ErrorResponse'
"404":
description: Job or tag does not exist
schema:
$ref: '#/definitions/api.ErrorResponse'
"500":
description: Internal Server Error
schema:
$ref: '#/definitions/api.ErrorResponse'
security:
- ApiKeyAuth: []
summary: Adds one or more tags to a job
securityDefinitions:
ApiKeyAuth:
description: JWT based authentification for general API endpoint use.
in: header
name: X-Auth-Token
type: apiKey
swagger: "2.0"

View File

@ -7,7 +7,6 @@ package main
import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"flag"
"fmt"
@ -35,119 +34,57 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/routerConfig"
"github.com/ClusterCockpit/cc-backend/internal/runtimeEnv"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/web"
"github.com/google/gops/agent"
"github.com/gorilla/handlers"
"github.com/gorilla/mux"
httpSwagger "github.com/swaggo/http-swagger"
_ "github.com/go-sql-driver/mysql"
_ "github.com/mattn/go-sqlite3"
)
// Format of the configurartion (file). See below for the defaults.
type ProgramConfig struct {
// Address where the http (or https) server will listen on (for example: 'localhost:80').
Addr string `json:"addr"`
const logoString = `
____ _ _ ____ _ _ _
/ ___| |_ _ ___| |_ ___ _ __ / ___|___ ___| | ___ __ (_) |_
| | | | | | / __| __/ _ \ '__| | / _ \ / __| |/ / '_ \| | __|
| |___| | |_| \__ \ || __/ | | |__| (_) | (__| <| |_) | | |_
\____|_|\__,_|___/\__\___|_| \____\___/ \___|_|\_\ .__/|_|\__|
|_|
`
// Drop root permissions once .env was read and the port was taken.
User string `json:"user"`
Group string `json:"group"`
// Disable authentication (for everything: API, Web-UI, ...)
DisableAuthentication bool `json:"disable-authentication"`
// If `embed-static-files` is true (default), the frontend files are directly
// embeded into the go binary and expected to be in web/frontend. Only if
// it is false the files in `static-files` are served instead.
EmbedStaticFiles bool `json:"embed-static-files"`
StaticFiles string `json:"static-files"`
// 'sqlite3' or 'mysql' (mysql will work for mariadb as well)
DBDriver string `json:"db-driver"`
// For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).
DB string `json:"db"`
// Path to the job-archive
JobArchive string `json:"job-archive"`
// Keep all metric data in the metric data repositories,
// do not write to the job-archive.
DisableArchive bool `json:"disable-archive"`
// For LDAP Authentication and user synchronisation.
LdapConfig *auth.LdapConfig `json:"ldap"`
JwtConfig *auth.JWTAuthConfig `json:"jwts"`
// If 0 or empty, the session/token does not expire!
SessionMaxAge string `json:"session-max-age"`
// If both those options are not empty, use HTTPS using those certificates.
HttpsCertFile string `json:"https-cert-file"`
HttpsKeyFile string `json:"https-key-file"`
// If not the empty string and `addr` does not end in ":80",
// redirect every request incoming at port 80 to that url.
RedirectHttpTo string `json:"redirect-http-to"`
// If overwriten, at least all the options in the defaults below must
// be provided! Most options here can be overwritten by the user.
UiDefaults map[string]interface{} `json:"ui-defaults"`
// Where to store MachineState files
MachineStateDir string `json:"machine-state-dir"`
// If not zero, automatically mark jobs as stopped running X seconds longer than their walltime.
StopJobsExceedingWalltime int `json:"stop-jobs-exceeding-walltime"`
}
var programConfig ProgramConfig = ProgramConfig{
Addr: ":8080",
DisableAuthentication: false,
EmbedStaticFiles: true,
DBDriver: "sqlite3",
DB: "./var/job.db",
JobArchive: "./var/job-archive",
DisableArchive: false,
LdapConfig: nil,
SessionMaxAge: "168h",
UiDefaults: map[string]interface{}{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"},
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"plot_general_colorBackground": true,
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
"plot_general_lineWidth": 3,
"plot_list_hideShortRunningJobs": 5 * 60,
"plot_list_jobsPerPage": 50,
"plot_list_selectedMetrics": []string{"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"},
"plot_view_plotsPerRow": 3,
"plot_view_showPolarplot": true,
"plot_view_showRoofline": true,
"plot_view_showStatTable": true,
"system_view_selectedMetric": "cpu_load",
},
StopJobsExceedingWalltime: 0,
}
var (
buildTime string
hash string
version string
)
func main() {
var flagReinitDB, flagStopImmediately, flagSyncLDAP, flagGops bool
var flagConfigFile, flagImportJob string
var flagNewUser, flagDelUser, flagGenJWT string
var flagReinitDB, flagServer, flagSyncLDAP, flagGops, flagDev, flagVersion bool
var flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob string
flag.BoolVar(&flagReinitDB, "init-db", false, "Go through job-archive and re-initialize the 'job', 'tag', and 'jobtag' tables (all running jobs will be lost!)")
flag.BoolVar(&flagSyncLDAP, "sync-ldap", false, "Sync the 'user' table with ldap")
flag.BoolVar(&flagStopImmediately, "no-server", false, "Do not start a server, stop right after initialization and argument handling")
flag.BoolVar(&flagServer, "server", false, "Start a server, continues listening on port after initialization and argument handling")
flag.BoolVar(&flagGops, "gops", false, "Listen via github.com/google/gops/agent (for debugging)")
flag.StringVar(&flagConfigFile, "config", "./config.json", "Overwrite the global config options by those specified in `config.json`")
flag.BoolVar(&flagDev, "dev", false, "Enable development components: GraphQL Playground and Swagger UI")
flag.BoolVar(&flagVersion, "version", false, "Show version information and exit")
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: `<username>:[admin,support,api,user]:<password>`")
flag.StringVar(&flagDelUser, "del-user", "", "Remove user by `username`")
flag.StringVar(&flagGenJWT, "jwt", "", "Generate and print a JWT for the user specified by its `username`")
flag.StringVar(&flagImportJob, "import-job", "", "Import a job. Argument format: `<path-to-meta.json>:<path-to-data.json>,...`")
flag.Parse()
if flagVersion {
fmt.Print(logoString)
fmt.Printf("Version:\t%s\n", version)
fmt.Printf("Git hash:\t%s\n", hash)
fmt.Printf("Build time:\t%s\n", buildTime)
os.Exit(0)
}
// See https://github.com/google/gops (Runtime overhead is almost zero)
if flagGops {
if err := agent.Listen(agent.Options{}); err != nil {
@ -159,46 +96,32 @@ func main() {
log.Fatalf("parsing './.env' file failed: %s", err.Error())
}
// Load JSON config:
f, err := os.Open(flagConfigFile)
if err != nil {
if !os.IsNotExist(err) || flagConfigFile != "./config.json" {
log.Fatal(err)
}
} else {
dec := json.NewDecoder(f)
dec.DisallowUnknownFields()
if err := dec.Decode(&programConfig); err != nil {
log.Fatal(err)
}
f.Close()
}
// Initialize sub-modules and handle command line flags.
// The order here is important!
config.Init(flagConfigFile)
// As a special case for `db`, allow using an environment variable instead of the value
// stored in the config. This can be done for people having security concerns about storing
// the password for their mysql database in the config.json.
if strings.HasPrefix(programConfig.DB, "env:") {
envvar := strings.TrimPrefix(programConfig.DB, "env:")
programConfig.DB = os.Getenv(envvar)
// the password for their mysql database in config.json.
if strings.HasPrefix(config.Keys.DB, "env:") {
envvar := strings.TrimPrefix(config.Keys.DB, "env:")
config.Keys.DB = os.Getenv(envvar)
}
repository.Connect(programConfig.DBDriver, programConfig.DB)
repository.Connect(config.Keys.DBDriver, config.Keys.DB)
db := repository.GetConnection()
// Initialize sub-modules and handle all command line flags.
// The order here is important! For example, the metricdata package
// depends on the config package.
var authentication *auth.Authentication
if !programConfig.DisableAuthentication {
if !config.Keys.DisableAuthentication {
var err error
if authentication, err = auth.Init(db.DB, map[string]interface{}{
"ldap": programConfig.LdapConfig,
"jwt": programConfig.JwtConfig,
"ldap": config.Keys.LdapConfig,
"jwt": config.Keys.JwtConfig,
}); err != nil {
log.Fatal(err)
}
if d, err := time.ParseDuration(programConfig.SessionMaxAge); err != nil {
if d, err := time.ParseDuration(config.Keys.SessionMaxAge); err != nil {
authentication.SessionMaxAge = d
}
@ -252,34 +175,32 @@ func main() {
log.Fatal("arguments --add-user and --del-user can only be used if authentication is enabled")
}
if err := config.Init(db.DB, !programConfig.DisableAuthentication, programConfig.UiDefaults, programConfig.JobArchive); err != nil {
if err := archive.Init(config.Keys.Archive); err != nil {
log.Fatal(err)
}
if err := metricdata.Init(programConfig.JobArchive, programConfig.DisableArchive); err != nil {
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
log.Fatal(err)
}
if flagReinitDB {
if err := repository.InitDB(db.DB, programConfig.JobArchive); err != nil {
if err := repository.InitDB(); err != nil {
log.Fatal(err)
}
}
jobRepo := repository.GetRepository()
if flagImportJob != "" {
if err := jobRepo.HandleImportFlag(flagImportJob); err != nil {
if err := repository.HandleImportFlag(flagImportJob); err != nil {
log.Fatalf("import failed: %s", err.Error())
}
}
if flagStopImmediately {
if !flagServer {
return
}
// Setup the http.Handler/Router used by the server
jobRepo := repository.GetJobRepository()
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
graphQLEndpoint := handler.NewDefaultServer(generated.NewExecutableSchema(generated.Config{Resolvers: resolver}))
if os.Getenv("DEBUG") != "1" {
@ -300,7 +221,7 @@ func main() {
api := &api.RestApi{
JobRepository: jobRepo,
Resolver: resolver,
MachineStateDir: programConfig.MachineStateDir,
MachineStateDir: config.Keys.MachineStateDir,
Authentication: authentication,
}
@ -323,7 +244,7 @@ func main() {
// Those should be mounted to this subrouter. If authentication is enabled, a middleware will prevent
// any unauthenticated accesses.
secured := r.PathPrefix("/").Subrouter()
if !programConfig.DisableAuthentication {
if !config.Keys.DisableAuthentication {
r.Handle("/login", authentication.Login(
// On success:
http.RedirectHandler("/", http.StatusTemporaryRedirect),
@ -363,7 +284,11 @@ func main() {
})
}
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
if flagDev {
r.Handle("/playground", playground.Handler("GraphQL playground", "/query"))
r.PathPrefix("/swagger/").Handler(httpSwagger.Handler(
httpSwagger.URL("http://localhost:8080/swagger/doc.json"))).Methods(http.MethodGet)
}
secured.Handle("/query", graphQLEndpoint)
// Send a searchId and then reply with a redirect to a user or job.
@ -394,10 +319,10 @@ func main() {
routerConfig.SetupRoutes(secured)
api.MountRoutes(secured)
if programConfig.EmbedStaticFiles {
if config.Keys.EmbedStaticFiles {
r.PathPrefix("/").Handler(web.ServeFiles())
} else {
r.PathPrefix("/").Handler(http.FileServer(http.Dir(programConfig.StaticFiles)))
r.PathPrefix("/").Handler(http.FileServer(http.Dir(config.Keys.StaticFiles)))
}
r.Use(handlers.CompressHandler)
@ -426,24 +351,23 @@ func main() {
ReadTimeout: 10 * time.Second,
WriteTimeout: 10 * time.Second,
Handler: handler,
Addr: programConfig.Addr,
Addr: config.Keys.Addr,
}
// Start http or https server
listener, err := net.Listen("tcp", programConfig.Addr)
listener, err := net.Listen("tcp", config.Keys.Addr)
if err != nil {
log.Fatal(err)
}
if !strings.HasSuffix(programConfig.Addr, ":80") && programConfig.RedirectHttpTo != "" {
if !strings.HasSuffix(config.Keys.Addr, ":80") && config.Keys.RedirectHttpTo != "" {
go func() {
http.ListenAndServe(":80", http.RedirectHandler(programConfig.RedirectHttpTo, http.StatusMovedPermanently))
http.ListenAndServe(":80", http.RedirectHandler(config.Keys.RedirectHttpTo, http.StatusMovedPermanently))
}()
}
if programConfig.HttpsCertFile != "" && programConfig.HttpsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(programConfig.HttpsCertFile, programConfig.HttpsKeyFile)
if config.Keys.HttpsCertFile != "" && config.Keys.HttpsKeyFile != "" {
cert, err := tls.LoadX509KeyPair(config.Keys.HttpsCertFile, config.Keys.HttpsKeyFile)
if err != nil {
log.Fatal(err)
}
@ -456,15 +380,15 @@ func main() {
MinVersion: tls.VersionTLS12,
PreferServerCipherSuites: true,
})
log.Printf("HTTPS server listening at %s...", programConfig.Addr)
log.Printf("HTTPS server listening at %s...", config.Keys.Addr)
} else {
log.Printf("HTTP server listening at %s...", programConfig.Addr)
log.Printf("HTTP server listening at %s...", config.Keys.Addr)
}
// Because this program will want to bind to a privileged port (like 80), the listener must
// be established first, then the user can be changed, and after that,
// the actuall http server can be started.
if err := runtimeEnv.DropPrivileges(programConfig.Group, programConfig.User); err != nil {
if err := runtimeEnv.DropPrivileges(config.Keys.Group, config.Keys.User); err != nil {
log.Fatalf("error while changing user: %s", err.Error())
}
@ -491,10 +415,10 @@ func main() {
api.OngoingArchivings.Wait()
}()
if programConfig.StopJobsExceedingWalltime > 0 {
if config.Keys.StopJobsExceedingWalltime > 0 {
go func() {
for range time.Tick(30 * time.Minute) {
err := jobRepo.StopJobsExceedingWalltimeBy(programConfig.StopJobsExceedingWalltime)
err := jobRepo.StopJobsExceedingWalltimeBy(config.Keys.StopJobsExceedingWalltime)
if err != nil {
log.Errorf("error while looking for jobs exceeding theire walltime: %s", err.Error())
}

View File

@ -1,9 +1,10 @@
## Intro
cc-backend can be used without a configuration file. In this case the default
options documented below are used. To overwrite the defaults specify a json
config file location using the command line option `--config <filepath>`.
All security relevant configuration. e.g., keys and passwords, are set using environment variables. It is supported to specify these by means of an `.env` file located in the project root.
cc-backend requires a configuration file speciyfing the cluster systems to be used. Still many default
options documented below are used. cc-backend tries to load a config.json from the working directory per default.
To overwrite the default specify a json config file location using the command line option `--config <filepath>`.
All security relevant configuration. e.g., keys and passwords, are set using environment variables.
It is supported to specify these by means of an `.env` file located in the project root.
## Configuration Options
@ -17,6 +18,7 @@ All security relevant configuration. e.g., keys and passwords, are set using env
* `db`: Type string. For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!). Default: `./var/job.db`.
* `job-archive`: Type string. Path to the job-archive. Default: `./var/job-archive`.
* `disable-archive`: Type bool. Keep all metric data in the metric data repositories, do not write to the job-archive. Default `false`.
* `validate`: Type bool. Validate all input json documents against json schema.
* `"session-max-age`: Type string. Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `168h`.
* `"jwt-max-age`: Type string. Specifies for how long a JWT token shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire! Default `0`.
* `https-cert-file` and `https-key-file`: Type string. If both those options are not empty, use HTTPS using those certificates.
@ -26,17 +28,28 @@ All security relevant configuration. e.g., keys and passwords, are set using env
* `ldap`: Type object. For LDAP Authentication and user synchronisation. Default `nil`.
- `url`: Type string. URL of LDAP directory server.
- `user_base`: Type string. Base DN of user tree root.
- `search_dn`: Type string. DN for authenticating LDAP admin account with fgeneral read rights.
- `search_dn`: Type string. DN for authenticating LDAP admin account with general read rights.
- `user_bind`: Type string. Expression used to authenticate users via LDAP bind. Must contain `uid={username}`.
- `user_filter`: Type string. Filter to extract users for syncing.
- `sync_interval`: Type string. Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.
- `sync_del_old_users`: Type bool. Delete obsolete users in database.
* `clusters`: Type array of objects
- `name`: Type string. The name of the cluster.
- `metricDataRepository`: Type object with properties: `kind` (Type string, can be one of `cc-metric-store`, `influxdb` ), `url` (Type string), `token` (Type string)
- `filterRanges` Type object. This option controls the slider ranges for the UI controls of numNodes, duration, and startTime. Example:
```
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
```
* `ui-defaults`: Type object. Default configuration for ui views. If overwriten, all options must be provided! Most options can be overwritten by the user via the web interface.
- `analysis_view_histogramMetrics`: Type string array. Metrics to show as job count histograms in analysis view. Default `["flops_any", "mem_bw", "mem_used"]`.
- `analysis_view_scatterPlotMetrics`: Type array of string array. Initial scatter plto configuration in analysis view. Default `[["flops_any", "mem_bw"], ["flops_any", "cpu_load"], ["cpu_load", "mem_bw"]]`.
- `job_view_nodestats_selectedMetrics`: Type string array. Initial metrics shown in node statistics table of single job view. Default `["flops_any", "mem_bw", "mem_used"]`.
- `job_view_polarPlotMetrics`: Type string array. Metrics shown in polar plot of single job view. Default `["flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"]`.
- `job_view_selectedMetrics`: Type string array. ??. Default `["flops_any", "mem_bw", "mem_used"]`.
- `job_view_selectedMetrics`: Type string array. Default `["flops_any", "mem_bw", "mem_used"]`.
- `plot_general_colorBackground`: Type bool. Color plot background according to job average threshold limits. Default `true`.
- `plot_general_colorscheme`: Type string array. Initial color scheme. Default `"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"`.
- `plot_general_lineWidth`: Type int. Initial linewidth. Default `3`.

237
configs/cluster.json Normal file
View File

@ -0,0 +1,237 @@
{
"subClusters": [
{
"name": "a40",
"numberOfNodes": 38,
"processorType": "AMD Milan",
"socketsPerNode": 2,
"coresPerSocket": 64,
"threadsPerCore": 1,
"flopRateScalar": 432,
"flopRateSimd": 9216,
"memoryBandwidth": 400,
"topology": {
"node": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127],
"socket": [
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63],
[64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]
],
"memoryDomain": [
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]
],
"core": [
[0],[1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],[13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23],[24],[25],[26],[27],[28],[29],[30],[31],[32],[33],[34],[35],[36],[37],[38],[39],[40],[41],[42],[43],[44],[45],[46],[47],[48],[49],[50],[51],[52],[53],[54],[55],[56],[57],[58],[59],[60],[61],[62],[63],[64],[65],[66],[67],[68],[69],[70],[71],[73],[74],[75],[76],[77],[78],[79],[80],[81],[82],[83],[84],[85],[86],[87],[88],[89],[90],[91],[92],[93],[94],[95],[96],[97],[98],[99],[100],[101],[102],[103],[104],[105],[106],[107],[108],[109],[110],[111],[112],[113],[114],[115],[116],[117],[118],[119],[120],[121],[122],[123],[124],[125],[126],[127]
],
"accelerators": [
{
"id": "00000000:01:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:25:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:41:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:61:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:81:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:A1:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:C1:00.0",
"type": "Nvidia GPU",
"model": "A40"
},
{
"id": "00000000:E1:00.0",
"type": "Nvidia GPU",
"model": "A40"
}
]
}
},
{
"name": "a100",
"numberOfNodes": 20,
"processorType": "AMD Milan",
"socketsPerNode": 2,
"coresPerSocket": 64,
"threadsPerCore": 1,
"flopRateScalar": 432,
"flopRateSimd": 9216,
"memoryBandwidth": 400,
"topology": {
"node": [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127],
"socket": [
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63],
[64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]
],
"memoryDomain": [
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127]
],
"core": [
[0],[1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[11],[12],[13],[14],[15],[16],[17],[18],[19],[20],[21],[22],[23],[24],[25],[26],[27],[28],[29],[30],[31],[32],[33],[34],[35],[36],[37],[38],[39],[40],[41],[42],[43],[44],[45],[46],[47],[48],[49],[50],[51],[52],[53],[54],[55],[56],[57],[58],[59],[60],[61],[62],[63],[64],[65],[66],[67],[68],[69],[70],[71],[73],[74],[75],[76],[77],[78],[79],[80],[81],[82],[83],[84],[85],[86],[87],[88],[89],[90],[91],[92],[93],[94],[95],[96],[97],[98],[99],[100],[101],[102],[103],[104],[105],[106],[107],[108],[109],[110],[111],[112],[113],[114],[115],[116],[117],[118],[119],[120],[121],[122],[123],[124],[125],[126],[127]
],
"accelerators": [
{
"id": "00000000:0E:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:13:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:49:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:4F:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:90:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:96:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:CC:00.0",
"type": "Nvidia GPU",
"model": "A100"
},
{
"id": "00000000:D1:00.0",
"type": "Nvidia GPU",
"model": "A100"
}
]
}
}
],
"metricConfig": [
{
"name": "cpu_load",
"scope": "node",
"unit": "load 1m",
"timestep": 60,
"aggregation": null,
"peak": 128,
"normal": 128,
"caution": 10,
"alert": 5
},
{
"name": "cpu_user",
"scope": "hwthread",
"unit": "cpu user",
"timestep": 60,
"aggregation": "avg",
"peak": 100,
"normal": 50,
"caution": 20,
"alert": 10
},
{
"name": "mem_used",
"scope": "node",
"unit": "GB",
"timestep": 60,
"aggregation": null,
"peak": 512,
"normal": 128,
"caution": 200,
"alert": 240
},
{
"name": "flops_any",
"scope": "hwthread",
"unit": "GF/s",
"timestep": 60,
"aggregation": "sum",
"peak": 9216,
"normal": 1000,
"caution": 200,
"alert": 50
},
{
"name": "mem_bw",
"scope": "socket",
"unit": "GB/s",
"timestep": 60,
"aggregation": "sum",
"peak": 350,
"normal": 100,
"caution": 50,
"alert": 10
},
{
"name": "clock",
"scope": "hwthread",
"unit": "MHz",
"timestep": 60,
"aggregation": "avg",
"peak": 3000,
"normal": 2400,
"caution": 1800,
"alert": 1200
},
{
"name": "core_power",
"scope": "hwthread",
"unit": "W",
"timestep": 60,
"aggregation": "sum",
"peak": 500,
"normal": 250,
"caution": 100,
"alert": 50
},
{
"name": "cpu_power",
"scope": "socket",
"unit": "W",
"timestep": 60,
"aggregation": "sum",
"peak": 500,
"normal": 250,
"caution": 100,
"alert": 50
},
{
"name": "ipc",
"scope": "hwthread",
"unit": "IPC",
"timestep": 60,
"aggregation": "avg",
"peak": 4,
"normal": 2,
"caution": 1,
"alert": 0.5
}
]
}

View File

@ -1,14 +1,33 @@
{
"addr": "0.0.0.0:443",
"ldap": {
"url": "ldaps://hpcldap.rrze.uni-erlangen.de",
"user_base": "ou=people,ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
"user_bind": "uid={username},ou=people,ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
"user_filter": "(&(objectclass=posixAccount)(uid=*))"
},
"url": "ldaps://hpcldap.rrze.uni-erlangen.de",
"user_base": "ou=people,ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
"search_dn": "cn=hpcmonitoring,ou=roadm,ou=profile,ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
"user_bind": "uid={username},ou=people,ou=hpc,dc=rrze,dc=uni-erlangen,dc=de",
"user_filter": "(&(objectclass=posixAccount)(uid=*))"
},
"https-cert-file": "/etc/letsencrypt/live/monitoring.nhr.fau.de/fullchain.pem",
"https-key-file": "/etc/letsencrypt/live/monitoring.nhr.fau.de/privkey.pem",
"user": "clustercockpit",
"group": "clustercockpit"
"group": "clustercockpit",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"clusters": [
{
"name": "test",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8082",
"token": "eyJhbGciOiJF-E-pQBQ"
},
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
}
]
}

228
configs/generate-subcluster.pl Executable file
View File

@ -0,0 +1,228 @@
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
my %INFO;
my %DOMAINS;
my $SMT;
my $numMemoryDomains;
$DOMAINS{socket} = [];
$DOMAINS{memoryDomain} = [];
$DOMAINS{core} = [];
$DOMAINS{gpu} = [];
my $gpuID=-1;
my $id;
# Step 1 : Extract system information
my $topo = `likwid-topology -O -G`;
$INFO{numGPUs} = 0;
foreach my $ln (split("\n", $topo)) {
if ( $ln =~ /^STRUCT,NUMA Topology ([0-9]+)/ ) {
$id = $1;
}
if ( $ln =~ /^Processors/ ) {
my @fields = split(",", $ln);
shift @fields;
$DOMAINS{memoryDomain}[$id] = [ @fields ];
}
if ( $ln =~ /^STRUCT,Cache Topology L1/ ) {
$id = -1;
}
if ( $ln =~ /^Cache groups/ ) {
if ( $id == -1 ) {
my @fields = split(",", $ln);
shift @fields;
my $i = 0;
foreach my $core ( @fields ) {
$DOMAINS{core}[$i++] = [ split(" ", $core) ];
}
$id = 0;
}
}
if ( $ln =~ /^ID:/ ) {
my @fields = split(",", $ln);
$gpuID = $fields[1];
}
if ( $gpuID >= 0 ) {
if ( $ln =~ /^Name:/ ) {
my @fields = split(",", $ln);
$DOMAINS{gpu}[$gpuID] = {};
$DOMAINS{gpu}[$gpuID]{model} = $fields[1];
if ( $fields[1] =~ /nvidia/i ) {
$DOMAINS{gpu}[$gpuID]{type} = "Nvidia GPU";
} elsif ( $fields[1] =~ /amd/i ) {
$DOMAINS{gpu}[$gpuID]{type} = "AMD GPU";
} elsif ( $fields[1] =~ /intel/i ) {
$DOMAINS{gpu}[$gpuID]{type} = "Intel GPU";
}
}
if ( $ln =~ /^PCI bus:/ ) {
my @fields = split(",", $ln);
$fields[1] =~ s/0x//;
$DOMAINS{gpu}[$gpuID]{bus} = $fields[1];
}
if ( $ln =~ /^PCI domain:/ ) {
my @fields = split(",", $ln);
$fields[1] =~ s/0x//;
$DOMAINS{gpu}[$gpuID]{domain} = $fields[1];
}
if ( $ln =~ /^PCI device/ ) {
my @fields = split(",", $ln);
$fields[1] =~ s/0x//;
$DOMAINS{gpu}[$gpuID]{device} = $fields[1];
$gpuID = -1;
}
}
if ( $ln =~ /^CPU name:/ ) {
my @fields = split(",", $ln);
$INFO{processor} = $fields[1];
}
if ( $ln =~ /^CPU type/ ) {
my @fields = split(",", $ln);
$INFO{family} = $fields[1];
$INFO{family} =~ s/[\(\)]//g;
}
if ( $ln =~ /^Sockets:/ ) {
my @fields = split(",", $ln);
$INFO{socketsPerNode} = $fields[1];
}
if ( $ln =~ /^Cores per socket:/ ) {
my @fields = split(",", $ln);
$INFO{coresPerSocket} = $fields[1];
}
if ( $ln =~ /^GPU count:/ ) {
my @fields = split(",", $ln);
$INFO{numGPUs} = $fields[1];
}
if ( $ln =~ /^Threads per core:/ ) {
my @fields = split(",", $ln);
$SMT = $fields[1];
$INFO{threadsPerCore} = $SMT;
}
if ( $ln =~ /^NUMA domains:/ ) {
my @fields = split(",", $ln);
$INFO{memoryDomainsPerNode} = $fields[1];
}
if ( $ln =~ /^Socket ([0-9]+)/ ) {
my @fields = split(",", $ln);
shift @fields;
$DOMAINS{socket}[$1] = [ @fields ];
}
}
my $node;
my @sockets;
foreach my $socket ( @{$DOMAINS{socket}} ) {
push @sockets, "[".join(",", @{$socket})."]";
$node .= join(",", @{$socket})
}
$INFO{sockets} = join(",\n", @sockets);
my @memDomains;
foreach my $d ( @{$DOMAINS{memoryDomain}} ) {
push @memDomains, "[".join(",", @{$d})."]";
}
$INFO{memoryDomains} = join(",\n", @memDomains);
my @cores;
foreach my $c ( @{$DOMAINS{core}} ) {
push @cores, "[".join(",", @{$c})."]";
}
$INFO{cores} = join(",", @cores);
my $numCoresPerNode = $INFO{coresPerSocket} * $INFO{socketsPerNode};
my $numCoresPerMemoryDomain = $numCoresPerNode / $INFO{memoryDomainsPerNode};
my $memBw;
my $exp = join(' ',map("-w M$_:1GB:$numCoresPerMemoryDomain:1:$SMT", 0 ... $INFO{memoryDomainsPerNode}-1));
print "Using: $exp\n";
my $out = `likwid-bench -t clload $exp`;
foreach my $ln ( split("\n", $out) ){
if ( $ln =~ /MByte\/s:\s+([0-9.]+)/ ) {
$memBw = my $rounded = int($1/1000 + 0.5);
}
}
my $flopsScalar;
$out = `likwid-bench -t peakflops -w N:24kB:$numCoresPerNode`;
foreach my $ln ( split("\n", $out) ){
if ( $ln =~ /MFlops\/s:\s+([0-9.]+)/ ) {
$flopsScalar = my $rounded = int($1/1000 + 0.5);
}
}
my $simd = "";
my $fh;
open($fh,"<","/proc/cpuinfo");
foreach my $ln ( <$fh> ) {
if ( $ln =~ /flags/ ) {
if ( $ln =~ /avx2/ ) {
$simd = '_avx_fma';
}
if ( $ln =~ /avx512ifma/ ) {
$simd = '_avx512_fma';
}
last;
}
}
close $fh;
print "Using peakflops variant $simd\n";
my $flopsSimd;
$out = `likwid-bench -t peakflops$simd -w N:500kB:$numCoresPerNode`;
foreach my $ln ( split("\n", $out) ){
if ( $ln =~ /MFlops\/s:\s+([0-9.]+)/ ) {
$flopsSimd = my $rounded = int($1/1000 + 0.5);
}
}
if ( $INFO{numGPUs} > 0 ) {
$INFO{gpus} = "\"accelerators\": [\n";
my @gpuStr;
foreach $id ( 0 ... ($INFO{numGPUs}-1) ) {
my %gpu = %{$DOMAINS{gpu}[$id]};
my $deviceAddr = sprintf("%08x:%02x:%02x\.0", hex($gpu{domain}), hex($gpu{bus}), hex($gpu{device}));
$gpuStr[$id] = <<END
{
"id": "$deviceAddr",
"type": "$gpu{type}",
"model": "$gpu{model}"
}
END
}
$INFO{gpus} .= join(",\n",@gpuStr);
$INFO{gpus} .= "]\n";
}
print <<"END";
{
"name": "<FILL IN>",
"processorType": "$INFO{processor}",
"socketsPerNode": $INFO{socketsPerNode},
"coresPerSocket": $INFO{coresPerSocket},
"threadsPerCore": $INFO{threadsPerCore},
"flopRateScalar": $flopsScalar,
"flopRateSimd": $flopsSimd,
"memoryBandwidth": $memBw,
"nodes": "<FILL IN NODE RANGES>",
"topology": {
"node": [$node],
"socket": [
$INFO{sockets}
],
"memoryDomain": [
$INFO{memoryDomains}
],
$INFO{gpus}
"core": [
$INFO{cores}
]
}
}
END

240
docs/Hands-on.md Normal file
View File

@ -0,0 +1,240 @@
# CC-HANDSON - Setup ClusterCockpit from scratch (w/o docker)
## Prerequisites
* Perl
* Yarn
* Go
* Optional: curl
* Script migrateTimestamp.pl
## Documentation
You find READMEs or api docs in
* ./cc-backend/configs
* ./cc-backend/init
* ./cc-backend/api
## ClusterCockpit configuration files
### cc-backend
* `./.env` Passwords and Tokens set in the environment
* `./config.json` Configuration options for cc-backend
### cc-metric-store
* `./config.json` Optional to overwrite configuration options
### cc-metric-collector
Not yet included in the hands-on setup.
## Setup Components
Start by creating a base folder for all of the following steps.
* `mkdir clustercockpit`
* `cd clustercockpit`
### Setup cc-backend
* Clone Repository
- `git clone https://github.com/ClusterCockpit/cc-backend.git`
- `cd cc-backend`
- `git checkout dev-job-archive-module` Will be merged soon into master
* Setup Frontend
- `cd ./web/frontend`
- `yarn install`
- `yarn build`
- `cd ../..`
* Build Go Executable
- `go build ./cmd/cc-backend/`
* Prepare Datafolder and Database file
- `mkdir var`
- `touch var/job.db`
* Activate & Config environment for cc-backend
- `cp configs/env-template.txt .env`
- Optional: Have a look via `vim ./.env`
- Copy the `config.json` file included in this tarball into the root directory of cc-backend: `cp ../../config.json ./`
* Back to toplevel `clustercockpit`
- `cd ..`
### Setup cc-metric-store
* Clone Repository
- `git clone https://github.com/ClusterCockpit/cc-metric-store.git`
- `cd cc-metric-store`
* Build Go Executable
- `go get`
- `go build`
* Prepare Datafolders
- `mkdir -p var/checkpoints`
- `mkdir -p var/archive`
* Update Config
- `vim config.json`
- Exchange existing setting in `metrics` with the following:
```
"clock": { "frequency": 60, "aggregation": null },
"cpi": { "frequency": 60, "aggregation": null },
"cpu_load": { "frequency": 60, "aggregation": null },
"flops_any": { "frequency": 60, "aggregation": null },
"flops_dp": { "frequency": 60, "aggregation": null },
"flops_sp": { "frequency": 60, "aggregation": null },
"ib_bw": { "frequency": 60, "aggregation": null },
"lustre_bw": { "frequency": 60, "aggregation": null },
"mem_bw": { "frequency": 60, "aggregation": null },
"mem_used": { "frequency": 60, "aggregation": null },
"rapl_power": { "frequency": 60, "aggregation": null }
```
* Back to toplevel `clustercockpit`
- `cd ..`
### Setup Demo Data
* `mkdir source-data`
* `cd source-data`
* Download JobArchive-Source:
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar.xz`
- `tar xJf job-archive-dev.tar.xz`
- `mv ./job-archive ./job-archive-source`
- `rm ./job-archive-dev.tar.xz`
* Download CC-Metric-Store Checkpoints:
- `mkdir -p cc-metric-store-source/checkpoints`
- `cd cc-metric-store-source/checkpoints`
- `wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/cc-metric-store-checkpoints.tar.xz`
- `tar xf cc-metric-store-checkpoints.tar.xz`
- `rm cc-metric-store-checkpoints.tar.xz`
* Back to `source-data`
- `cd ../..`
* Run timestamp migration script. This may take tens of minutes!
- `cp ../migrateTimestamps.pl .`
- `./migrateTimestamps.pl`
- Expected output:
```
Starting to update start- and stoptimes in job-archive for emmy
Starting to update start- and stoptimes in job-archive for woody
Done for job-archive
Starting to update checkpoint filenames and data starttimes for emmy
Starting to update checkpoint filenames and data starttimes for woody
Done for checkpoints
```
* Copy `cluster.json` files from source to migrated folders
- `cp source-data/job-archive-source/emmy/cluster.json cc-backend/var/job-archive/emmy/`
- `cp source-data/job-archive-source/woody/cluster.json cc-backend/var/job-archive/woody/`
* Initialize Job-Archive in SQLite3 job.db and add demo user
- `cd cc-backend`
- `./cc-backend --init-db --add-user demo:admin:AdminDev`
- Expected output:
```
<6>[INFO] new user "demo" created (roles: ["admin"], auth-source: 0)
<6>[INFO] Building job table...
<6>[INFO] A total of 3936 jobs have been registered in 1.791 seconds.
```
* Back to toplevel `clustercockpit`
- `cd ..`
### Startup both Apps
* In cc-backend root: `$./cc-backend --server --dev`
- Starts Clustercockpit at `http:localhost:8080`
- Log: `<6>[INFO] HTTP server listening at :8080...`
- Use local internet browser to access interface
- You should see and be able to browse finished Jobs
- Metadata is read from SQLite3 database
- Metricdata is read from job-archive/JSON-Files
- Create User in settings (top-right corner)
- Name `apiuser`
- Username `apiuser`
- Role `API`
- Submit & Refresh Page
- Create JTW for `apiuser`
- In Userlist, press `Gen. JTW` for `apiuser`
- Save JWT for later use
* In cc-metric-store root: `$./cc-metric-store`
- Start the cc-metric-store on `http:localhost:8081`, Log:
```
2022/07/15 17:17:42 Loading checkpoints newer than 2022-07-13T17:17:42+02:00
2022/07/15 17:17:45 Checkpoints loaded (5621 files, 319 MB, that took 3.034652s)
2022/07/15 17:17:45 API http endpoint listening on '0.0.0.0:8081'
```
- Does *not* have a graphical interface
- Otpional: Test function by executing:
```
$ curl -H "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" -D - "http://localhost:8081/api/query" -d "{ \"cluster\": \"emmy\", \"from\": $(expr $(date +%s) - 60), \"to\": $(date +%s), \"queries\": [{
\"metric\": \"flops_any\",
\"host\": \"e1111\"
}] }"
HTTP/1.1 200 OK
Content-Type: application/json
Date: Fri, 15 Jul 2022 13:57:22 GMT
Content-Length: 119
{"results":[[JSON-DATA-ARRAY]]}
```
### Development API web interfaces
The `--dev` flag enables web interfaces to document and test the apis:
* http://localhost:8080/playground - A GraphQL playground. To use it you must have a authenticated session in the same browser.
* http://localhost:8080/swagger - A Swagger UI. To use it you have to be logged out, so no user session in the same browser. Use the JWT token with role Api generate previously to authenticate via http header.
### Use cc-backend API to start job
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
* Click the `/job/start_job` endpoint and click the Try it out button.
* Enter the following json into the request body text area and fill in a recent start timestamp by executing `date +%s`.:
```
{
"jobId": 100000,
"arrayJobId": 0,
"user": "ccdemouser",
"subCluster": "main",
"cluster": "emmy",
"startTime": <date +%s>,
"project": "ccdemoproject",
"resources": [
{"hostname": "e0601"},
{"hostname": "e0823"},
{"hostname": "e0337"},
{"hostname": "e1111"}],
"numNodes": 4,
"numHwthreads": 80,
"walltime": 86400
}
```
* The response body should be the database id of the started job, for example:
```
{
"id": 3937
}
```
* Check in ClusterCockpit
- User `ccdemouser` should appear in Users-Tab with one running job
- It could take up to 5 Minutes until the Job is displayed with some current data (5 Min Short-Job Filter)
- Job then is marked with a green `running` tag
- Metricdata displayed is read from cc-metric-store!
### Use cc-backend API to stop job
* Enter the URL `http://localhost:8080/swagger/index.html` in your browser.
* Enter your JWT token you generated for the API user by clicking the green Authorize button in the upper right part of the window.
* Click the `/job/stop_job/{id}` endpoint and click the Try it out button.
* Enter the database id at id that was returned by `start_job` and copy the following into the request body. Replace the timestamp with a recent one:
```
{
"cluster": "emmy",
"jobState": "completed",
"stopTime": <RECENT TS>
}
```
* On success a json document with the job meta data is returned.
* Check in ClusterCockpit
- User `ccdemouser` should appear in Users-Tab with one completed job
- Job is no longer marked with a green `running` tag -> Completed!
- Metricdata displayed is now read from job-archive!
* Check in job-archive
- `cd ./cc-backend/var/job-archive/emmy/100/000`
- `cd $STARTTIME`
- Inspect `meta.json` and `data.json`
## Helper scripts
* In this tarball you can find the perl script `generate_subcluster.pl` that helps to generate the subcluster section for your system.
Usage:
* Log into an exclusive cluster node.
* The LIKWID tools likwid-topology and likwid-bench must be in the PATH!
* `$./generate_subcluster.pl` outputs the subcluster section on `stdout`
Please be aware that
* You have to enter the name and node list for the subCluster manually.
* GPU detection only works if LIKWID was build with Cuda avalable and you run likwid-topology also with Cuda loaded.
* Do not blindly trust the measured peakflops values.
* Because the script blindly relies on the CSV format output by likwid-topology this is a fragile undertaking!

35
docs/config.json Normal file
View File

@ -0,0 +1,35 @@
{
"addr": "0.0.0.0:8080",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"clusters": [
{
"name": "emmy",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8081",
"token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw"
},
"filterRanges": {
"numNodes": { "from": 1, "to": 32 },
"duration": { "from": 0, "to": 172800 },
"startTime": { "from": "2010-01-01T00:00:00Z", "to": null }
}
},
{
"name": "woody",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "http://localhost:8081",
"token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw"
},
"filterRanges": {
"numNodes": { "from": 1, "to": 1 },
"duration": { "from": 0, "to": 172800 },
"startTime": { "from": "2015-01-01T00:00:00Z", "to": null }
}
}
]
}

229
docs/migrateTimestamps.pl Executable file
View File

@ -0,0 +1,229 @@
#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use JSON::PP; # from Perl default install
use Time::Local qw( timelocal ); # from Perl default install
use Time::Piece; # from Perl default install
### JSON
my $json = JSON::PP->new->allow_nonref;
### TIME AND DATE
# now
my $localtime = localtime;
my $epochtime = $localtime->epoch;
# 5 days ago: Via epoch due to possible reverse month borders
my $epochlessfive = $epochtime - (86400 * 5);
my $locallessfive = localtime($epochlessfive);
# Calc like `date --date 'TZ="Europe/Berlin" 0:00 5 days ago' +%s`)
my ($day, $month, $year) = ($locallessfive->mday, $locallessfive->_mon, $locallessfive->year);
my $checkpointStart = timelocal(0, 0, 0, $day, $month, $year);
# for checkpoints
my $halfday = 43200;
### JOB-ARCHIVE
my $archiveTarget = './cc-backend/var/job-archive';
my $archiveSrc = './source-data/job-archive-source';
my @ArchiveClusters;
# Gen folder
if ( not -d $archiveTarget ){
mkdir( $archiveTarget ) or die "Couldn't create $archiveTarget directory, $!";
}
# Get clusters by job-archive/$subfolder
opendir my $dh, $archiveSrc or die "can't open directory: $!";
while ( readdir $dh ) {
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
my $cluster = $_;
push @ArchiveClusters, $cluster;
}
# start for jobarchive
foreach my $cluster ( @ArchiveClusters ) {
print "Starting to update start- and stoptimes in job-archive for $cluster\n";
my $clusterTarget = "$archiveTarget/$cluster";
if ( not -d $clusterTarget ){
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
}
opendir my $dhLevel1, "$archiveSrc/$cluster" or die "can't open directory: $!";
while ( readdir $dhLevel1 ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $level1 = $_;
if ( -d "$archiveSrc/$cluster/$level1" ) {
opendir my $dhLevel2, "$archiveSrc/$cluster/$level1" or die "can't open directory: $!";
while ( readdir $dhLevel2 ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $level2 = $_;
my $jobSource = "$archiveSrc/$cluster/$level1/$level2";
my $jobOrigin = "$jobSource";
my $jobTargetL1 = "$clusterTarget/$level1";
my $jobTargetL2 = "$jobTargetL1/$level2";
# check if files are directly accessible (old format) else get subfolders as file and update path
if ( ! -e "$jobSource/meta.json") {
opendir(D, "$jobSource") || die "Can't open directory $jobSource: $!\n";
my @folders = readdir(D);
closedir(D);
if (!@folders) {
next;
}
foreach my $folder ( @folders ) {
next if $folder eq '.' or $folder eq '..';
$jobSource = "$jobSource/".$folder;
}
}
# check if subfolder contains file, else skip
if ( ! -e "$jobSource/meta.json") {
print "$jobSource skipped\n";
next;
}
open my $metafh, '<', "$jobSource/meta.json" or die "Can't open file $!";
my $rawstr = do { local $/; <$metafh> };
close($metafh);
my $metadata = $json->decode($rawstr);
# NOTE Start meta.json iteration here
# my $random_number = int(rand(UPPERLIMIT)) + LOWERLIMIT;
# Set new startTime: Between 5 days and 1 day before now
# Remove id from attributes
$metadata->{startTime} = $epochtime - (int(rand(432000)) + 86400);
$metadata->{stopTime} = $metadata->{startTime} + $metadata->{duration};
# Add starttime subfolder to target path
my $jobTargetL3 = "$jobTargetL2/".$metadata->{startTime};
if ( not -d $jobTargetL1 ){
mkdir( $jobTargetL1 ) or die "Couldn't create $jobTargetL1 directory, $!";
}
if ( not -d $jobTargetL2 ){
mkdir( $jobTargetL2 ) or die "Couldn't create $jobTargetL2 directory, $!";
}
# target is not directory
if ( not -d $jobTargetL3 ){
mkdir( $jobTargetL3 ) or die "Couldn't create $jobTargetL3 directory, $!";
my $outstr = $json->encode($metadata);
open my $metaout, '>', "$jobTargetL3/meta.json" or die "Can't write to file $!";
print $metaout $outstr;
close($metaout);
open my $datafh, '<', "$jobSource/data.json" or die "Can't open file $!";
my $datastr = do { local $/; <$datafh> };
close($datafh);
open my $dataout, '>', "$jobTargetL3/data.json" or die "Can't write to file $!";
print $dataout $datastr;
close($dataout);
}
}
}
}
}
print "Done for job-archive\n";
sleep(1);
exit;
## CHECKPOINTS
my $checkpTarget = './cc-metric-store/var/checkpoints';
my $checkpSource = './source-data/cc-metric-store-source/checkpoints';
my @CheckpClusters;
# Gen folder
if ( not -d $checkpTarget ){
mkdir( $checkpTarget ) or die "Couldn't create $checkpTarget directory, $!";
}
# Get clusters by cc-metric-store/$subfolder
opendir my $dhc, $checkpSource or die "can't open directory: $!";
while ( readdir $dhc ) {
chomp; next if $_ eq '.' or $_ eq '..' or $_ eq 'job-archive';
my $cluster = $_;
push @CheckpClusters, $cluster;
}
closedir($dhc);
# start for checkpoints
foreach my $cluster ( @CheckpClusters ) {
print "Starting to update checkpoint filenames and data starttimes for $cluster\n";
my $clusterTarget = "$checkpTarget/$cluster";
if ( not -d $clusterTarget ){
mkdir( $clusterTarget ) or die "Couldn't create $clusterTarget directory, $!";
}
opendir my $dhLevel1, "$checkpSource/$cluster" or die "can't open directory: $!";
while ( readdir $dhLevel1 ) {
chomp; next if $_ eq '.' or $_ eq '..';
# Nodename as level1-folder
my $level1 = $_;
if ( -d "$checkpSource/$cluster/$level1" ) {
my $nodeSource = "$checkpSource/$cluster/$level1/";
my $nodeOrigin = "$nodeSource";
my $nodeTarget = "$clusterTarget/$level1";
my @files;
if ( -e "$nodeSource/1609459200.json") { # 1609459200 == First Checkpoint time in latest dump
opendir(D, "$nodeSource") || die "Can't open directory $nodeSource: $!\n";
while ( readdir D ) {
chomp; next if $_ eq '.' or $_ eq '..';
my $nodeFile = $_;
push @files, $nodeFile;
}
closedir(D);
my $length = @files;
if (!@files || $length != 14) { # needs 14 files == 7 days worth of data
next;
}
} else {
next;
}
# sort for integer timestamp-filename-part (moduleless): Guarantees start with index == 0 == 1609459200.json
my @sortedFiles = sort { ($a =~ /^([0-9]{10}).json$/)[0] <=> ($b =~ /^([0-9]{10}).json$/)[0] } @files;
if ( not -d $nodeTarget ){
mkdir( $nodeTarget ) or die "Couldn't create $nodeTarget directory, $!";
while (my ($index, $file) = each(@sortedFiles)) {
open my $checkfh, '<', "$nodeSource/$file" or die "Can't open file $!";
my $rawstr = do { local $/; <$checkfh> };
close($checkfh);
my $checkpdata = $json->decode($rawstr);
my $newTimestamp = $checkpointStart + ($index * $halfday);
# Get Diff from old Timestamp
my $timeDiff = $newTimestamp - $checkpdata->{from};
# Set new timestamp
$checkpdata->{from} = $newTimestamp;
foreach my $metric (keys %{$checkpdata->{metrics}}) {
$checkpdata->{metrics}->{$metric}->{start} += $timeDiff;
}
my $outstr = $json->encode($checkpdata);
open my $checkout, '>', "$nodeTarget/$newTimestamp.json" or die "Can't write to file $!";
print $checkout $outstr;
close($checkout);
}
}
}
}
closedir($dhLevel1);
}
print "Done for checkpoints\n";

67
go.mod
View File

@ -3,37 +3,60 @@ module github.com/ClusterCockpit/cc-backend
go 1.17
require (
github.com/99designs/gqlgen v0.17.2
github.com/Masterminds/squirrel v1.5.2
github.com/go-ldap/ldap/v3 v3.4.2
github.com/99designs/gqlgen v0.17.16
github.com/Masterminds/squirrel v1.5.3
github.com/go-ldap/ldap/v3 v3.4.4
github.com/go-sql-driver/mysql v1.6.0
github.com/golang-jwt/jwt/v4 v4.4.1
github.com/google/gops v0.3.22
github.com/golang-jwt/jwt/v4 v4.4.2
github.com/google/gops v0.3.25
github.com/gorilla/handlers v1.5.1
github.com/gorilla/mux v1.8.0
github.com/gorilla/sessions v1.2.1
github.com/influxdata/influxdb-client-go/v2 v2.8.1
github.com/jmoiron/sqlx v1.3.4
github.com/mattn/go-sqlite3 v1.14.12
github.com/vektah/gqlparser/v2 v2.4.1
golang.org/x/crypto v0.0.0-20220321153916-2c7772ba3064
github.com/influxdata/influxdb-client-go/v2 v2.10.0
github.com/jmoiron/sqlx v1.3.5
github.com/mattn/go-sqlite3 v1.14.15
github.com/santhosh-tekuri/jsonschema v1.2.4
github.com/vektah/gqlparser/v2 v2.5.0
golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90
)
require (
github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c // indirect
github.com/agnivade/levenshtein v1.1.0 // indirect
github.com/deepmap/oapi-codegen v1.8.2 // indirect
github.com/felixge/httpsnoop v1.0.1 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.1 // indirect
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e // indirect
github.com/KyleBanks/depth v1.2.1 // indirect
github.com/agnivade/levenshtein v1.1.1 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
github.com/deepmap/oapi-codegen v1.11.0 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/ghodss/yaml v1.0.0 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.20.0 // indirect
github.com/go-openapi/spec v0.20.7 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/gorilla/securecookie v1.1.1 // indirect
github.com/gorilla/websocket v1.4.2 // indirect
github.com/hashicorp/golang-lru v0.5.0 // indirect
github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/hashicorp/golang-lru v0.5.4 // indirect
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
github.com/mitchellh/mapstructure v1.2.3 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 // indirect
golang.org/x/sys v0.0.0-20211019181941-9d821ace8654 // indirect
gopkg.in/yaml.v2 v2.3.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/santhosh-tekuri/jsonschema/v5 v5.0.0 // indirect
github.com/swaggo/files v0.0.0-20220728132757-551d4a08d97a // indirect
github.com/swaggo/http-swagger v1.3.3 // indirect
github.com/swaggo/swag v1.8.5 // indirect
github.com/urfave/cli/v2 v2.8.1 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
golang.org/x/net v0.0.0-20220909164309-bea034e7d591 // indirect
golang.org/x/sys v0.0.0-20220913175220-63ea55921009 // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/tools v0.1.12 // indirect
golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

273
go.sum
View File

@ -1,50 +1,89 @@
github.com/99designs/gqlgen v0.17.2 h1:yczvlwMsfcVu/JtejqfrLwXuSP0yZFhmcss3caEvHw8=
github.com/99designs/gqlgen v0.17.2/go.mod h1:K5fzLKwtph+FFgh9j7nFbRUdBKvTcGnsta51fsMTn3o=
github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c h1:/IBSNwUN8+eKzUzbJPqhK839ygXJ82sde8x3ogr6R28=
github.com/Azure/go-ntlmssp v0.0.0-20200615164410-66371956d46c/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Masterminds/squirrel v1.5.2 h1:UiOEi2ZX4RCSkpiNDQN5kro/XIBpSRk9iTqdIRPzUXE=
github.com/Masterminds/squirrel v1.5.2/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8=
github.com/99designs/gqlgen v0.17.16 h1:tTIw/cQ/uvf3iXIb2I6YSkdaDkmHmH2W2eZkVe0IVLA=
github.com/99designs/gqlgen v0.17.16/go.mod h1:dnJdUkgfh8iw8CEx2hhTdgTQO/GvVWKLcm/kult5gwI=
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e h1:NeAW1fUYUEWhft7pkxDf6WoUvEZJ/uOKsvtpjLnn8MU=
github.com/Azure/go-ntlmssp v0.0.0-20220621081337-cb9428e4ac1e/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/BurntSushi/toml v1.1.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/Masterminds/squirrel v1.5.3 h1:YPpoceAcxuzIljlr5iWpNKaql7hLeG1KLSrhvdHpkZc=
github.com/Masterminds/squirrel v1.5.3/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10=
github.com/agnivade/levenshtein v1.0.1/go.mod h1:CURSv5d9Uaml+FovSIICkLbAUZ9S4RqaHDIsdSBg7lM=
github.com/agnivade/levenshtein v1.1.0 h1:n6qGwyHG61v3ABce1rPVZklEYRT8NFpCMrpZdBUbYGM=
github.com/agnivade/levenshtein v1.1.0/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo=
github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8=
github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo=
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883 h1:bvNMNQO63//z+xNgfBlViaCIJKLlCJ6/fmUseuG0wVQ=
github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo29Kk6CurOXKm700vrz8f0KW0JNfpkRJY/8=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/cpuguy83/go-md2man/v2 v2.0.1 h1:r/myEWzV9lfsM1tFLgDyu0atFtJ1fXn261LKYj/3DxU=
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deepmap/oapi-codegen v1.8.2 h1:SegyeYGcdi0jLLrpbCMoJxnUUn8GBXHsvr4rbzjuhfU=
github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc=
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.0-20210816181553-5444fa50b93d/go.mod h1:tmAIfUFEirG/Y8jhZ9M+h36obRZAk/1fcSpXwAVlfqE=
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs=
github.com/deepmap/oapi-codegen v1.8.2/go.mod h1:YLgSKSDv/bZQB7N4ws6luhozi3cEdRktEqrX88CvjIw=
github.com/deepmap/oapi-codegen v1.11.0 h1:f/X2NdIkaBKsSdpeuwLnY/vDI0AtPUrmB5LMgc7YD+A=
github.com/deepmap/oapi-codegen v1.11.0/go.mod h1:k+ujhoQGxmQYBZBbxhOZNZf4j08qv5mC+OH+fFTnKxM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ=
github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/getkin/kin-openapi v0.61.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4=
github.com/getkin/kin-openapi v0.94.0/go.mod h1:LWZfzOd7PRy8GJ1dJ6mCU6tNdSfOwRac1BUPam4aw6Q=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-asn1-ber/asn1-ber v1.5.1 h1:pDbRAunXzIUXfx4CB2QJFv5IuPiuoW+sWvr/Us009o8=
github.com/go-asn1-ber/asn1-ber v1.5.1/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.7.7/go.mod h1:axIBovoeJpVj8S3BwE0uPMTeReE4+AfFtqpqaZ1qq1U=
github.com/go-asn1-ber/asn1-ber v1.5.4 h1:vXT6d/FNDiELJnLb6hGNa309LMsrCoYFvpwHDF0+Y1A=
github.com/go-asn1-ber/asn1-ber v1.5.4/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0=
github.com/go-chi/chi/v5 v5.0.0/go.mod h1:BBug9lr0cqtdAhsu6R4AAdvufI0/XBzAQSsUqJpoZOs=
github.com/go-ldap/ldap/v3 v3.4.2 h1:zFZKcXKLqZpFMrMQGHeHWKXbDTdNCmhGY9AK41zPh+8=
github.com/go-ldap/ldap/v3 v3.4.2/go.mod h1:iYS1MdmrmceOJ1QOTnRXrIs7i3kloqtmGQjRvjKpyMg=
github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-ole/go-ole v1.2.6-0.20210915003542-8b1f7f90f6b1/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-chi/chi/v5 v5.0.7/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8=
github.com/go-ldap/ldap/v3 v3.4.4 h1:qPjipEpt+qDa6SI/h1fzuGWoRUY+qqQ9sOZq67/PYUs=
github.com/go-ldap/ldap/v3 v3.4.4/go.mod h1:fe1MsuN5eJJ1FeLT/LEBVdWfNWKh459R7aXgXtJC+aI=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY=
github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
github.com/go-openapi/jsonreference v0.20.0 h1:MYlu0sBgChmCfJxxUKZ8g1cPWFOB37YSZqewK7OKeyA=
github.com/go-openapi/jsonreference v0.20.0/go.mod h1:Ag74Ico3lPc+zR+qjn4XBUmXymS4zJbYVCZmcgkasdo=
github.com/go-openapi/spec v0.20.7 h1:1Rlu/ZrOCCob0n+JKKJAWhNWMPW8bOZRg8FJaY+0SKI=
github.com/go-openapi/spec v0.20.7/go.mod h1:2OpW+JddWPrpXSCIX8eOx7lZ5iyuWj3RYR6VaaBKcWA=
github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
github.com/go-openapi/swag v0.21.1/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ=
github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g=
github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs=
github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA=
github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
github.com/go-playground/validator/v10 v10.11.0/go.mod h1:i+3WkQ1FvaUjjxh1kSvIA4dMGDBiPU55YFDl0WbKdWU=
github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE=
github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/golang-jwt/jwt/v4 v4.4.1 h1:pC5DB52sCeK48Wlb9oPcdhnjkz1TKt1D/P7WKJ0kUcQ=
github.com/golang-jwt/jwt/v4 v4.4.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
github.com/golang-jwt/jwt/v4 v4.4.2 h1:rcc4lwaZgFMCZ5jxF9ABolDcIHdBytAFgqFPbSJQAYs=
github.com/golang-jwt/jwt/v4 v4.4.2/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y=
github.com/google/gops v0.3.22 h1:lyvhDxfPLHAOR2xIYwjPhN387qHxyU21Sk9sz/GhmhQ=
github.com/google/gops v0.3.22/go.mod h1:7diIdLsqpCihPSX3fQagksT/Ku/y4RL9LHTlKyEUDl8=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gops v0.3.25 h1:Pf6uw+cO6pDhc7HJ71NiG0x8dyQTeQcmg3HQFF39qVw=
github.com/google/gops v0.3.25/go.mod h1:8A7ebAm0id9K3H0uOggeRVGxszSvnlURun9mg3GdYDw=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/handlers v1.5.1 h1:9lRY6j8DEeeBT10CvO9hGW0gmky0BprnvDI5vfhUHH4=
github.com/gorilla/handlers v1.5.1/go.mod h1:t8XrUpc4KVXb7HGyJ4/cEnwQiaxrX/hz1Zv/4g96P1Q=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
@ -53,98 +92,164 @@ github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyC
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7FsgI=
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/influxdata/influxdb-client-go/v2 v2.8.1 h1:DahMl2iYvr9hEtGZcV4Ud8Z7AQ8cQVbPlX6gEOBBCrE=
github.com/influxdata/influxdb-client-go/v2 v2.8.1/go.mod h1:x7Jo5UHHl+w8wu8UnGiNobDDHygojXwJX4mx7rXGKMk=
github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU=
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/influxdata/influxdb-client-go/v2 v2.10.0 h1:bWCwNsp0KxBioW9PTG7LPk7/uXj2auHezuUMpztbpZY=
github.com/influxdata/influxdb-client-go/v2 v2.10.0/go.mod h1:x7Jo5UHHl+w8wu8UnGiNobDDHygojXwJX4mx7rXGKMk=
github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/jmoiron/sqlx v1.3.4 h1:wv+0IJZfL5z0uZoUjlpKgHkgaFSYD+r9CfrXjEXsO7w=
github.com/jmoiron/sqlx v1.3.4/go.mod h1:2BljVx/86SuTyjE+aPYlHCTNvZrnJXghYGpNiXLBMCQ=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g=
github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kevinmbeaulieu/eq-go v1.0.0/go.mod h1:G3S8ajA56gKBZm4UB9AOyoOS37JO3roToPzKNM8dtdM=
github.com/keybase/go-ps v0.0.0-20190827175125-91aafc93ba19/go.mod h1:hY+WOq6m2FpbvyrI93sMaypsttvaIL5nhVR92dTMUcQ=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/labstack/echo/v4 v4.2.1/go.mod h1:AA49e0DZ8kk5jTOOCKNuPR6oTnBS0dYiM4FW1e6jwpg=
github.com/labstack/echo/v4 v4.7.2/go.mod h1:xkCDAdFCIf8jsFQ5NnbK7oqaF/yU1A1X20Ltm0OvSks=
github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
github.com/labstack/gommon v0.3.1/go.mod h1:uW6kP17uPlLJsD3ijUYn3/M5bAxtlZhMI6m3MFxTMTM=
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0/go.mod h1:dXGbAdH5GtBTC4WfIxhKZfyBF/HBFgRZSWwZ9g/He9o=
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 h1:P6pPBnrTSX3DEVR4fDembhRWSsG5rVo6hYhAB/ADZrk=
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6FmdpVm2joNMFikkuWg0EoCKLGUMNw=
github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY=
github.com/lestrrat-go/backoff/v2 v2.0.8/go.mod h1:rHP/q/r9aT27n24JQLa7JhSQZCKBBOiM/uP402WwN8Y=
github.com/lestrrat-go/blackmagic v1.0.0/go.mod h1:TNgH//0vYSs8VXDCfkZLgIrVTTXQELZffUV0tz3MtdQ=
github.com/lestrrat-go/blackmagic v1.0.1/go.mod h1:UrEqBzIR2U6CnzVyUtfM6oZNMt/7O7Vohk2J0OGSAtU=
github.com/lestrrat-go/httpcc v1.0.1/go.mod h1:qiltp3Mt56+55GPVCbTdM9MlqhvzyuL6W/NMDA8vA5E=
github.com/lestrrat-go/iter v1.0.1/go.mod h1:zIdgO1mRKhn8l9vrZJZz9TUMMFbQbLeTsbqPDrJ/OJc=
github.com/lestrrat-go/iter v1.0.2/go.mod h1:Momfcq3AnRlRjI5b5O8/G5/BvpzrhoFTZcn06fEOPt4=
github.com/lestrrat-go/jwx v1.2.24/go.mod h1:zoNuZymNl5lgdcu6P7K6ie2QRll5HVfF4xwxBBK1NxY=
github.com/lestrrat-go/option v1.0.0/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
github.com/lib/pq v1.2.0 h1:LXpIM/LZ5xGFhOpXAQUIMM1HdyqzVYM13zNdjCEEcA0=
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/logrusorgru/aurora/v3 v3.0.0/go.mod h1:vsR12bk5grlLvLXAYrBsb5Oc/N+LxAlxggSjiwMnCUc=
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I=
github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/matryer/moq v0.0.0-20190312154309-6cfb0558e1bd/go.mod h1:9ELz6aaclSIGnZBoaSLZ3NAl1VTufbOrXBPvtcy6WiQ=
github.com/matryer/moq v0.2.3/go.mod h1:9RtPYjTnH1bSBIkpvtHkFN7nbWAnO7oRpdJkEIn6UtE=
github.com/matryer/moq v0.2.7/go.mod h1:kITsx543GOENm48TUAQyJ9+SAvFSr7iGQXPoth/VUBk=
github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ=
github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
github.com/mattn/go-sqlite3 v1.14.12 h1:TJ1bhYJPV44phC+IMu1u2K/i5RriLTPe+yc68XDJ1Z0=
github.com/mattn/go-sqlite3 v1.14.12/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU=
github.com/mitchellh/mapstructure v1.2.3 h1:f/MjBEBDLttYCGfRaKBbKSRVF5aV2O6fnBpzknuE3jU=
github.com/mitchellh/mapstructure v1.2.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI=
github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
github.com/mitchellh/mapstructure v1.3.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/santhosh-tekuri/jsonschema v1.2.4 h1:hNhW8e7t+H1vgY+1QeEQpveR6D4+OwKPXCfD2aieJis=
github.com/santhosh-tekuri/jsonschema v1.2.4/go.mod h1:TEAUOeZSmIxTTuHatJzrvARHiuO9LYd+cIxzgEHCQI4=
github.com/santhosh-tekuri/jsonschema/v5 v5.0.0 h1:TToq11gyfNlrMFZiYujSekIsPd9AmsA2Bj/iv+s4JHE=
github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0=
github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shirou/gopsutil/v3 v3.21.9/go.mod h1:YWp/H8Qs5fVmf17v7JNZzA0mPJ+mS2e9JdiUF9LlKzQ=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/shirou/gopsutil/v3 v3.22.4/go.mod h1:D01hZJ4pVHPpCTZ3m3T2+wDF2YAGfd+H4ifUguaQzHM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tklauser/go-sysconf v0.3.9/go.mod h1:11DU/5sG7UexIrp/O6g35hrWzu0JxlwQ3LSFUzyeuhs=
github.com/tklauser/numcpus v0.3.0/go.mod h1:yFGUr7TUHQRAhyqBcEg0Ge34zDBAsIvJJcyE6boqnA8=
github.com/urfave/cli/v2 v2.3.0/go.mod h1:LJmUH05zAU44vOAcrfzZQKsZbVcdbOG8rtL3/XcUArI=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.2 h1:4jaiDzPyXQvSd7D0EjG45355tLlV3VOECpq10pLC+8s=
github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/swaggo/files v0.0.0-20220728132757-551d4a08d97a h1:kAe4YSu0O0UFn1DowNo2MY5p6xzqtJ/wQ7LZynSvGaY=
github.com/swaggo/files v0.0.0-20220728132757-551d4a08d97a/go.mod h1:lKJPbtWzJ9JhsTN1k1gZgleJWY/cqq0psdoMmaThG3w=
github.com/swaggo/http-swagger v1.3.3 h1:Hu5Z0L9ssyBLofaama21iYaF2VbWyA8jdohaaCGpHsc=
github.com/swaggo/http-swagger v1.3.3/go.mod h1:sE+4PjD89IxMPm77FnkDz0sdO+p5lbXzrVWT6OTVVGo=
github.com/swaggo/swag v1.8.5 h1:7NgtfXsXE+jrcOwRyiftGKW7Ppydj7tZiVenuRf1fE4=
github.com/swaggo/swag v1.8.5/go.mod h1:jMLeXOOmYyjk8PvHTsXBdrubsNd9gUJTTCzL5iBnseg=
github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk=
github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ=
github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw=
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
github.com/urfave/cli/v2 v2.8.1 h1:CGuYNZF9IKZY/rfBe3lJpccSoIY1ytfvmgQT90cNOl4=
github.com/urfave/cli/v2 v2.8.1/go.mod h1:Z41J9TPoffeoqP0Iza0YbAhGvymRdZAd2uPmZ5JxRdY=
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=
github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ=
github.com/vektah/gqlparser/v2 v2.4.0/go.mod h1:flJWIR04IMQPGz+BXLrORkrARBxv/rtyIAFvd/MceW0=
github.com/vektah/gqlparser/v2 v2.4.1 h1:QOyEn8DAPMUMARGMeshKDkDgNmVoEaEGiDB0uWxcSlQ=
github.com/vektah/gqlparser/v2 v2.4.1/go.mod h1:flJWIR04IMQPGz+BXLrORkrARBxv/rtyIAFvd/MceW0=
github.com/vektah/gqlparser/v2 v2.5.0 h1:GwEwy7AJsqPWrey0bHnn+3JLaHLZVT66wY/+O+Tf9SU=
github.com/vektah/gqlparser/v2 v2.5.0/go.mod h1:mPgqFBu/woKTVYWyNk8cO3kh4S/f4aRFZrvOnp3hmCs=
github.com/xlab/treeprint v1.1.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 h1:bAn7/zixMGCfxrRTfdpNzjtPYqr8smhKouy9mxVdGPU=
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673/go.mod h1:N3UwUGtsrSj3ccvlPHLoLsHnpR27oXr4ZE984MbSER8=
github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200604202706-70a84ac30bf9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I=
golang.org/x/crypto v0.0.0-20220321153916-2c7772ba3064 h1:S25/rfnfsMVgORT4/J61MJ7rdyseOZOyvLIrZEZ7s6s=
golang.org/x/crypto v0.0.0-20220321153916-2c7772ba3064/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220427172511-eb4f295cb31f/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220513210258-46612604a0f9/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90 h1:Y/gsMcFOcR+6S6f3YeMKl5g+dZMEWqcz5Czj/GWYbkM=
golang.org/x/crypto v0.0.0-20220829220503-c86fa9a7ed90/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 h1:kQgndtyPBW/JIYERgdxfwMYh3AVStj88WQTlNDi2a+o=
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 h1:6zppjxzCulZykYSLyVDYbneBfbaBIQPYMevg0bEwv2s=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2 h1:CIJ76btIcR3eFI5EgSo6k1qKw9KJexJuRLI9G7Hp5wE=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220513224357-95641704303c/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c h1:yKufUcDwucU5urd+50/Opbt4AYpqthk7wHpHok8f1lo=
golang.org/x/net v0.0.0-20220906165146-f3363e06e74c/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/net v0.0.0-20220909164309-bea034e7d591 h1:D0B/7al0LLrVC8aWF4+oxpv/m8bc7ViFfVS8/gXGdqI=
golang.org/x/net v0.0.0-20220909164309-bea034e7d591/go.mod h1:YDH+HFinaLZZlnHAfSS6ZXJJ9M9t4Dl22yv3iI2vPwk=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -157,41 +262,69 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210902050250-f475640dd07b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211019181941-9d821ace8654 h1:id054HUawV2/6IGm2IV8KZQjqtwAOo2CYlOToYqa0d0=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220513210249-45d2b4557a2a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220907062415-87db552b00fd h1:AZeIEzg+8RCELJYq8w+ODLVxFgLMMigSwO/ffKPEd9U=
golang.org/x/sys v0.0.0-20220907062415-87db552b00fd/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220913175220-63ea55921009 h1:PuvuRMeLWqsf/ZdT1UUZz0syhioyv1mzuFZsXs4fvhw=
golang.org/x/sys v0.0.0-20220913175220-63ea55921009/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20220411224347-583f2d630306/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200815165600-90abf76919f3/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU=
golang.org/x/tools v0.1.10 h1:QjFRCZxdOhBJ/UNgnBZLbNV13DlbnK0quyivTnXJM20=
golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E=
golang.org/x/tools v0.1.12 h1:VveCTK38A2rkS8ZqFY25HIDFscX5X9OoEhJd3quQmXU=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f h1:GGU+dLjvlC3qDwqYgL6UgRmHXhOOgns0bZu2Ty5mm6U=
golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.3.0 h1:clyUAQHOM3G0M3f5vQj7LuJrETvjVot3Z5el9nffUtU=
gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
rsc.io/goversion v1.2.0/go.mod h1:Eih9y/uIBS3ulggl7KNJ09xGSLcuNaLgmvvqa07sgfo=

View File

@ -7,11 +7,6 @@ exec:
filename: internal/graph/generated/generated.go
package: generated
# Uncomment to enable federation
# federation:
# filename: graph/generated/federation.go
# package: generated
# Where should any generated models go?
model:
filename: internal/graph/model/models_gen.go
@ -20,7 +15,7 @@ model:
# Where should the resolver implementations go?
resolver:
layout: follow-schema
dir: graph
dir: internal/graph
package: graph
# Optional: turn on use `gqlgen:"fieldName"` tags in your models
@ -35,7 +30,7 @@ resolver:
# gqlgen will search for any type names in the schema in these go packages
# if they match it will use them, otherwise it will generate them.
autobind:
- "github.com/ClusterCockpit/cc-backend/graph/model"
- "github.com/ClusterCockpit/cc-backend/internal/graph/model"
# This section declares type mapping between the GraphQL and go type systems
#
@ -55,23 +50,32 @@ models:
- github.com/99designs/gqlgen/graphql.Int64
- github.com/99designs/gqlgen/graphql.Int32
Job:
model: "github.com/ClusterCockpit/cc-backend/schema.Job"
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Job"
fields:
tags:
resolver: true
metaData:
resolver: true
Cluster:
model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Cluster"
fields:
partitions:
resolver: true
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/schema.Float" }
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/schema.MetricScope" }
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/schema.JobStatistics" }
Tag: { model: "github.com/ClusterCockpit/cc-backend/schema.Tag" }
Resource: { model: "github.com/ClusterCockpit/cc-backend/schema.Resource" }
JobState: { model: "github.com/ClusterCockpit/cc-backend/schema.JobState" }
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/schema.JobMetric" }
Series: { model: "github.com/ClusterCockpit/cc-backend/schema.Series" }
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/schema.MetricStatistics" }
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/schema.StatsSeries" }
NullableFloat: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Float" }
MetricScope: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricScope" }
JobStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobStatistics" }
Tag: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Tag" }
Resource: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Resource" }
JobState: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobState" }
TimeRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.TimeRange" }
IntRange: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.IntRange" }
JobMetric: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.JobMetric" }
Series: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Series" }
MetricStatistics: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricStatistics" }
MetricConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.MetricConfig" }
SubClusterConfig: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubClusterConfig" }
Accelerator: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Accelerator" }
Topology: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.Topology" }
FilterRanges: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.FilterRanges" }
SubCluster: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.SubCluster" }
StatsSeries: { model: "github.com/ClusterCockpit/cc-backend/pkg/schema.StatsSeries" }

View File

@ -36,3 +36,36 @@ sudo systemctl start clustercockpit.service
# Check whats going on:
sudo journalctl -u clustercockpit.service
```
# Recommended deployment workflow
It is recommended to install all ClusterCockpit components in a common durectory, this can be something like `/opt/monitoring`, `var/monitoring` or `var/clustercockpit`.
In the following we are using `/opt/monitoring`.
Two systemd services are running on the central monitoring server:
clustercockpit : Binary cc-backend in `/opt/monitoring/cc-backend`
cc-metric-store: Binary cc-metric-store in `/opt/monitoring/cc-metric-store`
ClusterCockpit is deployed as a single file binary that embeds all static assets.
We recommend to keep all binaries in a folder `archive` and link the currently active from cc-backend root.
This allows to easily roll-back in case something breaks.
## Workflow to deploy new version
This example assumes the DB and job archive did not change.
* Backup the sqlite DB file and Job archive directory tree!
* Clone cc-backend source tree (e.g. in your home directory)
* Copy the adapted legal text files into the git source tree (./web/templates).
* Build cc-backend:
```
$ cd web/frontend
$ yarn && yarn build
$ cd ../../
$ go build ./cmd/cc-backend
```
* Copy `cc-backend` binary to `/opt/monitoring/cc-backend/archive`
* Link from cc-backend root to recent version
* Restart systemd service: `$ sudo systemctl restart clustercockpit.service`
* Check log for issues: `$ sudo journalctl -u clustercockpit.service`
* Check the ClusterCockpit web frontend and your Slurm adapters if anything is broken!

864
internal/api/docs.go Normal file
View File

@ -0,0 +1,864 @@
// Package api GENERATED BY SWAG; DO NOT EDIT
// This file was generated by swaggo/swag at
// 2022-09-22 13:31:53.353204065 +0200 CEST m=+0.139444562
package api
import "github.com/swaggo/swag"
const docTemplate = `{
"schemes": {{ marshal .Schemes }},
"swagger": "2.0",
"info": {
"description": "{{escape .Description}}",
"title": "{{.Title}}",
"termsOfService": "https://monitoring.nhr.fau.de/imprint",
"contact": {
"name": "ClusterCockpit Project",
"url": "https://github.com/ClusterCockpit",
"email": "support@clustercockpit.org"
},
"license": {
"name": "MIT License",
"url": "https://opensource.org/licenses/MIT"
},
"version": "{{.Version}}"
},
"host": "{{.Host}}",
"basePath": "{{.BasePath}}",
"paths": {
"/jobs/": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Get a list of all jobs. Filters can be applied using query parameters.\nNumber of results can be limited by page. Results are sorted by descending startTime.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"summary": "Lists all jobs",
"parameters": [
{
"enum": [
"running",
"completed",
"failed",
"cancelled",
"stopped",
"timeout"
],
"type": "string",
"description": "Job State",
"name": "state",
"in": "query"
},
{
"type": "string",
"description": "Job Cluster",
"name": "cluster",
"in": "query"
},
{
"type": "string",
"description": "Syntax: '$from-$to', as unix epoch timestamps in seconds",
"name": "start-time",
"in": "query"
},
{
"type": "integer",
"description": "Items per page (If empty: No Limit)",
"name": "items-per-page",
"in": "query"
},
{
"type": "integer",
"description": "Page Number (If empty: No Paging)",
"name": "page",
"in": "query"
},
{
"type": "boolean",
"description": "Include metadata (e.g. jobScript) in response",
"name": "with-metadata",
"in": "query"
}
],
"responses": {
"200": {
"description": "Array of matching jobs",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.Job"
}
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/start_job/": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job specified in request body will be saved to database as \"running\" with new DB ID.\nJob specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"summary": "Adds a new job as \"running\"",
"parameters": [
{
"description": "Job to add",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/schema.JobMeta"
}
}
],
"responses": {
"201": {
"description": "Job added successfully",
"schema": {
"$ref": "#/definitions/api.StartJobApiResponse"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: The combination of jobId, clusterId and startTime does already exist",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/stop_job/": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job to stop is specified by request body. All fields are required in this case.\nReturns full job resource information according to 'JobMeta' scheme.",
"produces": [
"application/json"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
{
"description": "All fields required",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/api.StopJobApiRequest"
}
}
],
"responses": {
"200": {
"description": "Job resource",
"schema": {
"$ref": "#/definitions/schema.JobMeta"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Resource not found",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/stop_job/{id}": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Job to stop is specified by database ID. Only stopTime and final state are required in request body.\nReturns full job resource information according to 'JobMeta' scheme.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"summary": "Marks job as completed and triggers archiving",
"parameters": [
{
"type": "integer",
"description": "Database ID of Job",
"name": "id",
"in": "path",
"required": true
},
{
"description": "stopTime and final state in request body",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/api.StopJobApiRequest"
}
}
],
"responses": {
"200": {
"description": "Job resource",
"schema": {
"$ref": "#/definitions/schema.JobMeta"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Resource not found",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"422": {
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/tag_job/{id}": {
"post": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.\nIf tagged job is already finished: Tag will be written directly to respective archive files.",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"summary": "Adds one or more tags to a job",
"parameters": [
{
"type": "integer",
"description": "Job Database ID",
"name": "id",
"in": "path",
"required": true
},
{
"description": "Array of tag-objects to add",
"name": "request",
"in": "body",
"required": true,
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/api.Tag"
}
}
}
],
"responses": {
"200": {
"description": "Updated job resource",
"schema": {
"$ref": "#/definitions/schema.Job"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"404": {
"description": "Job or tag does not exist",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
}
},
"definitions": {
"api.ErrorResponse": {
"description": "Error message as returned from backend.",
"type": "object",
"properties": {
"error": {
"description": "Error Message",
"type": "string"
},
"status": {
"description": "Statustext of Errorcode",
"type": "string"
}
}
},
"api.StartJobApiResponse": {
"description": "Successful job start response with database id of new job.",
"type": "object",
"properties": {
"id": {
"description": "Database ID of new job",
"type": "integer"
}
}
},
"api.StopJobApiRequest": {
"description": "Request to stop running job using stoptime and final state. They are only required if no database id was provided with endpoint.",
"type": "object",
"required": [
"jobState",
"stopTime"
],
"properties": {
"cluster": {
"description": "Cluster of job",
"type": "string",
"example": "fritz"
},
"jobId": {
"description": "Cluster Job ID of job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout"
],
"example": "completed"
},
"startTime": {
"description": "Start Time of job as epoch",
"type": "integer",
"example": 1649723812
},
"stopTime": {
"description": "Stop Time of job as epoch",
"type": "integer",
"example": 1649763839
}
}
},
"api.Tag": {
"description": "Defines a tag using name and type.",
"type": "object",
"properties": {
"name": {
"description": "Tag Name",
"type": "string",
"example": "Testjob"
},
"type": {
"description": "Tag Type",
"type": "string",
"example": "Debug"
}
}
},
"schema.Job": {
"description": "Information of a HPC job.",
"type": "object",
"properties": {
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer",
"example": 123000
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string",
"example": "fritz"
},
"duration": {
"description": "Duration of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 43200
},
"exclusive": {
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"id": {
"description": "The unique identifier of a job in the database",
"type": "integer"
},
"jobId": {
"description": "The unique identifier of a job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
],
"example": "completed"
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"monitoringStatus": {
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
"type": "integer",
"maximum": 3,
"minimum": 0,
"example": 1
},
"numAcc": {
"description": "Number of accelerators used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"numHwthreads": {
"description": "Number of HWThreads used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 20
},
"numNodes": {
"description": "Number of nodes used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string",
"example": "main"
},
"project": {
"description": "The unique identifier of a project",
"type": "string",
"example": "abcd200"
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Resource"
}
},
"smt": {
"description": "SMT threads used by job",
"type": "integer",
"example": 4
},
"startTime": {
"description": "Start time as 'time.Time' data type",
"type": "string"
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string",
"example": "main"
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Tag"
}
},
"user": {
"description": "The unique identifier of a user",
"type": "string",
"example": "abcd100h"
},
"walltime": {
"description": "Requested walltime of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 86400
}
}
},
"schema.JobMeta": {
"description": "Meta data information of a HPC job.",
"type": "object",
"properties": {
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer",
"example": 123000
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string",
"example": "fritz"
},
"duration": {
"description": "Duration of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 43200
},
"exclusive": {
"description": "Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user",
"type": "integer",
"maximum": 2,
"minimum": 0,
"example": 1
},
"id": {
"description": "The unique identifier of a job in the database",
"type": "integer"
},
"jobId": {
"description": "The unique identifier of a job",
"type": "integer",
"example": 123000
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"timeout",
"out_of_memory"
],
"example": "completed"
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"monitoringStatus": {
"description": "State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull",
"type": "integer",
"maximum": 3,
"minimum": 0,
"example": 1
},
"numAcc": {
"description": "Number of accelerators used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"numHwthreads": {
"description": "Number of HWThreads used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 20
},
"numNodes": {
"description": "Number of nodes used (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 2
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string",
"example": "main"
},
"project": {
"description": "The unique identifier of a project",
"type": "string",
"example": "abcd200"
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Resource"
}
},
"smt": {
"description": "SMT threads used by job",
"type": "integer",
"example": 4
},
"startTime": {
"description": "Start epoch time stamp in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 1649723812
},
"statistics": {
"description": "Metric statistics of job",
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/schema.JobStatistics"
}
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string",
"example": "main"
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Tag"
}
},
"user": {
"description": "The unique identifier of a user",
"type": "string",
"example": "abcd100h"
},
"walltime": {
"description": "Requested walltime of job in seconds (Min \u003e 0)",
"type": "integer",
"minimum": 1,
"example": 86400
}
}
},
"schema.JobStatistics": {
"description": "Specification for job metric statistics.",
"type": "object",
"properties": {
"avg": {
"description": "Job metric average",
"type": "number",
"minimum": 0,
"example": 2500
},
"max": {
"description": "Job metric maximum",
"type": "number",
"minimum": 0,
"example": 3000
},
"min": {
"description": "Job metric minimum",
"type": "number",
"minimum": 0,
"example": 2000
},
"unit": {
"description": "Metric unit (see schema/unit.schema.json)",
"type": "string",
"example": "GHz"
}
}
},
"schema.Resource": {
"description": "A resource used by a job",
"type": "object",
"properties": {
"accelerators": {
"description": "List of of accelerator device ids",
"type": "array",
"items": {
"type": "string"
}
},
"configuration": {
"description": "The configuration options of the node",
"type": "string"
},
"hostname": {
"description": "Name of the host (= node)",
"type": "string"
},
"hwthreads": {
"description": "List of OS processor ids",
"type": "array",
"items": {
"type": "integer"
}
}
}
},
"schema.Tag": {
"description": "Defines a tag using name and type.",
"type": "object",
"properties": {
"id": {
"description": "The unique DB identifier of a tag",
"type": "integer"
},
"name": {
"description": "Tag Name",
"type": "string",
"example": "Testjob"
},
"type": {
"description": "Tag Type",
"type": "string",
"example": "Debug"
}
}
}
},
"securityDefinitions": {
"ApiKeyAuth": {
"description": "JWT based authentification for general API endpoint use.",
"type": "apiKey",
"name": "X-Auth-Token",
"in": "header"
}
}
}`
// SwaggerInfo holds exported Swagger Info so clients can modify it
var SwaggerInfo = &swag.Spec{
Version: "0.1.0",
Host: "clustercockpit.localhost:8082",
BasePath: "/api",
Schemes: []string{},
Title: "ClusterCockpit REST API",
Description: "Defines a tag using name and type.",
InfoInstanceName: "swagger",
SwaggerTemplate: docTemplate,
}
func init() {
swag.Register(SwaggerInfo.InstanceName(), SwaggerInfo)
}

View File

@ -21,16 +21,36 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/gorilla/mux"
)
// @title ClusterCockpit REST API
// @version 0.1.0
// @description API for batch job control.
// @termsOfService https://monitoring.nhr.fau.de/imprint
// @contact.name ClusterCockpit Project
// @contact.url https://github.com/ClusterCockpit
// @contact.email support@clustercockpit.org
// @license.name MIT License
// @license.url https://opensource.org/licenses/MIT
// @host clustercockpit.localhost:8082
// @BasePath /api
// @securityDefinitions.apikey ApiKeyAuth
// @in header
// @name X-Auth-Token
// @description JWT based authentification for general API endpoint use.
type RestApi struct {
JobRepository *repository.JobRepository
Resolver *graph.Resolver
@ -44,14 +64,13 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
r.StrictSlash(true)
r.HandleFunc("/jobs/start_job/", api.startJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/stop_job/", api.stopJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/stop_job/{id}", api.stopJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/stop_job/", api.stopJobByRequest).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/stop_job/{id}", api.stopJobById).Methods(http.MethodPost, http.MethodPut)
// r.HandleFunc("/jobs/import/", api.importJob).Methods(http.MethodPost, http.MethodPut)
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
// r.HandleFunc("/jobs/{id}", api.getJob).Methods(http.MethodGet)
r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch)
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
if api.Authentication != nil {
@ -69,27 +88,43 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
}
}
// StartJobApiResponse model
// @Description Successful job start response with database id of new job.
type StartJobApiResponse struct {
// Database ID of new job
DBID int64 `json:"id"`
}
// StopJobApiRequest model
// @Description Request to stop running job using stoptime and final state.
// @Description They are only required if no database id was provided with endpoint.
type StopJobApiRequest struct {
// JobId, ClusterId and StartTime are optional.
// They are only used if no database id was provided.
JobId *int64 `json:"jobId"`
Cluster *string `json:"cluster"`
StartTime *int64 `json:"startTime"`
// Payload
StopTime int64 `json:"stopTime"`
State schema.JobState `json:"jobState"`
// Stop Time of job as epoch
StopTime int64 `json:"stopTime" validate:"required" example:"1649763839"`
State schema.JobState `json:"jobState" validate:"required" example:"completed" enums:"completed,failed,cancelled,stopped,timeout"` // Final job state
JobId *int64 `json:"jobId" example:"123000"` // Cluster Job ID of job
Cluster *string `json:"cluster" example:"fritz"` // Cluster of job
StartTime *int64 `json:"startTime" example:"1649723812"` // Start Time of job as epoch
}
// ErrorResponse model
// @Description Error message as returned from backend.
type ErrorResponse struct {
// Statustext of Errorcode
Status string `json:"status"`
Error string `json:"error"`
Error string `json:"error"` // Error Message
}
// Tag model
// @Description Defines a tag using name and type.
type Tag struct {
// Tag Type
Type string `json:"type" example:"Debug"`
Name string `json:"name" example:"Testjob"` // Tag Name
}
type TagJobApiRequest []*Tag
func handleError(err error, statusCode int, rw http.ResponseWriter) {
log.Warnf("REST API: %s", err.Error())
rw.Header().Add("Content-Type", "application/json")
@ -106,12 +141,24 @@ func decode(r io.Reader, val interface{}) error {
return dec.Decode(val)
}
type TagJobApiRequest []*struct {
Name string `json:"name"`
Type string `json:"type"`
}
// Return a list of jobs
// getJobs godoc
// @Summary Lists all jobs
// @Description Get a list of all jobs. Filters can be applied using query parameters.
// @Description Number of results can be limited by page. Results are sorted by descending startTime.
// @Accept json
// @Produce json
// @Param state query string false "Job State" Enums(running, completed, failed, cancelled, stopped, timeout)
// @Param cluster query string false "Job Cluster"
// @Param start-time query string false "Syntax: '$from-$to', as unix epoch timestamps in seconds"
// @Param items-per-page query int false "Items per page (If empty: No Limit)"
// @Param page query int false "Page Number (If empty: No Paging)"
// @Param with-metadata query bool false "Include metadata (e.g. jobScript) in response"
// @Success 200 {array} schema.Job "Array of matching jobs"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/ [get]
func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
withMetadata := false
filter := &model.JobFilter{}
@ -147,7 +194,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
return
}
ufrom, uto := time.Unix(from, 0), time.Unix(to, 0)
filter.StartTime = &model.TimeRange{From: &ufrom, To: &uto}
filter.StartTime = &schema.TimeRange{From: &ufrom, To: &uto}
case "page":
x, err := strconv.Atoi(vals[0])
if err != nil {
@ -198,7 +245,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
}
if res.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
res.Statistics, err = metricdata.GetStatistics(job)
res.Statistics, err = archive.GetStatistics(job)
if err != nil {
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
@ -221,7 +268,21 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
}
}
// Add a tag to a job
// tagJob godoc
// @Summary Adds one or more tags to a job
// @Description Adds tag(s) to a job specified by DB ID. Name and Type of Tag(s) can be chosen freely.
// @Description If tagged job is already finished: Tag will be written directly to respective archive files.
// @Accept json
// @Produce json
// @Param id path int true "Job Database ID"
// @Param request body api.TagJobApiRequest true "Array of tag-objects to add"
// @Success 200 {object} schema.Job "Updated job resource"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 404 {object} api.ErrorResponse "Job or tag does not exist"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/tag_job/{id} [post]
func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
iid, err := strconv.ParseInt(mux.Vars(r)["id"], 10, 64)
if err != nil {
@ -266,8 +327,21 @@ func (api *RestApi) tagJob(rw http.ResponseWriter, r *http.Request) {
json.NewEncoder(rw).Encode(job)
}
// A new job started. The body should be in the `meta.json` format, but some fields required
// there are optional here (e.g. `jobState` defaults to "running").
// startJob godoc
// @Summary Adds a new job as "running"
// @Description Job specified in request body will be saved to database as "running" with new DB ID.
// @Description Job specifications follow the 'JobMeta' scheme, API will fail to execute if requirements are not met.
// @Accept json
// @Produce json
// @Param request body schema.JobMeta true "Job to add"
// @Success 201 {object} api.StartJobApiResponse "Job added successfully"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 403 {object} api.ErrorResponse "Forbidden"
// @Failure 422 {object} api.ErrorResponse "Unprocessable Entity: The combination of jobId, clusterId and startTime does already exist"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/start_job/ [post]
func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
@ -322,14 +396,30 @@ func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
})
}
// A job has stopped and should be archived.
func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
// stopJobById godoc
// @Summary Marks job as completed and triggers archiving
// @Description Job to stop is specified by database ID. Only stopTime and final state are required in request body.
// @Description Returns full job resource information according to 'JobMeta' scheme.
// @Accept json
// @Produce json
// @Param id path int true "Database ID of Job"
// @Param request body api.StopJobApiRequest true "stopTime and final state in request body"
// @Success 200 {object} schema.JobMeta "Job resource"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 403 {object} api.ErrorResponse "Forbidden"
// @Failure 404 {object} api.ErrorResponse "Resource not found"
// @Failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/stop_job/{id} [post]
func (api *RestApi) stopJobById(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
return
}
// Parse request body
// Parse request body: Only StopTime and State
req := StopJobApiRequest{}
if err := decode(r.Body, &req); err != nil {
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
@ -349,23 +439,71 @@ func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
job, err = api.JobRepository.FindById(id)
} else {
if req.JobId == nil {
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
return
}
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
return
}
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
return
}
api.checkAndHandleStopJob(rw, job, req)
}
// stopJobByRequest godoc
// @Summary Marks job as completed and triggers archiving
// @Description Job to stop is specified by request body. All fields are required in this case.
// @Description Returns full job resource information according to 'JobMeta' scheme.
// @Produce json
// @Param request body api.StopJobApiRequest true "All fields required"
// @Success 200 {object} schema.JobMeta "Job resource"
// @Failure 400 {object} api.ErrorResponse "Bad Request"
// @Failure 401 {object} api.ErrorResponse "Unauthorized"
// @Failure 403 {object} api.ErrorResponse "Forbidden"
// @Failure 404 {object} api.ErrorResponse "Resource not found"
// @Failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
// @Failure 500 {object} api.ErrorResponse "Internal Server Error"
// @Security ApiKeyAuth
// @Router /jobs/stop_job/ [post]
func (api *RestApi) stopJobByRequest(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
return
}
// Parse request body
req := StopJobApiRequest{}
if err := decode(r.Body, &req); err != nil {
handleError(fmt.Errorf("parsing request body failed: %w", err), http.StatusBadRequest, rw)
return
}
// Fetch job (that will be stopped) from db
var job *schema.Job
var err error
if req.JobId == nil {
handleError(errors.New("the field 'jobId' is required"), http.StatusBadRequest, rw)
return
}
job, err = api.JobRepository.Find(req.JobId, req.Cluster, req.StartTime)
if err != nil {
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
return
}
api.checkAndHandleStopJob(rw, job, req)
}
func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Job, req StopJobApiRequest) {
// Sanity checks
if job == nil || job.StartTime.Unix() >= req.StopTime || job.State != schema.JobStateRunning {
handleError(errors.New("stopTime must be larger than startTime and only running jobs can be stopped"), http.StatusBadRequest, rw)
return
}
if req.State != "" && !req.State.Valid() {
handleError(fmt.Errorf("invalid job state: %#v", req.State), http.StatusBadRequest, rw)
return
@ -425,28 +563,28 @@ func (api *RestApi) stopJob(rw http.ResponseWriter, r *http.Request) {
}()
}
func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
return
}
// func (api *RestApi) importJob(rw http.ResponseWriter, r *http.Request) {
// if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleApi) {
// handleError(fmt.Errorf("missing role: %#v", auth.RoleApi), http.StatusForbidden, rw)
// return
// }
var body struct {
Meta *schema.JobMeta `json:"meta"`
Data *schema.JobData `json:"data"`
}
if err := decode(r.Body, &body); err != nil {
handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusBadRequest, rw)
return
}
// var body struct {
// Meta *schema.JobMeta `json:"meta"`
// Data *schema.JobData `json:"data"`
// }
// if err := decode(r.Body, &body); err != nil {
// handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusBadRequest, rw)
// return
// }
if err := api.JobRepository.ImportJob(body.Meta, body.Data); err != nil {
handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusUnprocessableEntity, rw)
return
}
// if err := api.JobRepository.ImportJob(body.Meta, body.Data); err != nil {
// handleError(fmt.Errorf("import failed: %s", err.Error()), http.StatusUnprocessableEntity, rw)
// return
// }
rw.Write([]byte(`{ "status": "OK" }`))
}
// rw.Write([]byte(`{ "status": "OK" }`))
// }
func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
id := mux.Vars(r)["id"]
@ -608,7 +746,7 @@ func (api *RestApi) updateConfiguration(rw http.ResponseWriter, r *http.Request)
fmt.Printf("KEY: %#v\nVALUE: %#v\n", key, value)
if err := config.UpdateConfig(key, value, r.Context()); err != nil {
if err := repository.GetUserCfgRepo().UpdateConfig(key, value, auth.GetUser(r.Context())); err != nil {
http.Error(rw, err.Error(), http.StatusUnprocessableEntity)
return
}

View File

@ -76,12 +76,13 @@ type Authentication struct {
SessionMaxAge time.Duration
authenticators []Authenticator
LdapAuth *LdapAutnenticator
LdapAuth *LdapAuthenticator
JwtAuth *JWTAuthenticator
LocalAuth *LocalAuthenticator
}
func Init(db *sqlx.DB, configs map[string]interface{}) (*Authentication, error) {
func Init(db *sqlx.DB,
configs map[string]interface{}) (*Authentication, error) {
auth := &Authentication{}
auth.db = db
_, err := db.Exec(`
@ -125,7 +126,7 @@ func Init(db *sqlx.DB, configs map[string]interface{}) (*Authentication, error)
auth.authenticators = append(auth.authenticators, auth.JwtAuth)
if config, ok := configs["ldap"]; ok {
auth.LdapAuth = &LdapAutnenticator{}
auth.LdapAuth = &LdapAuthenticator{}
if err := auth.LdapAuth.Init(auth, config); err != nil {
return nil, err
}
@ -135,7 +136,10 @@ func Init(db *sqlx.DB, configs map[string]interface{}) (*Authentication, error)
return auth, nil
}
func (auth *Authentication) AuthViaSession(rw http.ResponseWriter, r *http.Request) (*User, error) {
func (auth *Authentication) AuthViaSession(
rw http.ResponseWriter,
r *http.Request) (*User, error) {
session, err := auth.sessionStore.Get(r, "session")
if err != nil {
return nil, err
@ -155,7 +159,10 @@ func (auth *Authentication) AuthViaSession(rw http.ResponseWriter, r *http.Reque
}
// Handle a POST request that should log the user in, starting a new session.
func (auth *Authentication) Login(onsuccess http.Handler, onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error)) http.Handler {
func (auth *Authentication) Login(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, loginErr error)) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
var err error = errors.New("no authenticator applied")
username := r.FormValue("username")
@ -211,7 +218,10 @@ func (auth *Authentication) Login(onsuccess http.Handler, onfailure func(rw http
// Authenticate the user and put a User object in the
// context of the request. If authentication fails,
// do not continue but send client to the login screen.
func (auth *Authentication) Auth(onsuccess http.Handler, onfailure func(rw http.ResponseWriter, r *http.Request, authErr error)) http.Handler {
func (auth *Authentication) Auth(
onsuccess http.Handler,
onfailure func(rw http.ResponseWriter, r *http.Request, authErr error)) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
for _, authenticator := range auth.authenticators {
user, err := authenticator.Auth(rw, r)
@ -237,6 +247,7 @@ func (auth *Authentication) Auth(onsuccess http.Handler, onfailure func(rw http.
// Clears the session cookie
func (auth *Authentication) Logout(onsuccess http.Handler) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
session, err := auth.sessionStore.Get(r, "session")
if err != nil {

View File

@ -16,15 +16,10 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/golang-jwt/jwt/v4"
)
type JWTAuthConfig struct {
// Specifies for how long a session or JWT shall be valid
// as a string parsable by time.ParseDuration().
MaxAge int64 `json:"max-age"`
}
type JWTAuthenticator struct {
auth *Authentication
@ -33,14 +28,15 @@ type JWTAuthenticator struct {
loginTokenKey []byte // HS256 key
config *JWTAuthConfig
config *schema.JWTAuthConfig
}
var _ Authenticator = (*JWTAuthenticator)(nil)
func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
ja.auth = auth
ja.config = conf.(*JWTAuthConfig)
ja.config = conf.(*schema.JWTAuthConfig)
pubKey, privKey := os.Getenv("JWT_PUBLIC_KEY"), os.Getenv("JWT_PRIVATE_KEY")
if pubKey == "" || privKey == "" {
@ -69,11 +65,19 @@ func (ja *JWTAuthenticator) Init(auth *Authentication, conf interface{}) error {
return nil
}
func (ja *JWTAuthenticator) CanLogin(user *User, rw http.ResponseWriter, r *http.Request) bool {
func (ja *JWTAuthenticator) CanLogin(
user *User,
rw http.ResponseWriter,
r *http.Request) bool {
return (user != nil && user.AuthSource == AuthViaToken) || r.Header.Get("Authorization") != "" || r.URL.Query().Get("login-token") != ""
}
func (ja *JWTAuthenticator) Login(user *User, rw http.ResponseWriter, r *http.Request) (*User, error) {
func (ja *JWTAuthenticator) Login(
user *User,
rw http.ResponseWriter,
r *http.Request) (*User, error) {
rawtoken := r.Header.Get("X-Auth-Token")
if rawtoken == "" {
rawtoken = r.Header.Get("Authorization")
@ -135,7 +139,10 @@ func (ja *JWTAuthenticator) Login(user *User, rw http.ResponseWriter, r *http.Re
return user, nil
}
func (ja *JWTAuthenticator) Auth(rw http.ResponseWriter, r *http.Request) (*User, error) {
func (ja *JWTAuthenticator) Auth(
rw http.ResponseWriter,
r *http.Request) (*User, error) {
rawtoken := r.Header.Get("X-Auth-Token")
if rawtoken == "" {
rawtoken = r.Header.Get("Authorization")
@ -183,6 +190,7 @@ func (ja *JWTAuthenticator) Auth(rw http.ResponseWriter, r *http.Request) (*User
// Generate a new JWT that can be used for authentication
func (ja *JWTAuthenticator) ProvideJWT(user *User) (string, error) {
if ja.privateKey == nil {
return "", errors.New("environment variable 'JWT_PRIVATE_KEY' not set")
}

View File

@ -12,30 +12,24 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/go-ldap/ldap/v3"
)
type LdapConfig struct {
Url string `json:"url"`
UserBase string `json:"user_base"`
SearchDN string `json:"search_dn"`
UserBind string `json:"user_bind"`
UserFilter string `json:"user_filter"`
SyncInterval string `json:"sync_interval"` // Parsed using time.ParseDuration.
SyncDelOldUsers bool `json:"sync_del_old_users"`
}
type LdapAutnenticator struct {
type LdapAuthenticator struct {
auth *Authentication
config *LdapConfig
config *schema.LdapConfig
syncPassword string
}
var _ Authenticator = (*LdapAutnenticator)(nil)
var _ Authenticator = (*LdapAuthenticator)(nil)
func (la *LdapAuthenticator) Init(
auth *Authentication,
conf interface{}) error {
func (la *LdapAutnenticator) Init(auth *Authentication, conf interface{}) error {
la.auth = auth
la.config = conf.(*LdapConfig)
la.config = conf.(*schema.LdapConfig)
la.syncPassword = os.Getenv("LDAP_ADMIN_PASSWORD")
if la.syncPassword == "" {
@ -67,11 +61,19 @@ func (la *LdapAutnenticator) Init(auth *Authentication, conf interface{}) error
return nil
}
func (la *LdapAutnenticator) CanLogin(user *User, rw http.ResponseWriter, r *http.Request) bool {
func (la *LdapAuthenticator) CanLogin(
user *User,
rw http.ResponseWriter,
r *http.Request) bool {
return user != nil && user.AuthSource == AuthViaLDAP
}
func (la *LdapAutnenticator) Login(user *User, rw http.ResponseWriter, r *http.Request) (*User, error) {
func (la *LdapAuthenticator) Login(
user *User,
rw http.ResponseWriter,
r *http.Request) (*User, error) {
l, err := la.getLdapConnection(false)
if err != nil {
return nil, err
@ -86,11 +88,15 @@ func (la *LdapAutnenticator) Login(user *User, rw http.ResponseWriter, r *http.R
return user, nil
}
func (la *LdapAutnenticator) Auth(rw http.ResponseWriter, r *http.Request) (*User, error) {
func (la *LdapAuthenticator) Auth(
rw http.ResponseWriter,
r *http.Request) (*User, error) {
return la.auth.AuthViaSession(rw, r)
}
func (la *LdapAutnenticator) Sync() error {
func (la *LdapAuthenticator) Sync() error {
const IN_DB int = 1
const IN_LDAP int = 2
const IN_BOTH int = 3
@ -160,7 +166,8 @@ func (la *LdapAutnenticator) Sync() error {
// TODO: Add a connection pool or something like
// that so that connections can be reused/cached.
func (la *LdapAutnenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
func (la *LdapAuthenticator) getLdapConnection(admin bool) (*ldap.Conn, error) {
conn, err := ldap.DialURL(la.config.Url)
if err != nil {
return nil, err

View File

@ -17,16 +17,27 @@ type LocalAuthenticator struct {
var _ Authenticator = (*LocalAuthenticator)(nil)
func (la *LocalAuthenticator) Init(auth *Authentication, _ interface{}) error {
func (la *LocalAuthenticator) Init(
auth *Authentication,
_ interface{}) error {
la.auth = auth
return nil
}
func (la *LocalAuthenticator) CanLogin(user *User, rw http.ResponseWriter, r *http.Request) bool {
func (la *LocalAuthenticator) CanLogin(
user *User,
rw http.ResponseWriter,
r *http.Request) bool {
return user != nil && user.AuthSource == AuthViaLocalPassword
}
func (la *LocalAuthenticator) Login(user *User, rw http.ResponseWriter, r *http.Request) (*User, error) {
func (la *LocalAuthenticator) Login(
user *User,
rw http.ResponseWriter,
r *http.Request) (*User, error) {
if e := bcrypt.CompareHashAndPassword([]byte(user.Password), []byte(r.FormValue("password"))); e != nil {
return nil, fmt.Errorf("user '%s' provided the wrong password (%w)", user.Username, e)
}
@ -34,6 +45,9 @@ func (la *LocalAuthenticator) Login(user *User, rw http.ResponseWriter, r *http.
return user, nil
}
func (la *LocalAuthenticator) Auth(rw http.ResponseWriter, r *http.Request) (*User, error) {
func (la *LocalAuthenticator) Auth(
rw http.ResponseWriter,
r *http.Request) (*User, error) {
return la.auth.AuthViaSession(rw, r)
}

View File

@ -19,6 +19,7 @@ import (
)
func (auth *Authentication) GetUser(username string) (*User, error) {
user := &User{Username: username}
var hashedPassword, name, rawRoles, email sql.NullString
if err := sq.Select("password", "ldap", "name", "roles", "email").From("user").
@ -40,6 +41,7 @@ func (auth *Authentication) GetUser(username string) (*User, error) {
}
func (auth *Authentication) AddUser(user *User) error {
rolesJson, _ := json.Marshal(user.Roles)
cols := []string{"username", "roles"}
@ -70,11 +72,13 @@ func (auth *Authentication) AddUser(user *User) error {
}
func (auth *Authentication) DelUser(username string) error {
_, err := auth.db.Exec(`DELETE FROM user WHERE user.username = ?`, username)
return err
}
func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
q := sq.Select("username", "name", "email", "roles").From("user")
if specialsOnly {
q = q.Where("(roles != '[\"user\"]' AND roles != '[]')")
@ -106,7 +110,11 @@ func (auth *Authentication) ListUsers(specialsOnly bool) ([]*User, error) {
return users, nil
}
func (auth *Authentication) AddRole(ctx context.Context, username string, role string) error {
func (auth *Authentication) AddRole(
ctx context.Context,
username string,
role string) error {
user, err := auth.GetUser(username)
if err != nil {
return err

View File

@ -5,298 +5,64 @@
package config
import (
"context"
"bytes"
"encoding/json"
"errors"
"fmt"
"net/http"
"log"
"os"
"path/filepath"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/jmoiron/sqlx"
)
var db *sqlx.DB
var lookupConfigStmt *sqlx.Stmt
var Keys schema.ProgramConfig = schema.ProgramConfig{
Addr: ":8080",
DisableAuthentication: false,
EmbedStaticFiles: true,
DBDriver: "sqlite3",
DB: "./var/job.db",
Archive: json.RawMessage(`{\"kind\":\"file\",\"path\":\"./var/job-archive\"}`),
DisableArchive: false,
Validate: false,
LdapConfig: nil,
SessionMaxAge: "168h",
StopJobsExceedingWalltime: 0,
UiDefaults: map[string]interface{}{
"analysis_view_histogramMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"analysis_view_scatterPlotMetrics": [][]string{{"flops_any", "mem_bw"}, {"flops_any", "cpu_load"}, {"cpu_load", "mem_bw"}},
"job_view_nodestats_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"job_view_polarPlotMetrics": []string{"flops_any", "mem_bw", "mem_used", "net_bw", "file_bw"},
"job_view_selectedMetrics": []string{"flops_any", "mem_bw", "mem_used"},
"plot_general_colorBackground": true,
"plot_general_colorscheme": []string{"#00bfff", "#0000ff", "#ff00ff", "#ff0000", "#ff8000", "#ffff00", "#80ff00"},
"plot_general_lineWidth": 3,
"plot_list_hideShortRunningJobs": 5 * 60,
"plot_list_jobsPerPage": 50,
"plot_list_selectedMetrics": []string{"cpu_load", "ipc", "mem_used", "flops_any", "mem_bw"},
"plot_view_plotsPerRow": 3,
"plot_view_showPolarplot": true,
"plot_view_showRoofline": true,
"plot_view_showStatTable": true,
"system_view_selectedMetric": "cpu_load",
},
}
var lock sync.RWMutex
var uiDefaults map[string]interface{}
var cache *lrucache.Cache = lrucache.New(1024)
var Clusters []*model.Cluster
var nodeLists map[string]map[string]NodeList
func Init(usersdb *sqlx.DB, authEnabled bool, uiConfig map[string]interface{}, jobArchive string) error {
db = usersdb
uiDefaults = uiConfig
entries, err := os.ReadDir(jobArchive)
func Init(flagConfigFile string) {
raw, err := os.ReadFile(flagConfigFile)
if err != nil {
return err
}
Clusters = []*model.Cluster{}
nodeLists = map[string]map[string]NodeList{}
for _, de := range entries {
raw, err := os.ReadFile(filepath.Join(jobArchive, de.Name(), "cluster.json"))
if err != nil {
return err
if !os.IsNotExist(err) {
log.Fatal(err)
}
} else {
if err := schema.Validate(schema.Config, bytes.NewReader(raw)); err != nil {
log.Fatalf("Validate config: %v\n", err)
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
if err := dec.Decode(&Keys); err != nil {
log.Fatal(err)
}
var cluster model.Cluster
// Disabled because of the historic 'measurement' field.
// dec := json.NewDecoder(bytes.NewBuffer(raw))
// dec.DisallowUnknownFields()
// if err := dec.Decode(&cluster); err != nil {
// return err
// }
if err := json.Unmarshal(raw, &cluster); err != nil {
return err
}
if len(cluster.Name) == 0 || len(cluster.MetricConfig) == 0 || len(cluster.SubClusters) == 0 {
return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty")
}
for _, mc := range cluster.MetricConfig {
if len(mc.Name) == 0 {
return errors.New("cluster.metricConfig.name should not be empty")
}
if mc.Timestep < 1 {
return errors.New("cluster.metricConfig.timestep should not be smaller than one")
}
// For backwards compability...
if mc.Scope == "" {
mc.Scope = schema.MetricScopeNode
}
if !mc.Scope.Valid() {
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
}
}
if cluster.FilterRanges.StartTime.To.IsZero() {
cluster.FilterRanges.StartTime.To = time.Unix(0, 0)
}
if cluster.Name != de.Name() {
return fmt.Errorf("the file '.../%s/cluster.json' contains the clusterId '%s'", de.Name(), cluster.Name)
}
Clusters = append(Clusters, &cluster)
nodeLists[cluster.Name] = make(map[string]NodeList)
for _, sc := range cluster.SubClusters {
if sc.Nodes == "" {
continue
}
nl, err := ParseNodeList(sc.Nodes)
if err != nil {
return fmt.Errorf("in %s/cluster.json: %w", cluster.Name, err)
}
nodeLists[cluster.Name][sc.Name] = nl
if Keys.Clusters == nil || len(Keys.Clusters) < 1 {
log.Fatal("At least one cluster required in config!")
}
}
if authEnabled {
_, err := db.Exec(`
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
if err != nil {
return err
}
lookupConfigStmt, err = db.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
if err != nil {
return err
}
}
return nil
}
// Return the personalised UI config for the currently authenticated
// user or return the plain default config.
func GetUIConfig(r *http.Request) (map[string]interface{}, error) {
user := auth.GetUser(r.Context())
if user == nil {
lock.RLock()
copy := make(map[string]interface{}, len(uiDefaults))
for k, v := range uiDefaults {
copy[k] = v
}
lock.RUnlock()
return copy, nil
}
data := cache.Get(user.Username, func() (interface{}, time.Duration, int) {
config := make(map[string]interface{}, len(uiDefaults))
for k, v := range uiDefaults {
config[k] = v
}
rows, err := lookupConfigStmt.Query(user.Username)
if err != nil {
return err, 0, 0
}
size := 0
defer rows.Close()
for rows.Next() {
var key, rawval string
if err := rows.Scan(&key, &rawval); err != nil {
return err, 0, 0
}
var val interface{}
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
return err, 0, 0
}
size += len(key)
size += len(rawval)
config[key] = val
}
return config, 24 * time.Hour, size
})
if err, ok := data.(error); ok {
return nil, err
}
return data.(map[string]interface{}), nil
}
// If the context does not have a user, update the global ui configuration without persisting it!
// If there is a (authenticated) user, update only his configuration.
func UpdateConfig(key, value string, ctx context.Context) error {
user := auth.GetUser(ctx)
if user == nil {
var val interface{}
if err := json.Unmarshal([]byte(value), &val); err != nil {
return err
}
lock.Lock()
defer lock.Unlock()
uiDefaults[key] = val
return nil
}
// Disabled because now `plot_list_selectedMetrics:<cluster>` is possible.
// if _, ok := uiDefaults[key]; !ok {
// return errors.New("this configuration key does not exist")
// }
if _, err := db.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
user.Username, key, value); err != nil {
return err
}
cache.Del(user.Username)
return nil
}
func GetCluster(cluster string) *model.Cluster {
for _, c := range Clusters {
if c.Name == cluster {
return c
}
}
return nil
}
func GetSubCluster(cluster, subcluster string) *model.SubCluster {
for _, c := range Clusters {
if c.Name == cluster {
for _, p := range c.SubClusters {
if p.Name == subcluster {
return p
}
}
}
}
return nil
}
func GetMetricConfig(cluster, metric string) *model.MetricConfig {
for _, c := range Clusters {
if c.Name == cluster {
for _, m := range c.MetricConfig {
if m.Name == metric {
return m
}
}
}
}
return nil
}
// AssignSubCluster sets the `job.subcluster` property of the job based
// on its cluster and resources.
func AssignSubCluster(job *schema.BaseJob) error {
cluster := GetCluster(job.Cluster)
if cluster == nil {
return fmt.Errorf("unkown cluster: %#v", job.Cluster)
}
if job.SubCluster != "" {
for _, sc := range cluster.SubClusters {
if sc.Name == job.SubCluster {
return nil
}
}
return fmt.Errorf("already assigned subcluster %#v unkown (cluster: %#v)", job.SubCluster, job.Cluster)
}
if len(job.Resources) == 0 {
return fmt.Errorf("job without any resources/hosts")
}
host0 := job.Resources[0].Hostname
for sc, nl := range nodeLists[job.Cluster] {
if nl != nil && nl.Contains(host0) {
job.SubCluster = sc
return nil
}
}
if cluster.SubClusters[0].Nodes == "" {
job.SubCluster = cluster.SubClusters[0].Name
return nil
}
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0)
}
func GetSubClusterByNode(cluster, hostname string) (string, error) {
for sc, nl := range nodeLists[cluster] {
if nl != nil && nl.Contains(hostname) {
return sc, nil
}
}
c := GetCluster(cluster)
if c == nil {
return "", fmt.Errorf("unkown cluster: %#v", cluster)
}
if c.SubClusters[0].Nodes == "" {
return c.SubClusters[0].Name, nil
}
return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname)
}

View File

@ -0,0 +1,25 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package config
import (
"testing"
)
func TestInit(t *testing.T) {
fp := "../../configs/config.json"
Init(fp)
if Keys.Addr != "0.0.0.0:443" {
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:443", Keys.Addr)
}
}
func TestInitMinimal(t *testing.T) {
fp := "../../docs/config.json"
Init(fp)
if Keys.Addr != "0.0.0.0:8080" {
t.Errorf("wrong addr\ngot: %s \nwant: 0.0.0.0:8080", Keys.Addr)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -3,124 +3,3 @@
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package model
import (
"encoding/json"
"strconv"
)
type Cluster struct {
Name string `json:"name"`
MetricConfig []*MetricConfig `json:"metricConfig"`
FilterRanges *FilterRanges `json:"filterRanges"`
SubClusters []*SubCluster `json:"subClusters"`
// NOT part of the GraphQL API. This has to be a JSON object with a field `"kind"`.
// All other fields depend on that kind (e.g. "cc-metric-store", "influxdb-v2").
MetricDataRepository json.RawMessage `json:"metricDataRepository"`
}
// Return a list of socket IDs given a list of hwthread IDs.
// Even if just one hwthread is in that socket, add it to the list.
// If no hwthreads other than those in the argument list are assigned to
// one of the sockets in the first return value, return true as the second value.
// TODO: Optimize this, there must be a more efficient way/algorithm.
func (topo *Topology) GetSocketsFromHWThreads(hwthreads []int) (sockets []int, exclusive bool) {
socketsMap := map[int]int{}
for _, hwthread := range hwthreads {
for socket, hwthreadsInSocket := range topo.Socket {
for _, hwthreadInSocket := range hwthreadsInSocket {
if hwthread == hwthreadInSocket {
socketsMap[socket] += 1
}
}
}
}
exclusive = true
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
sockets = make([]int, 0, len(socketsMap))
for socket, count := range socketsMap {
sockets = append(sockets, socket)
exclusive = exclusive && count == hwthreadsPerSocket
}
return sockets, exclusive
}
// Return a list of core IDs given a list of hwthread IDs.
// Even if just one hwthread is in that core, add it to the list.
// If no hwthreads other than those in the argument list are assigned to
// one of the cores in the first return value, return true as the second value.
// TODO: Optimize this, there must be a more efficient way/algorithm.
func (topo *Topology) GetCoresFromHWThreads(hwthreads []int) (cores []int, exclusive bool) {
coresMap := map[int]int{}
for _, hwthread := range hwthreads {
for core, hwthreadsInCore := range topo.Core {
for _, hwthreadInCore := range hwthreadsInCore {
if hwthread == hwthreadInCore {
coresMap[core] += 1
}
}
}
}
exclusive = true
hwthreadsPerCore := len(topo.Node) / len(topo.Core)
cores = make([]int, 0, len(coresMap))
for core, count := range coresMap {
cores = append(cores, core)
exclusive = exclusive && count == hwthreadsPerCore
}
return cores, exclusive
}
// Return a list of memory domain IDs given a list of hwthread IDs.
// Even if just one hwthread is in that memory domain, add it to the list.
// If no hwthreads other than those in the argument list are assigned to
// one of the memory domains in the first return value, return true as the second value.
// TODO: Optimize this, there must be a more efficient way/algorithm.
func (topo *Topology) GetMemoryDomainsFromHWThreads(hwthreads []int) (memDoms []int, exclusive bool) {
memDomsMap := map[int]int{}
for _, hwthread := range hwthreads {
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
for _, hwthreadInmemDom := range hwthreadsInmemDom {
if hwthread == hwthreadInmemDom {
memDomsMap[memDom] += 1
}
}
}
}
exclusive = true
hwthreadsPermemDom := len(topo.Node) / len(topo.MemoryDomain)
memDoms = make([]int, 0, len(memDomsMap))
for memDom, count := range memDomsMap {
memDoms = append(memDoms, memDom)
exclusive = exclusive && count == hwthreadsPermemDom
}
return memDoms, exclusive
}
func (topo *Topology) GetAcceleratorIDs() ([]int, error) {
accels := make([]int, 0)
for _, accel := range topo.Accelerators {
id, err := strconv.Atoi(accel.ID)
if err != nil {
return nil, err
}
accels = append(accels, id)
}
return accels, nil
}
func (topo *Topology) GetAcceleratorIndex(id string) (int, bool) {
for idx, accel := range topo.Accelerators {
if accel.ID == id {
return idx, true
}
}
return -1, false
}

View File

@ -11,23 +11,11 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
type Accelerator struct {
ID string `json:"id"`
Type string `json:"type"`
Model string `json:"model"`
}
type Count struct {
Name string `json:"name"`
Count int `json:"count"`
}
type FilterRanges struct {
Duration *IntRangeOutput `json:"duration"`
NumNodes *IntRangeOutput `json:"numNodes"`
StartTime *TimeRangeOutput `json:"startTime"`
}
type FloatRange struct {
From float64 `json:"from"`
To float64 `json:"to"`
@ -43,11 +31,6 @@ type HistoPoint struct {
Value int `json:"value"`
}
type IntRange struct {
From int `json:"from"`
To int `json:"to"`
}
type IntRangeOutput struct {
From int `json:"from"`
To int `json:"to"`
@ -61,12 +44,12 @@ type JobFilter struct {
Project *StringInput `json:"project"`
Cluster *StringInput `json:"cluster"`
Partition *StringInput `json:"partition"`
Duration *IntRange `json:"duration"`
Duration *schema.IntRange `json:"duration"`
MinRunningFor *int `json:"minRunningFor"`
NumNodes *IntRange `json:"numNodes"`
NumAccelerators *IntRange `json:"numAccelerators"`
NumHWThreads *IntRange `json:"numHWThreads"`
StartTime *TimeRange `json:"startTime"`
NumNodes *schema.IntRange `json:"numNodes"`
NumAccelerators *schema.IntRange `json:"numAccelerators"`
NumHWThreads *schema.IntRange `json:"numHWThreads"`
StartTime *schema.TimeRange `json:"startTime"`
State []schema.JobState `json:"state"`
FlopsAnyAvg *FloatRange `json:"flopsAnyAvg"`
MemBwAvg *FloatRange `json:"memBwAvg"`
@ -96,19 +79,6 @@ type JobsStatistics struct {
HistNumNodes []*HistoPoint `json:"histNumNodes"`
}
type MetricConfig struct {
Name string `json:"name"`
Unit string `json:"unit"`
Scope schema.MetricScope `json:"scope"`
Aggregation *string `json:"aggregation"`
Timestep int `json:"timestep"`
Peak *float64 `json:"peak"`
Normal *float64 `json:"normal"`
Caution *float64 `json:"caution"`
Alert *float64 `json:"alert"`
SubClusters []*SubClusterConfig `json:"subClusters"`
}
type MetricFootprints struct {
Metric string `json:"metric"`
Data []schema.Float `json:"data"`
@ -137,47 +107,11 @@ type StringInput struct {
EndsWith *string `json:"endsWith"`
}
type SubCluster struct {
Name string `json:"name"`
Nodes string `json:"nodes"`
NumberOfNodes int `json:"numberOfNodes"`
ProcessorType string `json:"processorType"`
SocketsPerNode int `json:"socketsPerNode"`
CoresPerSocket int `json:"coresPerSocket"`
ThreadsPerCore int `json:"threadsPerCore"`
FlopRateScalar int `json:"flopRateScalar"`
FlopRateSimd int `json:"flopRateSimd"`
MemoryBandwidth int `json:"memoryBandwidth"`
Topology *Topology `json:"topology"`
}
type SubClusterConfig struct {
Name string `json:"name"`
Peak float64 `json:"peak"`
Normal float64 `json:"normal"`
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
}
type TimeRange struct {
From *time.Time `json:"from"`
To *time.Time `json:"to"`
}
type TimeRangeOutput struct {
From time.Time `json:"from"`
To time.Time `json:"to"`
}
type Topology struct {
Node []int `json:"node"`
Socket [][]int `json:"socket"`
MemoryDomain [][]int `json:"memoryDomain"`
Die [][]int `json:"die"`
Core [][]int `json:"core"`
Accelerators []*Accelerator `json:"accelerators"`
}
type User struct {
Username string `json:"username"`
Name string `json:"name"`

View File

@ -1,7 +1,3 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package graph
// This file will be automatically regenerated based on the schema, any resolver implementations
@ -15,29 +11,35 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/generated"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func (r *clusterResolver) Partitions(ctx context.Context, obj *model.Cluster) ([]string, error) {
// Partitions is the resolver for the partitions field.
func (r *clusterResolver) Partitions(ctx context.Context, obj *schema.Cluster) ([]string, error) {
return r.Repo.Partitions(obj.Name)
}
// Tags is the resolver for the tags field.
func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) {
return r.Repo.GetTags(&obj.ID)
}
// MetaData is the resolver for the metaData field.
func (r *jobResolver) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) {
return r.Repo.FetchMetadata(obj)
}
// UserData is the resolver for the userData field.
func (r *jobResolver) UserData(ctx context.Context, obj *schema.Job) (*model.User, error) {
return auth.FetchUser(ctx, r.DB, obj.User)
}
// CreateTag is the resolver for the createTag field.
func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name string) (*schema.Tag, error) {
id, err := r.Repo.CreateTag(typeArg, name)
if err != nil {
@ -47,11 +49,12 @@ func (r *mutationResolver) CreateTag(ctx context.Context, typeArg string, name s
return &schema.Tag{ID: id, Type: typeArg, Name: name}, nil
}
// DeleteTag is the resolver for the deleteTag field.
func (r *mutationResolver) DeleteTag(ctx context.Context, id string) (string, error) {
// The UI does not allow this currently anyways.
panic(fmt.Errorf("not implemented"))
panic(fmt.Errorf("not implemented: DeleteTag - deleteTag"))
}
// AddTagsToJob is the resolver for the addTagsToJob field.
func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
@ -73,6 +76,7 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
return tags, nil
}
// RemoveTagsFromJob is the resolver for the removeTagsFromJob field.
func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, tagIds []string) ([]*schema.Tag, error) {
jid, err := strconv.ParseInt(job, 10, 64)
if err != nil {
@ -94,26 +98,31 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
return tags, nil
}
// UpdateConfiguration is the resolver for the updateConfiguration field.
func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string, value string) (*string, error) {
if err := config.UpdateConfig(name, value, ctx); err != nil {
if err := repository.GetUserCfgRepo().UpdateConfig(name, value, auth.GetUser(ctx)); err != nil {
return nil, err
}
return nil, nil
}
func (r *queryResolver) Clusters(ctx context.Context) ([]*model.Cluster, error) {
return config.Clusters, nil
// Clusters is the resolver for the clusters field.
func (r *queryResolver) Clusters(ctx context.Context) ([]*schema.Cluster, error) {
return archive.Clusters, nil
}
// Tags is the resolver for the tags field.
func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
return r.Repo.GetTags(nil)
}
// User is the resolver for the user field.
func (r *queryResolver) User(ctx context.Context, username string) (*model.User, error) {
return auth.FetchUser(ctx, r.DB, username)
}
// AllocatedNodes is the resolver for the allocatedNodes field.
func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error) {
data, err := r.Repo.AllocatedNodes(cluster)
if err != nil {
@ -131,6 +140,7 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
return counts, nil
}
// Job is the resolver for the job field.
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
numericId, err := strconv.ParseInt(id, 10, 64)
if err != nil {
@ -149,6 +159,7 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
return job, nil
}
// JobMetrics is the resolver for the jobMetrics field.
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
job, err := r.Query().Job(ctx, id)
if err != nil {
@ -177,10 +188,12 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
return res, err
}
// JobsFootprints is the resolver for the jobsFootprints field.
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
return r.jobsFootprints(ctx, filter, metrics)
}
// Jobs is the resolver for the jobs field.
func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) (*model.JobResultList, error) {
if page == nil {
page = &model.PageRequest{
@ -202,10 +215,12 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
return &model.JobResultList{Items: jobs, Count: &count}, nil
}
// JobsStatistics is the resolver for the jobsStatistics field.
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) {
return r.jobsStatistics(ctx, filter, groupBy)
}
// JobsCount is the resolver for the jobsCount field.
func (r *queryResolver) JobsCount(ctx context.Context, filter []*model.JobFilter, groupBy model.Aggregate, weight *model.Weights, limit *int) ([]*model.Count, error) {
counts, err := r.Repo.CountGroupedJobs(ctx, groupBy, filter, weight, limit)
if err != nil {
@ -222,10 +237,12 @@ func (r *queryResolver) JobsCount(ctx context.Context, filter []*model.JobFilter
return res, nil
}
// RooflineHeatmap is the resolver for the rooflineHeatmap field.
func (r *queryResolver) RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
return r.rooflineHeatmap(ctx, filter, rows, cols, minX, minY, maxX, maxY)
}
// NodeMetrics is the resolver for the nodeMetrics field.
func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error) {
user := auth.GetUser(ctx)
if user != nil && !user.HasRole(auth.RoleAdmin) {
@ -233,7 +250,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
}
if metrics == nil {
for _, mc := range config.GetCluster(cluster).MetricConfig {
for _, mc := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, mc.Name)
}
}
@ -249,7 +266,7 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
Host: hostname,
Metrics: make([]*model.JobMetricWithName, 0, len(metrics)*len(scopes)),
}
host.SubCluster, _ = config.GetSubClusterByNode(cluster, hostname)
host.SubCluster, _ = archive.GetSubClusterByNode(cluster, hostname)
for metric, scopedMetrics := range metrics {
for _, scopedMetric := range scopedMetrics {

View File

@ -13,10 +13,10 @@ import (
"time"
"github.com/99designs/gqlgen/graphql"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
@ -36,7 +36,7 @@ func (r *queryResolver) jobsStatistics(ctx context.Context, filter []*model.JobF
stats := map[string]*model.JobsStatistics{}
// `socketsPerNode` and `coresPerSocket` can differ from cluster to cluster, so we need to explicitly loop over those.
for _, cluster := range config.Clusters {
for _, cluster := range archive.Clusters {
for _, subcluster := range cluster.SubClusters {
corehoursCol := fmt.Sprintf("CAST(ROUND(SUM(job.duration * job.num_nodes * %d * %d) / 3600) as int)", subcluster.SocketsPerNode, subcluster.CoresPerSocket)
var query sq.SelectBuilder
@ -200,7 +200,12 @@ func (r *queryResolver) jobsStatisticsHistogram(ctx context.Context, value strin
const MAX_JOBS_FOR_ANALYSIS = 500
// Helper function for the rooflineHeatmap GraphQL query placed here so that schema.resolvers.go is not too full.
func (r *Resolver) rooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
func (r *queryResolver) rooflineHeatmap(
ctx context.Context,
filter []*model.JobFilter,
rows int, cols int,
minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error) {
jobs, err := r.Repo.QueryJobs(ctx, filter, &model.PageRequest{Page: 1, ItemsPerPage: MAX_JOBS_FOR_ANALYSIS + 1}, nil)
if err != nil {
return nil, err

View File

@ -1,261 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricdata
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"math"
"os"
"path"
"path/filepath"
"strconv"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
// For a given job, return the path of the `data.json`/`meta.json` file.
// TODO: Implement Issue ClusterCockpit/ClusterCockpit#97
func getPath(job *schema.Job, file string, checkLegacy bool) (string, error) {
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
if !checkLegacy {
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
}
legacyPath := filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, file)
if _, err := os.Stat(legacyPath); errors.Is(err, os.ErrNotExist) {
return filepath.Join(JobArchivePath, job.Cluster, lvl1, lvl2, strconv.FormatInt(job.StartTime.Unix(), 10), file), nil
}
return legacyPath, nil
}
// Assuming job is completed/archived, return the jobs metric data.
func loadFromArchive(job *schema.Job) (schema.JobData, error) {
filename, err := getPath(job, "data.json", true)
if err != nil {
return nil, err
}
data := cache.Get(filename, func() (value interface{}, ttl time.Duration, size int) {
f, err := os.Open(filename)
if err != nil {
return err, 0, 1000
}
defer f.Close()
var data schema.JobData
if err := json.NewDecoder(bufio.NewReader(f)).Decode(&data); err != nil {
return err, 0, 1000
}
return data, 1 * time.Hour, data.Size()
})
if err, ok := data.(error); ok {
return nil, err
}
return data.(schema.JobData), nil
}
func loadMetaJson(job *schema.Job) (*schema.JobMeta, error) {
filename, err := getPath(job, "meta.json", true)
if err != nil {
return nil, err
}
bytes, err := os.ReadFile(filename)
if err != nil {
return nil, err
}
var metaFile schema.JobMeta = schema.JobMeta{
BaseJob: schema.JobDefaults,
}
if err := json.Unmarshal(bytes, &metaFile); err != nil {
return nil, err
}
return &metaFile, nil
}
// If the job is archived, find its `meta.json` file and override the tags list
// in that JSON file. If the job is not archived, nothing is done.
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
if job.State == schema.JobStateRunning {
return nil
}
filename, err := getPath(job, "meta.json", true)
if err != nil {
return err
}
f, err := os.Open(filename)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
var metaFile schema.JobMeta = schema.JobMeta{
BaseJob: schema.JobDefaults,
}
if err := json.NewDecoder(f).Decode(&metaFile); err != nil {
return err
}
f.Close()
metaFile.Tags = make([]*schema.Tag, 0)
for _, tag := range tags {
metaFile.Tags = append(metaFile.Tags, &schema.Tag{
Name: tag.Name,
Type: tag.Type,
})
}
bytes, err := json.Marshal(metaFile)
if err != nil {
return err
}
return os.WriteFile(filename, bytes, 0644)
}
// Helper to metricdata.LoadAverages().
func loadAveragesFromArchive(job *schema.Job, metrics []string, data [][]schema.Float) error {
metaFile, err := loadMetaJson(job)
if err != nil {
return err
}
for i, m := range metrics {
if stat, ok := metaFile.Statistics[m]; ok {
data[i] = append(data[i], schema.Float(stat.Avg))
} else {
data[i] = append(data[i], schema.NaN)
}
}
return nil
}
func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
metaFile, err := loadMetaJson(job)
if err != nil {
return nil, err
}
return metaFile.Statistics, nil
}
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := config.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
// TODO: Talk about this! What resolutions to store data at...
scopes := []schema.MetricScope{schema.MetricScopeNode}
if job.NumNodes <= 8 {
scopes = append(scopes, schema.MetricScopeCore)
}
jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil {
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// TODO/FIXME: Calc average for non-node metrics as well!
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: config.GetMetricConfig(job.Cluster, metric).Unit,
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if !useArchive {
return jobMeta, nil
}
dir, err := getPath(job, "", false)
if err != nil {
return nil, err
}
return jobMeta, writeFiles(dir, jobMeta, &jobData)
}
func writeFiles(dir string, jobMeta *schema.JobMeta, jobData *schema.JobData) error {
if err := os.MkdirAll(dir, 0777); err != nil {
return err
}
f, err := os.Create(path.Join(dir, "meta.json"))
if err != nil {
return err
}
if err := json.NewEncoder(f).Encode(jobMeta); err != nil {
return err
}
if err := f.Close(); err != nil {
return err
}
f, err = os.Create(path.Join(dir, "data.json"))
if err != nil {
return err
}
if err := json.NewEncoder(f).Encode(jobData); err != nil {
return err
}
return f.Close()
}
// Used to import a non-running job into the job-archive.
func ImportJob(job *schema.JobMeta, jobData *schema.JobData) error {
dir, err := getPath(&schema.Job{
BaseJob: job.BaseJob,
StartTimeUnix: job.StartTime,
StartTime: time.Unix(job.StartTime, 0),
}, "", false)
if err != nil {
return err
}
return writeFiles(dir, job, jobData)
}

View File

@ -15,7 +15,7 @@ import (
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@ -75,6 +75,7 @@ type ApiMetricData struct {
}
func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
var config CCMetricStoreConfig
if err := json.Unmarshal(rawConfig, &config); err != nil {
return err
@ -117,7 +118,10 @@ func (ccms *CCMetricStore) toLocalName(metric string) string {
return metric
}
func (ccms *CCMetricStore) doRequest(ctx context.Context, body *ApiQueryRequest) (*ApiQueryResponse, error) {
func (ccms *CCMetricStore) doRequest(
ctx context.Context,
body *ApiQueryRequest) (*ApiQueryResponse, error) {
buf := &bytes.Buffer{}
if err := json.NewEncoder(buf).Encode(body); err != nil {
return nil, err
@ -148,8 +152,13 @@ func (ccms *CCMetricStore) doRequest(ctx context.Context, body *ApiQueryRequest)
return &resBody, nil
}
func (ccms *CCMetricStore) LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
topology := config.GetSubCluster(job.Cluster, job.SubCluster).Topology
func (ccms *CCMetricStore) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
if err != nil {
return nil, err
@ -175,7 +184,7 @@ func (ccms *CCMetricStore) LoadData(job *schema.Job, metrics []string, scopes []
query := req.Queries[i]
metric := ccms.toLocalName(query.Metric)
scope := assignedScope[i]
mc := config.GetMetricConfig(job.Cluster, metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
if _, ok := jobData[metric]; !ok {
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
}
@ -250,14 +259,18 @@ var (
acceleratorString = string(schema.MetricScopeAccelerator)
)
func (ccms *CCMetricStore) buildQueries(job *schema.Job, metrics []string, scopes []schema.MetricScope) ([]ApiQuery, []schema.MetricScope, error) {
func (ccms *CCMetricStore) buildQueries(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope) ([]ApiQuery, []schema.MetricScope, error) {
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
topology := config.GetSubCluster(job.Cluster, job.SubCluster).Topology
topology := archive.GetSubCluster(job.Cluster, job.SubCluster).Topology
assignedScope := []schema.MetricScope{}
for _, metric := range metrics {
remoteName := ccms.toRemoteName(metric)
mc := config.GetMetricConfig(job.Cluster, metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
if mc == nil {
// return nil, fmt.Errorf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
// log.Printf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
@ -478,7 +491,11 @@ func (ccms *CCMetricStore) buildQueries(job *schema.Job, metrics []string, scope
return queries, assignedScope, nil
}
func (ccms *CCMetricStore) LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
func (ccms *CCMetricStore) LoadStats(
job *schema.Job,
metrics []string,
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode})
if err != nil {
return nil, err
@ -528,7 +545,13 @@ func (ccms *CCMetricStore) LoadStats(job *schema.Job, metrics []string, ctx cont
}
// TODO: Support sub-node-scope metrics! For this, the partition of a node needs to be known!
func (ccms *CCMetricStore) LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
func (ccms *CCMetricStore) LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
req := ApiQueryRequest{
Cluster: cluster,
From: from.Unix(),
@ -584,7 +607,7 @@ func (ccms *CCMetricStore) LoadNodeData(cluster string, metrics, nodes []string,
data[query.Hostname] = hostdata
}
mc := config.GetMetricConfig(cluster, metric)
mc := archive.GetMetricConfig(cluster, metric)
hostdata[metric] = append(hostdata[metric], &schema.JobMetric{
Unit: mc.Unit,
Scope: schema.MetricScopeNode,
@ -611,6 +634,7 @@ func (ccms *CCMetricStore) LoadNodeData(cluster string, metrics, nodes []string,
}
func intToStringSlice(is []int) []string {
ss := make([]string, len(is))
for i, x := range is {
ss[i] = strconv.Itoa(x)

View File

@ -14,7 +14,7 @@ import (
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2Api "github.com/influxdata/influxdb-client-go/v2/api"
@ -55,7 +55,11 @@ func (idb *InfluxDBv2DataRepository) epochToTime(epoch int64) time.Time {
return time.Unix(epoch, 0)
}
func (idb *InfluxDBv2DataRepository) LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
func (idb *InfluxDBv2DataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
measurementsConds := make([]string, 0, len(metrics))
for _, m := range metrics {
@ -124,7 +128,7 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *schema.Job, metrics []string,
for _, metric := range metrics {
jobMetric, ok := jobData[metric]
if !ok {
mc := config.GetMetricConfig(job.Cluster, metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
jobMetric = map[schema.MetricScope]*schema.JobMetric{
scope: { // uses scope var from above!
Unit: mc.Unit,
@ -234,7 +238,10 @@ func (idb *InfluxDBv2DataRepository) LoadData(job *schema.Job, metrics []string,
return jobData, nil
}
func (idb *InfluxDBv2DataRepository) LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
func (idb *InfluxDBv2DataRepository) LoadStats(
job *schema.Job,
metrics []string,
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
stats := map[string]map[string]schema.MetricStatistics{}
@ -304,7 +311,13 @@ func (idb *InfluxDBv2DataRepository) LoadStats(job *schema.Job, metrics []string
return stats, nil
}
func (idb *InfluxDBv2DataRepository) LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
func (idb *InfluxDBv2DataRepository) LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
// TODO : Implement to be used in Analysis- und System/Node-View
log.Println(fmt.Sprintf("LoadNodeData unimplemented for InfluxDBv2DataRepository, Args: cluster %s, metrics %v, nodes %v, scopes %v", cluster, metrics, nodes, scopes))

View File

@ -8,9 +8,11 @@ import (
"context"
"encoding/json"
"fmt"
"math"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
@ -33,14 +35,12 @@ type MetricDataRepository interface {
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
var JobArchivePath string
var useArchive bool
func Init(jobArchivePath string, disableArchive bool) error {
func Init(disableArchive bool) error {
useArchive = !disableArchive
JobArchivePath = jobArchivePath
for _, cluster := range config.Clusters {
for _, cluster := range config.Keys.Clusters {
if cluster.MetricDataRepository != nil {
var kind struct {
Kind string `json:"kind"`
@ -73,14 +73,20 @@ func Init(jobArchivePath string, disableArchive bool) error {
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
// Fetches the metric data for a job.
func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
func LoadData(job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
var jd schema.JobData
var err error
if job.State == schema.JobStateRunning ||
job.MonitoringStatus == schema.MonitoringStatusRunningOrArchiving ||
!useArchive {
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster), 0, 0
}
@ -90,7 +96,7 @@ func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ct
}
if metrics == nil {
cluster := config.GetCluster(job.Cluster)
cluster := archive.GetCluster(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
}
@ -106,7 +112,7 @@ func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ct
}
size = jd.Size()
} else {
jd, err = loadFromArchive(job)
jd, err = archive.GetHandle().LoadJobData(job)
if err != nil {
return err, 0, 0
}
@ -141,7 +147,7 @@ func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ct
}
jd = res
}
size = 1 // loadFromArchive() caches in the same cache.
size = jd.Size()
}
ttl = 5 * time.Hour
@ -161,9 +167,14 @@ func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ct
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(job *schema.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
func LoadAverages(
job *schema.Job,
metrics []string,
data [][]schema.Float,
ctx context.Context) error {
if job.State != schema.JobStateRunning && useArchive {
return loadAveragesFromArchive(job, metrics, data)
return archive.LoadAveragesFromArchive(job, metrics, data)
}
repo, ok := metricDataRepos[job.Cluster]
@ -194,14 +205,20 @@ func LoadAverages(job *schema.Job, metrics []string, data [][]schema.Float, ctx
}
// Used for the node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
func LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
repo, ok := metricDataRepos[cluster]
if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
for _, m := range config.GetCluster(cluster).MetricConfig {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
@ -222,17 +239,26 @@ func LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.Metri
return data, nil
}
func cacheKey(job *schema.Job, metrics []string, scopes []schema.MetricScope) string {
func cacheKey(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope) string {
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
return fmt.Sprintf("%d(%s):[%v],[%v]",
job.ID, job.State, metrics, scopes)
}
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need to be available at the scope 'node'.
// If a job has a lot of nodes, statisticsSeries should be available so that a min/mean/max Graph can be used instead of
// a lot of single lines.
func prepareJobData(job *schema.Job, jobData schema.JobData, scopes []schema.MetricScope) {
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/mean/max Graph can be
// used instead of a lot of single lines.
func prepareJobData(
job *schema.Job,
jobData schema.JobData,
scopes []schema.MetricScope) {
const maxSeriesSize int = 15
for _, scopes := range jobData {
for _, jm := range scopes {
@ -256,3 +282,61 @@ func prepareJobData(job *schema.Job, jobData schema.JobData, scopes []schema.Met
jobData.AddNodeScope("mem_bw")
}
}
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
// TODO: Talk about this! What resolutions to store data at...
scopes := []schema.MetricScope{schema.MetricScopeNode}
if job.NumNodes <= 8 {
scopes = append(scopes, schema.MetricScopeCore)
}
jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil {
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// TODO/FIXME: Calc average for non-node metrics as well!
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: archive.GetMetricConfig(job.Cluster, metric).Unit,
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if !useArchive {
return jobMeta, nil
}
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
}

View File

@ -23,14 +23,28 @@ func (tmdr *TestMetricDataRepository) Init(_ json.RawMessage) error {
return nil
}
func (tmdr *TestMetricDataRepository) LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
func (tmdr *TestMetricDataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
return TestLoadDataCallback(job, metrics, scopes, ctx)
}
func (tmdr *TestMetricDataRepository) LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
func (tmdr *TestMetricDataRepository) LoadStats(
job *schema.Job,
metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
panic("TODO")
}
func (tmdr *TestMetricDataRepository) LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
func (tmdr *TestMetricDataRepository) LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
ctx context.Context) (map[string]map[string][]*schema.JobMetric, error) {
panic("TODO")
}

View File

@ -1,159 +0,0 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"bytes"
"database/sql"
"encoding/json"
"fmt"
"os"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`
// Import all jobs specified as `<path-to-meta.json>:<path-to-data.json>,...`
func (r *JobRepository) HandleImportFlag(flag string) error {
for _, pair := range strings.Split(flag, ",") {
files := strings.Split(pair, ":")
if len(files) != 2 {
return fmt.Errorf("invalid import flag format")
}
raw, err := os.ReadFile(files[0])
if err != nil {
return err
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := dec.Decode(&jobMeta); err != nil {
return err
}
raw, err = os.ReadFile(files[1])
if err != nil {
return err
}
dec = json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobData := schema.JobData{}
if err := dec.Decode(&jobData); err != nil {
return err
}
if err := r.ImportJob(&jobMeta, &jobData); err != nil {
return err
}
}
return nil
}
func (r *JobRepository) ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) (err error) {
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
if err := metricdata.ImportJob(jobMeta, jobData); err != nil {
return err
}
if job, err := r.Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
if err != nil {
return err
}
return fmt.Errorf("a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
}
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return err
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
return err
}
res, err := r.DB.NamedExec(NamedJobInsert, job)
if err != nil {
return err
}
id, err := res.LastInsertId()
if err != nil {
return err
}
for _, tag := range job.Tags {
if _, err := r.AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
return err
}
}
log.Infof("Successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
return nil
}
// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
if c := config.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %#v", job.Cluster)
}
if err := config.AssignSubCluster(job); err != nil {
return err
}
if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %#v", job.State)
}
if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty")
}
if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
}
if len(job.Resources) != int(job.NumNodes) {
return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
}
return nil
}
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
return stats.Avg
}
return 0.0
}

View File

@ -5,16 +5,18 @@
package repository
import (
"bufio"
"bytes"
"database/sql"
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/jmoiron/sqlx"
)
// `AUTO_INCREMENT` is in a comment because of this hack:
@ -78,31 +80,133 @@ const JobsDbIndexes string = `
CREATE INDEX job_by_job_id ON job (job_id);
CREATE INDEX job_by_state ON job (job_state);
`
const NamedJobInsert string = `INSERT INTO job (
job_id, user, project, cluster, subcluster, ` + "`partition`" + `, array_job_id, num_nodes, num_hwthreads, num_acc,
exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, resources, meta_data,
mem_used_max, flops_any_avg, mem_bw_avg, load_avg, net_bw_avg, net_data_vol_total, file_bw_avg, file_data_vol_total
) VALUES (
:job_id, :user, :project, :cluster, :subcluster, :partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
:exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :resources, :meta_data,
:mem_used_max, :flops_any_avg, :mem_bw_avg, :load_avg, :net_bw_avg, :net_data_vol_total, :file_bw_avg, :file_data_vol_total
);`
// Import all jobs specified as `<path-to-meta.json>:<path-to-data.json>,...`
func HandleImportFlag(flag string) error {
for _, pair := range strings.Split(flag, ",") {
files := strings.Split(pair, ":")
if len(files) != 2 {
return fmt.Errorf("invalid import flag format")
}
raw, err := os.ReadFile(files[0])
if err != nil {
return err
}
if config.Keys.Validate {
if err := schema.Validate(schema.Meta, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("validate job meta: %v", err)
}
}
dec := json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := dec.Decode(&jobMeta); err != nil {
return err
}
raw, err = os.ReadFile(files[1])
if err != nil {
return err
}
if config.Keys.Validate {
if err := schema.Validate(schema.Data, bytes.NewReader(raw)); err != nil {
return fmt.Errorf("validate job data: %v", err)
}
}
dec = json.NewDecoder(bytes.NewReader(raw))
dec.DisallowUnknownFields()
jobData := schema.JobData{}
if err := dec.Decode(&jobData); err != nil {
return err
}
SanityChecks(&jobMeta.BaseJob)
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
if job, err := GetJobRepository().Find(&jobMeta.JobID, &jobMeta.Cluster, &jobMeta.StartTime); err != sql.ErrNoRows {
if err != nil {
return err
}
return fmt.Errorf("a job with that jobId, cluster and startTime does already exist (dbid: %d)", job.ID)
}
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return err
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
return err
}
if err := archive.GetHandle().ImportJob(&jobMeta, &jobData); err != nil {
return err
}
res, err := GetConnection().DB.NamedExec(NamedJobInsert, job)
if err != nil {
return err
}
id, err := res.LastInsertId()
if err != nil {
return err
}
for _, tag := range job.Tags {
if _, err := GetJobRepository().AddTagOrCreate(id, tag.Type, tag.Name); err != nil {
return err
}
}
log.Infof("Successfully imported a new job (jobId: %d, cluster: %s, dbid: %d)", job.JobID, job.Cluster, id)
}
return nil
}
// Delete the tables "job", "tag" and "jobtag" from the database and
// repopulate them using the jobs found in `archive`.
func InitDB(db *sqlx.DB, archive string) error {
func InitDB() error {
db := GetConnection()
starttime := time.Now()
log.Print("Building job table...")
// Basic database structure:
_, err := db.Exec(JobsDBSchema)
if err != nil {
return err
}
clustersDir, err := os.ReadDir(archive)
if err != nil {
return err
}
_, err := db.DB.Exec(JobsDBSchema)
if err != nil {
return err
}
// Inserts are bundled into transactions because in sqlite,
// that speeds up inserts A LOT.
tx, err := db.Beginx()
tx, err := db.DB.Beginx()
if err != nil {
return err
}
@ -111,23 +215,28 @@ func InitDB(db *sqlx.DB, archive string) error {
if err != nil {
return err
}
tags := make(map[string]int64)
// Not using log.Print because we want the line to end with `\r` and
// this function is only ever called when a special command line flag
// is passed anyways.
fmt.Printf("%d jobs inserted...\r", 0)
ar := archive.GetHandle()
i := 0
tags := make(map[string]int64)
handleDirectory := func(filename string) error {
// Bundle 100 inserts into one transaction for better performance:
if i%100 == 0 {
errorOccured := 0
for jobMeta := range ar.Iter() {
// // Bundle 100 inserts into one transaction for better performance:
if i%10 == 0 {
if tx != nil {
if err := tx.Commit(); err != nil {
return err
}
}
tx, err = db.Beginx()
tx, err = db.DB.Beginx()
if err != nil {
return err
}
@ -136,53 +245,80 @@ func InitDB(db *sqlx.DB, archive string) error {
fmt.Printf("%d jobs inserted...\r", i)
}
err := loadJob(tx, stmt, tags, filename)
if err == nil {
i += 1
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
return err
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(jobMeta, "file_bw")
for _, clusterDir := range clustersDir {
lvl1Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name()))
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return err
log.Errorf("repository initDB()- %v", err)
errorOccured++
continue
}
for _, lvl1Dir := range lvl1Dirs {
if !lvl1Dir.IsDir() {
// Could be the cluster.json file
continue
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
log.Errorf("repository initDB()- %v", err)
errorOccured++
continue
}
lvl2Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name()))
if err != nil {
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
log.Errorf("repository initDB()- %v", err)
errorOccured++
continue
}
for _, lvl2Dir := range lvl2Dirs {
dirpath := filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
startTimeDirs, err := os.ReadDir(dirpath)
res, err := stmt.Exec(job)
if err != nil {
log.Errorf("repository initDB()- %v", err)
errorOccured++
continue
}
id, err := res.LastInsertId()
if err != nil {
log.Errorf("repository initDB()- %v", err)
errorOccured++
continue
}
for _, tag := range job.Tags {
tagstr := tag.Name + ":" + tag.Type
tagId, ok := tags[tagstr]
if !ok {
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
if err != nil {
return err
}
// For compability with the old job-archive directory structure where
// there was no start time directory.
for _, startTimeDir := range startTimeDirs {
if startTimeDir.Type().IsRegular() && startTimeDir.Name() == "meta.json" {
if err := handleDirectory(dirpath); err != nil {
log.Errorf("in %s: %s", dirpath, err.Error())
}
} else if startTimeDir.IsDir() {
if err := handleDirectory(filepath.Join(dirpath, startTimeDir.Name())); err != nil {
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
}
}
tagId, err = res.LastInsertId()
if err != nil {
return err
}
tags[tagstr] = tagId
}
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
return err
}
}
if err == nil {
i += 1
}
}
if errorOccured > 0 {
log.Errorf("Error in import of %d jobs!", errorOccured)
}
if err := tx.Commit(); err != nil {
@ -191,7 +327,7 @@ func InitDB(db *sqlx.DB, archive string) error {
// Create indexes after inserts so that they do not
// need to be continually updated.
if _, err := db.Exec(JobsDbIndexes); err != nil {
if _, err := db.DB.Exec(JobsDbIndexes); err != nil {
return err
}
@ -199,77 +335,34 @@ func InitDB(db *sqlx.DB, archive string) error {
return nil
}
// TODO: Remove double logic, use repository/import.go!
// Read the `meta.json` file at `path` and insert it to the database using the prepared
// insert statement `stmt`. `tags` maps all existing tags to their database ID.
func loadJob(tx *sqlx.Tx, stmt *sqlx.NamedStmt, tags map[string]int64, path string) error {
f, err := os.Open(filepath.Join(path, "meta.json"))
if err != nil {
// This function also sets the subcluster if necessary!
func SanityChecks(job *schema.BaseJob) error {
if c := archive.GetCluster(job.Cluster); c == nil {
return fmt.Errorf("no such cluster: %#v", job.Cluster)
}
if err := archive.AssignSubCluster(job); err != nil {
return err
}
defer f.Close()
jobMeta := schema.JobMeta{BaseJob: schema.JobDefaults}
if err := json.NewDecoder(bufio.NewReader(f)).Decode(&jobMeta); err != nil {
return err
if !job.State.Valid() {
return fmt.Errorf("not a valid job state: %#v", job.State)
}
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
if len(job.Resources) == 0 || len(job.User) == 0 {
return fmt.Errorf("'resources' and 'user' should not be empty")
}
// TODO: Other metrics...
job.FlopsAnyAvg = loadJobStat(&jobMeta, "flops_any")
job.MemBwAvg = loadJobStat(&jobMeta, "mem_bw")
job.NetBwAvg = loadJobStat(&jobMeta, "net_bw")
job.FileBwAvg = loadJobStat(&jobMeta, "file_bw")
job.RawResources, err = json.Marshal(job.Resources)
if err != nil {
return err
if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 {
return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid")
}
job.RawMetaData, err = json.Marshal(job.MetaData)
if err != nil {
return err
}
if err := SanityChecks(&job.BaseJob); err != nil {
return err
}
res, err := stmt.Exec(job)
if err != nil {
return err
}
id, err := res.LastInsertId()
if err != nil {
return err
}
for _, tag := range job.Tags {
tagstr := tag.Name + ":" + tag.Type
tagId, ok := tags[tagstr]
if !ok {
res, err := tx.Exec(`INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)`, tag.Name, tag.Type)
if err != nil {
return err
}
tagId, err = res.LastInsertId()
if err != nil {
return err
}
tags[tagstr] = tagId
}
if _, err := tx.Exec(`INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)`, id, tagId); err != nil {
return err
}
if len(job.Resources) != int(job.NumNodes) {
return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes)
}
return nil
}
func loadJobStat(job *schema.JobMeta, metric string) float64 {
if stats, ok := job.Statistics[metric]; ok {
return stats.Avg
}
return 0.0
}

View File

@ -35,7 +35,7 @@ type JobRepository struct {
cache *lrucache.Cache
}
func GetRepository() *JobRepository {
func GetJobRepository() *JobRepository {
jobRepoOnce.Do(func() {
db := GetConnection()
@ -160,8 +160,7 @@ func (r *JobRepository) Find(
// The job is queried using the database id.
// It returns a pointer to a schema.Job data structure and an error variable.
// To check if no job was found test err == sql.ErrNoRows
func (r *JobRepository) FindById(
jobId int64) (*schema.Job, error) {
func (r *JobRepository) FindById(jobId int64) (*schema.Job, error) {
q := sq.Select(jobColumns...).
From("job").Where("job.id = ?", jobId)
return scanJob(q.RunWith(r.stmtCache).QueryRow())

View File

@ -8,18 +8,15 @@ import (
"fmt"
"testing"
"github.com/jmoiron/sqlx"
_ "github.com/mattn/go-sqlite3"
)
var db *sqlx.DB
func init() {
Connect("sqlite3", "../../test/test.db")
}
func setup(t *testing.T) *JobRepository {
return GetRepository()
return GetJobRepository()
}
func TestFind(t *testing.T) {

View File

@ -167,11 +167,11 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
return query
}
func buildIntCondition(field string, cond *model.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
}
func buildTimeCondition(field string, cond *model.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
if cond.From != nil && cond.To != nil {
return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
} else if cond.From != nil {

View File

@ -5,7 +5,7 @@
package repository
import (
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
sq "github.com/Masterminds/squirrel"
)
@ -26,7 +26,7 @@ func (r *JobRepository) AddTag(job int64, tag int64) ([]*schema.Tag, error) {
return nil, err
}
return tags, metricdata.UpdateTags(j, tags)
return tags, archive.UpdateTags(j, tags)
}
// Removes a tag from a job
@ -45,7 +45,7 @@ func (r *JobRepository) RemoveTag(job, tag int64) ([]*schema.Tag, error) {
return nil, err
}
return tags, metricdata.UpdateTags(j, tags)
return tags, archive.UpdateTags(j, tags)
}
// CreateTag creates a new tag with the specified type and name and returns its database id.

141
internal/repository/user.go Normal file
View File

@ -0,0 +1,141 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"encoding/json"
"log"
"sync"
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/jmoiron/sqlx"
)
var (
userCfgRepoOnce sync.Once
userCfgRepoInstance *UserCfgRepo
)
type UserCfgRepo struct {
DB *sqlx.DB
Lookup *sqlx.Stmt
lock sync.RWMutex
uiDefaults map[string]interface{}
cache *lrucache.Cache
}
func GetUserCfgRepo() *UserCfgRepo {
userCfgRepoOnce.Do(func() {
db := GetConnection()
_, err := db.DB.Exec(`
CREATE TABLE IF NOT EXISTS configuration (
username varchar(255),
confkey varchar(255),
value varchar(255),
PRIMARY KEY (username, confkey),
FOREIGN KEY (username) REFERENCES user (username) ON DELETE CASCADE ON UPDATE NO ACTION);`)
if err != nil {
log.Fatal(err)
}
lookupConfigStmt, err := db.DB.Preparex(`SELECT confkey, value FROM configuration WHERE configuration.username = ?`)
if err != nil {
log.Fatal(err)
}
userCfgRepoInstance = &UserCfgRepo{
DB: db.DB,
Lookup: lookupConfigStmt,
uiDefaults: config.Keys.UiDefaults,
cache: lrucache.New(1024),
}
})
return userCfgRepoInstance
}
// Return the personalised UI config for the currently authenticated
// user or return the plain default config.
func (uCfg *UserCfgRepo) GetUIConfig(user *auth.User) (map[string]interface{}, error) {
if user == nil {
uCfg.lock.RLock()
copy := make(map[string]interface{}, len(uCfg.uiDefaults))
for k, v := range uCfg.uiDefaults {
copy[k] = v
}
uCfg.lock.RUnlock()
return copy, nil
}
data := uCfg.cache.Get(user.Username, func() (interface{}, time.Duration, int) {
config := make(map[string]interface{}, len(uCfg.uiDefaults))
for k, v := range uCfg.uiDefaults {
config[k] = v
}
rows, err := uCfg.Lookup.Query(user.Username)
if err != nil {
return err, 0, 0
}
size := 0
defer rows.Close()
for rows.Next() {
var key, rawval string
if err := rows.Scan(&key, &rawval); err != nil {
return err, 0, 0
}
var val interface{}
if err := json.Unmarshal([]byte(rawval), &val); err != nil {
return err, 0, 0
}
size += len(key)
size += len(rawval)
config[key] = val
}
return config, 24 * time.Hour, size
})
if err, ok := data.(error); ok {
return nil, err
}
return data.(map[string]interface{}), nil
}
// If the context does not have a user, update the global ui configuration
// without persisting it! If there is a (authenticated) user, update only his
// configuration.
func (uCfg *UserCfgRepo) UpdateConfig(
key, value string,
user *auth.User) error {
if user == nil {
var val interface{}
if err := json.Unmarshal([]byte(value), &val); err != nil {
return err
}
uCfg.lock.Lock()
defer uCfg.lock.Unlock()
uCfg.uiDefaults[key] = val
return nil
}
if _, err := uCfg.DB.Exec(`REPLACE INTO configuration (username, confkey, value) VALUES (?, ?, ?)`,
user, key, value); err != nil {
return err
}
uCfg.cache.Del(user.Username)
return nil
}

View File

@ -0,0 +1,62 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repository
import (
"os"
"path/filepath"
"testing"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
)
func init() {
Connect("sqlite3", "../../test/test.db")
}
func setupUserTest(t *testing.T) *UserCfgRepo {
const testconfig = `{
"addr": "0.0.0.0:8080",
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"clusters": [
{
"name": "testcluster",
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
} } ]
}`
tmpdir := t.TempDir()
cfgFilePath := filepath.Join(tmpdir, "config.json")
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
t.Fatal(err)
}
config.Init(cfgFilePath)
return GetUserCfgRepo()
}
func TestGetUIConfig(t *testing.T) {
r := setupUserTest(t)
u := auth.User{Username: "jan"}
cfg, err := r.GetUIConfig(&u)
if err != nil {
t.Fatal("No config")
}
tmp := cfg["plot_list_selectedMetrics"]
metrics := tmp.([]interface{})
str := metrics[2].(string)
if str != "mem_bw" {
t.Errorf("wrong config\ngot: %s \nwant: mem_bw", str)
}
}

View File

@ -13,10 +13,10 @@ import (
"time"
"github.com/ClusterCockpit/cc-backend/internal/auth"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/web"
@ -55,7 +55,7 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
TotalJobs int
RecentShortJobs int
}
jobRepo := repository.GetRepository()
jobRepo := repository.GetJobRepository()
runningJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{
State: []schema.JobState{schema.JobStateRunning},
@ -71,8 +71,8 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
}
from := time.Now().Add(-24 * time.Hour)
recentShortJobs, err := jobRepo.CountGroupedJobs(r.Context(), model.AggregateCluster, []*model.JobFilter{{
StartTime: &model.TimeRange{From: &from, To: nil},
Duration: &model.IntRange{From: 0, To: graph.ShortJobDuration},
StartTime: &schema.TimeRange{From: &from, To: nil},
Duration: &schema.IntRange{From: 0, To: graph.ShortJobDuration},
}}, nil, nil)
if err != nil {
log.Errorf("failed to count jobs: %s", err.Error())
@ -80,7 +80,7 @@ func setupHomeRoute(i InfoType, r *http.Request) InfoType {
}
clusters := make([]cluster, 0)
for _, c := range config.Clusters {
for _, c := range archive.Clusters {
clusters = append(clusters, cluster{
Name: c.Name,
RunningJobs: runningJobs[c.Name],
@ -99,7 +99,7 @@ func setupJobRoute(i InfoType, r *http.Request) InfoType {
}
func setupUserRoute(i InfoType, r *http.Request) InfoType {
jobRepo := repository.GetRepository()
jobRepo := repository.GetJobRepository()
username := mux.Vars(r)["id"]
i["id"] = username
i["username"] = username
@ -142,7 +142,7 @@ func setupAnalysisRoute(i InfoType, r *http.Request) InfoType {
func setupTaglistRoute(i InfoType, r *http.Request) InfoType {
var username *string = nil
jobRepo := repository.GetRepository()
jobRepo := repository.GetJobRepository()
if user := auth.GetUser(r.Context()); user != nil && !user.HasRole(auth.RoleAdmin) {
username = &user.Username
}
@ -254,10 +254,11 @@ func buildFilterPresets(query url.Values) map[string]interface{} {
}
func SetupRoutes(router *mux.Router) {
userCfgRepo := repository.GetUserCfgRepo()
for _, route := range routes {
route := route
router.HandleFunc(route.Route, func(rw http.ResponseWriter, r *http.Request) {
conf, err := config.GetUIConfig(r)
conf, err := userCfgRepo.GetUIConfig(auth.GetUser(r.Context()))
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return

117
pkg/archive/archive.go Normal file
View File

@ -0,0 +1,117 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"encoding/json"
"fmt"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
type ArchiveBackend interface {
Init(rawConfig json.RawMessage) error
LoadJobMeta(job *schema.Job) (*schema.JobMeta, error)
LoadJobData(job *schema.Job) (schema.JobData, error)
LoadClusterCfg(name string) (*schema.Cluster, error)
StoreJobMeta(jobMeta *schema.JobMeta) error
ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) error
GetClusters() []string
Iter() <-chan *schema.JobMeta
}
var cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
var ar ArchiveBackend
func Init(rawConfig json.RawMessage) error {
var kind struct {
Kind string `json:"kind"`
}
if err := json.Unmarshal(rawConfig, &kind); err != nil {
return err
}
switch kind.Kind {
case "file":
ar = &FsArchive{}
// case "s3":
// ar = &S3Archive{}
default:
return fmt.Errorf("unkown archive backend '%s''", kind.Kind)
}
if err := ar.Init(rawConfig); err != nil {
return err
}
return initClusterConfig()
}
func GetHandle() ArchiveBackend {
return ar
}
// Helper to metricdata.LoadAverages().
func LoadAveragesFromArchive(
job *schema.Job,
metrics []string,
data [][]schema.Float) error {
metaFile, err := ar.LoadJobMeta(job)
if err != nil {
return err
}
for i, m := range metrics {
if stat, ok := metaFile.Statistics[m]; ok {
data[i] = append(data[i], schema.Float(stat.Avg))
} else {
data[i] = append(data[i], schema.NaN)
}
}
return nil
}
func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
metaFile, err := ar.LoadJobMeta(job)
if err != nil {
return nil, err
}
return metaFile.Statistics, nil
}
// If the job is archived, find its `meta.json` file and override the tags list
// in that JSON file. If the job is not archived, nothing is done.
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
if job.State == schema.JobStateRunning {
return nil
}
jobMeta, err := ar.LoadJobMeta(job)
if err != nil {
return err
}
jobMeta.Tags = make([]*schema.Tag, 0)
for _, tag := range tags {
jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{
Name: tag.Name,
Type: tag.Type,
})
}
return ar.StoreJobMeta(jobMeta)
}

View File

@ -0,0 +1,165 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"errors"
"fmt"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var Clusters []*schema.Cluster
var nodeLists map[string]map[string]NodeList
func initClusterConfig() error {
Clusters = []*schema.Cluster{}
nodeLists = map[string]map[string]NodeList{}
for _, c := range ar.GetClusters() {
cluster, err := ar.LoadClusterCfg(c)
if err != nil {
return err
}
if len(cluster.Name) == 0 ||
len(cluster.MetricConfig) == 0 ||
len(cluster.SubClusters) == 0 {
return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty")
}
for _, mc := range cluster.MetricConfig {
if len(mc.Name) == 0 {
return errors.New("cluster.metricConfig.name should not be empty")
}
if mc.Timestep < 1 {
return errors.New("cluster.metricConfig.timestep should not be smaller than one")
}
// For backwards compability...
if mc.Scope == "" {
mc.Scope = schema.MetricScopeNode
}
if !mc.Scope.Valid() {
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
}
}
Clusters = append(Clusters, cluster)
nodeLists[cluster.Name] = make(map[string]NodeList)
for _, sc := range cluster.SubClusters {
if sc.Nodes == "" {
continue
}
nl, err := ParseNodeList(sc.Nodes)
if err != nil {
return fmt.Errorf("in %s/cluster.json: %w", cluster.Name, err)
}
nodeLists[cluster.Name][sc.Name] = nl
}
}
return nil
}
func GetCluster(cluster string) *schema.Cluster {
for _, c := range Clusters {
if c.Name == cluster {
return c
}
}
return nil
}
func GetSubCluster(cluster, subcluster string) *schema.SubCluster {
for _, c := range Clusters {
if c.Name == cluster {
for _, p := range c.SubClusters {
if p.Name == subcluster {
return p
}
}
}
}
return nil
}
func GetMetricConfig(cluster, metric string) *schema.MetricConfig {
for _, c := range Clusters {
if c.Name == cluster {
for _, m := range c.MetricConfig {
if m.Name == metric {
return m
}
}
}
}
return nil
}
// AssignSubCluster sets the `job.subcluster` property of the job based
// on its cluster and resources.
func AssignSubCluster(job *schema.BaseJob) error {
cluster := GetCluster(job.Cluster)
if cluster == nil {
return fmt.Errorf("unkown cluster: %#v", job.Cluster)
}
if job.SubCluster != "" {
for _, sc := range cluster.SubClusters {
if sc.Name == job.SubCluster {
return nil
}
}
return fmt.Errorf("already assigned subcluster %#v unkown (cluster: %#v)", job.SubCluster, job.Cluster)
}
if len(job.Resources) == 0 {
return fmt.Errorf("job without any resources/hosts")
}
host0 := job.Resources[0].Hostname
for sc, nl := range nodeLists[job.Cluster] {
if nl != nil && nl.Contains(host0) {
job.SubCluster = sc
return nil
}
}
if cluster.SubClusters[0].Nodes == "" {
job.SubCluster = cluster.SubClusters[0].Name
return nil
}
return fmt.Errorf("no subcluster found for cluster %#v and host %#v", job.Cluster, host0)
}
func GetSubClusterByNode(cluster, hostname string) (string, error) {
for sc, nl := range nodeLists[cluster] {
if nl != nil && nl.Contains(hostname) {
return sc, nil
}
}
c := GetCluster(cluster)
if c == nil {
return "", fmt.Errorf("unkown cluster: %#v", cluster)
}
if c.SubClusters[0].Nodes == "" {
return c.SubClusters[0].Name, nil
}
return "", fmt.Errorf("no subcluster found for cluster %#v and host %#v", cluster, hostname)
}

228
pkg/archive/fsBackend.go Normal file
View File

@ -0,0 +1,228 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"os"
"path"
"path/filepath"
"strconv"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
type FsArchiveConfig struct {
Path string `json:"path"`
}
type FsArchive struct {
path string
clusters []string
}
func getPath(
job *schema.Job,
rootPath string,
file string) string {
lvl1, lvl2 := fmt.Sprintf("%d", job.JobID/1000), fmt.Sprintf("%03d", job.JobID%1000)
return filepath.Join(
rootPath,
job.Cluster,
lvl1, lvl2,
strconv.FormatInt(job.StartTime.Unix(), 10), file)
}
func loadJobMeta(filename string) (*schema.JobMeta, error) {
f, err := os.Open(filename)
if err != nil {
log.Errorf("fsBackend loadJobMeta()- %v", err)
return &schema.JobMeta{}, err
}
defer f.Close()
return DecodeJobMeta(bufio.NewReader(f))
}
func (fsa *FsArchive) Init(rawConfig json.RawMessage) error {
var config FsArchiveConfig
if err := json.Unmarshal(rawConfig, &config); err != nil {
log.Errorf("fsBackend Init()- %v", err)
return err
}
if config.Path == "" {
err := fmt.Errorf("fsBackend Init()- empty path")
log.Errorf("fsBackend Init()- %v", err)
return err
}
fsa.path = config.Path
entries, err := os.ReadDir(fsa.path)
if err != nil {
log.Errorf("fsBackend Init()- %v", err)
return err
}
for _, de := range entries {
fsa.clusters = append(fsa.clusters, de.Name())
}
return nil
}
func (fsa *FsArchive) LoadJobData(job *schema.Job) (schema.JobData, error) {
filename := getPath(job, fsa.path, "data.json")
f, err := os.Open(filename)
if err != nil {
log.Errorf("fsBackend LoadJobData()- %v", err)
return nil, err
}
defer f.Close()
return DecodeJobData(bufio.NewReader(f), filename)
}
func (fsa *FsArchive) LoadJobMeta(job *schema.Job) (*schema.JobMeta, error) {
filename := getPath(job, fsa.path, "meta.json")
return loadJobMeta(filename)
}
func (fsa *FsArchive) LoadClusterCfg(name string) (*schema.Cluster, error) {
b, err := os.ReadFile(filepath.Join(fsa.path, name, "cluster.json"))
if err != nil {
log.Errorf("fsBackend LoadClusterCfg()- %v", err)
return &schema.Cluster{}, err
}
if config.Keys.Validate {
if err := schema.Validate(schema.ClusterCfg, bytes.NewReader(b)); err != nil {
return &schema.Cluster{}, fmt.Errorf("Validate cluster config: %v\n", err)
}
}
return DecodeCluster(bytes.NewReader(b))
}
func (fsa *FsArchive) Iter() <-chan *schema.JobMeta {
ch := make(chan *schema.JobMeta)
go func() {
clustersDir, err := os.ReadDir(fsa.path)
if err != nil {
log.Fatalf("Reading clusters failed: %s", err.Error())
}
for _, clusterDir := range clustersDir {
lvl1Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name()))
if err != nil {
log.Fatalf("Reading jobs failed: %s", err.Error())
}
for _, lvl1Dir := range lvl1Dirs {
if !lvl1Dir.IsDir() {
// Could be the cluster.json file
continue
}
lvl2Dirs, err := os.ReadDir(filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name()))
if err != nil {
log.Fatalf("Reading jobs failed: %s", err.Error())
}
for _, lvl2Dir := range lvl2Dirs {
dirpath := filepath.Join(fsa.path, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
startTimeDirs, err := os.ReadDir(dirpath)
if err != nil {
log.Fatalf("Reading jobs failed: %s", err.Error())
}
for _, startTimeDir := range startTimeDirs {
if startTimeDir.IsDir() {
job, err := loadJobMeta(filepath.Join(dirpath, startTimeDir.Name(), "meta.json"))
if err != nil {
log.Errorf("in %s: %s", filepath.Join(dirpath, startTimeDir.Name()), err.Error())
} else {
ch <- job
}
}
}
}
}
}
close(ch)
}()
return ch
}
func (fsa *FsArchive) StoreJobMeta(jobMeta *schema.JobMeta) error {
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
f, err := os.Create(getPath(&job, fsa.path, "meta.json"))
if err != nil {
return err
}
if err := EncodeJobMeta(f, jobMeta); err != nil {
return err
}
if err := f.Close(); err != nil {
return err
}
return nil
}
func (fsa *FsArchive) GetClusters() []string {
return fsa.clusters
}
func (fsa *FsArchive) ImportJob(
jobMeta *schema.JobMeta,
jobData *schema.JobData) error {
job := schema.Job{
BaseJob: jobMeta.BaseJob,
StartTime: time.Unix(jobMeta.StartTime, 0),
StartTimeUnix: jobMeta.StartTime,
}
dir := getPath(&job, fsa.path, "")
if err := os.MkdirAll(dir, 0777); err != nil {
return err
}
f, err := os.Create(path.Join(dir, "meta.json"))
if err != nil {
return err
}
if err := EncodeJobMeta(f, jobMeta); err != nil {
return err
}
if err := f.Close(); err != nil {
return err
}
f, err = os.Create(path.Join(dir, "data.json"))
if err != nil {
return err
}
if err := EncodeJobData(f, jobData); err != nil {
return err
}
return f.Close()
}

View File

@ -0,0 +1,163 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"encoding/json"
"fmt"
"testing"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func TestInitEmptyPath(t *testing.T) {
var fsa FsArchive
err := fsa.Init(json.RawMessage("{\"kind\":\"../../test/archive\"}"))
if err == nil {
t.Fatal(err)
}
}
func TestInitNoJson(t *testing.T) {
var fsa FsArchive
err := fsa.Init(json.RawMessage("\"path\":\"../../test/archive\"}"))
if err == nil {
t.Fatal(err)
}
}
func TestInitNotExists(t *testing.T) {
var fsa FsArchive
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/job-archive\"}"))
if err == nil {
t.Fatal(err)
}
}
func TestInit(t *testing.T) {
var fsa FsArchive
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
if err != nil {
t.Fatal(err)
}
if fsa.path != "../../test/archive" {
t.Fail()
}
if len(fsa.clusters) != 1 || fsa.clusters[0] != "emmy" {
t.Fail()
}
}
func TestLoadJobMetaInternal(t *testing.T) {
var fsa FsArchive
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
if err != nil {
t.Fatal(err)
}
job, err := loadJobMeta("../../test/archive/emmy/1404/397/1609300556/meta.json")
if err != nil {
t.Fatal(err)
}
if job.JobID != 1404397 {
t.Fail()
}
if int(job.NumNodes) != len(job.Resources) {
t.Fail()
}
if job.StartTime != 1609300556 {
t.Fail()
}
}
func TestLoadJobMeta(t *testing.T) {
var fsa FsArchive
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
if err != nil {
t.Fatal(err)
}
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"
job, err := fsa.LoadJobMeta(&jobIn)
if err != nil {
t.Fatal(err)
}
if job.JobID != 1403244 {
t.Fail()
}
if int(job.NumNodes) != len(job.Resources) {
t.Fail()
}
if job.StartTime != 1608923076 {
t.Fail()
}
}
func TestLoadJobData(t *testing.T) {
var fsa FsArchive
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
if err != nil {
t.Fatal(err)
}
jobIn := schema.Job{BaseJob: schema.JobDefaults}
jobIn.StartTime = time.Unix(1608923076, 0)
jobIn.JobID = 1403244
jobIn.Cluster = "emmy"
data, err := fsa.LoadJobData(&jobIn)
if err != nil {
t.Fatal(err)
}
for name, scopes := range data {
fmt.Printf("Metric name: %s\n", name)
if _, exists := scopes[schema.MetricScopeNode]; !exists {
t.Fail()
}
}
}
func TestLoadCluster(t *testing.T) {
var fsa FsArchive
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
if err != nil {
t.Fatal(err)
}
cfg, err := fsa.LoadClusterCfg("emmy")
if err != nil {
t.Fatal(err)
}
if cfg.SubClusters[0].CoresPerSocket != 10 {
t.Fail()
}
}
func TestIter(t *testing.T) {
var fsa FsArchive
err := fsa.Init(json.RawMessage("{\"path\":\"../../test/archive\"}"))
if err != nil {
t.Fatal(err)
}
for job := range fsa.Iter() {
fmt.Printf("Job %d\n", job.JobID)
if job.Cluster != "emmy" {
t.Fail()
}
}
}

70
pkg/archive/json.go Normal file
View File

@ -0,0 +1,70 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
"encoding/json"
"io"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {
data := cache.Get(k, func() (value interface{}, ttl time.Duration, size int) {
var d schema.JobData
if err := json.NewDecoder(r).Decode(&d); err != nil {
return err, 0, 1000
}
return d, 1 * time.Hour, d.Size()
})
if err, ok := data.(error); ok {
return nil, err
}
return data.(schema.JobData), nil
}
func DecodeJobMeta(r io.Reader) (*schema.JobMeta, error) {
var d schema.JobMeta
if err := json.NewDecoder(r).Decode(&d); err != nil {
return &d, err
}
// Sanitize parameters
return &d, nil
}
func DecodeCluster(r io.Reader) (*schema.Cluster, error) {
var c schema.Cluster
if err := json.NewDecoder(r).Decode(&c); err != nil {
return &c, err
}
// Sanitize parameters
return &c, nil
}
func EncodeJobData(w io.Writer, d *schema.JobData) error {
// Sanitize parameters
if err := json.NewEncoder(w).Encode(d); err != nil {
return err
}
return nil
}
func EncodeJobMeta(w io.Writer, d *schema.JobMeta) error {
// Sanitize parameters
if err := json.NewEncoder(w).Encode(d); err != nil {
return err
}
return nil
}

View File

@ -2,7 +2,7 @@
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package config
package archive
import (
"fmt"

View File

@ -2,7 +2,7 @@
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package config
package archive
import (
"testing"

13
pkg/archive/s3Backend.go Normal file
View File

@ -0,0 +1,13 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
type S3ArchiveConfig struct {
Path string `json:"filePath"`
}
type S3Archive struct {
path string
}

View File

@ -116,4 +116,9 @@ to use `1` as size for every entry, in that case at most `maxMemory` entries wil
## Affects on GC
Because of the way a garbage collector decides when to run ([explained in the runtime package](https://pkg.go.dev/runtime)), having large amounts of data sitting in your cache might increase the memory consumption of your process by two times the maximum size of the cache. You can decrease the *target percentage* to reduce the effect, but then you might have negative performance effects when your cache is not filled.
Because of the way a garbage collector decides when to run ([explained in the
runtime package](https://pkg.go.dev/runtime)), having large amounts of data
sitting in your cache might increase the memory consumption of your process by
two times the maximum size of the cache. You can decrease the *target
percentage* to reduce the effect, but then you might have negative performance
effects when your cache is not filled.

174
pkg/schema/cluster.go Normal file
View File

@ -0,0 +1,174 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package schema
import "strconv"
type Accelerator struct {
ID string `json:"id"`
Type string `json:"type"`
Model string `json:"model"`
}
type Topology struct {
Node []int `json:"node"`
Socket [][]int `json:"socket"`
MemoryDomain [][]int `json:"memoryDomain"`
Die [][]int `json:"die"`
Core [][]int `json:"core"`
Accelerators []*Accelerator `json:"accelerators"`
}
type SubCluster struct {
Name string `json:"name"`
Nodes string `json:"nodes"`
NumberOfNodes int `json:"numberOfNodes"`
ProcessorType string `json:"processorType"`
SocketsPerNode int `json:"socketsPerNode"`
CoresPerSocket int `json:"coresPerSocket"`
ThreadsPerCore int `json:"threadsPerCore"`
FlopRateScalar int `json:"flopRateScalar"`
FlopRateSimd int `json:"flopRateSimd"`
MemoryBandwidth int `json:"memoryBandwidth"`
Topology *Topology `json:"topology"`
}
type SubClusterConfig struct {
Name string `json:"name"`
Peak float64 `json:"peak"`
Normal float64 `json:"normal"`
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
}
type MetricConfig struct {
Name string `json:"name"`
Unit string `json:"unit"`
Scope MetricScope `json:"scope"`
Aggregation *string `json:"aggregation"`
Timestep int `json:"timestep"`
Peak *float64 `json:"peak"`
Normal *float64 `json:"normal"`
Caution *float64 `json:"caution"`
Alert *float64 `json:"alert"`
SubClusters []*SubClusterConfig `json:"subClusters"`
}
type Cluster struct {
Name string `json:"name"`
MetricConfig []*MetricConfig `json:"metricConfig"`
SubClusters []*SubCluster `json:"subClusters"`
}
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
// hwthread is in that socket, add it to the list. If no hwthreads other than
// those in the argument list are assigned to one of the sockets in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
func (topo *Topology) GetSocketsFromHWThreads(
hwthreads []int) (sockets []int, exclusive bool) {
socketsMap := map[int]int{}
for _, hwthread := range hwthreads {
for socket, hwthreadsInSocket := range topo.Socket {
for _, hwthreadInSocket := range hwthreadsInSocket {
if hwthread == hwthreadInSocket {
socketsMap[socket] += 1
}
}
}
}
exclusive = true
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
sockets = make([]int, 0, len(socketsMap))
for socket, count := range socketsMap {
sockets = append(sockets, socket)
exclusive = exclusive && count == hwthreadsPerSocket
}
return sockets, exclusive
}
// Return a list of core IDs given a list of hwthread IDs. Even if just one
// hwthread is in that core, add it to the list. If no hwthreads other than
// those in the argument list are assigned to one of the cores in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
func (topo *Topology) GetCoresFromHWThreads(
hwthreads []int) (cores []int, exclusive bool) {
coresMap := map[int]int{}
for _, hwthread := range hwthreads {
for core, hwthreadsInCore := range topo.Core {
for _, hwthreadInCore := range hwthreadsInCore {
if hwthread == hwthreadInCore {
coresMap[core] += 1
}
}
}
}
exclusive = true
hwthreadsPerCore := len(topo.Node) / len(topo.Core)
cores = make([]int, 0, len(coresMap))
for core, count := range coresMap {
cores = append(cores, core)
exclusive = exclusive && count == hwthreadsPerCore
}
return cores, exclusive
}
// Return a list of memory domain IDs given a list of hwthread IDs. Even if
// just one hwthread is in that memory domain, add it to the list. If no
// hwthreads other than those in the argument list are assigned to one of the
// memory domains in the first return value, return true as the second value.
// TODO: Optimize this, there must be a more efficient way/algorithm.
func (topo *Topology) GetMemoryDomainsFromHWThreads(
hwthreads []int) (memDoms []int, exclusive bool) {
memDomsMap := map[int]int{}
for _, hwthread := range hwthreads {
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
for _, hwthreadInmemDom := range hwthreadsInmemDom {
if hwthread == hwthreadInmemDom {
memDomsMap[memDom] += 1
}
}
}
}
exclusive = true
hwthreadsPermemDom := len(topo.Node) / len(topo.MemoryDomain)
memDoms = make([]int, 0, len(memDomsMap))
for memDom, count := range memDomsMap {
memDoms = append(memDoms, memDom)
exclusive = exclusive && count == hwthreadsPermemDom
}
return memDoms, exclusive
}
func (topo *Topology) GetAcceleratorIDs() ([]int, error) {
accels := make([]int, 0)
for _, accel := range topo.Accelerators {
id, err := strconv.Atoi(accel.ID)
if err != nil {
return nil, err
}
accels = append(accels, id)
}
return accels, nil
}
func (topo *Topology) GetAcceleratorIndex(id string) (int, bool) {
for idx, accel := range topo.Accelerators {
if accel.ID == id {
return idx, true
}
}
return -1, false
}

111
pkg/schema/config.go Normal file
View File

@ -0,0 +1,111 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package schema
import (
"encoding/json"
"time"
)
type LdapConfig struct {
Url string `json:"url"`
UserBase string `json:"user_base"`
SearchDN string `json:"search_dn"`
UserBind string `json:"user_bind"`
UserFilter string `json:"user_filter"`
SyncInterval string `json:"sync_interval"` // Parsed using time.ParseDuration.
SyncDelOldUsers bool `json:"sync_del_old_users"`
}
type JWTAuthConfig struct {
// Specifies for how long a session or JWT shall be valid
// as a string parsable by time.ParseDuration().
MaxAge int64 `json:"max-age"`
}
type IntRange struct {
From int `json:"from"`
To int `json:"to"`
}
type TimeRange struct {
From *time.Time `json:"from"`
To *time.Time `json:"to"`
}
type FilterRanges struct {
Duration *IntRange `json:"duration"`
NumNodes *IntRange `json:"numNodes"`
StartTime *TimeRange `json:"startTime"`
}
type ClusterConfig struct {
Name string `json:"name"`
FilterRanges *FilterRanges `json:"filterRanges"`
MetricDataRepository json.RawMessage `json:"metricDataRepository"`
}
// Format of the configuration (file). See below for the defaults.
type ProgramConfig struct {
// Address where the http (or https) server will listen on (for example: 'localhost:80').
Addr string `json:"addr"`
// Drop root permissions once .env was read and the port was taken.
User string `json:"user"`
Group string `json:"group"`
// Disable authentication (for everything: API, Web-UI, ...)
DisableAuthentication bool `json:"disable-authentication"`
// If `embed-static-files` is true (default), the frontend files are directly
// embeded into the go binary and expected to be in web/frontend. Only if
// it is false the files in `static-files` are served instead.
EmbedStaticFiles bool `json:"embed-static-files"`
StaticFiles string `json:"static-files"`
// 'sqlite3' or 'mysql' (mysql will work for mariadb as well)
DBDriver string `json:"db-driver"`
// For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).
DB string `json:"db"`
// Config for job archive
Archive json.RawMessage `json:"archive"`
// Keep all metric data in the metric data repositories,
// do not write to the job-archive.
DisableArchive bool `json:"disable-archive"`
// Validate json input against schema
Validate bool `json:"validate"`
// For LDAP Authentication and user synchronisation.
LdapConfig *LdapConfig `json:"ldap"`
JwtConfig *JWTAuthConfig `json:"jwts"`
// If 0 or empty, the session/token does not expire!
SessionMaxAge string `json:"session-max-age"`
// If both those options are not empty, use HTTPS using those certificates.
HttpsCertFile string `json:"https-cert-file"`
HttpsKeyFile string `json:"https-key-file"`
// If not the empty string and `addr` does not end in ":80",
// redirect every request incoming at port 80 to that url.
RedirectHttpTo string `json:"redirect-http-to"`
// If overwriten, at least all the options in the defaults below must
// be provided! Most options here can be overwritten by the user.
UiDefaults map[string]interface{} `json:"ui-defaults"`
// Where to store MachineState files
MachineStateDir string `json:"machine-state-dir"`
// If not zero, automatically mark jobs as stopped running X seconds longer than their walltime.
StopJobsExceedingWalltime int `json:"stop-jobs-exceeding-walltime"`
// Array of Clusters
Clusters []*ClusterConfig `json:"clusters"`
}

View File

@ -11,57 +11,70 @@ import (
"time"
)
// Common subset of Job and JobMeta. Use one of those, not
// this type directly.
// Non-Swaggered Comment: BaseJob
// Non-Swaggered Comment: Common subset of Job and JobMeta. Use one of those, not this type directly.
type BaseJob struct {
JobID int64 `json:"jobId" db:"job_id"`
User string `json:"user" db:"user"`
Project string `json:"project" db:"project"`
Cluster string `json:"cluster" db:"cluster"`
SubCluster string `json:"subCluster" db:"subcluster"`
Partition string `json:"partition" db:"partition"`
ArrayJobId int32 `json:"arrayJobId" db:"array_job_id"`
NumNodes int32 `json:"numNodes" db:"num_nodes"`
NumHWThreads int32 `json:"numHwthreads" db:"num_hwthreads"`
NumAcc int32 `json:"numAcc" db:"num_acc"`
Exclusive int32 `json:"exclusive" db:"exclusive"`
MonitoringStatus int32 `json:"monitoringStatus" db:"monitoring_status"`
SMT int32 `json:"smt" db:"smt"`
State JobState `json:"jobState" db:"job_state"`
Duration int32 `json:"duration" db:"duration"`
Walltime int64 `json:"walltime" db:"walltime"`
Tags []*Tag `json:"tags"`
RawResources []byte `json:"-" db:"resources"`
Resources []*Resource `json:"resources"`
RawMetaData []byte `json:"-" db:"meta_data"`
MetaData map[string]string `json:"metaData"`
// The unique identifier of a job
JobID int64 `json:"jobId" db:"job_id" example:"123000"`
User string `json:"user" db:"user" example:"abcd100h"` // The unique identifier of a user
Project string `json:"project" db:"project" example:"abcd200"` // The unique identifier of a project
Cluster string `json:"cluster" db:"cluster" example:"fritz"` // The unique identifier of a cluster
SubCluster string `json:"subCluster" db:"subcluster" example:"main"` // The unique identifier of a sub cluster
Partition string `json:"partition" db:"partition" example:"main"` // The Slurm partition to which the job was submitted
ArrayJobId int64 `json:"arrayJobId" db:"array_job_id" example:"123000"` // The unique identifier of an array job
NumNodes int32 `json:"numNodes" db:"num_nodes" example:"2" minimum:"1"` // Number of nodes used (Min > 0)
NumHWThreads int32 `json:"numHwthreads" db:"num_hwthreads" example:"20" minimum:"1"` // Number of HWThreads used (Min > 0)
NumAcc int32 `json:"numAcc" db:"num_acc" example:"2" minimum:"1"` // Number of accelerators used (Min > 0)
Exclusive int32 `json:"exclusive" db:"exclusive" example:"1" minimum:"0" maximum:"2"` // Specifies how nodes are shared: 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive (Default), 2 - Shared among multiple jobs of same user
MonitoringStatus int32 `json:"monitoringStatus" db:"monitoring_status" example:"1" minimum:"0" maximum:"3"` // State of monitoring system during job run: 0 - Disabled, 1 - Running or Archiving (Default), 2 - Archiving Failed, 3 - Archiving Successfull
SMT int32 `json:"smt" db:"smt" example:"4"` // SMT threads used by job
State JobState `json:"jobState" db:"job_state" example:"completed" enums:"completed,failed,cancelled,stopped,timeout,out_of_memory"` // Final state of job
Duration int32 `json:"duration" db:"duration" example:"43200" minimum:"1"` // Duration of job in seconds (Min > 0)
Walltime int64 `json:"walltime" db:"walltime" example:"86400" minimum:"1"` // Requested walltime of job in seconds (Min > 0)
Tags []*Tag `json:"tags"` // List of tags
RawResources []byte `json:"-" db:"resources"` // Resources used by job [As Bytes]
Resources []*Resource `json:"resources"` // Resources used by job
RawMetaData []byte `json:"-" db:"meta_data"` // Additional information about the job [As Bytes]
MetaData map[string]string `json:"metaData"` // Additional information about the job
}
// This type is used as the GraphQL interface and using sqlx as a table row.
// Non-Swaggered Comment: Job
// Non-Swaggered Comment: This type is used as the GraphQL interface and using sqlx as a table row.
// Job model
// @Description Information of a HPC job.
type Job struct {
// The unique identifier of a job in the database
ID int64 `json:"id" db:"id"`
BaseJob
StartTimeUnix int64 `json:"-" db:"start_time"`
StartTime time.Time `json:"startTime"`
MemUsedMax float64 `json:"-" db:"mem_used_max"`
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"`
MemBwAvg float64 `json:"-" db:"mem_bw_avg"`
LoadAvg float64 `json:"-" db:"load_avg"`
NetBwAvg float64 `json:"-" db:"net_bw_avg"`
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"`
FileBwAvg float64 `json:"-" db:"file_bw_avg"`
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"`
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64
MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64
LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64
FileDataVolTotal float64 `json:"-" db:"file_data_vol_total"` // FileDataVolTotal as Float64
}
// When reading from the database or sending data via GraphQL, the start time can be in the much more
// convenient time.Time type. In the `meta.json` files, the start time is encoded as a unix epoch timestamp.
// This is why there is this struct, which contains all fields from the regular job struct, but "overwrites"
// the StartTime field with one of type int64.
// Non-Swaggered Comment: JobMeta
// Non-Swaggered Comment: When reading from the database or sending data via GraphQL, the start time can be in the much more
// Non-Swaggered Comment: convenient time.Time type. In the `meta.json` files, the start time is encoded as a unix epoch timestamp.
// Non-Swaggered Comment: This is why there is this struct, which contains all fields from the regular job struct, but "overwrites"
// Non-Swaggered Comment: the StartTime field with one of type int64.
// Non-Swaggered Comment: ID *int64 `json:"id,omitempty"` >> never used in the job-archive, only available via REST-API
// JobMeta model
// @Description Meta data information of a HPC job.
type JobMeta struct {
ID *int64 `json:"id,omitempty"` // never used in the job-archive, only available via REST-API
// The unique identifier of a job in the database
ID *int64 `json:"id,omitempty"`
BaseJob
StartTime int64 `json:"startTime" db:"start_time"`
Statistics map[string]JobStatistics `json:"statistics,omitempty"`
StartTime int64 `json:"startTime" db:"start_time" example:"1649723812" minimum:"1"` // Start epoch time stamp in seconds (Min > 0)
Statistics map[string]JobStatistics `json:"statistics,omitempty"` // Metric statistics of job
}
const (
@ -76,24 +89,32 @@ var JobDefaults BaseJob = BaseJob{
MonitoringStatus: MonitoringStatusRunningOrArchiving,
}
// JobStatistics model
// @Description Specification for job metric statistics.
type JobStatistics struct {
Unit string `json:"unit"`
Avg float64 `json:"avg"`
Min float64 `json:"min"`
Max float64 `json:"max"`
// Metric unit (see schema/unit.schema.json)
Unit string `json:"unit" example:"GHz"`
Avg float64 `json:"avg" example:"2500" minimum:"0"` // Job metric average
Min float64 `json:"min" example:"2000" minimum:"0"` // Job metric minimum
Max float64 `json:"max" example:"3000" minimum:"0"` // Job metric maximum
}
// Tag model
// @Description Defines a tag using name and type.
type Tag struct {
// The unique DB identifier of a tag
ID int64 `json:"id" db:"id"`
Type string `json:"type" db:"tag_type"`
Name string `json:"name" db:"tag_name"`
Type string `json:"type" db:"tag_type" example:"Debug"` // Tag Type
Name string `json:"name" db:"tag_name" example:"Testjob"` // Tag Name
}
// Resource model
// @Description A resource used by a job
type Resource struct {
Hostname string `json:"hostname"`
HWThreads []int `json:"hwthreads,omitempty"`
Accelerators []string `json:"accelerators,omitempty"`
Configuration string `json:"configuration,omitempty"`
Hostname string `json:"hostname"` // Name of the host (= node)
HWThreads []int `json:"hwthreads,omitempty"` // List of OS processor ids
Accelerators []string `json:"accelerators,omitempty"` // List of of accelerator device ids
Configuration string `json:"configuration,omitempty"` // The configuration options of the node
}
type JobState string

View File

@ -0,0 +1,236 @@
{
"$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://cluster.schema.json",
"title": "HPC cluster description",
"description": "Meta data information of a HPC cluster",
"type": "object",
"properties":{
"name": {
"description": "The unique identifier of a cluster",
"type": "string"
},
"metricConfig": {
"description": "Metric specifications",
"type": "array",
"items": {
"type": "object",
"properties":{
"name": {
"description": "Metric name",
"type": "string"
},
"unit": {
"description": "Metric unit",
"type": "string"
},
"scope": {
"description": "Native measurement resolution",
"type": "string"
},
"timestep": {
"description": "Frequency of timeseries points",
"type": "integer"
},
"aggregation": {
"description": "How the metric is aggregated",
"type": "string",
"enum": [
"sum",
"avg"
]
},
"subClusters": {
"description": "Array of cluster hardware partition metric thresholds",
"type": "array",
"items": {
"type": "object",
"properties":{
"name": {
"description": "Hardware partition name",
"type": "string"
},
"peak": {
"type": "number"
},
"normal": {
"type": "number"
},
"caution": {
"type": "number"
},
"alert": {
"type": "number"
}
},
"required": [
"name",
"peak",
"caution",
"alert"
]
}
}
},
"required": [
"name",
"unit",
"scope",
"timestep"
]
},
"minItems": 1
},
"subClusters": {
"description": "Array of cluster hardware partitions",
"type": "array",
"items": {
"type": "object",
"properties":{
"name": {
"description": "Hardware partition name",
"type": "string"
},
"processorType": {
"description": "Processor type",
"type": "string"
},
"socketsPerNode": {
"description": "Number of sockets per node",
"type": "integer"
},
"coresPerSocket": {
"description": "Number of cores per socket",
"type": "integer"
},
"threadsPerCore": {
"description": "Number of SMT threads per core",
"type": "integer"
},
"flopRateScalar": {
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
"type": "integer"
},
"flopRateSimd": {
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
"type": "integer"
},
"memoryBandwidth": {
"description": "Theoretical node peak memory bandwidth in GB/s",
"type": "integer"
},
"nodes": {
"description": "Node list expression",
"type": "string"
},
"topology": {
"description": "Node topology",
"type": "object",
"properties":{
"node": {
"description": "HwTread lists of node",
"type": "array",
"items": {
"type": "integer"
}
},
"socket": {
"description": "HwTread lists of sockets",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"memoryDomain": {
"description": "HwTread lists of memory domains",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"die": {
"description": "HwTread lists of dies",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"core": {
"description": "HwTread lists of cores",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"accelerators": {
"type": "array",
"description": "List of of accelerator devices",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique device id"
},
"type": {
"type": "string",
"description": "The accelerator type",
"enum": [
"Nvidia GPU",
"AMD GPU",
"Intel GPU"
]
},
"model": {
"type": "string",
"description": "The accelerator model"
}
},
"required": [
"id",
"type",
"model"
]
}
}
},
"required":[
"node",
"socket",
"memoryDomain"
]
}
},
"required":[
"name",
"topology",
"processorType",
"socketsPerNode",
"coresPerSocket",
"threadsPerCore",
"flopRateScalar",
"flopRateSimd",
"memoryBandwidth"
]
},
"minItems": 1
}
},
"required":[
"name",
"metricConfig",
"subClusters"
]
}

View File

@ -0,0 +1,353 @@
{
"$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://config.schema.json",
"title": "cc-backend configuration file schema",
"type": "object",
"properties":{
"addr": {
"description": "Address where the http (or https) server will listen on (for example: 'localhost:80').",
"type": "string"
},
"user": {
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
"type": "string"
},
"group": {
"description": "Drop root permissions once .env was read and the port was taken. Only applicable if using privileged port.",
"type": "string"
},
"disable-authentication": {
"description": "Disable authentication (for everything: API, Web-UI, ...).",
"type": "boolean"
},
"embed-static-files": {
"description": "If all files in `web/frontend/public` should be served from within the binary itself (they are embedded) or not.",
"type": "boolean"
},
"static-files": {
"description": "Folder where static assets can be found, if embed-static-files is false.",
"type": "string"
},
"db-driver": {
"description": "sqlite3 or mysql (mysql will work for mariadb as well).",
"type": "string",
"enum": [
"sqlite3",
"mysql"
]
},
"db": {
"description": "For sqlite3 a filename, for mysql a DSN in this format: https://github.com/go-sql-driver/mysql#dsn-data-source-name (Without query parameters!).",
"type": "string"
},
"job-archive": {
"description": "Path to the job-archive.",
"type": "string"
},
"disable-archive": {
"description": "Keep all metric data in the metric data repositories, do not write to the job-archive.",
"type": "boolean"
},
"validate": {
"description": "Validate all input json documents against json schema.",
"type": "boolean"
},
"session-max-age": {
"description": "Specifies for how long a session shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire!",
"type": "string"
},
"jwt-max-age": {
"description": "Specifies for how long a JWT token shall be valid as a string parsable by time.ParseDuration(). If 0 or empty, the session/token does not expire!",
"type": "string"
},
"https-cert-file": {
"description": "Filepath to SSL certificate. If also https-key-file is set use HTTPS using those certificates.",
"type": "string"
},
"https-key-file": {
"description": "Filepath to SSL key file. If also https-cert-file is set use HTTPS using those certificates.",
"type": "string"
},
"redirect-http-to": {
"description": "If not the empty string and addr does not end in :80, redirect every request incoming at port 80 to that url.",
"type": "string"
},
"stop-jobs-exceeding-walltime": {
"description": "If not zero, automatically mark jobs as stopped running X seconds longer than their walltime. Only applies if walltime is set for job.",
"type": "integer"
},
"": {
"description": "",
"type": "string"
},
"ldap": {
"description": "For LDAP Authentication and user synchronisation.",
"type": "object",
"properties": {
"url": {
"description": "URL of LDAP directory server.",
"type": "string"
},
"user_base": {
"description": "Base DN of user tree root.",
"type": "string"
},
"search_dn": {
"description": "DN for authenticating LDAP admin account with general read rights.",
"type": "string"
},
"user_bind": {
"description": "Expression used to authenticate users via LDAP bind. Must contain uid={username}.",
"type": "string"
},
"user_filter": {
"description": "Filter to extract users for syncing.",
"type": "string"
},
"sync_interval": {
"description": "Interval used for syncing local user table with LDAP directory. Parsed using time.ParseDuration.",
"type": "string"
},
"sync_del_old_users": {
"description": "Delete obsolete users in database.",
"type": "boolean"
}
},
"required": [
"url",
"user_base",
"search_dn",
"user_bind",
"user_filter"
]
},
"clusters": {
"description": "Configuration for the clusters to be displayed.",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"description": "The name of the cluster.",
"type": "string"
},
"metricDataRepository": {
"description": "Type of the metric data repository for this cluster",
"type": "object",
"properties": {
"kind": {
"type": "string",
"enum": [
"influxdb-v2",
"prometheus",
"cc-metric-store",
"test"
]
},
"url": {
"type": "string"
},
"token": {
"type": "string"
}
},
"required": [
"kind",
"url"
]
},
"filterRanges": {
"description": "This option controls the slider ranges for the UI controls of numNodes, duration, and startTime.",
"type": "object",
"properties": {
"numNodes": {
"description": "UI slider range for number of nodes",
"type": "object",
"properties": {
"from": {
"type": "integer"
},
"to": {
"type": "integer"
}
},
"required": [
"from",
"to"
]
},
"duration": {
"description": "UI slider range for duration",
"type": "object",
"properties": {
"from": {
"type": "integer"
},
"to": {
"type": "integer"
}
},
"required": [
"from",
"to"
]
},
"startTime": {
"description": "UI slider range for start time",
"type": "object",
"properties": {
"from": {
"type": "string",
"format": "date-time"
},
"to": {
"type": "null"
}
},
"required": [
"from",
"to"
]
}
},
"required": [
"numNodes",
"duration",
"startTime"
]
}
},
"required": [
"name",
"metricDataRepository",
"filterRanges"
],
"minItems": 1
}
},
"ui-defaults": {
"description": "Default configuration for web UI",
"type": "object",
"properties": {
"plot_general_colorBackground": {
"description": "Color plot background according to job average threshold limits",
"type": "boolean"
},
"plot_general_lineWidth": {
"description": "Initial linewidth",
"type": "integer"
},
"plot_list_jobsPerPage": {
"description": "Jobs shown per page in job lists",
"type": "integer"
},
"plot_list_hideShortRunningJobs": {
"description": "Do not show running jobs shorter than X seconds",
"type": "integer"
},
"plot_view_plotsPerRow": {
"description": "Number of plots per row in single job view",
"type": "integer"
},
"plot_view_showPolarplot": {
"description": "Option to toggle polar plot in single job view",
"type": "boolean"
},
"plot_view_showRoofline": {
"description": "Option to toggle roofline plot in single job view",
"type": "boolean"
},
"plot_view_showStatTable": {
"description": "Option to toggle the node statistic table in single job view",
"type": "boolean"
},
"system_view_selectedMetric": {
"description": "Initial metric shown in system view",
"type": "string"
},
"analysis_view_histogramMetrics": {
"description": "Metrics to show as job count histograms in analysis view",
"type": "array",
"items": {
"type": "string",
"minItems": 1
}
},
"analysis_view_scatterPlotMetrics": {
"description": "Initial scatter plto configuration in analysis view",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "string",
"minItems": 2,
"maxItems": 2
},
"minItems": 1
}
},
"job_view_nodestats_selectedMetrics": {
"description": "Initial metrics shown in node statistics table of single job view",
"type": "array",
"items": {
"type": "string",
"minItems": 1
}
},
"job_view_polarPlotMetrics": {
"description": "Metrics shown in polar plot of single job view",
"type": "array",
"items": {
"type": "string",
"minItems": 1
}
},
"job_view_selectedMetrics": {
"description": "",
"type": "array",
"items": {
"type": "string",
"minItems": 1
}
},
"plot_general_colorscheme": {
"description": "Initial color scheme",
"type": "array",
"items": {
"type": "string",
"minItems": 1
}
},
"plot_list_selectedMetrics": {
"description": "Initial metric plots shown in jobs lists",
"type": "array",
"items": {
"type": "string",
"minItems": 1
}
}
},
"required": [
"plot_general_colorBackground",
"plot_general_lineWidth",
"plot_list_jobsPerPage",
"plot_view_plotsPerRow",
"plot_view_showPolarplot",
"plot_view_showRoofline",
"plot_view_showStatTable",
"system_view_selectedMetric",
"analysis_view_histogramMetrics",
"analysis_view_scatterPlotMetrics",
"job_view_nodestats_selectedMetrics",
"job_view_polarPlotMetrics",
"job_view_selectedMetrics",
"plot_general_colorscheme",
"plot_list_selectedMetrics",
"plot_list_hideShortRunningJobs"
]
}
},
"required": [
"clusters"
]
}

View File

@ -0,0 +1,489 @@
{
"$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://job-data.schema.json",
"title": "Job metric data list",
"description": "Collection of metric data of a HPC job",
"type": "object",
"properties": {
"mem_used": {
"description": "Memory capacity used",
"type": "object",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"flops_any": {
"description": "Total flop rate with DP flops scaled up",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"mem_bw": {
"description": "Main memory bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"net_bw": {
"description": "Total fast interconnect network bandwidth",
"type": "object",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ipc": {
"description": "Instructions executed per cycle",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"cpu_used": {
"description": "CPU active core utilization",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"cpu_load": {
"description": "CPU requested core utilization (load 1m)",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"flops_dp": {
"description": "Double precision flop rate",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"flops_sp": {
"description": "Single precision flops rate",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"vectorization_ratio": {
"description": "Fraction of arithmetic instructions using SIMD instructions",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"cpu_power": {
"description": "CPU power consumption",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"mem_power": {
"description": "Memory power consumption",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"acc_utilization": {
"description": "GPU utilization",
"properties": {
"accelerator": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"accelerator"
]
},
"acc_mem_used": {
"description": "GPU memory capacity used",
"properties": {
"accelerator": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"accelerator"
]
},
"acc_power": {
"description": "GPU power consumption",
"properties": {
"accelerator": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"accelerator"
]
},
"clock": {
"description": "Average core frequency",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"socket": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"memoryDomain": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"core": {
"$ref": "embedfs://job-metric-data.schema.json"
},
"hwthread": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"minProperties": 1
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"filesystems": {
"description": "Array of filesystems",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string",
"enum": [
"nfs",
"lustre",
"gpfs",
"nvme",
"ssd",
"hdd",
"beegfs"
]
},
"read_bw": {
"description": "File system read bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"write_bw": {
"description": "File system write bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"read_req": {
"description": "File system read requests",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"write_req": {
"description": "File system write requests",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"inodes": {
"description": "File system write requests",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"accesses": {
"description": "File system open and close",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"fsync": {
"description": "File system fsync",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"create": {
"description": "File system create",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"open": {
"description": "File system open",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"close": {
"description": "File system close",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"seek": {
"description": "File system seek",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
}
},
"required": [
"name",
"type",
"read_bw",
"write_bw"
]
},
"minItems": 1
}
},
"ic_rcv_packets": {
"description": "Network interconnect read packets",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_send_packets": {
"description": "Network interconnect send packet",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_read_bw": {
"description": "Network interconnect read bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_write_bw": {
"description": "Network interconnect write bandwidth",
"properties": {
"node": {
"$ref": "embedfs://job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"required": [
"cpu_used",
"mem_used",
"flops_any",
"mem_bw",
"net_bw",
"filesystems"
]
}

View File

@ -0,0 +1,355 @@
{
"$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://job-meta.schema.json",
"title": "Job meta data",
"description": "Meta data information of a HPC job",
"type": "object",
"properties": {
"jobId": {
"description": "The unique identifier of a job",
"type": "integer"
},
"user": {
"description": "The unique identifier of a user",
"type": "string"
},
"project": {
"description": "The unique identifier of a project",
"type": "string"
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string"
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string"
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string"
},
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer"
},
"numNodes": {
"description": "Number of nodes used",
"type": "integer",
"exclusiveMinimum": 0
},
"numHwthreads": {
"description": "Number of HWThreads used",
"type": "integer",
"exclusiveMinimum": 0
},
"numAcc": {
"description": "Number of accelerators used",
"type": "integer",
"exclusiveMinimum": 0
},
"exclusive": {
"description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user",
"type": "integer",
"minimum": 0,
"maximum": 2
},
"monitoringStatus": {
"description": "State of monitoring system during job run",
"type": "integer"
},
"smt": {
"description": "SMT threads used by job",
"type": "integer"
},
"walltime": {
"description": "Requested walltime of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"out_of_memory",
"timeout"
]
},
"startTime": {
"description": "Start epoch time stamp in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"stopTime": {
"description": "Stop epoch time stamp in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"duration": {
"description": "Duration of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"type": "object",
"properties": {
"hostname": {
"type": "string"
},
"hwthreads": {
"type": "array",
"description": "List of OS processor ids",
"items": {
"type": "integer"
}
},
"accelerators": {
"type": "array",
"description": "List of of accelerator device ids",
"items": {
"type": "string"
}
},
"configuration": {
"type": "string",
"description": "The configuration options of the node"
}
},
"required": [
"hostname"
],
"minItems": 1
}
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"properties": {
"jobScript": {
"type": "string",
"description": "The batch script of the job"
},
"jobName": {
"type": "string",
"description": "Slurm Job name"
},
"slurmInfo": {
"type": "string",
"description": "Additional slurm infos as show by scontrol show job"
}
}
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string"
}
},
"required": [
"name",
"type"
]
},
"uniqueItems": true
},
"statistics": {
"description": "Job statistic data",
"type": "object",
"properties": {
"mem_used": {
"description": "Memory capacity used (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"cpu_load": {
"description": "CPU requested core utilization (load 1m) (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"flops_any": {
"description": "Total flop rate with DP flops scaled up (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"mem_bw": {
"description": "Main memory bandwidth (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"net_bw": {
"description": "Total fast interconnect network bandwidth (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"file_bw": {
"description": "Total file IO bandwidth (required)",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ipc": {
"description": "Instructions executed per cycle",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"cpu_used": {
"description": "CPU active core utilization",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"flops_dp": {
"description": "Double precision flop rate",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"flops_sp": {
"description": "Single precision flops rate",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"rapl_power": {
"description": "CPU power consumption",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"acc_used": {
"description": "GPU utilization",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"acc_mem_used": {
"description": "GPU memory capacity used",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"acc_power": {
"description": "GPU power consumption",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"clock": {
"description": "Average core frequency",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_rcv_packets": {
"description": "Network interconnect read packets",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_send_packets": {
"description": "Network interconnect send packet",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_read_bw": {
"description": "Network interconnect read bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"ic_write_bw": {
"description": "Network interconnect write bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"filesystems": {
"description": "Array of filesystems",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string",
"enum": [
"nfs",
"lustre",
"gpfs",
"nvme",
"ssd",
"hdd",
"beegfs"
]
},
"read_bw": {
"description": "File system read bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"write_bw": {
"description": "File system write bandwidth",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"read_req": {
"description": "File system read requests",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"write_req": {
"description": "File system write requests",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"inodes": {
"description": "File system write requests",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"accesses": {
"description": "File system open and close",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"fsync": {
"description": "File system fsync",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"create": {
"description": "File system create",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"open": {
"description": "File system open",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"close": {
"description": "File system close",
"$ref": "embedfs://job-metric-statistics.schema.json"
},
"seek": {
"description": "File system seek",
"$ref": "embedfs://job-metric-statistics.schema.json"
}
},
"required": [
"name",
"type",
"read_bw",
"write_bw"
]
},
"minItems": 1
}
},
"required": [
"cpu_used",
"mem_used",
"flops_any",
"mem_bw"
]
}
},
"required": [
"jobId",
"user",
"project",
"cluster",
"numNodes",
"exclusive",
"startTime",
"jobState",
"duration",
"resources",
"tags",
"statistics"
]
}

View File

@ -0,0 +1,216 @@
{
"$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://job-metric-data.schema.json",
"title": "Job metric data",
"description": "Metric data of a HPC job",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"timestep": {
"description": "Measurement interval in seconds",
"type": "integer"
},
"thresholds": {
"description": "Metric thresholds for specific system",
"type": "object",
"properties": {
"peak": {
"type": "number"
},
"normal": {
"type": "number"
},
"caution": {
"type": "number"
},
"alert": {
"type": "number"
}
}
},
"statisticsSeries": {
"type": "object",
"description": "Statistics series across topology",
"properties": {
"min": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"max": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"mean": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"percentiles": {
"type": "object",
"properties": {
"10": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"20": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"30": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"40": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"50": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"60": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"70": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"80": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"90": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"25": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"75": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
}
}
}
}
},
"series": {
"type": "array",
"items": {
"type": "object",
"properties": {
"hostname": {
"type": "string"
},
"id": {
"type": "string"
},
"statistics": {
"type": "object",
"description": "Statistics across time dimension",
"properties": {
"avg": {
"description": "Series average",
"type": "number",
"minimum": 0
},
"min": {
"description": "Series minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Series maximum",
"type": "number",
"minimum": 0
}
},
"required": [
"avg",
"min",
"max"
]
},
"data": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 1
}
},
"required": [
"hostname",
"statistics",
"data"
]
}
}
},
"required": [
"unit",
"timestep",
"series"
]
}

View File

@ -0,0 +1,34 @@
{
"$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "schemafs://job-metric-statistics.schema.json",
"title": "Job statistics",
"description": "Format specification for job metric statistics",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"$ref": "embedfs://unit.schema.json"
},
"avg": {
"description": "Job metric average",
"type": "number",
"minimum": 0
},
"min": {
"description": "Job metric minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Job metric maximum",
"type": "number",
"minimum": 0
}
},
"required": [
"unit",
"avg",
"min",
"max"
]
}

View File

@ -0,0 +1,41 @@
{
"$schema": "http://json-schema.org/draft/2020-12/schema",
"$id": "embedfs://unit.schema.json",
"title": "Metric unit",
"description": "Format specification for job metric units",
"type": "object",
"properties": {
"base_unit": {
"description": "Metric base unit",
"type": "string",
"enum": [
"B",
"F",
"B/s",
"F/s",
"CPI",
"IPC",
"load",
"Hz",
"W",
"°C",
""
]
},
"prefix": {
"description": "Unit prefix",
"type": "string",
"enum": [
"K",
"M",
"G",
"T",
"P",
"E"
]
}
},
"required": [
"base_unit"
]
}

66
pkg/schema/validate.go Normal file
View File

@ -0,0 +1,66 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package schema
import (
"embed"
"encoding/json"
"fmt"
"io"
"path/filepath"
"strings"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/santhosh-tekuri/jsonschema/v5"
)
type Kind int
const (
Meta Kind = iota + 1
Data
Config
ClusterCfg
)
//go:embed schemas/*
var schemaFiles embed.FS
func Validate(k Kind, r io.Reader) (err error) {
jsonschema.Loaders["embedfs"] = func(s string) (io.ReadCloser, error) {
f := filepath.Join("schemas", strings.Split(s, "//")[1])
return schemaFiles.Open(f)
}
var s *jsonschema.Schema
switch k {
case Meta:
s, err = jsonschema.Compile("embedfs://job-meta.schema.json")
case Data:
s, err = jsonschema.Compile("embedfs://job-data.schema.json")
case ClusterCfg:
s, err = jsonschema.Compile("embedfs://cluster.schema.json")
case Config:
s, err = jsonschema.Compile("embedfs://config.schema.json")
default:
return fmt.Errorf("unkown schema kind ")
}
if err != nil {
return err
}
var v interface{}
if err := json.NewDecoder(r).Decode(&v); err != nil {
log.Errorf("schema.Validate() - Failed to decode %v", err)
return err
}
if err = s.Validate(v); err != nil {
return fmt.Errorf("%#v", err)
}
return nil
}

View File

@ -0,0 +1,80 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package schema
import (
"bytes"
"testing"
)
func TestValidateConfig(t *testing.T) {
json := []byte(`{
"clusters": [
{
"name": "testcluster",
"metricDataRepository": {
"kind": "cc-metric-store",
"url": "localhost:8082"},
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
}
]
}`)
if err := Validate(Config, bytes.NewReader(json)); err != nil {
t.Errorf("Error is not nil! %v", err)
}
}
func TestValidateJobMeta(t *testing.T) {
}
func TestValidateCluster(t *testing.T) {
json := []byte(`{
"name": "emmy",
"subClusters": [
{
"name": "main",
"processorType": "Intel IvyBridge",
"socketsPerNode": 2,
"coresPerSocket": 10,
"threadsPerCore": 2,
"flopRateScalar": 44,
"flopRateSimd": 704,
"memoryBandwidth": 80,
"topology": {
"node": [0,20,1,21,2,22,3,23,4,24,5,25,6,26,7,27,8,28,9,29,10,30,11,31,12,32,13,33,14,34,15,35,16,36,17,37,18,38,19,39],
"socket": [
[0,20,1,21,2,22,3,23,4,24,5,25,6,26,7,27,8,28,9,29],
[10,30,11,31,12,32,13,33,14,34,15,35,16,36,17,37,18,38,19,39]
],
"memoryDomain": [
[0,20,1,21,2,22,3,23,4,24,5,25,6,26,7,27,8,28,9,29],
[10,30,11,31,12,32,13,33,14,34,15,35,16,36,17,37,18,38,19,39]
],
"core": [
[0,20],[1,21],[2,22],[3,23],[4,24],[5,25],[6,26],[7,27],[8,28],[9,29],[10,30],[11,31],[12,32],[13,33],[14,34],[15,35],[16,36],[17,37],[18,38],[19,39]
]
}
}
],
"metricConfig": [
{
"name": "cpu_load",
"scope": "hwthread",
"unit": "load",
"timestep": 60
}
]
}`)
if err := Validate(ClusterCfg, bytes.NewReader(json)); err != nil {
t.Errorf("Error is not nil! %v", err)
}
}

174
pkg/units/README.md Normal file
View File

@ -0,0 +1,174 @@
# cc-units - A unit system for ClusterCockpit
When working with metrics, the problem comes up that they may use different unit name but have the same unit in fact. There are a lot of real world examples like 'kB' and 'Kbyte'. In [cc-metric-collector](https://github.com/ClusterCockpit/cc-metric-collector), the collectors read data from different sources which may use different units or the programmer specifies a unit for a metric by hand. The cc-units system is not comparable with the SI unit system. If you are looking for a package for the SI units, see [here](https://pkg.go.dev/github.com/gurre/si).
In order to enable unit comparison and conversion, the ccUnits package provides some helpers:
```go
NewUnit(unit string) Unit // create a new unit from some string like 'GHz', 'Mbyte' or 'kevents/s'
func GetUnitUnitFactor(in Unit, out Unit) (func(value float64) float64, error) // Get conversion function between two units
func GetPrefixFactor(in Prefix, out Prefix) func(value float64) float64 // Get conversion function between two prefixes
func GetUnitPrefixFactor(in Unit, out Prefix) (func(value float64) float64, Unit) // Get conversion function for prefix changes and the new unit for further use
type Unit interface {
Valid() bool
String() string
Short() string
AddUnitDenominator(div Measure)
}
```
In order to get the "normalized" string unit back or test for validity, you can use:
```go
u := NewUnit("MB")
fmt.Println(u.Valid()) // true
fmt.Printf("Long string %q", u.String()) // MegaBytes
fmt.Printf("Short string %q", u.Short()) // MBytes
v := NewUnit("foo")
fmt.Println(v.Valid()) // false
```
If you have two units or other components and need the conversion function:
```go
// Get conversion functions for 'kB' to 'MBytes'
u1 := NewUnit("kB")
u2 := NewUnit("MBytes")
convFunc, err := GetUnitUnitFactor(u1, u2) // Returns an error if the units have different measures
if err == nil {
v2 := convFunc(v1)
fmt.Printf("%f %s\n", v2, u2.Short())
}
// Get conversion function for 'kB' -> 'G' prefix.
// Returns the function and the new unit 'GBytes'
p1 := NewPrefix("G")
convFunc, u_p1 := GetUnitPrefixFactor(u1, p1)
// or
// convFunc, u_p1 := GetUnitPrefixStringFactor(u1, "G")
if convFunc != nil {
v2 := convFunc(v1)
fmt.Printf("%f %s\n", v2, u_p1.Short())
}
// Get conversion function for two prefixes: 'G' -> 'T'
p2 := NewPrefix("T")
convFunc = GetPrefixPrefixFactor(p1, p2)
if convFunc != nil {
v2 := convFunc(v1)
fmt.Printf("%f %s -> %f %s\n", v1, p1.Prefix(), v2, p2.Prefix())
}
```
(In the ClusterCockpit ecosystem the separation between values and units if useful since they are commonly not stored as a single entity but the value is a field in the CCMetric while unit is a tag or a meta information).
If you have a metric and want the derivation to a bandwidth or events per second, you can use the original unit:
```go
in_unit, err := metric.GetMeta("unit")
if err == nil {
value, ok := metric.GetField("value")
if ok {
out_unit = NewUnit(in_unit)
out_unit.AddUnitDenominator("seconds")
seconds := timeDiff.Seconds()
y, err := lp.New(metric.Name()+"_bw",
metric.Tags(),
metric.Meta(),
map[string]interface{"value": value/seconds},
metric.Time())
if err == nil {
y.AddMeta("unit", out_unit.Short())
}
}
}
```
## Special unit detection
Some used measures like Bytes and Flops are non-dividable. Consequently there prefixes like Milli, Micro and Nano are not useful. This is quite handy since a unit `mb` for `MBytes` is not uncommon but would by default be parsed as "MilliBytes".
Special parsing rules for the following measures: iff `prefix==Milli`, use `prefix==Mega`
- `Bytes`
- `Flops`
- `Packets`
- `Events`
- `Cycles`
- `Requests`
This means the prefixes `Micro` (like `ubytes`) and `Nano` like (`nflops/sec`) are not allowed and return an invalid unit. But you can specify `mflops` and `mb`.
Prefixes for `%` or `percent` are ignored.
## Supported prefixes
```go
const (
Base Prefix = 1
Exa = 1e18
Peta = 1e15
Tera = 1e12
Giga = 1e9
Mega = 1e6
Kilo = 1e3
Milli = 1e-3
Micro = 1e-6
Nano = 1e-9
Kibi = 1024
Mebi = 1024 * 1024
Gibi = 1024 * 1024 * 1024
Tebi = 1024 * 1024 * 1024 * 1024
)
```
The prefixes are detected using a regular expression `^([kKmMgGtTpP]?[i]?)(.*)` that splits the prefix from the measure. You probably don't need to deal with the prefixes in the code.
## Supported measures
```go
const (
None Measure = iota
Bytes
Flops
Percentage
TemperatureC
TemperatureF
Rotation
Hertz
Time
Watt
Joule
Cycles
Requests
Packets
Events
)
```
There a regular expression for each of the measures like `^([bB][yY]?[tT]?[eE]?[sS]?)` for the `Bytes` measure.
## New units
If the selected units are not suitable for your metric, feel free to send a PR.
### New prefix
For a new prefix, add it to the big `const` in `ccUnitPrefix.go` and adjust the prefix-unit-splitting regular expression. Afterwards, you have to add cases to the three functions `String()`, `Prefix()` and `NewPrefix()`. `NewPrefix()` contains the parser (`k` or `K` -> `Kilo`). The other one are used for output. `String()` outputs a longer version of the prefix (`Kilo`), while `Prefix()` returns only the short notation (`K`).
### New measure
Adding new prefixes is probably rare but adding a new measure is a more common task. At first, add it to the big `const` in `ccUnitMeasure.go`. Moreover, create a regular expression matching the measure (and pre-compile it like the others). Add the expression matching to `NewMeasure()`. The `String()` and `Short()` functions return descriptive strings for the measure in long form (like `Hertz`) and short form (like `Hz`).
If there are special conversation rules between measures and you want to convert one measure to another, like temperatures in Celsius to Fahrenheit, a special case in `GetUnitPrefixFactor()` is required.
### Special parsing rules
The two parsers for prefix and measure are called under the hood by `NewUnit()` and there might some special rules apply. Like in the above section about 'special unit detection', special rules for your new measure might be required. Currently there are two special cases:
- Measures that are non-dividable like Flops, Bytes, Events, ... cannot use `Milli`, `Micro` and `Nano`. The prefix `m` is forced to `M` for these measures
- If the prefix is `p`/`P` (`Peta`) or `e`/`E` (`Exa`) and the measure is not detectable, it retries detection with the prefix. So first round it tries, for example, prefix `p` and measure `ackets` which fails, so it retries the detection with measure `packets` and `<empty>` prefix (resolves to `Base` prefix).
## Limitations
The `ccUnits` package is a simple implemtation of a unit system and comes with some limitations:
- The unit denominator (like `s` in `Mbyte/s`) can only have the `Base` prefix, you cannot specify `Byte/ms` for "Bytes per milli second".

134
pkg/units/unitMeasure.go Normal file
View File

@ -0,0 +1,134 @@
package units
import "regexp"
type Measure int
const (
InvalidMeasure Measure = iota
Bytes
Flops
Percentage
TemperatureC
TemperatureF
Rotation
Frequency
Time
Watt
Joule
Cycles
Requests
Packets
Events
)
type MeasureData struct {
Long string
Short string
Regex string
}
// Different names and regex used for input and output
var InvalidMeasureLong string = "Invalid"
var InvalidMeasureShort string = "inval"
var MeasuresMap map[Measure]MeasureData = map[Measure]MeasureData{
Bytes: {
Long: "byte",
Short: "B",
Regex: "^([bB][yY]?[tT]?[eE]?[sS]?)",
},
Flops: {
Long: "Flops",
Short: "Flops",
Regex: "^([fF][lL]?[oO]?[pP]?[sS]?)",
},
Percentage: {
Long: "Percent",
Short: "%",
Regex: "^(%|[pP]ercent)",
},
TemperatureC: {
Long: "DegreeC",
Short: "degC",
Regex: "^(deg[Cc]|°[cC])",
},
TemperatureF: {
Long: "DegreeF",
Short: "degF",
Regex: "^(deg[fF]|°[fF])",
},
Rotation: {
Long: "RPM",
Short: "RPM",
Regex: "^([rR][pP][mM])",
},
Frequency: {
Long: "Hertz",
Short: "Hz",
Regex: "^([hH][eE]?[rR]?[tT]?[zZ])",
},
Time: {
Long: "Seconds",
Short: "s",
Regex: "^([sS][eE]?[cC]?[oO]?[nN]?[dD]?[sS]?)",
},
Cycles: {
Long: "Cycles",
Short: "cyc",
Regex: "^([cC][yY][cC]?[lL]?[eE]?[sS]?)",
},
Watt: {
Long: "Watts",
Short: "W",
Regex: "^([wW][aA]?[tT]?[tT]?[sS]?)",
},
Joule: {
Long: "Joules",
Short: "J",
Regex: "^([jJ][oO]?[uU]?[lL]?[eE]?[sS]?)",
},
Requests: {
Long: "Requests",
Short: "requests",
Regex: "^([rR][eE][qQ][uU]?[eE]?[sS]?[tT]?[sS]?)",
},
Packets: {
Long: "Packets",
Short: "packets",
Regex: "^([pP][aA]?[cC]?[kK][eE]?[tT][sS]?)",
},
Events: {
Long: "Events",
Short: "events",
Regex: "^([eE][vV]?[eE]?[nN][tT][sS]?)",
},
}
// String returns the long string for the measure like 'Percent' or 'Seconds'
func (m *Measure) String() string {
if data, ok := MeasuresMap[*m]; ok {
return data.Long
}
return InvalidMeasureLong
}
// Short returns the short string for the measure like 'B' (Bytes), 's' (Time) or 'W' (Watt). Is is recommened to use Short() over String().
func (m *Measure) Short() string {
if data, ok := MeasuresMap[*m]; ok {
return data.Short
}
return InvalidMeasureShort
}
// NewMeasure creates a new measure out of a string representing a measure like 'Bytes', 'Flops' and 'precent'.
// It uses regular expressions for matching.
func NewMeasure(unit string) Measure {
for m, data := range MeasuresMap {
regex := regexp.MustCompile(data.Regex)
match := regex.FindStringSubmatch(unit)
if match != nil {
return m
}
}
return InvalidMeasure
}

174
pkg/units/unitPrefix.go Normal file
View File

@ -0,0 +1,174 @@
package units
import (
"regexp"
)
type Prefix float64
const (
InvalidPrefix Prefix = iota
Base = 1
Yotta = 1e24
Zetta = 1e21
Exa = 1e18
Peta = 1e15
Tera = 1e12
Giga = 1e9
Mega = 1e6
Kilo = 1e3
Milli = 1e-3
Micro = 1e-6
Nano = 1e-9
Kibi = 1024
Mebi = 1024 * 1024
Gibi = 1024 * 1024 * 1024
Tebi = 1024 * 1024 * 1024 * 1024
Pebi = 1024 * 1024 * 1024 * 1024 * 1024
Exbi = 1024 * 1024 * 1024 * 1024 * 1024 * 1024
Zebi = 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024
Yobi = 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024 * 1024
)
const PrefixUnitSplitRegexStr = `^([kKmMgGtTpPeEzZyY]?[i]?)(.*)`
var prefixUnitSplitRegex = regexp.MustCompile(PrefixUnitSplitRegexStr)
type PrefixData struct {
Long string
Short string
Regex string
}
// Different names and regex used for input and output
var InvalidPrefixLong string = "Invalid"
var InvalidPrefixShort string = "inval"
var PrefixDataMap map[Prefix]PrefixData = map[Prefix]PrefixData{
Base: {
Long: "",
Short: "",
Regex: "^$",
},
Kilo: {
Long: "Kilo",
Short: "K",
Regex: "^[kK]$",
},
Mega: {
Long: "Mega",
Short: "M",
Regex: "^[M]$",
},
Giga: {
Long: "Giga",
Short: "G",
Regex: "^[gG]$",
},
Tera: {
Long: "Tera",
Short: "T",
Regex: "^[tT]$",
},
Peta: {
Long: "Peta",
Short: "P",
Regex: "^[pP]$",
},
Exa: {
Long: "Exa",
Short: "E",
Regex: "^[eE]$",
},
Zetta: {
Long: "Zetta",
Short: "Z",
Regex: "^[zZ]$",
},
Yotta: {
Long: "Yotta",
Short: "Y",
Regex: "^[yY]$",
},
Milli: {
Long: "Milli",
Short: "m",
Regex: "^[m]$",
},
Micro: {
Long: "Micro",
Short: "u",
Regex: "^[u]$",
},
Nano: {
Long: "Nano",
Short: "n",
Regex: "^[n]$",
},
Kibi: {
Long: "Kibi",
Short: "Ki",
Regex: "^[kK][i]$",
},
Mebi: {
Long: "Mebi",
Short: "Mi",
Regex: "^[M][i]$",
},
Gibi: {
Long: "Gibi",
Short: "Gi",
Regex: "^[gG][i]$",
},
Tebi: {
Long: "Tebi",
Short: "Ti",
Regex: "^[tT][i]$",
},
Pebi: {
Long: "Pebi",
Short: "Pi",
Regex: "^[pP][i]$",
},
Exbi: {
Long: "Exbi",
Short: "Ei",
Regex: "^[eE][i]$",
},
Zebi: {
Long: "Zebi",
Short: "Zi",
Regex: "^[zZ][i]$",
},
Yobi: {
Long: "Yobi",
Short: "Yi",
Regex: "^[yY][i]$",
},
}
// String returns the long string for the prefix like 'Kilo' or 'Mega'
func (p *Prefix) String() string {
if data, ok := PrefixDataMap[*p]; ok {
return data.Long
}
return InvalidMeasureLong
}
// Prefix returns the short string for the prefix like 'K', 'M' or 'G'. Is is recommened to use Prefix() over String().
func (p *Prefix) Prefix() string {
if data, ok := PrefixDataMap[*p]; ok {
return data.Short
}
return InvalidMeasureShort
}
// NewPrefix creates a new prefix out of a string representing a unit like 'k', 'K', 'M' or 'G'.
func NewPrefix(prefix string) Prefix {
for p, data := range PrefixDataMap {
regex := regexp.MustCompile(data.Regex)
match := regex.FindStringSubmatch(prefix)
if match != nil {
return p
}
}
return InvalidPrefix
}

250
pkg/units/units.go Normal file
View File

@ -0,0 +1,250 @@
// Unit system for cluster monitoring metrics like bytes, flops and events
package units
import (
"fmt"
"strings"
)
type unit struct {
prefix Prefix
measure Measure
divMeasure Measure
}
type Unit interface {
Valid() bool
String() string
Short() string
AddUnitDenominator(div Measure)
getPrefix() Prefix
getMeasure() Measure
getUnitDenominator() Measure
setPrefix(p Prefix)
}
var INVALID_UNIT = NewUnit("foobar")
// Valid checks whether a unit is a valid unit. A unit is valid if it has at least a prefix and a measure. The unit denominator is optional.
func (u *unit) Valid() bool {
return u.prefix != InvalidPrefix && u.measure != InvalidMeasure
}
// String returns the long string for the unit like 'KiloHertz' or 'MegaBytes'
func (u *unit) String() string {
if u.divMeasure != InvalidMeasure {
return fmt.Sprintf("%s%s/%s", u.prefix.String(), u.measure.String(), u.divMeasure.String())
} else {
return fmt.Sprintf("%s%s", u.prefix.String(), u.measure.String())
}
}
// Short returns the short string for the unit like 'kHz' or 'MByte'. Is is recommened to use Short() over String().
func (u *unit) Short() string {
if u.divMeasure != InvalidMeasure {
return fmt.Sprintf("%s%s/%s", u.prefix.Prefix(), u.measure.Short(), u.divMeasure.Short())
} else {
return fmt.Sprintf("%s%s", u.prefix.Prefix(), u.measure.Short())
}
}
// AddUnitDenominator adds a unit denominator to an exising unit. Can be used if you want to derive e.g. data volume to bandwidths.
// The data volume is in a Byte unit like 'kByte' and by dividing it by the runtime in seconds, we get the bandwidth. We can use the
// data volume unit and add 'Second' as unit denominator
func (u *unit) AddUnitDenominator(div Measure) {
u.divMeasure = div
}
func (u *unit) getPrefix() Prefix {
return u.prefix
}
func (u *unit) setPrefix(p Prefix) {
u.prefix = p
}
func (u *unit) getMeasure() Measure {
return u.measure
}
func (u *unit) getUnitDenominator() Measure {
return u.divMeasure
}
// GetPrefixPrefixFactor creates the default conversion function between two prefixes.
// It returns a conversation function for the value.
func GetPrefixPrefixFactor(in Prefix, out Prefix) func(value interface{}) interface{} {
var factor = 1.0
var in_prefix = float64(in)
var out_prefix = float64(out)
factor = in_prefix / out_prefix
conv := func(value interface{}) interface{} {
switch v := value.(type) {
case float64:
return v * factor
case float32:
return float32(float64(v) * factor)
case int:
return int(float64(v) * factor)
case int32:
return int32(float64(v) * factor)
case int64:
return int64(float64(v) * factor)
case uint:
return uint(float64(v) * factor)
case uint32:
return uint32(float64(v) * factor)
case uint64:
return uint64(float64(v) * factor)
}
return value
}
return conv
}
// This is the conversion function between temperatures in Celsius to Fahrenheit
func convertTempC2TempF(value interface{}) interface{} {
switch v := value.(type) {
case float64:
return (v * 1.8) + 32
case float32:
return (v * 1.8) + 32
case int:
return int((float64(v) * 1.8) + 32)
case int32:
return int32((float64(v) * 1.8) + 32)
case int64:
return int64((float64(v) * 1.8) + 32)
case uint:
return uint((float64(v) * 1.8) + 32)
case uint32:
return uint32((float64(v) * 1.8) + 32)
case uint64:
return uint64((float64(v) * 1.8) + 32)
}
return value
}
// This is the conversion function between temperatures in Fahrenheit to Celsius
func convertTempF2TempC(value interface{}) interface{} {
switch v := value.(type) {
case float64:
return (v - 32) / 1.8
case float32:
return (v - 32) / 1.8
case int:
return int(((float64(v) - 32) / 1.8))
case int32:
return int32(((float64(v) - 32) / 1.8))
case int64:
return int64(((float64(v) - 32) / 1.8))
case uint:
return uint(((float64(v) - 32) / 1.8))
case uint32:
return uint32(((float64(v) - 32) / 1.8))
case uint64:
return uint64(((float64(v) - 32) / 1.8))
}
return value
}
// GetPrefixStringPrefixStringFactor is a wrapper for GetPrefixPrefixFactor with string inputs instead
// of prefixes. It also returns a conversation function for the value.
func GetPrefixStringPrefixStringFactor(in string, out string) func(value interface{}) interface{} {
var i Prefix = NewPrefix(in)
var o Prefix = NewPrefix(out)
return GetPrefixPrefixFactor(i, o)
}
// GetUnitPrefixFactor gets the conversion function and resulting unit for a unit and a prefix. This is
// the most common case where you have some input unit and want to convert it to the same unit but with
// a different prefix. The returned unit represents the value after conversation.
func GetUnitPrefixFactor(in Unit, out Prefix) (func(value interface{}) interface{}, Unit) {
outUnit := NewUnit(in.Short())
if outUnit.Valid() {
outUnit.setPrefix(out)
conv := GetPrefixPrefixFactor(in.getPrefix(), out)
return conv, outUnit
}
return nil, INVALID_UNIT
}
// GetUnitPrefixStringFactor gets the conversion function and resulting unit for a unit and a prefix as string.
// It is a wrapper for GetUnitPrefixFactor
func GetUnitPrefixStringFactor(in Unit, out string) (func(value interface{}) interface{}, Unit) {
var o Prefix = NewPrefix(out)
return GetUnitPrefixFactor(in, o)
}
// GetUnitStringPrefixStringFactor gets the conversion function and resulting unit for a unit and a prefix when both are only string representations.
// This is just a wrapper for GetUnitPrefixFactor with the given input unit and the desired output prefix.
func GetUnitStringPrefixStringFactor(in string, out string) (func(value interface{}) interface{}, Unit) {
var i = NewUnit(in)
return GetUnitPrefixStringFactor(i, out)
}
// GetUnitUnitFactor gets the conversion function and (maybe) error for unit to unit conversion.
// It is basically a wrapper for GetPrefixPrefixFactor with some special cases for temperature
// conversion between Fahrenheit and Celsius.
func GetUnitUnitFactor(in Unit, out Unit) (func(value interface{}) interface{}, error) {
if in.getMeasure() == TemperatureC && out.getMeasure() == TemperatureF {
return convertTempC2TempF, nil
} else if in.getMeasure() == TemperatureF && out.getMeasure() == TemperatureC {
return convertTempF2TempC, nil
} else if in.getMeasure() != out.getMeasure() || in.getUnitDenominator() != out.getUnitDenominator() {
return func(value interface{}) interface{} { return 1.0 }, fmt.Errorf("invalid measures in in and out Unit")
}
return GetPrefixPrefixFactor(in.getPrefix(), out.getPrefix()), nil
}
// NewUnit creates a new unit out of a string representing a unit like 'Mbyte/s' or 'GHz'.
// It uses regular expressions to detect the prefix, unit and (maybe) unit denominator.
func NewUnit(unitStr string) Unit {
u := &unit{
prefix: InvalidPrefix,
measure: InvalidMeasure,
divMeasure: InvalidMeasure,
}
matches := prefixUnitSplitRegex.FindStringSubmatch(unitStr)
if len(matches) > 2 {
pre := NewPrefix(matches[1])
measures := strings.Split(matches[2], "/")
m := NewMeasure(measures[0])
// Special case for prefix 'p' or 'P' (Peta) and measures starting with 'p' or 'P'
// like 'packets' or 'percent'. Same for 'e' or 'E' (Exa) for measures starting with
// 'e' or 'E' like 'events'
if m == InvalidMeasure {
switch pre {
case Peta, Exa:
t := NewMeasure(matches[1] + measures[0])
if t != InvalidMeasure {
m = t
pre = Base
}
}
}
div := InvalidMeasure
if len(measures) > 1 {
div = NewMeasure(measures[1])
}
switch m {
// Special case for 'm' as prefix for Bytes and some others as thers is no unit like MilliBytes
case Bytes, Flops, Packets, Events, Cycles, Requests:
if pre == Milli {
pre = Mega
}
// Special case for percentage. No/ignore prefix
case Percentage:
pre = Base
}
if pre != InvalidPrefix && m != InvalidMeasure {
u.prefix = pre
u.measure = m
if div != InvalidMeasure {
u.divMeasure = div
}
}
}
return u
}

201
pkg/units/units_test.go Normal file
View File

@ -0,0 +1,201 @@
package units
import (
"fmt"
"regexp"
"testing"
)
func TestUnitsExact(t *testing.T) {
testCases := []struct {
in string
want Unit
}{
{"b", NewUnit("Bytes")},
{"B", NewUnit("Bytes")},
{"byte", NewUnit("Bytes")},
{"bytes", NewUnit("Bytes")},
{"BYtes", NewUnit("Bytes")},
{"Mb", NewUnit("MBytes")},
{"MB", NewUnit("MBytes")},
{"Mbyte", NewUnit("MBytes")},
{"Mbytes", NewUnit("MBytes")},
{"MbYtes", NewUnit("MBytes")},
{"Gb", NewUnit("GBytes")},
{"GB", NewUnit("GBytes")},
{"Hz", NewUnit("Hertz")},
{"MHz", NewUnit("MHertz")},
{"GHz", NewUnit("GHertz")},
{"pkts", NewUnit("Packets")},
{"packets", NewUnit("Packets")},
{"packet", NewUnit("Packets")},
{"flop", NewUnit("Flops")},
{"flops", NewUnit("Flops")},
{"floPS", NewUnit("Flops")},
{"Mflop", NewUnit("MFlops")},
{"Gflop", NewUnit("GFlops")},
{"gflop", NewUnit("GFlops")},
{"%", NewUnit("Percent")},
{"percent", NewUnit("Percent")},
{"degc", NewUnit("degC")},
{"degC", NewUnit("degC")},
{"degf", NewUnit("degF")},
{"°f", NewUnit("degF")},
{"events", NewUnit("events")},
{"event", NewUnit("events")},
{"EveNts", NewUnit("events")},
{"reqs", NewUnit("requests")},
{"reQuEsTs", NewUnit("requests")},
{"Requests", NewUnit("requests")},
{"cyc", NewUnit("cycles")},
{"cy", NewUnit("cycles")},
{"Cycles", NewUnit("cycles")},
{"J", NewUnit("Joules")},
{"Joule", NewUnit("Joules")},
{"joule", NewUnit("Joules")},
{"W", NewUnit("Watt")},
{"Watts", NewUnit("Watt")},
{"watt", NewUnit("Watt")},
{"s", NewUnit("seconds")},
{"sec", NewUnit("seconds")},
{"secs", NewUnit("seconds")},
{"RPM", NewUnit("rpm")},
{"rPm", NewUnit("rpm")},
{"watt/byte", NewUnit("W/B")},
{"watts/bytes", NewUnit("W/B")},
{"flop/byte", NewUnit("flops/Bytes")},
{"F/B", NewUnit("flops/Bytes")},
}
compareUnitExact := func(in, out Unit) bool {
if in.getMeasure() == out.getMeasure() && in.getUnitDenominator() == out.getUnitDenominator() && in.getPrefix() == out.getPrefix() {
return true
}
return false
}
for _, c := range testCases {
u := NewUnit(c.in)
if (!u.Valid()) || (!compareUnitExact(u, c.want)) {
t.Errorf("func NewUnit(%q) == %q, want %q", c.in, u.String(), c.want.String())
} else {
t.Logf("NewUnit(%q) == %q", c.in, u.String())
}
}
}
func TestUnitUnitConversion(t *testing.T) {
testCases := []struct {
in string
want Unit
prefixFactor float64
}{
{"kb", NewUnit("Bytes"), 1000},
{"Mb", NewUnit("Bytes"), 1000000},
{"Mb/s", NewUnit("Bytes/s"), 1000000},
{"Flops/s", NewUnit("MFlops/s"), 1e-6},
{"Flops/s", NewUnit("GFlops/s"), 1e-9},
{"MHz", NewUnit("Hertz"), 1e6},
{"kb", NewUnit("Kib"), 1000.0 / 1024},
{"Mib", NewUnit("MBytes"), (1024 * 1024.0) / (1e6)},
{"mb", NewUnit("MBytes"), 1.0},
}
compareUnitWithPrefix := func(in, out Unit, factor float64) bool {
if in.getMeasure() == out.getMeasure() && in.getUnitDenominator() == out.getUnitDenominator() {
if f := GetPrefixPrefixFactor(in.getPrefix(), out.getPrefix()); f(1.0) == factor {
return true
} else {
fmt.Println(f(1.0))
}
}
return false
}
for _, c := range testCases {
u := NewUnit(c.in)
if (!u.Valid()) || (!compareUnitWithPrefix(u, c.want, c.prefixFactor)) {
t.Errorf("GetPrefixPrefixFactor(%q, %q) invalid, want %q with factor %g", c.in, u.String(), c.want.String(), c.prefixFactor)
} else {
t.Logf("GetPrefixPrefixFactor(%q, %q) = %g", c.in, c.want.String(), c.prefixFactor)
}
}
}
func TestUnitPrefixConversion(t *testing.T) {
testCases := []struct {
in string
want string
prefixFactor float64
wantUnit Unit
}{
{"KBytes", "", 1000, NewUnit("Bytes")},
{"MBytes", "", 1e6, NewUnit("Bytes")},
{"MBytes", "G", 1e-3, NewUnit("GBytes")},
{"mb", "M", 1, NewUnit("MBytes")},
}
compareUnitPrefix := func(in Unit, out Prefix, factor float64, outUnit Unit) bool {
if in.Valid() {
conv, unit := GetUnitPrefixFactor(in, out)
value := conv(1.0)
if value == factor && unit.String() == outUnit.String() {
return true
}
}
return false
}
for _, c := range testCases {
u := NewUnit(c.in)
p := NewPrefix(c.want)
if (!u.Valid()) || (!compareUnitPrefix(u, p, c.prefixFactor, c.wantUnit)) {
t.Errorf("GetUnitPrefixFactor(%q, %q) invalid, want %q with factor %g", c.in, p.Prefix(), c.wantUnit.String(), c.prefixFactor)
} else {
t.Logf("GetUnitPrefixFactor(%q, %q) = %g", c.in, c.wantUnit.String(), c.prefixFactor)
}
}
}
func TestPrefixPrefixConversion(t *testing.T) {
testCases := []struct {
in string
want string
prefixFactor float64
}{
{"K", "", 1000},
{"M", "", 1e6},
{"M", "G", 1e-3},
{"", "M", 1e-6},
{"", "m", 1e3},
{"m", "n", 1e6},
//{"", "n", 1e9}, //does not work because of IEEE rounding problems
}
for _, c := range testCases {
i := NewPrefix(c.in)
o := NewPrefix(c.want)
if i != InvalidPrefix && o != InvalidPrefix {
conv := GetPrefixPrefixFactor(i, o)
value := conv(1.0)
if value != c.prefixFactor {
t.Errorf("GetPrefixPrefixFactor(%q, %q) invalid, want %q with factor %g but got %g", c.in, c.want, o.Prefix(), c.prefixFactor, value)
} else {
t.Logf("GetPrefixPrefixFactor(%q, %q) = %g", c.in, c.want, c.prefixFactor)
}
}
}
}
func TestMeasureRegex(t *testing.T) {
for _, data := range MeasuresMap {
_, err := regexp.Compile(data.Regex)
if err != nil {
t.Errorf("failed to compile regex '%s': %s", data.Regex, err.Error())
}
t.Logf("succussfully compiled regex '%s' for measure %s", data.Regex, data.Long)
}
}
func TestPrefixRegex(t *testing.T) {
for _, data := range PrefixDataMap {
_, err := regexp.Compile(data.Regex)
if err != nil {
t.Errorf("failed to compile regex '%s': %s", data.Regex, err.Error())
}
t.Logf("succussfully compiled regex '%s' for prefix %s", data.Regex, data.Long)
}
}

View File

@ -3,7 +3,7 @@
mkdir ./var
cd ./var
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive.tar.xz
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar.xz
tar xJf job-archive.tar.xz
rm ./job-archive.tar.xz
@ -13,8 +13,8 @@ yarn install
yarn build
cd ../..
# Use your own keys in production!
cp ./configs/env-template.txt .env
cp ./docs/config.json config.json
go build ./cmd/cc-backend
./cc-backend --init-db --add-user demo:admin:AdminDev
./cc-backend --server --dev --init-db --add-user demo:admin:AdminDev

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"exclusive":1,"jobId":1403244,"statistics":{"mem_bw":{"avg":63.57,"min":0,"unit":"GB/s","max":74.5},"rapl_power":{"avg":228.07,"min":0,"unit":"W","max":258.56},"ipc":{"unit":"IPC","max":0.510204081632653,"avg":1.53846153846154,"min":0.0},"clock":{"min":1380.32,"avg":2599.39,"unit":"MHz","max":2634.46},"cpu_load":{"avg":18.4,"min":0,"max":23.58,"unit":"load"},"flops_any":{"max":404.62,"unit":"GF/s","avg":225.59,"min":0},"flops_dp":{"max":0.24,"unit":"GF/s","min":0,"avg":0},"mem_used":{"min":1.55,"avg":27.84,"unit":"GB","max":37.5},"flops_sp":{"min":0,"avg":225.59,"max":404.62,"unit":"GF/s"}},"resources":[{"hostname":"e0102"},{"hostname":"e0103"},{"hostname":"e0105"},{"hostname":"e0106"},{"hostname":"e0107"},{"hostname":"e0108"},{"hostname":"e0114"},{"hostname":"e0320"},{"hostname":"e0321"},{"hostname":"e0325"},{"hostname":"e0404"},{"hostname":"e0415"},{"hostname":"e0433"},{"hostname":"e0437"},{"hostname":"e0439"},{"hostname":"e0501"},{"hostname":"e0503"},{"hostname":"e0505"},{"hostname":"e0506"},{"hostname":"e0512"},{"hostname":"e0513"},{"hostname":"e0514"},{"hostname":"e0653"},{"hostname":"e0701"},{"hostname":"e0716"},{"hostname":"e0727"},{"hostname":"e0728"},{"hostname":"e0925"},{"hostname":"e0926"},{"hostname":"e0929"},{"hostname":"e0934"},{"hostname":"e0951"}],"walltime":10,"jobState":"completed","cluster":"emmy","stopTime":1609009562,"user":"emmyUser6","startTime":1608923076,"partition":"work","tags":[],"project":"no project","numNodes":32,"duration":86486}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
{"stopTime":1609387081,"resources":[{"hostname":"e0151"},{"hostname":"e0152"},{"hostname":"e0153"},{"hostname":"e0232"},{"hostname":"e0303"},{"hostname":"e0314"},{"hostname":"e0344"},{"hostname":"e0345"},{"hostname":"e0348"},{"hostname":"e0507"},{"hostname":"e0518"},{"hostname":"e0520"},{"hostname":"e0522"},{"hostname":"e0526"},{"hostname":"e0527"},{"hostname":"e0528"},{"hostname":"e0530"},{"hostname":"e0551"},{"hostname":"e0604"},{"hostname":"e0613"},{"hostname":"e0634"},{"hostname":"e0639"},{"hostname":"e0640"},{"hostname":"e0651"},{"hostname":"e0653"},{"hostname":"e0701"},{"hostname":"e0704"},{"hostname":"e0751"},{"hostname":"e0809"},{"hostname":"e0814"},{"hostname":"e0819"},{"hostname":"e0908"}],"walltime":10,"cluster":"emmy","jobState":"completed","statistics":{"clock":{"max":2634.9,"unit":"MHz","min":0,"avg":2597.8},"cpu_load":{"max":27.41,"unit":"load","min":0,"avg":18.39},"mem_bw":{"min":0,"avg":63.23,"unit":"GB/s","max":75.06},"ipc":{"min":0.0,"avg":1.53846153846154,"unit":"IPC","max":0.490196078431373},"rapl_power":{"min":0,"avg":227.32,"unit":"W","max":256.22},"mem_used":{"min":1.5,"avg":27.77,"unit":"GB","max":37.43},"flops_sp":{"unit":"GF/s","max":413.21,"min":0,"avg":224.41},"flops_dp":{"max":5.72,"unit":"GF/s","min":0,"avg":0},"flops_any":{"min":0,"avg":224.42,"max":413.21,"unit":"GF/s"}},"exclusive":1,"jobId":1404397,"tags":[],"partition":"work","project":"no project","user":"emmyUser6","startTime":1609300556,"duration":86525,"numNodes":32}

View File

@ -0,0 +1,141 @@
{
"name": "emmy",
"subClusters": [
{
"name": "main",
"processorType": "Intel IvyBridge",
"socketsPerNode": 2,
"coresPerSocket": 10,
"threadsPerCore": 2,
"flopRateScalar": 44,
"flopRateSimd": 704,
"memoryBandwidth": 80,
"topology": {
"node": [0,20,1,21,2,22,3,23,4,24,5,25,6,26,7,27,8,28,9,29,10,30,11,31,12,32,13,33,14,34,15,35,16,36,17,37,18,38,19,39],
"socket": [
[0,20,1,21,2,22,3,23,4,24,5,25,6,26,7,27,8,28,9,29],
[10,30,11,31,12,32,13,33,14,34,15,35,16,36,17,37,18,38,19,39]
],
"memoryDomain": [
[0,20,1,21,2,22,3,23,4,24,5,25,6,26,7,27,8,28,9,29],
[10,30,11,31,12,32,13,33,14,34,15,35,16,36,17,37,18,38,19,39]
],
"core": [
[0,20],[1,21],[2,22],[3,23],[4,24],[5,25],[6,26],[7,27],[8,28],[9,29],[10,30],[11,31],[12,32],[13,33],[14,34],[15,35],[16,36],[17,37],[18,38],[19,39]
]
}
}
],
"metricConfig": [
{
"name": "cpu_load",
"scope": "hwthread",
"unit": "load",
"timestep": 60,
"peak": 40,
"normal": 20,
"caution": 15,
"alert": 10
},
{
"name": "mem_used",
"scope": "node",
"unit": "GB",
"timestep": 60,
"peak": 64,
"normal": 20,
"caution": 40,
"alert": 55
},
{
"name": "flops_any",
"scope": "hwthread",
"unit": "GF/s",
"timestep": 60,
"peak": 704,
"normal": 100,
"caution": 20,
"alert": 2
},
{
"name": "flops_sp",
"scope": "hwthread",
"unit": "GF/s",
"timestep": 60,
"peak": 704,
"normal": 100,
"caution": 20,
"alert": 2
},
{
"name": "flops_dp",
"scope": "hwthread",
"unit": "GF/s",
"timestep": 60,
"peak": 350,
"normal": 50,
"caution": 10,
"alert": 2
},
{
"name": "mem_bw",
"scope": "memoryDomain",
"unit": "GB/s",
"timestep": 60,
"peak": 80,
"normal": 30,
"caution": 10,
"alert": 5
},
{
"name": "ipc",
"scope": "hwthread",
"unit": "IPC",
"timestep": 60,
"peak": 80,
"normal": 30,
"caution": 10,
"alert": 5
},
{
"name": "clock",
"scope": "hwthread",
"unit": "MHz",
"timestep": 60,
"peak": 80,
"normal": 30,
"caution": 10,
"alert": 5
},
{
"name": "rapl_power",
"scope": "socket",
"unit": "W",
"timestep": 60,
"peak": 80,
"normal": 30,
"caution": 10,
"alert": 5
},
{
"name": "ib_bw",
"scope": "node",
"unit": "GB/s",
"timestep": 60,
"peak": 40,
"normal": 20,
"caution": 15,
"alert": 10
},
{
"name": "lustre_bw",
"scope": "node",
"unit": "GB/s",
"timestep": 60,
"peak": 40,
"normal": 20,
"caution": 15,
"alert": 10
}
]
}

488
test/data.json Normal file
View File

@ -0,0 +1,488 @@
{
"cpu_used": {
"core": {
"unit": "cpu used",
"scope": "core",
"timestep": 30,
"series": [
{
"hostname": "taurusi6489",
"id": 0,
"statistics": {
"min": 0.09090909090909093,
"avg": 0.9173553719008265,
"max": 1.0000000000000002
},
"data": [
0.09090909090909093,
0.9999999999999999,
1.0,
1.0000000000000002,
1.0,
1.0000000000000002,
0.9999999999999999,
1.0,
1.0,
1.0,
1.0
]
},
{
"hostname": "taurusi6489",
"id": 1,
"statistics": {
"min": 0.03694102397926118,
"avg": 0.045968409230268584,
"max": 0.08809840425531917
},
"data": [
0.08809840425531917,
0.05710659898477157,
0.04034861200774694,
0.037962362102530824,
0.03976721629485936,
0.04163976759199483,
0.03694102397926118,
0.03821243523316062,
0.03851132686084142,
0.044752092723760455,
0.04231266149870802
]
},
{
"hostname": "taurusi6490",
"id": 10,
"statistics": {
"min": 0.10505319148936171,
"avg": 0.9186411992263056,
"max": 1.0000000000000002
},
"data": [
0.10505319148936171,
1.0000000000000002,
1.0,
1.0,
1.0,
0.9999999999999999,
1.0,
0.9999999999999999,
1.0,
1.0,
1.0
]
},
{
"hostname": "taurusi6490",
"id": 11,
"statistics": {
"min": 0.05286048845767815,
"avg": 0.07053823838706144,
"max": 0.075148113501715
},
"data": [
0.05286048845767815,
0.06936597614563718,
0.07254534083802376,
0.075148113501715,
0.06909547738693468,
0.07372696032489846,
0.07077983088005012,
0.07082419304293325,
0.07424812030075188,
0.07285803627267043,
0.07446808510638298
]
}
],
"statisticsSeries": null
}
},
"ipc": {
"core": {
"unit": "IPC",
"scope": "core",
"timestep": 60,
"series": [
{
"hostname": "taurusi6489",
"id": 0,
"statistics": {
"min": 1.3808406263195592,
"avg": 1.3960848578375105,
"max": 1.4485575599350569
},
"data": [
1.4485575599350569,
1.3808406263195592,
1.3830284413690626,
1.3836692663348698,
1.3843283952290035
]
},
{
"hostname": "taurusi6489",
"id": 1,
"statistics": {
"min": 0.30469640475234366,
"avg": 0.8816944294664065,
"max": 1.797623522191001
},
"data": [
1.797623522191001,
0.954395633726228,
1.0019972349956185,
0.30469640475234366,
0.3497593516668412
]
},
{
"hostname": "taurusi6490",
"id": 10,
"statistics": {
"min": 1.3791232173760588,
"avg": 1.3850247295506815,
"max": 1.386710405495511
},
"data": [
1.3791232173760588,
1.38619977419787,
1.386397917938246,
1.3866923327457215,
1.386710405495511
]
},
{
"hostname": "taurusi6490",
"id": 11,
"statistics": {
"min": 0.6424094604392216,
"avg": 0.9544442638400293,
"max": 1.2706704244636826
},
"data": [
1.2706704244636826,
0.6424094604392216,
0.9249973908234796,
0.6940110823242276,
1.2401329611495353
]
}
],
"statisticsSeries": null
}
},
"flops_any": {
"core": {
"unit": "F/s",
"scope": "core",
"timestep": 60,
"series": [
{
"hostname": "taurusi6489",
"id": 0,
"statistics": {
"min": 0.0,
"avg": 184.2699002412084,
"max": 921.3495012060421
},
"data": [
921.3495012060421,
0.0,
0.0,
0.0,
0.0
]
},
{
"hostname": "taurusi6489",
"id": 1,
"statistics": {
"min": 0.13559227208748068,
"avg": 273.2997868356056,
"max": 1355.9227390817396
},
"data": [
1355.9227390817396,
8.94908797747172,
0.6779613312519499,
0.13559227208748068,
0.8135535154771758
]
},
{
"hostname": "taurusi6490",
"id": 10,
"statistics": {
"min": 0.0,
"avg": 1678.8419461262179,
"max": 4346.591400350933
},
"data": [
4346.591400350933,
0.0,
578.4248288199713,
0.0,
3469.193501460185
]
},
{
"hostname": "taurusi6490",
"id": 11,
"statistics": {
"min": 45.28689133054866,
"avg": 609.6644949204072,
"max": 2582.7080822873186
},
"data": [
2582.7080822873186,
45.28689133054866,
48.67663233623293,
47.591911855555026,
324.0589567923803
]
}
],
"statisticsSeries": null
}
},
"mem_bw": {
"socket": {
"unit": "B/s",
"scope": "socket",
"timestep": 60,
"series": [
{
"hostname": "taurusi6489",
"id": 0,
"statistics": {
"min": 653671812.1661415,
"avg": 1637585527.5854635,
"max": 2614718291.9554267
},
"data": [
653671812.1661415,
2614718291.9554267,
1732453371.7073724,
1612865229.8704093,
1574218932.2279677
]
},
{
"hostname": "taurusi6490",
"id": 0,
"statistics": {
"min": 1520190251.61048,
"avg": 1572477682.3850098,
"max": 1688960732.2760606
},
"data": [
1688960732.2760606,
1580140679.8216474,
1520190251.61048,
1541841829.6250021,
1531254918.591859
]
}
],
"statisticsSeries": null
}
},
"file_bw": {
"node": {
"unit": "B/s",
"scope": "node",
"timestep": 30,
"series": [
{
"hostname": "taurusi6489",
"statistics": {
"min": 0.0,
"avg": 190352.6328851857,
"max": 2093878.361723524
},
"data": [
0.0,
0.0,
0.0,
0.6000135186380174,
0.0,
0.0,
2093878.361723524,
0.0,
0.0,
0.0,
0.0
]
},
{
"hostname": "taurusi6490",
"statistics": {
"min": 0.0,
"avg": 1050832.4509396513,
"max": 11559156.360352296
},
"data": [
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
11559156.360352296,
0.0,
0.5999838690326298,
0.0,
0.0
]
}
],
"statisticsSeries": null
}
},
"net_bw": {
"node": {
"unit": "B/s",
"scope": "node",
"timestep": 30,
"series": [
{
"hostname": "taurusi6489",
"statistics": {
"min": 126779.89655880642,
"avg": 653834.5091507058,
"max": 1285639.5107541133
},
"data": [
1158202.7403032137,
126779.89655880642,
419017.91939583793,
345766.3974972795,
645419.3296982117,
644667.7333333333,
1285639.5107541133,
643481.2108874657,
640025.3562553325,
643241.4875354709,
639938.0184386979
]
},
{
"hostname": "taurusi6490",
"statistics": {
"min": 640156.9862985397,
"avg": 872367.6551257868,
"max": 1916309.7075416835
},
"data": [
1774843.146788355,
643218.3646426039,
641681.1031071587,
644690.1512268113,
647183.5650609672,
644439.3303402043,
1916309.7075416835,
643748.3241006166,
757189.8273227927,
642583.6999539217,
640156.9862985397
]
}
],
"statisticsSeries": null
}
},
"mem_used": {
"node": {
"unit": "B",
"scope": "node",
"timestep": 30,
"series": [
{
"hostname": "taurusi6489",
"statistics": {
"min": 2779066368.0,
"avg": 9282117259.636364,
"max": 10202595328.0
},
"data": [
2779066368.0,
8518217728.0,
9852760064.0,
9979805696.0,
10039619584.0,
10087104512.0,
10136084480.0,
10202595328.0,
10154196992.0,
10177409024.0,
10176430080.0
]
},
{
"hostname": "taurusi6490",
"statistics": {
"min": 9993277440.0,
"avg": 10013080110.545454,
"max": 10039676928.0
},
"data": [
10001317888.0,
10013028352.0,
10006728704.0,
10039676928.0,
10035838976.0,
10033356800.0,
10006577152.0,
10005659648.0,
9993277440.0,
9993564160.0,
10014855168.0
]
}
],
"statisticsSeries": null
}
},
"cpu_power": {
"socket": {
"unit": "W",
"scope": "socket",
"timestep": 60,
"series": [
{
"hostname": "taurusi6489",
"id": 0,
"statistics": {
"min": 35.50647456742635,
"avg": 72.08313211552377,
"max": 83.33799371150049
},
"data": [
35.50647456742635,
75.65022009482759,
83.33799371150049,
83.00405043233219,
82.9169217715322
]
},
{
"hostname": "taurusi6490",
"id": 0,
"statistics": {
"min": 83.8466923147859,
"avg": 85.18572681122097,
"max": 85.83909286117324
},
"data": [
83.8466923147859,
85.58816979864088,
85.31266819129794,
85.83909286117324,
85.34201089020692
]
}
],
"statisticsSeries": null
}
}
}

View File

@ -4,6 +4,7 @@ import (
"bytes"
"context"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"os"
@ -18,6 +19,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/graph"
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/gorilla/mux"
@ -25,8 +27,36 @@ import (
)
func setup(t *testing.T) *api.RestApi {
const testconfig = `{
"addr": "0.0.0.0:8080",
"validate": false,
"archive": {
"kind": "file",
"path": "./var/job-archive"
},
"clusters": [
{
"name": "testcluster",
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
"filterRanges": {
"numNodes": { "from": 1, "to": 64 },
"duration": { "from": 0, "to": 86400 },
"startTime": { "from": "2022-01-01T00:00:00Z", "to": null }
}
},
{
"name": "taurus",
"metricDataRepository": {"kind": "test", "url": "bla:8081"},
"filterRanges": {
"numNodes": { "from": 1, "to": 4000 },
"duration": { "from": 0, "to": 604800 },
"startTime": { "from": "2010-01-01T00:00:00Z", "to": null }
}
}
]
}`
const testclusterJson = `{
"name": "testcluster",
"name": "testcluster",
"subClusters": [
{
"name": "sc0",
@ -47,12 +77,10 @@ func setup(t *testing.T) *api.RestApi {
"socket": [[0, 1, 2, 3, 4, 5, 6, 7]],
"memoryDomain": [[0, 1, 2, 3, 4, 5, 6, 7]],
"die": [[0, 1, 2, 3, 4, 5, 6, 7]],
"core": [[0], [1], [2], [3], [4], [5], [6], [7]],
"accelerators": []
"core": [[0], [1], [2], [3], [4], [5], [6], [7]]
}
}
],
"metricDataRepository": {"kind": "test"},
"metricConfig": [
{
"name": "load_one",
@ -64,12 +92,157 @@ func setup(t *testing.T) *api.RestApi {
"caution": 0,
"alert": 0
}
]
}`
const taurusclusterJson = `{
"name": "taurus",
"SubClusters": [
{
"name": "haswell",
"processorType": "Intel Haswell",
"socketsPerNode": 2,
"coresPerSocket": 12,
"threadsPerCore": 1,
"flopRateScalar": 32,
"flopRateSimd": 512,
"memoryBandwidth": 60,
"topology": {
"node": [ 0, 1 ],
"socket": [
[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ],
[ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 ]
],
"memoryDomain": [
[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ],
[ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 ]
],
"core": [ [ 0 ], [ 1 ], [ 2 ], [ 3 ], [ 4 ], [ 5 ], [ 6 ], [ 7 ], [ 8 ], [ 9 ], [ 10 ], [ 11 ], [ 12 ], [ 13 ], [ 14 ], [ 15 ], [ 16 ], [ 17 ], [ 18 ], [ 19 ], [ 20 ], [ 21 ], [ 22 ], [ 23 ] ]
}
}
],
"filterRanges": {
"numNodes": { "from": 1, "to": 1 },
"duration": { "from": 0, "to": 172800 },
"startTime": { "from": "2010-01-01T00:00:00Z", "to": null }
}
"metricConfig": [
{
"name": "cpu_used",
"scope": "core",
"unit": "",
"timestep": 30,
"subClusters": [
{
"name": "haswell",
"peak": 1,
"normal": 0.5,
"caution": 2e-07,
"alert": 1e-07
}
]
},
{
"name": "ipc",
"scope": "core",
"unit": "IPC",
"timestep": 60,
"subClusters": [
{
"name": "haswell",
"peak": 2,
"normal": 1,
"caution": 0.1,
"alert": 0.5
}
]
},
{
"name": "flops_any",
"scope": "core",
"unit": "F/s",
"timestep": 60,
"subClusters": [
{
"name": "haswell",
"peak": 40000000000,
"normal": 20000000000,
"caution": 30000000000,
"alert": 35000000000
}
]
},
{
"name": "mem_bw",
"scope": "socket",
"unit": "B/s",
"timestep": 60,
"subClusters": [
{
"name": "haswell",
"peak": 58800000000,
"normal": 28800000000,
"caution": 38800000000,
"alert": 48800000000
}
]
},
{
"name": "file_bw",
"scope": "node",
"unit": "B/s",
"timestep": 30,
"subClusters": [
{
"name": "haswell",
"peak": 20000000000,
"normal": 5000000000,
"caution": 9000000000,
"alert": 19000000000
}
]
},
{
"name": "net_bw",
"scope": "node",
"unit": "B/s",
"timestep": 30,
"subClusters": [
{
"name": "haswell",
"peak": 7000000000,
"normal": 5000000000,
"caution": 6000000000,
"alert": 6500000000
}
]
},
{
"name": "mem_used",
"scope": "node",
"unit": "B",
"timestep": 30,
"subClusters": [
{
"name": "haswell",
"peak": 32000000000,
"normal": 2000000000,
"caution": 31000000000,
"alert": 30000000000
}
]
},
{
"name": "cpu_power",
"scope": "socket",
"unit": "W",
"timestep": 60,
"subClusters": [
{
"name": "haswell",
"peak": 100,
"normal": 80,
"caution": 90,
"alert": 90
}
]
}
]
}`
tmpdir := t.TempDir()
@ -86,6 +259,13 @@ func setup(t *testing.T) *api.RestApi {
t.Fatal(err)
}
if err := os.Mkdir(filepath.Join(jobarchive, "taurus"), 0777); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(jobarchive, "taurus", "cluster.json"), []byte(taurusclusterJson), 0666); err != nil {
t.Fatal(err)
}
dbfilepath := filepath.Join(tmpdir, "test.db")
f, err := os.Create(dbfilepath)
if err != nil {
@ -93,22 +273,30 @@ func setup(t *testing.T) *api.RestApi {
}
f.Close()
cfgFilePath := filepath.Join(tmpdir, "config.json")
if err := os.WriteFile(cfgFilePath, []byte(testconfig), 0666); err != nil {
t.Fatal(err)
}
config.Init(cfgFilePath)
archiveCfg := fmt.Sprintf("{\"kind\": \"file\",\"path\": \"%s\"}", jobarchive)
repository.Connect("sqlite3", dbfilepath)
db := repository.GetConnection()
if err := archive.Init(json.RawMessage(archiveCfg)); err != nil {
t.Fatal(err)
}
if err := metricdata.Init(config.Keys.DisableArchive); err != nil {
t.Fatal(err)
}
if _, err := db.DB.Exec(repository.JobsDBSchema); err != nil {
t.Fatal(err)
}
if err := config.Init(db.DB, false, map[string]interface{}{}, jobarchive); err != nil {
t.Fatal(err)
}
if err := metricdata.Init(jobarchive, false); err != nil {
t.Fatal(err)
}
jobRepo := repository.GetRepository()
jobRepo := repository.GetJobRepository()
resolver := &graph.Resolver{DB: db.DB, Repo: jobRepo}
return &api.RestApi{
@ -122,9 +310,9 @@ func cleanup() {
}
/*
* This function starts a job, stops it, and then reads its data from the job-archive.
* Do not run sub-tests in parallel! Tests should not be run in parallel at all, because
* at least `setup` modifies global state. Log-Output is redirected to /dev/null on purpose.
* This function starts a job, stops it, and then reads its data from the job-archive.
* Do not run sub-tests in parallel! Tests should not be run in parallel at all, because
* at least `setup` modifies global state. Log-Output is redirected to /dev/null on purpose.
*/
func TestRestApi(t *testing.T) {
restapi := setup(t)
@ -155,28 +343,28 @@ func TestRestApi(t *testing.T) {
restapi.MountRoutes(r)
const startJobBody string = `{
"jobId": 123,
"user": "testuser",
"project": "testproj",
"cluster": "testcluster",
"partition": "default",
"jobId": 123,
"user": "testuser",
"project": "testproj",
"cluster": "testcluster",
"partition": "default",
"walltime": 3600,
"arrayJobId": 0,
"numNodes": 1,
"numHwthreads": 8,
"numAcc": 0,
"exclusive": 1,
"monitoringStatus": 1,
"smt": 1,
"tags": [{ "type": "testTagType", "name": "testTagName" }],
"resources": [
{
"hostname": "host123",
"hwthreads": [0, 1, 2, 3, 4, 5, 6, 7]
}
],
"metaData": { "jobScript": "blablabla..." },
"startTime": 123456789
"arrayJobId": 0,
"numNodes": 1,
"numHwthreads": 8,
"numAcc": 0,
"exclusive": 1,
"monitoringStatus": 1,
"smt": 1,
"tags": [{ "type": "testTagType", "name": "testTagName" }],
"resources": [
{
"hostname": "host123",
"hwthreads": [0, 1, 2, 3, 4, 5, 6, 7]
}
],
"metaData": { "jobScript": "blablabla..." },
"startTime": 123456789
}`
var dbid int64
@ -234,7 +422,7 @@ func TestRestApi(t *testing.T) {
}
const stopJobBody string = `{
"jobId": 123,
"jobId": 123,
"startTime": 123456789,
"cluster": "testcluster",
@ -309,30 +497,34 @@ func TestRestApi(t *testing.T) {
t.Run("FailedJob", func(t *testing.T) {
subtestLetJobFail(t, restapi, r)
})
t.Run("ImportJob", func(t *testing.T) {
testImportFlag(t)
})
}
func subtestLetJobFail(t *testing.T, restapi *api.RestApi, r *mux.Router) {
const startJobBody string = `{
"jobId": 12345,
"user": "testuser",
"project": "testproj",
"cluster": "testcluster",
"partition": "default",
"jobId": 12345,
"user": "testuser",
"project": "testproj",
"cluster": "testcluster",
"partition": "default",
"walltime": 3600,
"arrayJobId": 0,
"numNodes": 1,
"numAcc": 0,
"exclusive": 1,
"monitoringStatus": 1,
"smt": 1,
"tags": [],
"resources": [
{
"hostname": "host123"
}
],
"metaData": {},
"startTime": 12345678
"arrayJobId": 0,
"numNodes": 1,
"numAcc": 0,
"exclusive": 1,
"monitoringStatus": 1,
"smt": 1,
"tags": [],
"resources": [
{
"hostname": "host123"
}
],
"metaData": {},
"startTime": 12345678
}`
ok := t.Run("StartJob", func(t *testing.T) {
@ -350,7 +542,7 @@ func subtestLetJobFail(t *testing.T, restapi *api.RestApi, r *mux.Router) {
}
const stopJobBody string = `{
"jobId": 12345,
"jobId": 12345,
"cluster": "testcluster",
"jobState": "failed",
@ -382,3 +574,32 @@ func subtestLetJobFail(t *testing.T, restapi *api.RestApi, r *mux.Router) {
t.Fatal("subtest failed")
}
}
func testImportFlag(t *testing.T) {
if err := repository.HandleImportFlag("meta.json:data.json"); err != nil {
t.Fatal(err)
}
repo := repository.GetJobRepository()
jobId := int64(20639587)
cluster := "taurus"
startTime := int64(1635856524)
job, err := repo.Find(&jobId, &cluster, &startTime)
if err != nil {
t.Fatal(err)
}
if job.NumNodes != 2 {
t.Errorf("NumNode: Received %d, expected 2", job.NumNodes)
}
ar := archive.GetHandle()
data, err := ar.LoadJobData(job)
if err != nil {
t.Fatal(err)
}
if len(data) != 8 {
t.Errorf("Job data length: Got %d, want 8", len(data))
}
}

78
test/meta.json Normal file
View File

@ -0,0 +1,78 @@
{
"jobId": 20639587,
"user": "s3804552",
"project": "p_speichersysteme",
"cluster": "taurus",
"subCluster": "haswell",
"partition": "haswell64",
"arrayJobId": 0,
"numNodes": 2,
"numHwthreads": 4,
"numAcc": 0,
"exclusive": 0,
"startTime": 1635856524,
"jobState": "completed",
"duration": 310,
"walltime": 3600,
"smt": 0,
"resources": [
{
"hostname": "taurusi6489",
"hwthreads": [ 0, 1 ]
},
{
"hostname": "taurusi6490",
"hwthreads": [ 10, 11 ]
}
],
"statistics": {
"cpu_used": {
"min": 0.03694102397926118,
"avg": 0.48812580468611544,
"max": 1.0000000000000002,
"unit": "cpu used"
},
"ipc": {
"min": 0.30469640475234366,
"avg": 1.154312070173657,
"max": 1.797623522191001,
"unit": "IPC"
},
"flops_any": {
"min": 0.0,
"avg": 686.5190320308598,
"max": 4346.591400350933,
"unit": "F/s"
},
"mem_bw": {
"min": 653671812.1661415,
"avg": 1605031604.9852366,
"max": 2614718291.9554267,
"unit": "B/s"
},
"file_bw": {
"min": 0.0,
"avg": 620592.5419124186,
"max": 11559156.360352296,
"unit": "B/s"
},
"net_bw": {
"min": 126779.89655880642,
"avg": 763101.082138246,
"max": 1916309.7075416835,
"unit": "B/s"
},
"mem_used": {
"min": 2779066368.0,
"avg": 9647598685.09091,
"max": 10202595328.0,
"unit": "B"
},
"cpu_power": {
"min": 35.50647456742635,
"avg": 78.63442946337237,
"max": 85.83909286117324,
"unit": "W"
}
}
}

9
tools.go Normal file
View File

@ -0,0 +1,9 @@
//go:build tools
// +build tools
package tools
import (
_ "github.com/99designs/gqlgen"
_ "github.com/swaggo/swag/cmd/swag"
)

View File

@ -0,0 +1,9 @@
// Copyright (C) 2022 NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package main
func main() {
}

View File

@ -5,7 +5,8 @@ import resolve from '@rollup/plugin-node-resolve';
import { terser } from 'rollup-plugin-terser';
import css from 'rollup-plugin-css-only';
const production = !process.env.ROLLUP_WATCH;
// const production = !process.env.ROLLUP_WATCH;
const production = true
const plugins = [
svelte({

View File

@ -42,8 +42,8 @@
</DropdownToggle>
<DropdownMenu>
{#each clusters as cluster}
<DropdownItem href={item.href + cluster} active={window.location.pathname == item.href + cluster}>
{cluster}
<DropdownItem href={item.href + cluster.name} active={window.location.pathname == item.href + cluster.name}>
{cluster.name}
</DropdownItem>
{/each}
</DropdownMenu>

View File

@ -38,7 +38,7 @@
{cluster.name}
</ListGroupItem>
{/each}
</ListGroup>
</ListGroup>
{/if}
{#if $initialized && pendingCluster != null}
<br/>

View File

@ -1,6 +1,7 @@
<script>
import { createEventDispatcher, getContext } from 'svelte'
import { Button, Modal, ModalBody, ModalHeader, ModalFooter } from 'sveltestrap'
import Header from '../Header.svelte';
import DoubleRangeSlider from './DoubleRangeSlider.svelte'
const clusters = getContext('clusters'),
@ -22,20 +23,22 @@
const findMaxNumAccels = clusters => clusters.reduce((max, cluster) => Math.max(max,
cluster.subClusters.reduce((max, sc) => Math.max(max, sc.topology.accelerators?.length || 0), 0)), 0)
console.log(header)
let minNumNodes = 1, maxNumNodes = 0, minNumHWThreads = 1, maxNumHWThreads = 0, minNumAccelerators = 0, maxNumAccelerators = 0
$: {
if ($initialized) {
if (cluster != null) {
const { filterRanges, subClusters } = clusters.find(c => c.name == cluster)
const { subClusters } = clusters.find(c => c.name == cluster)
const { filterRanges } = header.clusters.find(c => c.name == cluster)
minNumNodes = filterRanges.numNodes.from
maxNumNodes = filterRanges.numNodes.to
maxNumAccelerators = findMaxNumAccels([{ subClusters }])
} else if (clusters.length > 0) {
const { filterRanges } = clusters[0]
const { filterRanges } = header.clusters[0]
minNumNodes = filterRanges.numNodes.from
maxNumNodes = filterRanges.numNodes.to
maxNumAccelerators = findMaxNumAccels(clusters)
for (let cluster of clusters) {
for (let cluster of header.clusters) {
const { filterRanges } = cluster
minNumNodes = Math.min(minNumNodes, filterRanges.numNodes.from)
maxNumNodes = Math.max(maxNumNodes, filterRanges.numNodes.to)

View File

@ -9,7 +9,7 @@ import { readable } from 'svelte/store'
*
* It does several things:
* - Initialize the GraphQL client
* - Creates a readable store 'initialization' which indicates when the values below can be used.
* - Creates a readable store 'initialization' which indicates when the values below can be used.
* - Adds 'tags' to the context (list of all tags)
* - Adds 'clusters' to the context (object with cluster names as keys)
* - Adds 'metrics' to the context, a function that takes a cluster and metric name and returns the MetricConfig (or undefined)
@ -43,11 +43,6 @@ export function init(extraInitQuery = '') {
aggregation,
subClusters { name, peak, normal, caution, alert }
}
filterRanges {
duration { from, to }
numNodes { from, to }
startTime { from, to }
}
partitions
subClusters {
name, processorType
@ -104,7 +99,7 @@ export function init(extraInitQuery = '') {
for (let tag of data.tags)
tags.push(tag)
for (let cluster of data.clusters)
clusters.push(cluster)
@ -241,7 +236,7 @@ export async function fetchMetrics(job, metrics, scopes) {
if (res.status != 200) {
return { error: { status: res.status, message: await res.text() } }
}
return await res.json()
} catch (e) {
return { error: e }

View File

@ -13,6 +13,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
/// Go's embed is only allowed to embed files in a subdirectory of the embedding package ([see here](https://github.com/golang/go/issues/46056)).
@ -63,7 +64,7 @@ type Page struct {
Error string // For generic use (e.g. the exact error message on /login)
Info string // For generic use (e.g. "Logout successfull" on /login)
User User // Information about the currently logged in user
Clusters []string // List of all clusters for use in the Header
Clusters []schema.ClusterConfig // List of all clusters for use in the Header
FilterPresets map[string]interface{} // For pages with the Filter component, this can be used to set initial filters.
Infos map[string]interface{} // For generic use (e.g. username for /monitoring/user/<id>, job id for /monitoring/job/<id>)
Config map[string]interface{} // UI settings for the currently logged in user (e.g. line width, ...)
@ -76,11 +77,12 @@ func RenderTemplate(rw http.ResponseWriter, r *http.Request, file string, page *
}
if page.Clusters == nil {
for _, c := range config.Clusters {
page.Clusters = append(page.Clusters, c.Name)
for _, c := range config.Keys.Clusters {
page.Clusters = append(page.Clusters, schema.ClusterConfig{Name: c.Name, FilterRanges: c.FilterRanges, MetricDataRepository: nil})
}
}
log.Infof("%v\n", page.Config)
if err := t.Execute(rw, page); err != nil {
log.Errorf("template error: %s", err.Error())
}