mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-11-10 08:57:25 +01:00
feat: Add jobs endpoint to retrieve job meta and all job metric data
203 make full jobarchive available per simple api call
This commit is contained in:
commit
5004e44934
339
api/swagger.json
339
api/swagger.json
@ -17,6 +17,63 @@
|
||||
"host": "localhost:8080",
|
||||
"basePath": "/api",
|
||||
"paths": {
|
||||
"/clusters/": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Get a list of all cluster configs. Specific cluster can be requested using query parameter.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"Cluster query"
|
||||
],
|
||||
"summary": "Lists all cluster configs",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Job Cluster",
|
||||
"name": "cluster",
|
||||
"in": "query"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Array of clusters",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.GetClustersApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/": {
|
||||
"get": {
|
||||
"security": [
|
||||
@ -694,6 +751,80 @@
|
||||
}
|
||||
},
|
||||
"/jobs/{id}": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"Job query"
|
||||
],
|
||||
"summary": "Get job meta and optional all metric data",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "integer",
|
||||
"description": "Database ID of Job",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"type": "boolean",
|
||||
"description": "Include all available metrics",
|
||||
"name": "all-metrics",
|
||||
"in": "query"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Job resource",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.GetJobApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Resource not found",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
@ -710,7 +841,7 @@
|
||||
"tags": [
|
||||
"Job query"
|
||||
],
|
||||
"summary": "Get complete job meta and metric data",
|
||||
"summary": "Get job meta and configurable metric data",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "integer",
|
||||
@ -1210,6 +1341,18 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.GetClustersApiResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"clusters": {
|
||||
"description": "Array of clusters",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Cluster"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.GetJobApiResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@ -1305,6 +1448,40 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Accelerator": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Cluster": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"metricConfig": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.MetricConfig"
|
||||
}
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"subClusters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.SubCluster"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Job": {
|
||||
"description": "Information of a HPC job.",
|
||||
"type": "object",
|
||||
@ -1703,6 +1880,44 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.MetricConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"aggregation": {
|
||||
"type": "string"
|
||||
},
|
||||
"alert": {
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"type": "number"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"normal": {
|
||||
"type": "number"
|
||||
},
|
||||
"peak": {
|
||||
"type": "number"
|
||||
},
|
||||
"scope": {
|
||||
"$ref": "#/definitions/schema.MetricScope"
|
||||
},
|
||||
"subClusters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.SubClusterConfig"
|
||||
}
|
||||
},
|
||||
"timestep": {
|
||||
"type": "integer"
|
||||
},
|
||||
"unit": {
|
||||
"$ref": "#/definitions/schema.Unit"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.MetricScope": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
@ -1738,6 +1953,17 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.MetricValue": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"$ref": "#/definitions/schema.Unit"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Resource": {
|
||||
"description": "A resource used by a job",
|
||||
"type": "object",
|
||||
@ -1818,6 +2044,64 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.SubCluster": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"coresPerSocket": {
|
||||
"type": "integer"
|
||||
},
|
||||
"flopRateScalar": {
|
||||
"$ref": "#/definitions/schema.MetricValue"
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"$ref": "#/definitions/schema.MetricValue"
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"$ref": "#/definitions/schema.MetricValue"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"nodes": {
|
||||
"type": "string"
|
||||
},
|
||||
"processorType": {
|
||||
"type": "string"
|
||||
},
|
||||
"socketsPerNode": {
|
||||
"type": "integer"
|
||||
},
|
||||
"threadsPerCore": {
|
||||
"type": "integer"
|
||||
},
|
||||
"topology": {
|
||||
"$ref": "#/definitions/schema.Topology"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.SubClusterConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"alert": {
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"type": "number"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"normal": {
|
||||
"type": "number"
|
||||
},
|
||||
"peak": {
|
||||
"type": "number"
|
||||
},
|
||||
"remove": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Tag": {
|
||||
"description": "Defines a tag using name and type.",
|
||||
"type": "object",
|
||||
@ -1838,6 +2122,59 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Topology": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"accelerators": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Accelerator"
|
||||
}
|
||||
},
|
||||
"core": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"die": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryDomain": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"node": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"socket": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Unit": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
225
api/swagger.yaml
225
api/swagger.yaml
@ -68,6 +68,14 @@ definitions:
|
||||
description: Statustext of Errorcode
|
||||
type: string
|
||||
type: object
|
||||
api.GetClustersApiResponse:
|
||||
properties:
|
||||
clusters:
|
||||
description: Array of clusters
|
||||
items:
|
||||
$ref: '#/definitions/schema.Cluster'
|
||||
type: array
|
||||
type: object
|
||||
api.GetJobApiResponse:
|
||||
properties:
|
||||
data:
|
||||
@ -133,6 +141,28 @@ definitions:
|
||||
- jobState
|
||||
- stopTime
|
||||
type: object
|
||||
schema.Accelerator:
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
type: object
|
||||
schema.Cluster:
|
||||
properties:
|
||||
metricConfig:
|
||||
items:
|
||||
$ref: '#/definitions/schema.MetricConfig'
|
||||
type: array
|
||||
name:
|
||||
type: string
|
||||
subClusters:
|
||||
items:
|
||||
$ref: '#/definitions/schema.SubCluster'
|
||||
type: array
|
||||
type: object
|
||||
schema.Job:
|
||||
description: Information of a HPC job.
|
||||
properties:
|
||||
@ -448,6 +478,31 @@ definitions:
|
||||
unit:
|
||||
$ref: '#/definitions/schema.Unit'
|
||||
type: object
|
||||
schema.MetricConfig:
|
||||
properties:
|
||||
aggregation:
|
||||
type: string
|
||||
alert:
|
||||
type: number
|
||||
caution:
|
||||
type: number
|
||||
name:
|
||||
type: string
|
||||
normal:
|
||||
type: number
|
||||
peak:
|
||||
type: number
|
||||
scope:
|
||||
$ref: '#/definitions/schema.MetricScope'
|
||||
subClusters:
|
||||
items:
|
||||
$ref: '#/definitions/schema.SubClusterConfig'
|
||||
type: array
|
||||
timestep:
|
||||
type: integer
|
||||
unit:
|
||||
$ref: '#/definitions/schema.Unit'
|
||||
type: object
|
||||
schema.MetricScope:
|
||||
enum:
|
||||
- invalid_scope
|
||||
@ -475,6 +530,13 @@ definitions:
|
||||
min:
|
||||
type: number
|
||||
type: object
|
||||
schema.MetricValue:
|
||||
properties:
|
||||
unit:
|
||||
$ref: '#/definitions/schema.Unit'
|
||||
value:
|
||||
type: number
|
||||
type: object
|
||||
schema.Resource:
|
||||
description: A resource used by a job
|
||||
properties:
|
||||
@ -529,6 +591,44 @@ definitions:
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
schema.SubCluster:
|
||||
properties:
|
||||
coresPerSocket:
|
||||
type: integer
|
||||
flopRateScalar:
|
||||
$ref: '#/definitions/schema.MetricValue'
|
||||
flopRateSimd:
|
||||
$ref: '#/definitions/schema.MetricValue'
|
||||
memoryBandwidth:
|
||||
$ref: '#/definitions/schema.MetricValue'
|
||||
name:
|
||||
type: string
|
||||
nodes:
|
||||
type: string
|
||||
processorType:
|
||||
type: string
|
||||
socketsPerNode:
|
||||
type: integer
|
||||
threadsPerCore:
|
||||
type: integer
|
||||
topology:
|
||||
$ref: '#/definitions/schema.Topology'
|
||||
type: object
|
||||
schema.SubClusterConfig:
|
||||
properties:
|
||||
alert:
|
||||
type: number
|
||||
caution:
|
||||
type: number
|
||||
name:
|
||||
type: string
|
||||
normal:
|
||||
type: number
|
||||
peak:
|
||||
type: number
|
||||
remove:
|
||||
type: boolean
|
||||
type: object
|
||||
schema.Tag:
|
||||
description: Defines a tag using name and type.
|
||||
properties:
|
||||
@ -544,6 +644,41 @@ definitions:
|
||||
example: Debug
|
||||
type: string
|
||||
type: object
|
||||
schema.Topology:
|
||||
properties:
|
||||
accelerators:
|
||||
items:
|
||||
$ref: '#/definitions/schema.Accelerator'
|
||||
type: array
|
||||
core:
|
||||
items:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
type: array
|
||||
die:
|
||||
items:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
type: array
|
||||
memoryDomain:
|
||||
items:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
type: array
|
||||
node:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
socket:
|
||||
items:
|
||||
items:
|
||||
type: integer
|
||||
type: array
|
||||
type: array
|
||||
type: object
|
||||
schema.Unit:
|
||||
properties:
|
||||
base:
|
||||
@ -564,6 +699,43 @@ info:
|
||||
title: ClusterCockpit REST API
|
||||
version: 1.0.0
|
||||
paths:
|
||||
/clusters/:
|
||||
get:
|
||||
description: Get a list of all cluster configs. Specific cluster can be requested
|
||||
using query parameter.
|
||||
parameters:
|
||||
- description: Job Cluster
|
||||
in: query
|
||||
name: cluster
|
||||
type: string
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Array of clusters
|
||||
schema:
|
||||
$ref: '#/definitions/api.GetClustersApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Lists all cluster configs
|
||||
tags:
|
||||
- Cluster query
|
||||
/jobs/:
|
||||
get:
|
||||
description: |-
|
||||
@ -630,6 +802,57 @@ paths:
|
||||
tags:
|
||||
- Job query
|
||||
/jobs/{id}:
|
||||
get:
|
||||
description: |-
|
||||
Job to get is specified by database ID
|
||||
Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||
parameters:
|
||||
- description: Database ID of Job
|
||||
in: path
|
||||
name: id
|
||||
required: true
|
||||
type: integer
|
||||
- description: Include all available metrics
|
||||
in: query
|
||||
name: all-metrics
|
||||
type: boolean
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Job resource
|
||||
schema:
|
||||
$ref: '#/definitions/api.GetJobApiResponse'
|
||||
"400":
|
||||
description: Bad Request
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"401":
|
||||
description: Unauthorized
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"403":
|
||||
description: Forbidden
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"404":
|
||||
description: Resource not found
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"422":
|
||||
description: 'Unprocessable Entity: finding job failed: sql: no rows in
|
||||
result set'
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Get job meta and optional all metric data
|
||||
tags:
|
||||
- Job query
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
@ -684,7 +907,7 @@ paths:
|
||||
$ref: '#/definitions/api.ErrorResponse'
|
||||
security:
|
||||
- ApiKeyAuth: []
|
||||
summary: Get complete job meta and metric data
|
||||
summary: Get job meta and configurable metric data
|
||||
tags:
|
||||
- Job query
|
||||
/jobs/delete_job/:
|
||||
|
@ -23,6 +23,63 @@ const docTemplate = `{
|
||||
"host": "{{.Host}}",
|
||||
"basePath": "{{.BasePath}}",
|
||||
"paths": {
|
||||
"/clusters/": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Get a list of all cluster configs. Specific cluster can be requested using query parameter.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"Cluster query"
|
||||
],
|
||||
"summary": "Lists all cluster configs",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Job Cluster",
|
||||
"name": "cluster",
|
||||
"in": "query"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Array of clusters",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.GetClustersApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/jobs/": {
|
||||
"get": {
|
||||
"security": [
|
||||
@ -700,6 +757,80 @@ const docTemplate = `{
|
||||
}
|
||||
},
|
||||
"/jobs/{id}": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"ApiKeyAuth": []
|
||||
}
|
||||
],
|
||||
"description": "Job to get is specified by database ID\nReturns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.",
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"Job query"
|
||||
],
|
||||
"summary": "Get job meta and optional all metric data",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "integer",
|
||||
"description": "Database ID of Job",
|
||||
"name": "id",
|
||||
"in": "path",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"type": "boolean",
|
||||
"description": "Include all available metrics",
|
||||
"name": "all-metrics",
|
||||
"in": "query"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Job resource",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.GetJobApiResponse"
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"401": {
|
||||
"description": "Unauthorized",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"404": {
|
||||
"description": "Resource not found",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"422": {
|
||||
"description": "Unprocessable Entity: finding job failed: sql: no rows in result set",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error",
|
||||
"schema": {
|
||||
"$ref": "#/definitions/api.ErrorResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
"security": [
|
||||
{
|
||||
@ -716,7 +847,7 @@ const docTemplate = `{
|
||||
"tags": [
|
||||
"Job query"
|
||||
],
|
||||
"summary": "Get complete job meta and metric data",
|
||||
"summary": "Get job meta and configurable metric data",
|
||||
"parameters": [
|
||||
{
|
||||
"type": "integer",
|
||||
@ -1216,6 +1347,18 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.GetClustersApiResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"clusters": {
|
||||
"description": "Array of clusters",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Cluster"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"api.GetJobApiResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@ -1311,6 +1454,40 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Accelerator": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Cluster": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"metricConfig": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.MetricConfig"
|
||||
}
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"subClusters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.SubCluster"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Job": {
|
||||
"description": "Information of a HPC job.",
|
||||
"type": "object",
|
||||
@ -1709,6 +1886,44 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.MetricConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"aggregation": {
|
||||
"type": "string"
|
||||
},
|
||||
"alert": {
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"type": "number"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"normal": {
|
||||
"type": "number"
|
||||
},
|
||||
"peak": {
|
||||
"type": "number"
|
||||
},
|
||||
"scope": {
|
||||
"$ref": "#/definitions/schema.MetricScope"
|
||||
},
|
||||
"subClusters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.SubClusterConfig"
|
||||
}
|
||||
},
|
||||
"timestep": {
|
||||
"type": "integer"
|
||||
},
|
||||
"unit": {
|
||||
"$ref": "#/definitions/schema.Unit"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.MetricScope": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
@ -1744,6 +1959,17 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.MetricValue": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"$ref": "#/definitions/schema.Unit"
|
||||
},
|
||||
"value": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Resource": {
|
||||
"description": "A resource used by a job",
|
||||
"type": "object",
|
||||
@ -1824,6 +2050,64 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.SubCluster": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"coresPerSocket": {
|
||||
"type": "integer"
|
||||
},
|
||||
"flopRateScalar": {
|
||||
"$ref": "#/definitions/schema.MetricValue"
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"$ref": "#/definitions/schema.MetricValue"
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"$ref": "#/definitions/schema.MetricValue"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"nodes": {
|
||||
"type": "string"
|
||||
},
|
||||
"processorType": {
|
||||
"type": "string"
|
||||
},
|
||||
"socketsPerNode": {
|
||||
"type": "integer"
|
||||
},
|
||||
"threadsPerCore": {
|
||||
"type": "integer"
|
||||
},
|
||||
"topology": {
|
||||
"$ref": "#/definitions/schema.Topology"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.SubClusterConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"alert": {
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"type": "number"
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"normal": {
|
||||
"type": "number"
|
||||
},
|
||||
"peak": {
|
||||
"type": "number"
|
||||
},
|
||||
"remove": {
|
||||
"type": "boolean"
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Tag": {
|
||||
"description": "Defines a tag using name and type.",
|
||||
"type": "object",
|
||||
@ -1844,6 +2128,59 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Topology": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"accelerators": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/schema.Accelerator"
|
||||
}
|
||||
},
|
||||
"core": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"die": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryDomain": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"node": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"socket": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"schema.Unit": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -70,6 +70,7 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
|
||||
|
||||
r.HandleFunc("/jobs/", api.getJobs).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/{id}", api.getJobById).Methods(http.MethodPost)
|
||||
r.HandleFunc("/jobs/{id}", api.getCompleteJobById).Methods(http.MethodGet)
|
||||
r.HandleFunc("/jobs/tag_job/{id}", api.tagJob).Methods(http.MethodPost, http.MethodPatch)
|
||||
r.HandleFunc("/jobs/edit_meta/{id}", api.editMeta).Methods(http.MethodPost, http.MethodPatch)
|
||||
r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
|
||||
@ -77,6 +78,8 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
|
||||
r.HandleFunc("/jobs/delete_job/{id}", api.deleteJobById).Methods(http.MethodDelete)
|
||||
r.HandleFunc("/jobs/delete_job_before/{ts}", api.deleteJobBefore).Methods(http.MethodDelete)
|
||||
|
||||
r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet)
|
||||
|
||||
if api.MachineStateDir != "" {
|
||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet)
|
||||
r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost)
|
||||
@ -133,6 +136,11 @@ type GetJobsApiResponse struct {
|
||||
Page int `json:"page"` // Page id returned
|
||||
}
|
||||
|
||||
// GetClustersApiResponse model
|
||||
type GetClustersApiResponse struct {
|
||||
Clusters []*schema.Cluster `json:"clusters"` // Array of clusters
|
||||
}
|
||||
|
||||
// ErrorResponse model
|
||||
type ErrorResponse struct {
|
||||
// Statustext of Errorcode
|
||||
@ -162,6 +170,11 @@ type GetJobApiResponse struct {
|
||||
Data []*JobMetricWithName
|
||||
}
|
||||
|
||||
type GetCompleteJobApiResponse struct {
|
||||
Meta *schema.Job
|
||||
Data schema.JobData
|
||||
}
|
||||
|
||||
type JobMetricWithName struct {
|
||||
Name string `json:"name"`
|
||||
Scope schema.MetricScope `json:"scope"`
|
||||
@ -230,6 +243,55 @@ func securedCheck(r *http.Request) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// getClusters godoc
|
||||
// @summary Lists all cluster configs
|
||||
// @tags Cluster query
|
||||
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
|
||||
// @produce json
|
||||
// @param cluster query string false "Job Cluster"
|
||||
// @success 200 {object} api.GetClustersApiResponse "Array of clusters"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /clusters/ [get]
|
||||
func (api *RestApi) getClusters(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
bw := bufio.NewWriter(rw)
|
||||
defer bw.Flush()
|
||||
|
||||
var clusters []*schema.Cluster
|
||||
|
||||
if r.URL.Query().Has("cluster") {
|
||||
name := r.URL.Query().Get("cluster")
|
||||
cluster := archive.GetCluster(name)
|
||||
if cluster == nil {
|
||||
handleError(fmt.Errorf("unknown cluster: %s", name), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
clusters = append(clusters, cluster)
|
||||
} else {
|
||||
clusters = archive.Clusters
|
||||
}
|
||||
|
||||
payload := GetClustersApiResponse{
|
||||
Clusters: clusters,
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// getJobs godoc
|
||||
// @summary Lists all jobs
|
||||
// @tags Job query
|
||||
@ -348,10 +410,8 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
if res.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
|
||||
res.Statistics, err = archive.GetStatistics(job)
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@ -376,14 +436,95 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// getJobById godoc
|
||||
// @summary Get complete job meta and metric data
|
||||
// @summary Get job meta and optional all metric data
|
||||
// @tags Job query
|
||||
// @description Job to get is specified by database ID
|
||||
// @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||
// @produce json
|
||||
// @param id path int true "Database ID of Job"
|
||||
// @param all-metrics query bool false "Include all available metrics"
|
||||
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
// @failure 403 {object} api.ErrorResponse "Forbidden"
|
||||
// @failure 404 {object} api.ErrorResponse "Resource not found"
|
||||
// @failure 422 {object} api.ErrorResponse "Unprocessable Entity: finding job failed: sql: no rows in result set"
|
||||
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
|
||||
// @security ApiKeyAuth
|
||||
// @router /jobs/{id} [get]
|
||||
func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request) {
|
||||
if user := repository.GetUserFromContext(r.Context()); user != nil &&
|
||||
!user.HasRole(schema.RoleApi) {
|
||||
|
||||
handleError(fmt.Errorf("missing role: %v",
|
||||
schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
|
||||
return
|
||||
}
|
||||
|
||||
// Fetch job from db
|
||||
id, ok := mux.Vars(r)["id"]
|
||||
var job *schema.Job
|
||||
var err error
|
||||
if ok {
|
||||
id, e := strconv.ParseInt(id, 10, 64)
|
||||
if e != nil {
|
||||
handleError(fmt.Errorf("integer expected in path for id: %w", e), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
job, err = api.JobRepository.FindById(id)
|
||||
} else {
|
||||
handleError(errors.New("the parameter 'id' is required"), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
handleError(fmt.Errorf("finding job failed: %w", err), http.StatusUnprocessableEntity, rw)
|
||||
return
|
||||
}
|
||||
|
||||
var scopes []schema.MetricScope
|
||||
|
||||
if job.NumNodes == 1 {
|
||||
scopes = []schema.MetricScope{"core"}
|
||||
} else {
|
||||
scopes = []schema.MetricScope{"node"}
|
||||
}
|
||||
|
||||
var data schema.JobData
|
||||
|
||||
if r.URL.Query().Has("all-metrics") {
|
||||
data, err = metricdata.LoadData(job, nil, scopes, r.Context())
|
||||
if err != nil {
|
||||
log.Warn("Error while loading job data")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("/api/job/%s: get job %d", id, job.JobID)
|
||||
rw.Header().Add("Content-Type", "application/json")
|
||||
bw := bufio.NewWriter(rw)
|
||||
defer bw.Flush()
|
||||
|
||||
payload := GetCompleteJobApiResponse{
|
||||
Meta: job,
|
||||
Data: data,
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(bw).Encode(payload); err != nil {
|
||||
handleError(err, http.StatusInternalServerError, rw)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// getJobById godoc
|
||||
// @summary Get job meta and configurable metric data
|
||||
// @tags Job query
|
||||
// @description Job to get is specified by database ID
|
||||
// @description Returns full job resource information according to 'JobMeta' scheme and all metrics according to 'JobData'.
|
||||
// @accept json
|
||||
// @produce json
|
||||
// @param id path int true "Database ID of Job"
|
||||
// @param request body api.GetJobApiRequest true "Array of metric names"
|
||||
// @param id path int true "Database ID of Job"
|
||||
// @param request body api.GetJobApiRequest true "Array of metric names"
|
||||
// @success 200 {object} api.GetJobApiResponse "Job resource"
|
||||
// @failure 400 {object} api.ErrorResponse "Bad Request"
|
||||
// @failure 401 {object} api.ErrorResponse "Unauthorized"
|
||||
|
Loading…
Reference in New Issue
Block a user