feat: Add cluster config endpoint to rest api

This commit is contained in:
Jan Eitzinger 2024-03-08 16:35:30 +01:00
parent 9fd839fad8
commit 99d55f05f8
4 changed files with 756 additions and 4 deletions

View File

@ -17,6 +17,63 @@
"host": "localhost:8080",
"basePath": "/api",
"paths": {
"/clusters/": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Get a list of all cluster configs. Specific cluster can be requested using query parameter.",
"produces": [
"application/json"
],
"tags": [
"Cluster query"
],
"summary": "Lists all cluster configs",
"parameters": [
{
"type": "string",
"description": "Job Cluster",
"name": "cluster",
"in": "query"
}
],
"responses": {
"200": {
"description": "Array of clusters",
"schema": {
"$ref": "#/definitions/api.GetClustersApiResponse"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/": {
"get": {
"security": [
@ -1284,6 +1341,18 @@
}
}
},
"api.GetClustersApiResponse": {
"type": "object",
"properties": {
"clusters": {
"description": "Array of clusters",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Cluster"
}
}
}
},
"api.GetJobApiResponse": {
"type": "object",
"properties": {
@ -1379,6 +1448,40 @@
}
}
},
"schema.Accelerator": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"model": {
"type": "string"
},
"type": {
"type": "string"
}
}
},
"schema.Cluster": {
"type": "object",
"properties": {
"metricConfig": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.MetricConfig"
}
},
"name": {
"type": "string"
},
"subClusters": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.SubCluster"
}
}
}
},
"schema.Job": {
"description": "Information of a HPC job.",
"type": "object",
@ -1777,6 +1880,44 @@
}
}
},
"schema.MetricConfig": {
"type": "object",
"properties": {
"aggregation": {
"type": "string"
},
"alert": {
"type": "number"
},
"caution": {
"type": "number"
},
"name": {
"type": "string"
},
"normal": {
"type": "number"
},
"peak": {
"type": "number"
},
"scope": {
"$ref": "#/definitions/schema.MetricScope"
},
"subClusters": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.SubClusterConfig"
}
},
"timestep": {
"type": "integer"
},
"unit": {
"$ref": "#/definitions/schema.Unit"
}
}
},
"schema.MetricScope": {
"type": "string",
"enum": [
@ -1812,6 +1953,17 @@
}
}
},
"schema.MetricValue": {
"type": "object",
"properties": {
"unit": {
"$ref": "#/definitions/schema.Unit"
},
"value": {
"type": "number"
}
}
},
"schema.Resource": {
"description": "A resource used by a job",
"type": "object",
@ -1892,6 +2044,64 @@
}
}
},
"schema.SubCluster": {
"type": "object",
"properties": {
"coresPerSocket": {
"type": "integer"
},
"flopRateScalar": {
"$ref": "#/definitions/schema.MetricValue"
},
"flopRateSimd": {
"$ref": "#/definitions/schema.MetricValue"
},
"memoryBandwidth": {
"$ref": "#/definitions/schema.MetricValue"
},
"name": {
"type": "string"
},
"nodes": {
"type": "string"
},
"processorType": {
"type": "string"
},
"socketsPerNode": {
"type": "integer"
},
"threadsPerCore": {
"type": "integer"
},
"topology": {
"$ref": "#/definitions/schema.Topology"
}
}
},
"schema.SubClusterConfig": {
"type": "object",
"properties": {
"alert": {
"type": "number"
},
"caution": {
"type": "number"
},
"name": {
"type": "string"
},
"normal": {
"type": "number"
},
"peak": {
"type": "number"
},
"remove": {
"type": "boolean"
}
}
},
"schema.Tag": {
"description": "Defines a tag using name and type.",
"type": "object",
@ -1912,6 +2122,59 @@
}
}
},
"schema.Topology": {
"type": "object",
"properties": {
"accelerators": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.Accelerator"
}
},
"core": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"die": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"memoryDomain": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"node": {
"type": "array",
"items": {
"type": "integer"
}
},
"socket": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
}
}
},
"schema.Unit": {
"type": "object",
"properties": {

View File

@ -68,6 +68,14 @@ definitions:
description: Statustext of Errorcode
type: string
type: object
api.GetClustersApiResponse:
properties:
clusters:
description: Array of clusters
items:
$ref: '#/definitions/schema.Cluster'
type: array
type: object
api.GetJobApiResponse:
properties:
data:
@ -133,6 +141,28 @@ definitions:
- jobState
- stopTime
type: object
schema.Accelerator:
properties:
id:
type: string
model:
type: string
type:
type: string
type: object
schema.Cluster:
properties:
metricConfig:
items:
$ref: '#/definitions/schema.MetricConfig'
type: array
name:
type: string
subClusters:
items:
$ref: '#/definitions/schema.SubCluster'
type: array
type: object
schema.Job:
description: Information of a HPC job.
properties:
@ -448,6 +478,31 @@ definitions:
unit:
$ref: '#/definitions/schema.Unit'
type: object
schema.MetricConfig:
properties:
aggregation:
type: string
alert:
type: number
caution:
type: number
name:
type: string
normal:
type: number
peak:
type: number
scope:
$ref: '#/definitions/schema.MetricScope'
subClusters:
items:
$ref: '#/definitions/schema.SubClusterConfig'
type: array
timestep:
type: integer
unit:
$ref: '#/definitions/schema.Unit'
type: object
schema.MetricScope:
enum:
- invalid_scope
@ -475,6 +530,13 @@ definitions:
min:
type: number
type: object
schema.MetricValue:
properties:
unit:
$ref: '#/definitions/schema.Unit'
value:
type: number
type: object
schema.Resource:
description: A resource used by a job
properties:
@ -529,6 +591,44 @@ definitions:
type: array
type: object
type: object
schema.SubCluster:
properties:
coresPerSocket:
type: integer
flopRateScalar:
$ref: '#/definitions/schema.MetricValue'
flopRateSimd:
$ref: '#/definitions/schema.MetricValue'
memoryBandwidth:
$ref: '#/definitions/schema.MetricValue'
name:
type: string
nodes:
type: string
processorType:
type: string
socketsPerNode:
type: integer
threadsPerCore:
type: integer
topology:
$ref: '#/definitions/schema.Topology'
type: object
schema.SubClusterConfig:
properties:
alert:
type: number
caution:
type: number
name:
type: string
normal:
type: number
peak:
type: number
remove:
type: boolean
type: object
schema.Tag:
description: Defines a tag using name and type.
properties:
@ -544,6 +644,41 @@ definitions:
example: Debug
type: string
type: object
schema.Topology:
properties:
accelerators:
items:
$ref: '#/definitions/schema.Accelerator'
type: array
core:
items:
items:
type: integer
type: array
type: array
die:
items:
items:
type: integer
type: array
type: array
memoryDomain:
items:
items:
type: integer
type: array
type: array
node:
items:
type: integer
type: array
socket:
items:
items:
type: integer
type: array
type: array
type: object
schema.Unit:
properties:
base:
@ -564,6 +699,43 @@ info:
title: ClusterCockpit REST API
version: 1.0.0
paths:
/clusters/:
get:
description: Get a list of all cluster configs. Specific cluster can be requested
using query parameter.
parameters:
- description: Job Cluster
in: query
name: cluster
type: string
produces:
- application/json
responses:
"200":
description: Array of clusters
schema:
$ref: '#/definitions/api.GetClustersApiResponse'
"400":
description: Bad Request
schema:
$ref: '#/definitions/api.ErrorResponse'
"401":
description: Unauthorized
schema:
$ref: '#/definitions/api.ErrorResponse'
"403":
description: Forbidden
schema:
$ref: '#/definitions/api.ErrorResponse'
"500":
description: Internal Server Error
schema:
$ref: '#/definitions/api.ErrorResponse'
security:
- ApiKeyAuth: []
summary: Lists all cluster configs
tags:
- Cluster query
/jobs/:
get:
description: |-

View File

@ -23,6 +23,63 @@ const docTemplate = `{
"host": "{{.Host}}",
"basePath": "{{.BasePath}}",
"paths": {
"/clusters/": {
"get": {
"security": [
{
"ApiKeyAuth": []
}
],
"description": "Get a list of all cluster configs. Specific cluster can be requested using query parameter.",
"produces": [
"application/json"
],
"tags": [
"Cluster query"
],
"summary": "Lists all cluster configs",
"parameters": [
{
"type": "string",
"description": "Job Cluster",
"name": "cluster",
"in": "query"
}
],
"responses": {
"200": {
"description": "Array of clusters",
"schema": {
"$ref": "#/definitions/api.GetClustersApiResponse"
}
},
"400": {
"description": "Bad Request",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"401": {
"description": "Unauthorized",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"403": {
"description": "Forbidden",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"$ref": "#/definitions/api.ErrorResponse"
}
}
}
}
},
"/jobs/": {
"get": {
"security": [
@ -1290,6 +1347,18 @@ const docTemplate = `{
}
}
},
"api.GetClustersApiResponse": {
"type": "object",
"properties": {
"clusters": {
"description": "Array of clusters",
"type": "array",
"items": {
"$ref": "#/definitions/schema.Cluster"
}
}
}
},
"api.GetJobApiResponse": {
"type": "object",
"properties": {
@ -1385,6 +1454,40 @@ const docTemplate = `{
}
}
},
"schema.Accelerator": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"model": {
"type": "string"
},
"type": {
"type": "string"
}
}
},
"schema.Cluster": {
"type": "object",
"properties": {
"metricConfig": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.MetricConfig"
}
},
"name": {
"type": "string"
},
"subClusters": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.SubCluster"
}
}
}
},
"schema.Job": {
"description": "Information of a HPC job.",
"type": "object",
@ -1783,6 +1886,44 @@ const docTemplate = `{
}
}
},
"schema.MetricConfig": {
"type": "object",
"properties": {
"aggregation": {
"type": "string"
},
"alert": {
"type": "number"
},
"caution": {
"type": "number"
},
"name": {
"type": "string"
},
"normal": {
"type": "number"
},
"peak": {
"type": "number"
},
"scope": {
"$ref": "#/definitions/schema.MetricScope"
},
"subClusters": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.SubClusterConfig"
}
},
"timestep": {
"type": "integer"
},
"unit": {
"$ref": "#/definitions/schema.Unit"
}
}
},
"schema.MetricScope": {
"type": "string",
"enum": [
@ -1818,6 +1959,17 @@ const docTemplate = `{
}
}
},
"schema.MetricValue": {
"type": "object",
"properties": {
"unit": {
"$ref": "#/definitions/schema.Unit"
},
"value": {
"type": "number"
}
}
},
"schema.Resource": {
"description": "A resource used by a job",
"type": "object",
@ -1898,6 +2050,64 @@ const docTemplate = `{
}
}
},
"schema.SubCluster": {
"type": "object",
"properties": {
"coresPerSocket": {
"type": "integer"
},
"flopRateScalar": {
"$ref": "#/definitions/schema.MetricValue"
},
"flopRateSimd": {
"$ref": "#/definitions/schema.MetricValue"
},
"memoryBandwidth": {
"$ref": "#/definitions/schema.MetricValue"
},
"name": {
"type": "string"
},
"nodes": {
"type": "string"
},
"processorType": {
"type": "string"
},
"socketsPerNode": {
"type": "integer"
},
"threadsPerCore": {
"type": "integer"
},
"topology": {
"$ref": "#/definitions/schema.Topology"
}
}
},
"schema.SubClusterConfig": {
"type": "object",
"properties": {
"alert": {
"type": "number"
},
"caution": {
"type": "number"
},
"name": {
"type": "string"
},
"normal": {
"type": "number"
},
"peak": {
"type": "number"
},
"remove": {
"type": "boolean"
}
}
},
"schema.Tag": {
"description": "Defines a tag using name and type.",
"type": "object",
@ -1918,6 +2128,59 @@ const docTemplate = `{
}
}
},
"schema.Topology": {
"type": "object",
"properties": {
"accelerators": {
"type": "array",
"items": {
"$ref": "#/definitions/schema.Accelerator"
}
},
"core": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"die": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"memoryDomain": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"node": {
"type": "array",
"items": {
"type": "integer"
}
},
"socket": {
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
}
}
},
"schema.Unit": {
"type": "object",
"properties": {

View File

@ -78,6 +78,8 @@ func (api *RestApi) MountRoutes(r *mux.Router) {
r.HandleFunc("/jobs/delete_job/{id}", api.deleteJobById).Methods(http.MethodDelete)
r.HandleFunc("/jobs/delete_job_before/{ts}", api.deleteJobBefore).Methods(http.MethodDelete)
r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet)
if api.MachineStateDir != "" {
r.HandleFunc("/machine_state/{cluster}/{host}", api.getMachineState).Methods(http.MethodGet)
r.HandleFunc("/machine_state/{cluster}/{host}", api.putMachineState).Methods(http.MethodPut, http.MethodPost)
@ -134,6 +136,11 @@ type GetJobsApiResponse struct {
Page int `json:"page"` // Page id returned
}
// GetClustersApiResponse model
type GetClustersApiResponse struct {
Clusters []*schema.Cluster `json:"clusters"` // Array of clusters
}
// ErrorResponse model
type ErrorResponse struct {
// Statustext of Errorcode
@ -236,6 +243,55 @@ func securedCheck(r *http.Request) error {
return nil
}
// getClusters godoc
// @summary Lists all cluster configs
// @tags Cluster query
// @description Get a list of all cluster configs. Specific cluster can be requested using query parameter.
// @produce json
// @param cluster query string false "Job Cluster"
// @success 200 {object} api.GetClustersApiResponse "Array of clusters"
// @failure 400 {object} api.ErrorResponse "Bad Request"
// @failure 401 {object} api.ErrorResponse "Unauthorized"
// @failure 403 {object} api.ErrorResponse "Forbidden"
// @failure 500 {object} api.ErrorResponse "Internal Server Error"
// @security ApiKeyAuth
// @router /clusters/ [get]
func (api *RestApi) getClusters(rw http.ResponseWriter, r *http.Request) {
if user := repository.GetUserFromContext(r.Context()); user != nil &&
!user.HasRole(schema.RoleApi) {
handleError(fmt.Errorf("missing role: %v", schema.GetRoleString(schema.RoleApi)), http.StatusForbidden, rw)
return
}
rw.Header().Add("Content-Type", "application/json")
bw := bufio.NewWriter(rw)
defer bw.Flush()
var clusters []*schema.Cluster
if r.URL.Query().Has("cluster") {
name := r.URL.Query().Get("cluster")
cluster := archive.GetCluster(name)
if cluster == nil {
handleError(fmt.Errorf("unknown cluster: %s", name), http.StatusBadRequest, rw)
return
}
clusters = append(clusters, cluster)
} else {
clusters = archive.Clusters
}
payload := GetClustersApiResponse{
Clusters: clusters,
}
if err := json.NewEncoder(bw).Encode(payload); err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
}
}
// getJobs godoc
// @summary Lists all jobs
// @tags Job query
@ -354,10 +410,8 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
if res.MonitoringStatus == schema.MonitoringStatusArchivingSuccessful {
res.Statistics, err = archive.GetStatistics(job)
if err != nil {
if err != nil {
handleError(err, http.StatusInternalServerError, rw)
return
}
handleError(err, http.StatusInternalServerError, rw)
return
}
}