mirror of
https://github.com/ClusterCockpit/cc-specifications.git
synced 2024-12-26 13:29:05 +01:00
Restructure repo
This commit is contained in:
parent
403f74ddee
commit
d762e3e52b
4
datastructures/README.md
Normal file
4
datastructures/README.md
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
## Generic database specification
|
||||||
|
|
||||||
|
This collection of datastructures descriptions is intended to be used
|
||||||
|
as datastructures in application, payloads in apis, and file formats.
|
3
interfaces/graphql/README.md
Normal file
3
interfaces/graphql/README.md
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
## GraphQL Schema
|
||||||
|
|
||||||
|
This schema is intended for communication between web-frontend and web-backend.
|
261
interfaces/graphql/schema.graphqls
Normal file
261
interfaces/graphql/schema.graphqls
Normal file
@ -0,0 +1,261 @@
|
|||||||
|
scalar Time
|
||||||
|
scalar Any
|
||||||
|
|
||||||
|
scalar NullableFloat
|
||||||
|
scalar MetricScope
|
||||||
|
scalar JobState
|
||||||
|
|
||||||
|
type Job {
|
||||||
|
id: ID!
|
||||||
|
jobId: Int!
|
||||||
|
user: String!
|
||||||
|
project: String!
|
||||||
|
cluster: String!
|
||||||
|
subCluster: String!
|
||||||
|
startTime: Time!
|
||||||
|
duration: Int!
|
||||||
|
walltime: Int!
|
||||||
|
numNodes: Int!
|
||||||
|
numHWThreads: Int!
|
||||||
|
numAcc: Int!
|
||||||
|
SMT: Int!
|
||||||
|
exclusive: Int!
|
||||||
|
partition: String!
|
||||||
|
arrayJobId: Int!
|
||||||
|
monitoringStatus: Int!
|
||||||
|
state: JobState!
|
||||||
|
tags: [Tag!]!
|
||||||
|
resources: [Resource!]!
|
||||||
|
|
||||||
|
metaData: Any
|
||||||
|
userData: User
|
||||||
|
}
|
||||||
|
|
||||||
|
type Cluster {
|
||||||
|
name: String!
|
||||||
|
partitions: [String!]! # Slurm partitions
|
||||||
|
metricConfig: [MetricConfig!]!
|
||||||
|
filterRanges: FilterRanges!
|
||||||
|
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||||
|
}
|
||||||
|
|
||||||
|
type SubCluster {
|
||||||
|
name: String!
|
||||||
|
nodes: String!
|
||||||
|
processorType: String!
|
||||||
|
socketsPerNode: Int!
|
||||||
|
coresPerSocket: Int!
|
||||||
|
threadsPerCore: Int!
|
||||||
|
flopRateScalar: Int!
|
||||||
|
flopRateSimd: Int!
|
||||||
|
memoryBandwidth: Int!
|
||||||
|
topology: Topology!
|
||||||
|
}
|
||||||
|
|
||||||
|
type Topology {
|
||||||
|
node: [Int!]
|
||||||
|
socket: [[Int!]!]
|
||||||
|
memoryDomain: [[Int!]!]
|
||||||
|
die: [[Int!]!]
|
||||||
|
core: [[Int!]!]
|
||||||
|
accelerators: [Accelerator!]
|
||||||
|
}
|
||||||
|
|
||||||
|
type Accelerator {
|
||||||
|
id: String!
|
||||||
|
type: String!
|
||||||
|
model: String!
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricConfig {
|
||||||
|
name: String!
|
||||||
|
unit: String!
|
||||||
|
scope: MetricScope!
|
||||||
|
timestep: Int!
|
||||||
|
peak: Float!
|
||||||
|
normal: Float!
|
||||||
|
caution: Float!
|
||||||
|
alert: Float!
|
||||||
|
}
|
||||||
|
|
||||||
|
type Tag {
|
||||||
|
id: ID!
|
||||||
|
type: String!
|
||||||
|
name: String!
|
||||||
|
}
|
||||||
|
|
||||||
|
type Resource {
|
||||||
|
hostname: String!
|
||||||
|
hwthreads: [Int!]
|
||||||
|
accelerators: [Int!]
|
||||||
|
configuration: String
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobMetricWithName {
|
||||||
|
name: String!
|
||||||
|
metric: JobMetric!
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobMetric {
|
||||||
|
unit: String!
|
||||||
|
scope: MetricScope!
|
||||||
|
timestep: Int!
|
||||||
|
series: [Series!]
|
||||||
|
statisticsSeries: StatsSeries
|
||||||
|
}
|
||||||
|
|
||||||
|
type Series {
|
||||||
|
hostname: String!
|
||||||
|
id: Int
|
||||||
|
statistics: MetricStatistics
|
||||||
|
data: [NullableFloat!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricStatistics {
|
||||||
|
avg: Float!
|
||||||
|
min: Float!
|
||||||
|
max: Float!
|
||||||
|
}
|
||||||
|
|
||||||
|
type StatsSeries {
|
||||||
|
mean: [NullableFloat!]!
|
||||||
|
min: [NullableFloat!]!
|
||||||
|
max: [NullableFloat!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricFootprints {
|
||||||
|
metric: String!
|
||||||
|
data: [NullableFloat!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type Footprints {
|
||||||
|
nodehours: [NullableFloat!]!
|
||||||
|
metrics: [MetricFootprints!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Aggregate { USER, PROJECT, CLUSTER }
|
||||||
|
|
||||||
|
type NodeMetrics {
|
||||||
|
host: String!
|
||||||
|
metrics: [JobMetricWithName!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type Count {
|
||||||
|
name: String!
|
||||||
|
count: Int!
|
||||||
|
}
|
||||||
|
|
||||||
|
type User {
|
||||||
|
username: String!
|
||||||
|
name: String!
|
||||||
|
email: String!
|
||||||
|
}
|
||||||
|
|
||||||
|
type Query {
|
||||||
|
clusters: [Cluster!]! # List of all clusters
|
||||||
|
tags: [Tag!]! # List of all tags
|
||||||
|
|
||||||
|
user(username: String!): User
|
||||||
|
|
||||||
|
job(id: ID!): Job
|
||||||
|
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
||||||
|
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||||
|
|
||||||
|
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||||
|
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]!
|
||||||
|
jobsCount(filter: [JobFilter]!, groupBy: Aggregate!, limit: Int): [Count!]!
|
||||||
|
|
||||||
|
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
||||||
|
|
||||||
|
nodeMetrics(cluster: String!, partition: String, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
||||||
|
}
|
||||||
|
|
||||||
|
type Mutation {
|
||||||
|
createTag(type: String!, name: String!): Tag!
|
||||||
|
deleteTag(id: ID!): ID!
|
||||||
|
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||||
|
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||||
|
|
||||||
|
updateConfiguration(name: String!, value: String!): String
|
||||||
|
}
|
||||||
|
|
||||||
|
type IntRangeOutput { from: Int!, to: Int! }
|
||||||
|
type TimeRangeOutput { from: Time!, to: Time! }
|
||||||
|
|
||||||
|
type FilterRanges {
|
||||||
|
duration: IntRangeOutput!
|
||||||
|
numNodes: IntRangeOutput!
|
||||||
|
startTime: TimeRangeOutput!
|
||||||
|
}
|
||||||
|
|
||||||
|
input JobFilter {
|
||||||
|
tags: [ID!]
|
||||||
|
jobId: StringInput
|
||||||
|
arrayJobId: Int
|
||||||
|
user: StringInput
|
||||||
|
project: StringInput
|
||||||
|
cluster: StringInput
|
||||||
|
partition: StringInput
|
||||||
|
duration: IntRange
|
||||||
|
|
||||||
|
minRunningFor: Int
|
||||||
|
|
||||||
|
numNodes: IntRange
|
||||||
|
numAccelerators: IntRange
|
||||||
|
numHWThreads: IntRange
|
||||||
|
|
||||||
|
startTime: TimeRange
|
||||||
|
state: [JobState!]
|
||||||
|
flopsAnyAvg: FloatRange
|
||||||
|
memBwAvg: FloatRange
|
||||||
|
loadAvg: FloatRange
|
||||||
|
memUsedMax: FloatRange
|
||||||
|
}
|
||||||
|
|
||||||
|
input OrderByInput {
|
||||||
|
field: String!
|
||||||
|
order: SortDirectionEnum! = ASC
|
||||||
|
}
|
||||||
|
|
||||||
|
enum SortDirectionEnum {
|
||||||
|
DESC
|
||||||
|
ASC
|
||||||
|
}
|
||||||
|
|
||||||
|
input StringInput {
|
||||||
|
eq: String
|
||||||
|
contains: String
|
||||||
|
startsWith: String
|
||||||
|
endsWith: String
|
||||||
|
}
|
||||||
|
|
||||||
|
input IntRange { from: Int!, to: Int! }
|
||||||
|
input FloatRange { from: Float!, to: Float! }
|
||||||
|
input TimeRange { from: Time, to: Time }
|
||||||
|
|
||||||
|
type JobResultList {
|
||||||
|
items: [Job!]!
|
||||||
|
offset: Int
|
||||||
|
limit: Int
|
||||||
|
count: Int
|
||||||
|
}
|
||||||
|
|
||||||
|
type HistoPoint {
|
||||||
|
count: Int!
|
||||||
|
value: Int!
|
||||||
|
}
|
||||||
|
|
||||||
|
type JobsStatistics {
|
||||||
|
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
||||||
|
totalJobs: Int! # Number of jobs that matched
|
||||||
|
shortJobs: Int! # Number of jobs with a duration of less than 2 minutes
|
||||||
|
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||||
|
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
||||||
|
histWalltime: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
||||||
|
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
||||||
|
}
|
||||||
|
|
||||||
|
input PageRequest {
|
||||||
|
itemsPerPage: Int!
|
||||||
|
page: Int!
|
||||||
|
}
|
1
interfaces/rest/README.md
Normal file
1
interfaces/rest/README.md
Normal file
@ -0,0 +1 @@
|
|||||||
|
## REST API interfaces
|
221
interfaces/rest/openapi.yaml
Normal file
221
interfaces/rest/openapi.yaml
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
#
|
||||||
|
# ClusterCockpit's API spec can be exported via:
|
||||||
|
# docker exec -it cc-php php bin/console api:openapi:export --yaml
|
||||||
|
#
|
||||||
|
# This spec is written by hand and hopefully up to date with the API.
|
||||||
|
#
|
||||||
|
|
||||||
|
openapi: 3.0.3
|
||||||
|
info:
|
||||||
|
title: 'ClusterCockpit REST API'
|
||||||
|
description: 'API for batch job control'
|
||||||
|
version: 0.0.2
|
||||||
|
servers:
|
||||||
|
- url: /
|
||||||
|
description: ''
|
||||||
|
paths:
|
||||||
|
'/api/jobs/':
|
||||||
|
get:
|
||||||
|
operationId: 'getJobs'
|
||||||
|
summary: 'List all jobs'
|
||||||
|
description: 'Get a list of all jobs. Filters can be applied using query parameters.'
|
||||||
|
parameters:
|
||||||
|
- name: state
|
||||||
|
in: query
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
|
||||||
|
- name: cluster
|
||||||
|
in: query
|
||||||
|
schema: { type: string }
|
||||||
|
- name: start-time
|
||||||
|
description: 'Syntax: "<from>-<to>", where <from> and <to> are unix timestamps in seconds'
|
||||||
|
in: query
|
||||||
|
schema: { type: string }
|
||||||
|
- name: page
|
||||||
|
in: query
|
||||||
|
schema: { type: integer }
|
||||||
|
- name: items-per-page
|
||||||
|
in: query
|
||||||
|
schema: { type: integer }
|
||||||
|
- name: with-metadata
|
||||||
|
in: query
|
||||||
|
schema: { type: boolean }
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: 'Array of jobs'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
jobs:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
400:
|
||||||
|
description: 'Bad Request'
|
||||||
|
'/api/jobs/tag_job/{id}':
|
||||||
|
post:
|
||||||
|
operationId: 'tagJob'
|
||||||
|
summary: 'Add a tag to a job'
|
||||||
|
parameters:
|
||||||
|
- name: id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema: { type: integer }
|
||||||
|
description: 'Job ID'
|
||||||
|
requestBody:
|
||||||
|
description: 'Array of tags to add'
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/Tag'
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: 'Job resource'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
404:
|
||||||
|
description: 'Job or tag does not exist'
|
||||||
|
400:
|
||||||
|
description: 'Bad request'
|
||||||
|
'/api/jobs/start_job/':
|
||||||
|
post:
|
||||||
|
operationId: 'startJob'
|
||||||
|
summary: 'Add a newly started job'
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
responses:
|
||||||
|
201:
|
||||||
|
description: 'Job successfully'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: integer
|
||||||
|
description: 'The database ID assigned to this job'
|
||||||
|
400:
|
||||||
|
description: 'Bad request'
|
||||||
|
422:
|
||||||
|
description: 'The combination of jobId, clusterId and startTime does already exist'
|
||||||
|
'/api/jobs/stop_job/':
|
||||||
|
post:
|
||||||
|
operationId: stopJobViaJobID
|
||||||
|
summary: 'Mark a job as stopped. Which job to stop is specified by the request body.'
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
required: [jobId, cluster, stopTime, jobState]
|
||||||
|
properties:
|
||||||
|
jobId: { type: integer }
|
||||||
|
cluster: { type: string }
|
||||||
|
startTime: { type: integer }
|
||||||
|
stopTime: { type: integer }
|
||||||
|
jobState:
|
||||||
|
type: string
|
||||||
|
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: 'Job resource'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
400:
|
||||||
|
description: 'Bad request'
|
||||||
|
404:
|
||||||
|
description: 'Resource not found'
|
||||||
|
'/api/jobs/stop_job/{id}':
|
||||||
|
post:
|
||||||
|
operationId: 'stopJobViaDBID'
|
||||||
|
summary: 'Mark a job as stopped.'
|
||||||
|
parameters:
|
||||||
|
- name: id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema: { type: integer }
|
||||||
|
description: 'Database ID (Resource Identifier)'
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
required: [stopTime, jobState]
|
||||||
|
properties:
|
||||||
|
stopTime: { type: integer }
|
||||||
|
jobState:
|
||||||
|
type: string
|
||||||
|
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: 'Job resource'
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Job'
|
||||||
|
400:
|
||||||
|
description: 'Bad request'
|
||||||
|
404:
|
||||||
|
description: 'Resource not found'
|
||||||
|
'/api/jobs/import/':
|
||||||
|
post:
|
||||||
|
operationId: 'importJob'
|
||||||
|
summary: 'Imports a job and its metric data'
|
||||||
|
requestBody:
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
'application/json':
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
meta:
|
||||||
|
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
|
||||||
|
data:
|
||||||
|
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-data.schema.json
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: 'Import successful'
|
||||||
|
400:
|
||||||
|
description: 'Bad request'
|
||||||
|
422:
|
||||||
|
description: 'Unprocessable Entity'
|
||||||
|
components:
|
||||||
|
schemas:
|
||||||
|
Tag:
|
||||||
|
description: 'A job tag'
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: 'Database ID'
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
description: 'Tag type'
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: 'Tag name'
|
||||||
|
Job:
|
||||||
|
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
|
||||||
|
securitySchemes:
|
||||||
|
bearerAuth:
|
||||||
|
type: http
|
||||||
|
scheme: bearer
|
||||||
|
bearerFormat: JWT
|
||||||
|
security:
|
||||||
|
- bearerAuth: [] # Applies `bearerAuth` globally
|
@ -1,35 +0,0 @@
|
|||||||
# Overview
|
|
||||||
|
|
||||||
ClusterCockpit uses the InfluxData line-protocol for collecting the node metric
|
|
||||||
data.
|
|
||||||
|
|
||||||
```
|
|
||||||
<measurement>,<tag set> <field set> <timestamp [s]>
|
|
||||||
```
|
|
||||||
|
|
||||||
Supported measurements:
|
|
||||||
* node – Tags: host
|
|
||||||
* socket – Tags: host, socket
|
|
||||||
* cpu -- Tags: host, cpu
|
|
||||||
|
|
||||||
## Supported node level fields
|
|
||||||
|
|
||||||
* `load`
|
|
||||||
* `mem_used`
|
|
||||||
* `net_bw` - split into `ib_bw` and `eth_bw` if required
|
|
||||||
* `file_bw` - split into multiple file systems if required
|
|
||||||
|
|
||||||
## Supported socket fields
|
|
||||||
|
|
||||||
All socket metrics can be aggregated to coarser granularity.
|
|
||||||
|
|
||||||
* `power`
|
|
||||||
* `mem_bw`
|
|
||||||
|
|
||||||
## Supported cpu level fields
|
|
||||||
|
|
||||||
All cpu metrics can be aggregated to coarser granularity.
|
|
||||||
|
|
||||||
* `ipc`
|
|
||||||
* `flops_any`
|
|
||||||
* `clock`
|
|
0
schemas/README.md
Normal file
0
schemas/README.md
Normal file
Loading…
Reference in New Issue
Block a user