mirror of
https://github.com/ClusterCockpit/cc-specifications.git
synced 2024-12-26 05:19:05 +01:00
Restructure repo
This commit is contained in:
parent
403f74ddee
commit
d762e3e52b
4
datastructures/README.md
Normal file
4
datastructures/README.md
Normal file
@ -0,0 +1,4 @@
|
||||
## Generic database specification
|
||||
|
||||
This collection of datastructures descriptions is intended to be used
|
||||
as datastructures in application, payloads in apis, and file formats.
|
3
interfaces/graphql/README.md
Normal file
3
interfaces/graphql/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
## GraphQL Schema
|
||||
|
||||
This schema is intended for communication between web-frontend and web-backend.
|
261
interfaces/graphql/schema.graphqls
Normal file
261
interfaces/graphql/schema.graphqls
Normal file
@ -0,0 +1,261 @@
|
||||
scalar Time
|
||||
scalar Any
|
||||
|
||||
scalar NullableFloat
|
||||
scalar MetricScope
|
||||
scalar JobState
|
||||
|
||||
type Job {
|
||||
id: ID!
|
||||
jobId: Int!
|
||||
user: String!
|
||||
project: String!
|
||||
cluster: String!
|
||||
subCluster: String!
|
||||
startTime: Time!
|
||||
duration: Int!
|
||||
walltime: Int!
|
||||
numNodes: Int!
|
||||
numHWThreads: Int!
|
||||
numAcc: Int!
|
||||
SMT: Int!
|
||||
exclusive: Int!
|
||||
partition: String!
|
||||
arrayJobId: Int!
|
||||
monitoringStatus: Int!
|
||||
state: JobState!
|
||||
tags: [Tag!]!
|
||||
resources: [Resource!]!
|
||||
|
||||
metaData: Any
|
||||
userData: User
|
||||
}
|
||||
|
||||
type Cluster {
|
||||
name: String!
|
||||
partitions: [String!]! # Slurm partitions
|
||||
metricConfig: [MetricConfig!]!
|
||||
filterRanges: FilterRanges!
|
||||
subClusters: [SubCluster!]! # Hardware partitions/subclusters
|
||||
}
|
||||
|
||||
type SubCluster {
|
||||
name: String!
|
||||
nodes: String!
|
||||
processorType: String!
|
||||
socketsPerNode: Int!
|
||||
coresPerSocket: Int!
|
||||
threadsPerCore: Int!
|
||||
flopRateScalar: Int!
|
||||
flopRateSimd: Int!
|
||||
memoryBandwidth: Int!
|
||||
topology: Topology!
|
||||
}
|
||||
|
||||
type Topology {
|
||||
node: [Int!]
|
||||
socket: [[Int!]!]
|
||||
memoryDomain: [[Int!]!]
|
||||
die: [[Int!]!]
|
||||
core: [[Int!]!]
|
||||
accelerators: [Accelerator!]
|
||||
}
|
||||
|
||||
type Accelerator {
|
||||
id: String!
|
||||
type: String!
|
||||
model: String!
|
||||
}
|
||||
|
||||
type MetricConfig {
|
||||
name: String!
|
||||
unit: String!
|
||||
scope: MetricScope!
|
||||
timestep: Int!
|
||||
peak: Float!
|
||||
normal: Float!
|
||||
caution: Float!
|
||||
alert: Float!
|
||||
}
|
||||
|
||||
type Tag {
|
||||
id: ID!
|
||||
type: String!
|
||||
name: String!
|
||||
}
|
||||
|
||||
type Resource {
|
||||
hostname: String!
|
||||
hwthreads: [Int!]
|
||||
accelerators: [Int!]
|
||||
configuration: String
|
||||
}
|
||||
|
||||
type JobMetricWithName {
|
||||
name: String!
|
||||
metric: JobMetric!
|
||||
}
|
||||
|
||||
type JobMetric {
|
||||
unit: String!
|
||||
scope: MetricScope!
|
||||
timestep: Int!
|
||||
series: [Series!]
|
||||
statisticsSeries: StatsSeries
|
||||
}
|
||||
|
||||
type Series {
|
||||
hostname: String!
|
||||
id: Int
|
||||
statistics: MetricStatistics
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricStatistics {
|
||||
avg: Float!
|
||||
min: Float!
|
||||
max: Float!
|
||||
}
|
||||
|
||||
type StatsSeries {
|
||||
mean: [NullableFloat!]!
|
||||
min: [NullableFloat!]!
|
||||
max: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type MetricFootprints {
|
||||
metric: String!
|
||||
data: [NullableFloat!]!
|
||||
}
|
||||
|
||||
type Footprints {
|
||||
nodehours: [NullableFloat!]!
|
||||
metrics: [MetricFootprints!]!
|
||||
}
|
||||
|
||||
enum Aggregate { USER, PROJECT, CLUSTER }
|
||||
|
||||
type NodeMetrics {
|
||||
host: String!
|
||||
metrics: [JobMetricWithName!]!
|
||||
}
|
||||
|
||||
type Count {
|
||||
name: String!
|
||||
count: Int!
|
||||
}
|
||||
|
||||
type User {
|
||||
username: String!
|
||||
name: String!
|
||||
email: String!
|
||||
}
|
||||
|
||||
type Query {
|
||||
clusters: [Cluster!]! # List of all clusters
|
||||
tags: [Tag!]! # List of all tags
|
||||
|
||||
user(username: String!): User
|
||||
|
||||
job(id: ID!): Job
|
||||
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
|
||||
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
|
||||
|
||||
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
|
||||
jobsStatistics(filter: [JobFilter!], groupBy: Aggregate): [JobsStatistics!]!
|
||||
jobsCount(filter: [JobFilter]!, groupBy: Aggregate!, limit: Int): [Count!]!
|
||||
|
||||
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!
|
||||
|
||||
nodeMetrics(cluster: String!, partition: String, nodes: [String!], scopes: [MetricScope!], metrics: [String!], from: Time!, to: Time!): [NodeMetrics!]!
|
||||
}
|
||||
|
||||
type Mutation {
|
||||
createTag(type: String!, name: String!): Tag!
|
||||
deleteTag(id: ID!): ID!
|
||||
addTagsToJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
removeTagsFromJob(job: ID!, tagIds: [ID!]!): [Tag!]!
|
||||
|
||||
updateConfiguration(name: String!, value: String!): String
|
||||
}
|
||||
|
||||
type IntRangeOutput { from: Int!, to: Int! }
|
||||
type TimeRangeOutput { from: Time!, to: Time! }
|
||||
|
||||
type FilterRanges {
|
||||
duration: IntRangeOutput!
|
||||
numNodes: IntRangeOutput!
|
||||
startTime: TimeRangeOutput!
|
||||
}
|
||||
|
||||
input JobFilter {
|
||||
tags: [ID!]
|
||||
jobId: StringInput
|
||||
arrayJobId: Int
|
||||
user: StringInput
|
||||
project: StringInput
|
||||
cluster: StringInput
|
||||
partition: StringInput
|
||||
duration: IntRange
|
||||
|
||||
minRunningFor: Int
|
||||
|
||||
numNodes: IntRange
|
||||
numAccelerators: IntRange
|
||||
numHWThreads: IntRange
|
||||
|
||||
startTime: TimeRange
|
||||
state: [JobState!]
|
||||
flopsAnyAvg: FloatRange
|
||||
memBwAvg: FloatRange
|
||||
loadAvg: FloatRange
|
||||
memUsedMax: FloatRange
|
||||
}
|
||||
|
||||
input OrderByInput {
|
||||
field: String!
|
||||
order: SortDirectionEnum! = ASC
|
||||
}
|
||||
|
||||
enum SortDirectionEnum {
|
||||
DESC
|
||||
ASC
|
||||
}
|
||||
|
||||
input StringInput {
|
||||
eq: String
|
||||
contains: String
|
||||
startsWith: String
|
||||
endsWith: String
|
||||
}
|
||||
|
||||
input IntRange { from: Int!, to: Int! }
|
||||
input FloatRange { from: Float!, to: Float! }
|
||||
input TimeRange { from: Time, to: Time }
|
||||
|
||||
type JobResultList {
|
||||
items: [Job!]!
|
||||
offset: Int
|
||||
limit: Int
|
||||
count: Int
|
||||
}
|
||||
|
||||
type HistoPoint {
|
||||
count: Int!
|
||||
value: Int!
|
||||
}
|
||||
|
||||
type JobsStatistics {
|
||||
id: ID! # If `groupBy` was used, ID of the user/project/cluster
|
||||
totalJobs: Int! # Number of jobs that matched
|
||||
shortJobs: Int! # Number of jobs with a duration of less than 2 minutes
|
||||
totalWalltime: Int! # Sum of the duration of all matched jobs in hours
|
||||
totalCoreHours: Int! # Sum of the core hours of all matched jobs
|
||||
histWalltime: [HistoPoint!]! # value: hour, count: number of jobs with a rounded duration of value
|
||||
histNumNodes: [HistoPoint!]! # value: number of nodes, count: number of jobs with that number of nodes
|
||||
}
|
||||
|
||||
input PageRequest {
|
||||
itemsPerPage: Int!
|
||||
page: Int!
|
||||
}
|
1
interfaces/rest/README.md
Normal file
1
interfaces/rest/README.md
Normal file
@ -0,0 +1 @@
|
||||
## REST API interfaces
|
221
interfaces/rest/openapi.yaml
Normal file
221
interfaces/rest/openapi.yaml
Normal file
@ -0,0 +1,221 @@
|
||||
#
|
||||
# ClusterCockpit's API spec can be exported via:
|
||||
# docker exec -it cc-php php bin/console api:openapi:export --yaml
|
||||
#
|
||||
# This spec is written by hand and hopefully up to date with the API.
|
||||
#
|
||||
|
||||
openapi: 3.0.3
|
||||
info:
|
||||
title: 'ClusterCockpit REST API'
|
||||
description: 'API for batch job control'
|
||||
version: 0.0.2
|
||||
servers:
|
||||
- url: /
|
||||
description: ''
|
||||
paths:
|
||||
'/api/jobs/':
|
||||
get:
|
||||
operationId: 'getJobs'
|
||||
summary: 'List all jobs'
|
||||
description: 'Get a list of all jobs. Filters can be applied using query parameters.'
|
||||
parameters:
|
||||
- name: state
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
|
||||
- name: cluster
|
||||
in: query
|
||||
schema: { type: string }
|
||||
- name: start-time
|
||||
description: 'Syntax: "<from>-<to>", where <from> and <to> are unix timestamps in seconds'
|
||||
in: query
|
||||
schema: { type: string }
|
||||
- name: page
|
||||
in: query
|
||||
schema: { type: integer }
|
||||
- name: items-per-page
|
||||
in: query
|
||||
schema: { type: integer }
|
||||
- name: with-metadata
|
||||
in: query
|
||||
schema: { type: boolean }
|
||||
responses:
|
||||
200:
|
||||
description: 'Array of jobs'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
jobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Job'
|
||||
400:
|
||||
description: 'Bad Request'
|
||||
'/api/jobs/tag_job/{id}':
|
||||
post:
|
||||
operationId: 'tagJob'
|
||||
summary: 'Add a tag to a job'
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema: { type: integer }
|
||||
description: 'Job ID'
|
||||
requestBody:
|
||||
description: 'Array of tags to add'
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/Tag'
|
||||
responses:
|
||||
200:
|
||||
description: 'Job resource'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
404:
|
||||
description: 'Job or tag does not exist'
|
||||
400:
|
||||
description: 'Bad request'
|
||||
'/api/jobs/start_job/':
|
||||
post:
|
||||
operationId: 'startJob'
|
||||
summary: 'Add a newly started job'
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
responses:
|
||||
201:
|
||||
description: 'Job successfully'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: integer
|
||||
description: 'The database ID assigned to this job'
|
||||
400:
|
||||
description: 'Bad request'
|
||||
422:
|
||||
description: 'The combination of jobId, clusterId and startTime does already exist'
|
||||
'/api/jobs/stop_job/':
|
||||
post:
|
||||
operationId: stopJobViaJobID
|
||||
summary: 'Mark a job as stopped. Which job to stop is specified by the request body.'
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
required: [jobId, cluster, stopTime, jobState]
|
||||
properties:
|
||||
jobId: { type: integer }
|
||||
cluster: { type: string }
|
||||
startTime: { type: integer }
|
||||
stopTime: { type: integer }
|
||||
jobState:
|
||||
type: string
|
||||
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
|
||||
responses:
|
||||
200:
|
||||
description: 'Job resource'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
400:
|
||||
description: 'Bad request'
|
||||
404:
|
||||
description: 'Resource not found'
|
||||
'/api/jobs/stop_job/{id}':
|
||||
post:
|
||||
operationId: 'stopJobViaDBID'
|
||||
summary: 'Mark a job as stopped.'
|
||||
parameters:
|
||||
- name: id
|
||||
in: path
|
||||
required: true
|
||||
schema: { type: integer }
|
||||
description: 'Database ID (Resource Identifier)'
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
required: [stopTime, jobState]
|
||||
properties:
|
||||
stopTime: { type: integer }
|
||||
jobState:
|
||||
type: string
|
||||
enum: ["running", "completed", "failed", "canceled", "stopped", "timeout"]
|
||||
responses:
|
||||
200:
|
||||
description: 'Job resource'
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
$ref: '#/components/schemas/Job'
|
||||
400:
|
||||
description: 'Bad request'
|
||||
404:
|
||||
description: 'Resource not found'
|
||||
'/api/jobs/import/':
|
||||
post:
|
||||
operationId: 'importJob'
|
||||
summary: 'Imports a job and its metric data'
|
||||
requestBody:
|
||||
required: true
|
||||
content:
|
||||
'application/json':
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
meta:
|
||||
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
|
||||
data:
|
||||
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-data.schema.json
|
||||
responses:
|
||||
200:
|
||||
description: 'Import successful'
|
||||
400:
|
||||
description: 'Bad request'
|
||||
422:
|
||||
description: 'Unprocessable Entity'
|
||||
components:
|
||||
schemas:
|
||||
Tag:
|
||||
description: 'A job tag'
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: 'Database ID'
|
||||
type:
|
||||
type: string
|
||||
description: 'Tag type'
|
||||
name:
|
||||
type: string
|
||||
description: 'Tag name'
|
||||
Job:
|
||||
$ref: https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/schema/json/job-meta.schema.json
|
||||
securitySchemes:
|
||||
bearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
security:
|
||||
- bearerAuth: [] # Applies `bearerAuth` globally
|
@ -1,35 +0,0 @@
|
||||
# Overview
|
||||
|
||||
ClusterCockpit uses the InfluxData line-protocol for collecting the node metric
|
||||
data.
|
||||
|
||||
```
|
||||
<measurement>,<tag set> <field set> <timestamp [s]>
|
||||
```
|
||||
|
||||
Supported measurements:
|
||||
* node – Tags: host
|
||||
* socket – Tags: host, socket
|
||||
* cpu -- Tags: host, cpu
|
||||
|
||||
## Supported node level fields
|
||||
|
||||
* `load`
|
||||
* `mem_used`
|
||||
* `net_bw` - split into `ib_bw` and `eth_bw` if required
|
||||
* `file_bw` - split into multiple file systems if required
|
||||
|
||||
## Supported socket fields
|
||||
|
||||
All socket metrics can be aggregated to coarser granularity.
|
||||
|
||||
* `power`
|
||||
* `mem_bw`
|
||||
|
||||
## Supported cpu level fields
|
||||
|
||||
All cpu metrics can be aggregated to coarser granularity.
|
||||
|
||||
* `ipc`
|
||||
* `flops_any`
|
||||
* `clock`
|
0
schemas/README.md
Normal file
0
schemas/README.md
Normal file
Loading…
Reference in New Issue
Block a user