mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-25 12:59:06 +01:00
Embed schema files for validation
This commit is contained in:
parent
7e0f2a2e06
commit
207254a9e2
234
pkg/schema/schemas/cluster.schema.json
Normal file
234
pkg/schema/schemas/cluster.schema.json
Normal file
@ -0,0 +1,234 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedFS://schemas/cluster.schema.json",
|
||||
"title": "HPC cluster description",
|
||||
"description": "Meta data information of a HPC cluster",
|
||||
"type": "object",
|
||||
"properties":{
|
||||
"name": {
|
||||
"description": "The unique identifier of a cluster",
|
||||
"type": "string"
|
||||
},
|
||||
"metricConfig": {
|
||||
"description": "Metric specifications",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties":{
|
||||
"name": {
|
||||
"description": "Metric name",
|
||||
"type": "string"
|
||||
},
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"type": "string"
|
||||
},
|
||||
"scope": {
|
||||
"description": "Native measurement resolution",
|
||||
"type": "string"
|
||||
},
|
||||
"timestep": {
|
||||
"description": "Frequency of timeseries points",
|
||||
"type": "integer"
|
||||
},
|
||||
"aggregation": {
|
||||
"description": "How the metric is aggregated",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"sum",
|
||||
"avg"
|
||||
]
|
||||
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partition metric thresholds",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties":{
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"peak": {
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"peak",
|
||||
"caution",
|
||||
"alert"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"unit",
|
||||
"scope",
|
||||
"timestep"
|
||||
]
|
||||
}
|
||||
},
|
||||
"subClusters": {
|
||||
"description": "Array of cluster hardware partitions",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties":{
|
||||
"name": {
|
||||
"description": "Hardware partition name",
|
||||
"type": "string"
|
||||
},
|
||||
"processorType": {
|
||||
"description": "Processor type",
|
||||
"type": "string"
|
||||
},
|
||||
"socketsPerNode": {
|
||||
"description": "Number of sockets per node",
|
||||
"type": "integer"
|
||||
},
|
||||
"coresPerSocket": {
|
||||
"description": "Number of cores per socket",
|
||||
"type": "integer"
|
||||
},
|
||||
"threadsPerCore": {
|
||||
"description": "Number of SMT threads per core",
|
||||
"type": "integer"
|
||||
},
|
||||
"flopRateScalar": {
|
||||
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
|
||||
"type": "integer"
|
||||
},
|
||||
"flopRateSimd": {
|
||||
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
|
||||
"type": "integer"
|
||||
},
|
||||
"memoryBandwidth": {
|
||||
"description": "Theoretical node peak memory bandwidth in GB/s",
|
||||
"type": "integer"
|
||||
},
|
||||
"nodes": {
|
||||
"description": "Node list expression",
|
||||
"type": "string"
|
||||
},
|
||||
"topology": {
|
||||
"description": "Node topology",
|
||||
"type": "object",
|
||||
"properties":{
|
||||
"node": {
|
||||
"description": "HwTread lists of node",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"socket": {
|
||||
"description": "HwTread lists of sockets",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"memoryDomain": {
|
||||
"description": "HwTread lists of memory domains",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"die": {
|
||||
"description": "HwTread lists of dies",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"core": {
|
||||
"description": "HwTread lists of cores",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
},
|
||||
"accelerators": {
|
||||
"type": "array",
|
||||
"description": "List of of accelerator devices",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The unique device id"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The accelerator type",
|
||||
"enum": [
|
||||
"Nvidia GPU",
|
||||
"AMD GPU",
|
||||
"Intel GPU"
|
||||
]
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The accelerator model"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"type",
|
||||
"model"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required":[
|
||||
"node",
|
||||
"socket",
|
||||
"memoryDomain"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required":[
|
||||
"name",
|
||||
"topology",
|
||||
"processorType",
|
||||
"socketsPerNode",
|
||||
"coresPerSocket",
|
||||
"threadsPerCore",
|
||||
"flopRateScalar",
|
||||
"flopRateSimd",
|
||||
"memoryBandwidth"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required":[
|
||||
"name",
|
||||
"metricConfig",
|
||||
"subClusters"
|
||||
]
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedFS://schemas/config.schema.json",
|
||||
"title": "cc-backend configuration file schema",
|
||||
"type": "object",
|
||||
"properties":{
|
489
pkg/schema/schemas/job-data.schema.json
Normal file
489
pkg/schema/schemas/job-data.schema.json
Normal file
@ -0,0 +1,489 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedFS://schemas/job-data.schema.json",
|
||||
"title": "Job metric data list",
|
||||
"description": "Collection of metric data of a HPC job",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"mem_used": {
|
||||
"description": "Memory capacity used",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"flops_any": {
|
||||
"description": "Total flop rate with DP flops scaled up",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"memoryDomain": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"core": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"hwthread": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"mem_bw": {
|
||||
"description": "Main memory bandwidth",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"memoryDomain": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"net_bw": {
|
||||
"description": "Total fast interconnect network bandwidth",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"ipc": {
|
||||
"description": "Instructions executed per cycle",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"memoryDomain": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"core": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"hwthread": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"cpu_used": {
|
||||
"description": "CPU active core utilization",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"memoryDomain": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"core": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"hwthread": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"cpu_load": {
|
||||
"description": "CPU requested core utilization (load 1m)",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"flops_dp": {
|
||||
"description": "Double precision flop rate",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"memoryDomain": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"core": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"hwthread": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"flops_sp": {
|
||||
"description": "Single precision flops rate",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"memoryDomain": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"core": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"hwthread": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"vectorization_ratio": {
|
||||
"description": "Fraction of arithmetic instructions using SIMD instructions",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"memoryDomain": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"core": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"hwthread": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"cpu_power": {
|
||||
"description": "CPU power consumption",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"mem_power": {
|
||||
"description": "Memory power consumption",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"acc_utilization": {
|
||||
"description": "GPU utilization",
|
||||
"properties": {
|
||||
"accelerator": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"accelerator"
|
||||
]
|
||||
},
|
||||
"acc_mem_used": {
|
||||
"description": "GPU memory capacity used",
|
||||
"properties": {
|
||||
"accelerator": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"accelerator"
|
||||
]
|
||||
},
|
||||
"acc_power": {
|
||||
"description": "GPU power consumption",
|
||||
"properties": {
|
||||
"accelerator": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"accelerator"
|
||||
]
|
||||
},
|
||||
"clock": {
|
||||
"description": "Average core frequency",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"socket": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"memoryDomain": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"core": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
},
|
||||
"hwthread": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"minProperties": 1
|
||||
},
|
||||
"eth_read_bw": {
|
||||
"description": "Ethernet read bandwidth",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"eth_write_bw": {
|
||||
"description": "Ethernet write bandwidth",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"filesystems": {
|
||||
"description": "Array of filesystems",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"nfs",
|
||||
"lustre",
|
||||
"gpfs",
|
||||
"nvme",
|
||||
"ssd",
|
||||
"hdd",
|
||||
"beegfs"
|
||||
]
|
||||
},
|
||||
"read_bw": {
|
||||
"description": "File system read bandwidth",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"write_bw": {
|
||||
"description": "File system write bandwidth",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"read_req": {
|
||||
"description": "File system read requests",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"write_req": {
|
||||
"description": "File system write requests",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"inodes": {
|
||||
"description": "File system write requests",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"accesses": {
|
||||
"description": "File system open and close",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"fsync": {
|
||||
"description": "File system fsync",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"create": {
|
||||
"description": "File system create",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"open": {
|
||||
"description": "File system open",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"close": {
|
||||
"description": "File system close",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"seek": {
|
||||
"description": "File system seek",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"type",
|
||||
"read_bw",
|
||||
"write_bw"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"ic_rcv_packets": {
|
||||
"description": "Network interconnect read packets",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"ic_send_packets": {
|
||||
"description": "Network interconnect send packet",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"ic_read_bw": {
|
||||
"description": "Network interconnect read bandwidth",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"ic_write_bw": {
|
||||
"description": "Network interconnect write bandwidth",
|
||||
"properties": {
|
||||
"node": {
|
||||
"$ref": "embedFS://schemas/job-metric-data.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"node"
|
||||
]
|
||||
},
|
||||
"required": [
|
||||
"cpu_used",
|
||||
"mem_used",
|
||||
"flops_any",
|
||||
"mem_bw",
|
||||
"net_bw",
|
||||
"filesystems"
|
||||
]
|
||||
}
|
355
pkg/schema/schemas/job-meta.schema.json
Normal file
355
pkg/schema/schemas/job-meta.schema.json
Normal file
@ -0,0 +1,355 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedFS://schemas/job-meta.schema.json",
|
||||
"title": "Job meta data",
|
||||
"description": "Meta data information of a HPC job",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"jobId": {
|
||||
"description": "The unique identifier of a job",
|
||||
"type": "integer"
|
||||
},
|
||||
"user": {
|
||||
"description": "The unique identifier of a user",
|
||||
"type": "string"
|
||||
},
|
||||
"project": {
|
||||
"description": "The unique identifier of a project",
|
||||
"type": "string"
|
||||
},
|
||||
"cluster": {
|
||||
"description": "The unique identifier of a cluster",
|
||||
"type": "string"
|
||||
},
|
||||
"subCluster": {
|
||||
"description": "The unique identifier of a sub cluster",
|
||||
"type": "string"
|
||||
},
|
||||
"partition": {
|
||||
"description": "The Slurm partition to which the job was submitted",
|
||||
"type": "string"
|
||||
},
|
||||
"arrayJobId": {
|
||||
"description": "The unique identifier of an array job",
|
||||
"type": "integer"
|
||||
},
|
||||
"numNodes": {
|
||||
"description": "Number of nodes used",
|
||||
"type": "integer",
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"numHwthreads": {
|
||||
"description": "Number of HWThreads used",
|
||||
"type": "integer",
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"numAcc": {
|
||||
"description": "Number of accelerators used",
|
||||
"type": "integer",
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"exclusive": {
|
||||
"description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user",
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"maximum": 2
|
||||
},
|
||||
"monitoringStatus": {
|
||||
"description": "State of monitoring system during job run",
|
||||
"type": "integer"
|
||||
},
|
||||
"smt": {
|
||||
"description": "SMT threads used by job",
|
||||
"type": "integer"
|
||||
},
|
||||
"walltime": {
|
||||
"description": "Requested walltime of job in seconds",
|
||||
"type": "integer",
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"jobState": {
|
||||
"description": "Final state of job",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"completed",
|
||||
"failed",
|
||||
"cancelled",
|
||||
"stopped",
|
||||
"out_of_memory",
|
||||
"timeout"
|
||||
]
|
||||
},
|
||||
"startTime": {
|
||||
"description": "Start epoch time stamp in seconds",
|
||||
"type": "integer",
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"stopTime": {
|
||||
"description": "Stop epoch time stamp in seconds",
|
||||
"type": "integer",
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"duration": {
|
||||
"description": "Duration of job in seconds",
|
||||
"type": "integer",
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"resources": {
|
||||
"description": "Resources used by job",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hostname": {
|
||||
"type": "string"
|
||||
},
|
||||
"hwthreads": {
|
||||
"type": "array",
|
||||
"description": "List of OS processor ids",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"accelerators": {
|
||||
"type": "array",
|
||||
"description": "List of of accelerator device ids",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"configuration": {
|
||||
"type": "string",
|
||||
"description": "The configuration options of the node"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"hostname"
|
||||
],
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"metaData": {
|
||||
"description": "Additional information about the job",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"jobScript": {
|
||||
"type": "string",
|
||||
"description": "The batch script of the job"
|
||||
},
|
||||
"jobName": {
|
||||
"type": "string",
|
||||
"description": "Slurm Job name"
|
||||
},
|
||||
"slurmInfo": {
|
||||
"type": "string",
|
||||
"description": "Additional slurm infos as show by scontrol show job"
|
||||
}
|
||||
}
|
||||
},
|
||||
"tags": {
|
||||
"description": "List of tags",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"type"
|
||||
]
|
||||
},
|
||||
"uniqueItems": true
|
||||
},
|
||||
"statistics": {
|
||||
"description": "Job statistic data",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"mem_used": {
|
||||
"description": "Memory capacity used (required)",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"cpu_load": {
|
||||
"description": "CPU requested core utilization (load 1m) (required)",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"flops_any": {
|
||||
"description": "Total flop rate with DP flops scaled up (required)",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"mem_bw": {
|
||||
"description": "Main memory bandwidth (required)",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"net_bw": {
|
||||
"description": "Total fast interconnect network bandwidth (required)",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"file_bw": {
|
||||
"description": "Total file IO bandwidth (required)",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"ipc": {
|
||||
"description": "Instructions executed per cycle",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"cpu_used": {
|
||||
"description": "CPU active core utilization",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"flops_dp": {
|
||||
"description": "Double precision flop rate",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"flops_sp": {
|
||||
"description": "Single precision flops rate",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"rapl_power": {
|
||||
"description": "CPU power consumption",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"gpu_used": {
|
||||
"description": "GPU utilization",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"gpu_mem_used": {
|
||||
"description": "GPU memory capacity used",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"gpu_power": {
|
||||
"description": "GPU power consumption",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"clock": {
|
||||
"description": "Average core frequency",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"eth_read_bw": {
|
||||
"description": "Ethernet read bandwidth",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"eth_write_bw": {
|
||||
"description": "Ethernet write bandwidth",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"ic_rcv_packets": {
|
||||
"description": "Network interconnect read packets",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"ic_send_packets": {
|
||||
"description": "Network interconnect send packet",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"ic_read_bw": {
|
||||
"description": "Network interconnect read bandwidth",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"ic_write_bw": {
|
||||
"description": "Network interconnect write bandwidth",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"filesystems": {
|
||||
"description": "Array of filesystems",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"nfs",
|
||||
"lustre",
|
||||
"gpfs",
|
||||
"nvme",
|
||||
"ssd",
|
||||
"hdd",
|
||||
"beegfs"
|
||||
]
|
||||
},
|
||||
"read_bw": {
|
||||
"description": "File system read bandwidth",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"write_bw": {
|
||||
"description": "File system write bandwidth",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"read_req": {
|
||||
"description": "File system read requests",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"write_req": {
|
||||
"description": "File system write requests",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"inodes": {
|
||||
"description": "File system write requests",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"accesses": {
|
||||
"description": "File system open and close",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"fsync": {
|
||||
"description": "File system fsync",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"create": {
|
||||
"description": "File system create",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"open": {
|
||||
"description": "File system open",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"close": {
|
||||
"description": "File system close",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
},
|
||||
"seek": {
|
||||
"description": "File system seek",
|
||||
"$ref": "embedFS://schemas/job-metric-statistics.schema.json"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"type",
|
||||
"read_bw",
|
||||
"write_bw"
|
||||
]
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"cpu_used",
|
||||
"mem_used",
|
||||
"flops_any",
|
||||
"mem_bw"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"jobId",
|
||||
"user",
|
||||
"project",
|
||||
"cluster",
|
||||
"numNodes",
|
||||
"exclusive",
|
||||
"startTime",
|
||||
"jobState",
|
||||
"duration",
|
||||
"resources",
|
||||
"tags",
|
||||
"statistics"
|
||||
]
|
||||
}
|
216
pkg/schema/schemas/job-metric-data.schema.json
Normal file
216
pkg/schema/schemas/job-metric-data.schema.json
Normal file
@ -0,0 +1,216 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedFS://schemas/job-metric-data.schema.json",
|
||||
"title": "Job metric data",
|
||||
"description": "Metric data of a HPC job",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedFS://schemas/unit.schema.json"
|
||||
},
|
||||
"timestep": {
|
||||
"description": "Measurement interval in seconds",
|
||||
"type": "integer"
|
||||
},
|
||||
"thresholds": {
|
||||
"description": "Metric thresholds for specific system",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"peak": {
|
||||
"type": "number"
|
||||
},
|
||||
"normal": {
|
||||
"type": "number"
|
||||
},
|
||||
"caution": {
|
||||
"type": "number"
|
||||
},
|
||||
"alert": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
},
|
||||
"statisticsSeries": {
|
||||
"type": "object",
|
||||
"description": "Statistics series across topology",
|
||||
"properties": {
|
||||
"min": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"max": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"mean": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"percentiles": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"10": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"20": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"30": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"40": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"50": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"60": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"70": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"80": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"90": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"25": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
},
|
||||
"75": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"series": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hostname": {
|
||||
"type": "string"
|
||||
},
|
||||
"id": {
|
||||
"type": "integer"
|
||||
},
|
||||
"statistics": {
|
||||
"type": "object",
|
||||
"description": "Statistics across time dimension",
|
||||
"properties": {
|
||||
"avg": {
|
||||
"description": "Series average",
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"min": {
|
||||
"description": "Series minimum",
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"max": {
|
||||
"description": "Series maximum",
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"avg",
|
||||
"min",
|
||||
"max"
|
||||
]
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"minItems": 1
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"hostname",
|
||||
"statistics",
|
||||
"data"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"unit",
|
||||
"timestep",
|
||||
"series"
|
||||
]
|
||||
}
|
34
pkg/schema/schemas/job-metric-statistics.schema.json
Normal file
34
pkg/schema/schemas/job-metric-statistics.schema.json
Normal file
@ -0,0 +1,34 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "schemaFS://schemas/job-metric-statistics.schema.json",
|
||||
"title": "Job statistics",
|
||||
"description": "Format specification for job metric statistics",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"unit": {
|
||||
"description": "Metric unit",
|
||||
"$ref": "embedFS://schemas/unit.schema.json"
|
||||
},
|
||||
"avg": {
|
||||
"description": "Job metric average",
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"min": {
|
||||
"description": "Job metric minimum",
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
},
|
||||
"max": {
|
||||
"description": "Job metric maximum",
|
||||
"type": "number",
|
||||
"minimum": 0
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"unit",
|
||||
"avg",
|
||||
"min",
|
||||
"max"
|
||||
]
|
||||
}
|
40
pkg/schema/schemas/unit.schema.json
Normal file
40
pkg/schema/schemas/unit.schema.json
Normal file
@ -0,0 +1,40 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "embedFS://schemas/unit.schema.json",
|
||||
"title": "Metric unit",
|
||||
"description": "Format specification for job metric units",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"base_unit": {
|
||||
"description": "Metric base unit",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"B",
|
||||
"F",
|
||||
"B/s",
|
||||
"F/s",
|
||||
"CPI",
|
||||
"IPC",
|
||||
"cpu_used",
|
||||
"cpu_load",
|
||||
"Hz",
|
||||
"W"
|
||||
]
|
||||
},
|
||||
"prefix": {
|
||||
"description": "Unit prefix",
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"K",
|
||||
"M",
|
||||
"G",
|
||||
"T",
|
||||
"P",
|
||||
"E"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"base_unit"
|
||||
]
|
||||
}
|
@ -5,9 +5,11 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||
"github.com/santhosh-tekuri/jsonschema/v5"
|
||||
@ -22,18 +24,34 @@ const (
|
||||
ClusterCfg
|
||||
)
|
||||
|
||||
//go:embed schemas/*
|
||||
var schemaFiles embed.FS
|
||||
|
||||
func Load(s string) (io.ReadCloser, error) {
|
||||
u, err := url.Parse(s)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f := u.Path
|
||||
return schemaFiles.Open(f)
|
||||
}
|
||||
|
||||
func init() {
|
||||
jsonschema.Loaders["embedFS"] = Load
|
||||
}
|
||||
|
||||
func Validate(k Kind, r io.Reader) (err error) {
|
||||
var s *jsonschema.Schema
|
||||
|
||||
switch k {
|
||||
case Meta:
|
||||
s, err = jsonschema.Compile("https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/datastructures/job-meta.schema.json")
|
||||
s, err = jsonschema.Compile("embedFS://schemas/job-meta.schema.json")
|
||||
case Data:
|
||||
s, err = jsonschema.Compile("https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/datastructures/job-data.schema.json")
|
||||
s, err = jsonschema.Compile("embedFS://schemas/job-data.schema.json")
|
||||
case ClusterCfg:
|
||||
s, err = jsonschema.Compile("https://raw.githubusercontent.com/ClusterCockpit/cc-specifications/master/datastructures/cluster.schema.json")
|
||||
s, err = jsonschema.Compile("embedFS://schemas/cluster.schema.json")
|
||||
case Config:
|
||||
s, err = jsonschema.Compile("../../configs/config.schema.json")
|
||||
s, err = jsonschema.Compile("embedFS://schemas/config.schema.json")
|
||||
default:
|
||||
return fmt.Errorf("unkown schema kind ")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user