Restructure repo

This commit is contained in:
Jan Eitzinger
2022-03-18 14:47:39 +01:00
parent 403f74ddee
commit d762e3e52b
15 changed files with 490 additions and 35 deletions

4
datastructures/README.md Normal file
View File

@@ -0,0 +1,4 @@
## Generic database specification
This collection of datastructures descriptions is intended to be used
as datastructures in application, payloads in apis, and file formats.

View File

@@ -0,0 +1,221 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "HPC Cluster description",
"description": "Meta data information of a HPC cluster",
"type": "object",
"properties":{
"name": {
"description": "The unique identifier of a cluster",
"type": "string"
},
"metricDataRepository": {
"description": "Type of the metric data repository for this cluster",
"type": "object",
"properties": {
"kind": {
"type": "string",
"enum": [
"influxdb-v1",
"influxdb-v2",
"prometheus",
"cc-metric-store"
]
},
"url": {
"type": "string"
},
"token": {
"type": "string"
}
},
"required": [
"kind",
"url"
]
},
"metricConfig": {
"description": "Metric specifications",
"type": "array",
"items": {
"type": "object",
"properties":{
"name": {
"description": "Metric name",
"type": "string"
},
"unit": {
"description": "Metric unit",
"type": "string"
},
"scope": {
"description": "Native measurement resolution",
"type": "string"
},
"timestep": {
"description": "Frequency of timeseries points",
"type": "integer"
},
"peak": {
"type": "number"
},
"normal": {
"type": "number"
},
"caution": {
"type": "number"
},
"alert": {
"type": "number"
}
}
},
"subClusters": {
"description": "Array of cluster hardware partitions",
"type": "array",
"items": {
"type": "object",
"properties":{
"name": {
"description": "Hardware partition name",
"type": "string"
},
"processorType": {
"description": "Processor type",
"type": "string"
},
"socketsPerNode": {
"description": "Number of sockets per node",
"type": "integer"
},
"coresPerSocket": {
"description": "Number of cores per socket",
"type": "integer"
},
"threadsPerCore": {
"description": "Number of SMT threads per core",
"type": "integer"
},
"flopRateScalar": {
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
"type": "integer"
},
"flopRateSimd": {
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
"type": "integer"
},
"memoryBandwidth": {
"description": "Theoretical node peak memory bandwidth in GB/s",
"type": "integer"
},
"nodes": {
"description": "Node list expression",
"type": "string"
},
"topology": {
"description": "Node topology",
"type": "object",
"properties":{
"node": {
"description": "HwTread lists of node",
"type": "array",
"items": {
"type": "integer"
}
},
"socket": {
"description": "HwTread lists of sockets",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"memoryDomain": {
"description": "HwTread lists of memory domains",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"die": {
"description": "HwTread lists of dies",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"core": {
"description": "HwTread lists of cores",
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer"
}
}
},
"accelerators": {
"type": "array",
"description": "List of of accelerator devices",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The unique device id"
},
"type": {
"type": "string",
"description": "The accelerator type",
"enum": [
"Nvidia GPU",
"AMD GPU",
"Intel GPU"
]
},
"model": {
"type": "string",
"description": "The accelerator model"
}
},
"required": [
"id",
"type",
"model"
]
}
},
"required":[
"node",
"socket",
"memoryDomain"
]
},
"required":[
"name",
"topology",
"processorType",
"socketsPerNode",
"coresPerSocket",
"threadsPerCore",
"flopRateScalar",
"flopRateSimd",
"memoryBandwidth"
]
}
},
"required":[
"name",
"metricDataRepository",
"metricConfig",
"partitions"
]
}

View File

@@ -0,0 +1,545 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Job metric data list",
"description": "Collection of metric data of a HPC job",
"type": "object",
"properties": {
"mem_used": {
"description": "Memory capacity used (required)",
"type": "object",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"flops_any": {
"description": "Total flop rate with DP flops scaled up (required)",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"core": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"hwthread": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"mem_bw": {
"description": "Main memory bandwidth (required)",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"net_bw": {
"description": "Total fast interconnect network bandwidth (required)",
"type": "object",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"file_bw": {
"description": "Total file IO bandwidth (required)",
"type": "object",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ipc": {
"description": "Instructions executed per cycle",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"core": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"hwthread": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"cpu_used": {
"description": "CPU active core utilization",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"core": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"hwthread": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"cpu_load": {
"description": "CPU requested core utilization (load 1m)",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"core": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"hwthread": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"flops_dp": {
"description": "Double precision flop rate",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"core": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"hwthread": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"flops_sp": {
"description": "Single precision flops rate",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"core": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"hwthread": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"vectorization_ratio": {
"description": "Fraction of arithmetic instructions using SIMD instructions",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"core": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"hwthread": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"cpu_power": {
"description": "CPU power consumption",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"mem_power": {
"description": "Memory power consumption",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"acc_utilization": {
"description": "GPU utilization",
"properties": {
"accelerator": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"acc_mem_used": {
"description": "GPU memory capacity used",
"properties": {
"accelerator": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"acc_power": {
"description": "GPU power consumption",
"properties": {
"accelerator": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"clock": {
"description": "Average core frequency",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"socket": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"memoryDomain": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"core": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
},
"hwthread": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_read_bw": {
"description": "Parallel file system read bandwidth",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_write_bw": {
"description": "Parallel file system write bandwidth",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_read_req": {
"description": "Parallel file system read requests",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_write_req": {
"description": "Parallel file system write requests",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_inodes": {
"description": "Parallel file system inodes used",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_accesses": {
"description": "Parallel file system open and close",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_fsync": {
"description": "Parallel file system fsync",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_create": {
"description": "Parallel file system create",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_open": {
"description": "Parallel file system open",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_close": {
"description": "Parallel file system close",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"pfs_seek": {
"description": "Parallel file system seek",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"fs_read_bw": {
"description": "Local file system read bandwidth",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"fs_write_bw": {
"description": "Local file system write bandwidth",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"fs_inodes": {
"description": "Local file system inodes used",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"fs_accesses": {
"description": "Local file system open and close",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_rcv_packets": {
"description": "Network interconnect read packets",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_send_packets": {
"description": "Network interconnect send packet",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_read_bw": {
"description": "Network interconnect read bandwidth",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
},
"ic_write_bw": {
"description": "Network interconnect write bandwidth",
"properties": {
"node": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-data.schema.json"
}
},
"required": [
"node"
]
}
},
"required": [
"cpu_used",
"mem_used",
"flops_any",
"mem_bw",
"net_bw",
"file_bw"
]
}

View File

@@ -0,0 +1,342 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Job meta data",
"description": "Meta data information of a HPC job",
"type": "object",
"properties": {
"jobId": {
"description": "The unique identifier of a job",
"type": "integer"
},
"user": {
"description": "The unique identifier of a user",
"type": "string"
},
"project": {
"description": "The unique identifier of a project",
"type": "string"
},
"cluster": {
"description": "The unique identifier of a cluster",
"type": "string"
},
"subCluster": {
"description": "The unique identifier of a sub cluster",
"type": "string"
},
"partition": {
"description": "The Slurm partition to which the job was submitted",
"type": "string"
},
"arrayJobId": {
"description": "The unique identifier of an array job",
"type": "integer"
},
"numNodes": {
"description": "Number of nodes used",
"type": "integer",
"exclusiveMinimum": 0
},
"numHwthreads": {
"description": "Number of HWThreads used",
"type": "integer",
"exclusiveMinimum": 0
},
"numAcc": {
"description": "Number of accelerators used",
"type": "integer",
"exclusiveMinimum": 0
},
"exclusive": {
"description": "Job uses only exclusive nodes",
"type": "integer"
},
"monitoringStatus": {
"description": "State of monitoring system during job run",
"type": "integer"
},
"smt": {
"description": "SMT threads used by job",
"type": "integer"
},
"walltime": {
"description": "Requested walltime of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"jobState": {
"description": "Final state of job",
"type": "string",
"enum": [
"completed",
"failed",
"cancelled",
"stopped",
"out_of_memory",
"timeout"
]
},
"startTime": {
"description": "Start epoch time stamp in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"stopTime": {
"description": "Stop epoch time stamp in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"duration": {
"description": "Duration of job in seconds",
"type": "integer",
"exclusiveMinimum": 0
},
"resources": {
"description": "Resources used by job",
"type": "array",
"items": {
"type": "object",
"properties": {
"hostname": {
"type": "string"
},
"hwthreads": {
"type": "array",
"description": "List of OS processor ids",
"items": {
"type": "integer"
}
},
"accelerators": {
"type": "array",
"description": "List of of accelerator ids",
"items": {
"type": "integer"
}
},
"configuration": {
"type": "string",
"description": "The configuration options of the node"
},
"required": [
"hostname"
]
},
"minItems": 1
}
},
"metaData": {
"description": "Additional information about the job",
"type": "object",
"properties": {
"jobScript": {
"type": "string",
"description": "The batch script of the job"
},
"jobName": {
"type": "string",
"description": "Slurm Job name"
},
"slurmInfo": {
"type": "string",
"description": "Additional slurm infos as show by scontrol show job"
}
}
},
"tags": {
"description": "List of tags",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"type": {
"type": "string"
}
},
"required": [
"name",
"type"
]
},
"uniqueItems": true
},
"statistics": {
"description": "Job statistic data",
"type": "object",
"properties": {
"mem_used": {
"description": "Memory capacity used (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"cpu_load": {
"description": "CPU requested core utilization (load 1m) (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"flops_any": {
"description": "Total flop rate with DP flops scaled up (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"mem_bw": {
"description": "Main memory bandwidth (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"net_bw": {
"description": "Total fast interconnect network bandwidth (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"file_bw": {
"description": "Total file IO bandwidth (required)",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"ipc": {
"description": "Instructions executed per cycle",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"cpu_used": {
"description": "CPU active core utilization",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"flops_dp": {
"description": "Double precision flop rate",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"flops_sp": {
"description": "Single precision flops rate",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"rapl_power": {
"description": "CPU power consumption",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"gpu_used": {
"description": "GPU utilization",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"gpu_mem_used": {
"description": "GPU memory capacity used",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"gpu_power": {
"description": "GPU power consumption",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"clock": {
"description": "Average core frequency",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"eth_read_bw": {
"description": "Ethernet read bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"eth_write_bw": {
"description": "Ethernet write bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_read_bw": {
"description": "Parallel file system read bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_write_bw": {
"description": "Parallel file system write bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_read_req": {
"description": "Parallel file system read requests",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_write_req": {
"description": "Parallel file system write requests",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_inodes": {
"description": "Parallel file system inodes used",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_accesses": {
"description": "Parallel file system open and close",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_fsync": {
"description": "Parallel file system fsync",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_create": {
"description": "Parallel file system create",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_open": {
"description": "Parallel file system open",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_close": {
"description": "Parallel file system close",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"pfs_seek": {
"description": "Parallel file system seek",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"fs_read_bw": {
"description": "Local file system read bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"fs_write_bw": {
"description": "Local file system write bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"fs_inodes": {
"description": "Local file system inodes used",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"fs_accesses": {
"description": "Local file system open and close",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"ic_rcv_packets": {
"description": "Network interconnect read packets",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"ic_send_packets": {
"description": "Network interconnect send packet",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"ic_read_bw": {
"description": "Network interconnect read bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
},
"ic_write_bw": {
"description": "Network interconnect write bandwidth",
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/job-metric-statistics.schema.json"
}
},
"required": [
"mem_used",
"cpu_load",
"flops_any",
"mem_bw",
"net_bw",
"file_bw"
]
}
},
"required": [
"jobId",
"user",
"project",
"cluster",
"subCluster",
"partition",
"numNodes",
"exclusive",
"startTime",
"stopTime",
"jobState",
"duration",
"resources",
"tags",
"statistics"
]
}

View File

@@ -0,0 +1,226 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Job metric data",
"description": "Metric data of a HPC job",
"type": "object",
"properties": {
"unit": {
"#ref": "https://github.com/ClusterCockpit/cc-specifications/blob/master/schema/json/unit.schema.json"
},
"scope": {
"type": "string",
"enum": [
"node",
"hwthread",
"core",
"memoryDomain",
"die",
"socket",
"accelerator"
]
},
"timestep": {
"description": "Measurement interval in seconds",
"type": "integer"
},
"thresholds": {
"description": "Metric thresholds for specific system",
"type": "object",
"properties": {
"peak": {
"type": "number"
},
"normal": {
"type": "number"
},
"caution": {
"type": "number"
},
"alert": {
"type": "number"
}
}
},
"statisticsSeries": {
"type": "object",
"description": "Statistics series across topology",
"properties": {
"min": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"max": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"mean": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"percentiles": {
"type": "object",
"properties": {
"10": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"20": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"30": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"40": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"50": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"60": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"70": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"80": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"90": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"25": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
},
"75": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 3
}
}
}
}
},
"series": {
"type": "array",
"items": {
"type": "object",
"properties": {
"hostname": {
"type": "string"
},
"id": {
"type": "integer"
},
"statistics": {
"type": "object",
"description": "Statistics across time dimension",
"properties": {
"avg": {
"description": "Series average",
"type": "number",
"minimum": 0
},
"min": {
"description": "Series minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Series maximum",
"type": "number",
"minimum": 0
}
},
"required": [
"avg",
"min",
"max"
]
},
"data": {
"type": "array",
"items": {
"type": "number",
"minimum": 0
},
"minItems": 1
}
},
"required": [
"hostname",
"data"
]
}
}
},
"required": [
"unit",
"scope",
"timestep",
"series"
]
}

View File

@@ -0,0 +1,33 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Job statistics",
"description": "Format specification for job metric statistics",
"type": "object",
"properties": {
"unit": {
"description": "Metric unit",
"#ref": "https://github.com/RRZE-HPC/HPCJobDatabase/blob/master/json-schema/unit.schema.json"
},
"avg": {
"description": "Job metric average",
"type": "number",
"minimum": 0
},
"min": {
"description": "Job metric minimum",
"type": "number",
"minimum": 0
},
"max": {
"description": "Job metric maximum",
"type": "number",
"minimum": 0
}
},
"required": [
"unit",
"avg",
"min",
"max"
]
}

View File

@@ -0,0 +1,36 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Metric unit",
"description": "Format specification for job metric units",
"type": "object",
"properties": {
"base_unit": {
"description": "Metric base unit",
"type": "string",
"enum": [
"B",
"F",
"B/s",
"F/s",
"CPI",
"IPC",
"Hz"
]
},
"prefix": {
"description": "Unit prefix",
"type": "string",
"enum": [
"K",
"M",
"G",
"T",
"P",
"E"
]
}
},
"required": [
"base_unit"
]
}