From 119050b4b38bde301c58f9803565c85c26cf9b8f Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 20 Dec 2024 08:56:49 +0100 Subject: [PATCH] Update JSON schemas --- datastructures/cluster.schema.json | 605 ++++++----- datastructures/job-data.schema.json | 958 +++++++++--------- datastructures/job-meta.schema.json | 672 ++++++------ datastructures/job-metric-data.schema.json | 426 ++++---- .../job-metric-statistics.schema.json | 62 +- datastructures/unit.schema.json | 74 +- 6 files changed, 1420 insertions(+), 1377 deletions(-) diff --git a/datastructures/cluster.schema.json b/datastructures/cluster.schema.json index 1d00962..66b7ba1 100644 --- a/datastructures/cluster.schema.json +++ b/datastructures/cluster.schema.json @@ -1,284 +1,327 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "cluster.schema.json", - "title": "HPC cluster description", - "description": "Meta data information of a HPC cluster", - "type": "object", - "properties": { - "name": { - "description": "The unique identifier of a cluster", - "type": "string" - }, - "metricConfig": { - "description": "Metric specifications", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "description": "Metric name", - "type": "string" - }, - "unit": { - "description": "Metric unit", - "$ref": "embedfs://unit.schema.json" - }, - "scope": { - "description": "Native measurement resolution", - "type": "string" - }, - "timestep": { - "description": "Frequency of timeseries points", - "type": "integer" - }, - "aggregation": { - "description": "How the metric is aggregated", - "type": "string", - "enum": [ - "sum", - "avg" - ] - }, - "peak": { - "description": "Metric peak threshold (Upper metric limit)", - "type": "number" - }, - "normal": { - "description": "Metric normal threshold", - "type": "number" - }, - "caution": { - "description": "Metric caution threshold (Suspicious but does not require immediate action)", - "type": "number" - }, - "alert": { - "description": "Metric alert threshold (Requires immediate action)", - "type": "number" - }, - "subClusters": { - "description": "Array of cluster hardware partition metric thresholds", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "description": "Hardware partition name", - "type": "string" - }, - "peak": { - "type": "number" - }, - "normal": { - "type": "number" - }, - "caution": { - "type": "number" - }, - "alert": { - "type": "number" - }, - "remove": { - "type": "boolean" - } - }, - "required": [ - "name" - ] - } - } - }, - "required": [ - "name", - "unit", - "scope", - "timestep", - "aggregation", - "peak", - "normal", - "caution", - "alert" - ] - }, - "minItems": 1 - }, - "subClusters": { - "description": "Array of cluster hardware partitions", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "description": "Hardware partition name", - "type": "string" - }, - "processorType": { - "description": "Processor type", - "type": "string" - }, - "socketsPerNode": { - "description": "Number of sockets per node", - "type": "integer" - }, - "coresPerSocket": { - "description": "Number of cores per socket", - "type": "integer" - }, - "threadsPerCore": { - "description": "Number of SMT threads per core", - "type": "integer" - }, - "flopRateScalar": { - "description": "Theoretical node peak flop rate for scalar code in GFlops/s", - "type": "object", - "properties": { - "unit": { - "description": "Metric unit", - "$ref": "embedfs://unit.schema.json" - }, - "value": { - "type": "number" - } - } - }, - "flopRateSimd": { - "description": "Theoretical node peak flop rate for SIMD code in GFlops/s", - "type": "object", - "properties": { - "unit": { - "description": "Metric unit", - "$ref": "embedfs://unit.schema.json" - }, - "value": { - "type": "number" - } - } - }, - "memoryBandwidth": { - "description": "Theoretical node peak memory bandwidth in GB/s", - "type": "object", - "properties": { - "unit": { - "description": "Metric unit", - "$ref": "embedfs://unit.schema.json" - }, - "value": { - "type": "number" - } - } - }, - "nodes": { - "description": "Node list expression", - "type": "string" - }, - "topology": { - "description": "Node topology", - "type": "object", - "properties": { - "node": { - "description": "HwTread lists of node", - "type": "array", - "items": { - "type": "integer" - } - }, - "socket": { - "description": "HwTread lists of sockets", - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer" - } - } - }, - "memoryDomain": { - "description": "HwTread lists of memory domains", - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer" - } - } - }, - "die": { - "description": "HwTread lists of dies", - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer" - } - } - }, - "core": { - "description": "HwTread lists of cores", - "type": "array", - "items": { - "type": "array", - "items": { - "type": "integer" - } - } - }, - "accelerators": { - "type": "array", - "description": "List of of accelerator devices", - "items": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The unique device id" - }, - "type": { - "type": "string", - "description": "The accelerator type", - "enum": [ - "Nvidia GPU", - "AMD GPU", - "Intel GPU" - ] - }, - "model": { - "type": "string", - "description": "The accelerator model" - } - }, - "required": [ - "id", - "type", - "model" - ] - } - } - }, - "required": [ - "node", - "socket", - "memoryDomain" - ] - } - }, - "required": [ - "name", - "nodes", - "topology", - "processorType", - "socketsPerNode", - "coresPerSocket", - "threadsPerCore", - "flopRateScalar", - "flopRateSimd", - "memoryBandwidth" - ] - }, - "minItems": 1 - } + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://cluster.schema.json", + "title": "HPC cluster description", + "description": "Meta data information of a HPC cluster", + "type": "object", + "properties": { + "name": { + "description": "The unique identifier of a cluster", + "type": "string" }, - "required": [ - "name", - "metricConfig", - "subClusters" - ] + "metricConfig": { + "description": "Metric specifications", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "description": "Metric name", + "type": "string" + }, + "unit": { + "description": "Metric unit", + "$ref": "embedfs://unit.schema.json" + }, + "scope": { + "description": "Native measurement resolution", + "type": "string" + }, + "timestep": { + "description": "Frequency of timeseries points", + "type": "integer" + }, + "aggregation": { + "description": "How the metric is aggregated", + "type": "string", + "enum": [ + "sum", + "avg" + ] + }, + "footprint": { + "description": "Is it a footprint metric and what type", + "type": "string", + "enum": [ + "avg", + "max", + "min" + ] + }, + "energy": { + "description": "Is it used to calculate job energy", + "type": "string", + "enum": [ + "power", + "energy" + ] + }, + "lowerIsBetter": { + "description": "Is lower better.", + "type": "boolean" + }, + "peak": { + "description": "Metric peak threshold (Upper metric limit)", + "type": "number" + }, + "normal": { + "description": "Metric normal threshold", + "type": "number" + }, + "caution": { + "description": "Metric caution threshold (Suspicious but does not require immediate action)", + "type": "number" + }, + "alert": { + "description": "Metric alert threshold (Requires immediate action)", + "type": "number" + }, + "subClusters": { + "description": "Array of cluster hardware partition metric thresholds", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "description": "Hardware partition name", + "type": "string" + }, + "footprint": { + "description": "Is it a footprint metric and what type. Overwrite global setting", + "type": "string", + "enum": [ + "avg", + "max", + "min" + ] + }, + "energy": { + "description": "Is it used to calculate job energy. Overwrite global", + "type": "string", + "enum": [ + "power", + "energy" + ] + }, + "lowerIsBetter": { + "description": "Is lower better. Overwrite global", + "type": "boolean" + }, + "peak": { + "type": "number" + }, + "normal": { + "type": "number" + }, + "caution": { + "type": "number" + }, + "alert": { + "type": "number" + }, + "remove": { + "description": "Remove this metric for this subcluster", + "type": "boolean" + } + }, + "required": [ + "name" + ] + } + } + }, + "required": [ + "name", + "unit", + "scope", + "timestep", + "aggregation", + "peak", + "normal", + "caution", + "alert" + ] + }, + "minItems": 1 + }, + "subClusters": { + "description": "Array of cluster hardware partitions", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "description": "Hardware partition name", + "type": "string" + }, + "processorType": { + "description": "Processor type", + "type": "string" + }, + "socketsPerNode": { + "description": "Number of sockets per node", + "type": "integer" + }, + "coresPerSocket": { + "description": "Number of cores per socket", + "type": "integer" + }, + "threadsPerCore": { + "description": "Number of SMT threads per core", + "type": "integer" + }, + "flopRateScalar": { + "description": "Theoretical node peak flop rate for scalar code in GFlops/s", + "type": "object", + "properties": { + "unit": { + "description": "Metric unit", + "$ref": "embedfs://unit.schema.json" + }, + "value": { + "type": "number" + } + } + }, + "flopRateSimd": { + "description": "Theoretical node peak flop rate for SIMD code in GFlops/s", + "type": "object", + "properties": { + "unit": { + "description": "Metric unit", + "$ref": "embedfs://unit.schema.json" + }, + "value": { + "type": "number" + } + } + }, + "memoryBandwidth": { + "description": "Theoretical node peak memory bandwidth in GB/s", + "type": "object", + "properties": { + "unit": { + "description": "Metric unit", + "$ref": "embedfs://unit.schema.json" + }, + "value": { + "type": "number" + } + } + }, + "nodes": { + "description": "Node list expression", + "type": "string" + }, + "topology": { + "description": "Node topology", + "type": "object", + "properties": { + "node": { + "description": "HwTread lists of node", + "type": "array", + "items": { + "type": "integer" + } + }, + "socket": { + "description": "HwTread lists of sockets", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "memoryDomain": { + "description": "HwTread lists of memory domains", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "die": { + "description": "HwTread lists of dies", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "core": { + "description": "HwTread lists of cores", + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer" + } + } + }, + "accelerators": { + "type": "array", + "description": "List of of accelerator devices", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The unique device id" + }, + "type": { + "type": "string", + "description": "The accelerator type", + "enum": [ + "Nvidia GPU", + "AMD GPU", + "Intel GPU" + ] + }, + "model": { + "type": "string", + "description": "The accelerator model" + } + }, + "required": [ + "id", + "type", + "model" + ] + } + } + }, + "required": [ + "node", + "socket", + "memoryDomain" + ] + } + }, + "required": [ + "name", + "nodes", + "topology", + "processorType", + "socketsPerNode", + "coresPerSocket", + "threadsPerCore", + "flopRateScalar", + "flopRateSimd", + "memoryBandwidth" + ] + }, + "minItems": 1 + } + }, + "required": [ + "name", + "metricConfig", + "subClusters" + ] } diff --git a/datastructures/job-data.schema.json b/datastructures/job-data.schema.json index d2526b1..c0c492b 100644 --- a/datastructures/job-data.schema.json +++ b/datastructures/job-data.schema.json @@ -1,490 +1,490 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "job-data.schema.json", - "title": "Job metric data list", - "description": "Collection of metric data of a HPC job", - "type": "object", - "properties": { - "mem_used": { - "description": "Memory capacity used", - "type": "object", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "flops_any": { - "description": "Total flop rate with DP flops scaled up", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "job-metric-data.schema.json" - }, - "core": { - "$ref": "job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "mem_bw": { - "description": "Main memory bandwidth", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "net_bw": { - "description": "Total fast interconnect network bandwidth", - "type": "object", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "ipc": { - "description": "Instructions executed per cycle", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "job-metric-data.schema.json" - }, - "core": { - "$ref": "job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "cpu_user": { - "description": "CPU user active core utilization", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "job-metric-data.schema.json" - }, - "core": { - "$ref": "job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "cpu_load": { - "description": "CPU requested core utilization (load 1m)", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "flops_dp": { - "description": "Double precision flop rate", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "job-metric-data.schema.json" - }, - "core": { - "$ref": "job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "flops_sp": { - "description": "Single precision flops rate", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "job-metric-data.schema.json" - }, - "core": { - "$ref": "job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "vectorization_ratio": { - "description": "Fraction of arithmetic instructions using SIMD instructions", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "job-metric-data.schema.json" - }, - "core": { - "$ref": "job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "cpu_power": { - "description": "CPU power consumption", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "mem_power": { - "description": "Memory power consumption", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "acc_utilization": { - "description": "GPU utilization", - "properties": { - "accelerator": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "accelerator" - ] - }, - "acc_mem_used": { - "description": "GPU memory capacity used", - "properties": { - "accelerator": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "accelerator" - ] - }, - "acc_power": { - "description": "GPU power consumption", - "properties": { - "accelerator": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "accelerator" - ] - }, - "clock": { - "description": "Average core frequency", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - }, - "socket": { - "$ref": "job-metric-data.schema.json" - }, - "memoryDomain": { - "$ref": "job-metric-data.schema.json" - }, - "core": { - "$ref": "job-metric-data.schema.json" - }, - "hwthread": { - "$ref": "job-metric-data.schema.json" - } - }, - "minProperties": 1 - }, - "eth_read_bw": { - "description": "Ethernet read bandwidth", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "eth_write_bw": { - "description": "Ethernet write bandwidth", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "filesystems": { - "description": "Array of filesystems", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "nfs", - "lustre", - "gpfs", - "nvme", - "ssd", - "hdd", - "beegfs" - ] - }, - "read_bw": { - "description": "File system read bandwidth", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "write_bw": { - "description": "File system write bandwidth", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "read_req": { - "description": "File system read requests", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "write_req": { - "description": "File system write requests", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "inodes": { - "description": "File system write requests", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "accesses": { - "description": "File system open and close", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "fsync": { - "description": "File system fsync", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "create": { - "description": "File system create", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "open": { - "description": "File system open", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "close": { - "description": "File system close", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "seek": { - "description": "File system seek", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - } - }, - "required": [ - "name", - "type", - "read_bw", - "write_bw" - ] - }, - "minItems": 1 + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://job-data.schema.json", + "title": "Job metric data list", + "description": "Collection of metric data of a HPC job", + "type": "object", + "properties": { + "mem_used": { + "description": "Memory capacity used", + "type": "object", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" } + }, + "required": [ + "node" + ] }, - "ic_rcv_packets": { - "description": "Network interconnect read packets", + "flops_any": { + "description": "Total flop rate with DP flops scaled up", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "mem_bw": { + "description": "Main memory bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "net_bw": { + "description": "Total fast interconnect network bandwidth", + "type": "object", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "ipc": { + "description": "Instructions executed per cycle", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "cpu_user": { + "description": "CPU user active core utilization", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "cpu_load": { + "description": "CPU requested core utilization (load 1m)", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "flops_dp": { + "description": "Double precision flop rate", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "flops_sp": { + "description": "Single precision flops rate", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "vectorization_ratio": { + "description": "Fraction of arithmetic instructions using SIMD instructions", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "cpu_power": { + "description": "CPU power consumption", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "mem_power": { + "description": "Memory power consumption", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "acc_utilization": { + "description": "GPU utilization", + "properties": { + "accelerator": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "accelerator" + ] + }, + "acc_mem_used": { + "description": "GPU memory capacity used", + "properties": { + "accelerator": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "accelerator" + ] + }, + "acc_power": { + "description": "GPU power consumption", + "properties": { + "accelerator": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "accelerator" + ] + }, + "clock": { + "description": "Average core frequency", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "socket": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "memoryDomain": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "core": { + "$ref": "embedfs://job-metric-data.schema.json" + }, + "hwthread": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "minProperties": 1 + }, + "eth_read_bw": { + "description": "Ethernet read bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "eth_write_bw": { + "description": "Ethernet write bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "filesystems": { + "description": "Array of filesystems", + "type": "array", + "items": { + "type": "object", "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } + "name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "nfs", + "lustre", + "gpfs", + "nvme", + "ssd", + "hdd", + "beegfs" + ] + }, + "read_bw": { + "description": "File system read bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "write_bw": { + "description": "File system write bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "read_req": { + "description": "File system read requests", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "write_req": { + "description": "File system write requests", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "inodes": { + "description": "File system write requests", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "accesses": { + "description": "File system open and close", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "fsync": { + "description": "File system fsync", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "create": { + "description": "File system create", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "open": { + "description": "File system open", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "close": { + "description": "File system close", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "seek": { + "description": "File system seek", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + } }, "required": [ - "node" - ] - }, - "ic_send_packets": { - "description": "Network interconnect send packet", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "ic_read_bw": { - "description": "Network interconnect read bandwidth", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" - ] - }, - "ic_write_bw": { - "description": "Network interconnect write bandwidth", - "properties": { - "node": { - "$ref": "job-metric-data.schema.json" - } - }, - "required": [ - "node" + "name", + "type", + "read_bw", + "write_bw" ] + }, + "minItems": 1 + } + }, + "ic_rcv_packets": { + "description": "Network interconnect read packets", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } }, "required": [ - "cpu_user", - "cpu_load", - "mem_used", - "flops_any", - "mem_bw", - "net_bw", - "filesystems" + "node" ] + }, + "ic_send_packets": { + "description": "Network interconnect send packet", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "ic_read_bw": { + "description": "Network interconnect read bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "ic_write_bw": { + "description": "Network interconnect write bandwidth", + "properties": { + "node": { + "$ref": "embedfs://job-metric-data.schema.json" + } + }, + "required": [ + "node" + ] + }, + "required": [ + "cpu_user", + "cpu_load", + "mem_used", + "flops_any", + "mem_bw", + "net_bw", + "filesystems" + ] } diff --git a/datastructures/job-meta.schema.json b/datastructures/job-meta.schema.json index aa8255f..db7475c 100644 --- a/datastructures/job-meta.schema.json +++ b/datastructures/job-meta.schema.json @@ -1,351 +1,351 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "job-meta.schema.json", - "title": "Job meta data", - "description": "Meta data information of a HPC job", - "type": "object", - "properties": { - "jobId": { - "description": "The unique identifier of a job", - "type": "integer" - }, - "user": { - "description": "The unique identifier of a user", + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://job-meta.schema.json", + "title": "Job meta data", + "description": "Meta data information of a HPC job", + "type": "object", + "properties": { + "jobId": { + "description": "The unique identifier of a job", + "type": "integer" + }, + "user": { + "description": "The unique identifier of a user", + "type": "string" + }, + "project": { + "description": "The unique identifier of a project", + "type": "string" + }, + "cluster": { + "description": "The unique identifier of a cluster", + "type": "string" + }, + "subCluster": { + "description": "The unique identifier of a sub cluster", + "type": "string" + }, + "partition": { + "description": "The Slurm partition to which the job was submitted", + "type": "string" + }, + "arrayJobId": { + "description": "The unique identifier of an array job", + "type": "integer" + }, + "numNodes": { + "description": "Number of nodes used", + "type": "integer", + "exclusiveMinimum": 0 + }, + "numHwthreads": { + "description": "Number of HWThreads used", + "type": "integer", + "exclusiveMinimum": 0 + }, + "numAcc": { + "description": "Number of accelerators used", + "type": "integer", + "exclusiveMinimum": 0 + }, + "exclusive": { + "description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user", + "type": "integer", + "minimum": 0, + "maximum": 2 + }, + "monitoringStatus": { + "description": "State of monitoring system during job run", + "type": "integer" + }, + "smt": { + "description": "SMT threads used by job", + "type": "integer" + }, + "walltime": { + "description": "Requested walltime of job in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "jobState": { + "description": "Final state of job", + "type": "string", + "enum": [ + "completed", + "failed", + "cancelled", + "stopped", + "out_of_memory", + "timeout" + ] + }, + "startTime": { + "description": "Start epoch time stamp in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "duration": { + "description": "Duration of job in seconds", + "type": "integer", + "exclusiveMinimum": 0 + }, + "resources": { + "description": "Resources used by job", + "type": "array", + "items": { + "type": "object", + "properties": { + "hostname": { "type": "string" - }, - "project": { - "description": "The unique identifier of a project", - "type": "string" - }, - "cluster": { - "description": "The unique identifier of a cluster", - "type": "string" - }, - "subCluster": { - "description": "The unique identifier of a sub cluster", - "type": "string" - }, - "partition": { - "description": "The Slurm partition to which the job was submitted", - "type": "string" - }, - "arrayJobId": { - "description": "The unique identifier of an array job", - "type": "integer" - }, - "numNodes": { - "description": "Number of nodes used", - "type": "integer", - "exclusiveMinimum": 0 - }, - "numHwthreads": { - "description": "Number of HWThreads used", - "type": "integer", - "exclusiveMinimum": 0 - }, - "numAcc": { - "description": "Number of accelerators used", - "type": "integer", - "exclusiveMinimum": 0 - }, - "exclusive": { - "description": "Specifies how nodes are shared. 0 - Shared among multiple jobs of multiple users, 1 - Job exclusive, 2 - Shared among multiple jobs of same user", - "type": "integer", - "minimum": 0, - "maximum": 2 - }, - "monitoringStatus": { - "description": "State of monitoring system during job run", - "type": "integer" - }, - "smt": { - "description": "SMT threads used by job", - "type": "integer" - }, - "walltime": { - "description": "Requested walltime of job in seconds", - "type": "integer", - "exclusiveMinimum": 0 - }, - "jobState": { - "description": "Final state of job", + }, + "hwthreads": { + "type": "array", + "description": "List of OS processor ids", + "items": { + "type": "integer" + } + }, + "accelerators": { + "type": "array", + "description": "List of of accelerator device ids", + "items": { + "type": "string" + } + }, + "configuration": { "type": "string", - "enum": [ - "completed", - "failed", - "cancelled", - "stopped", - "out_of_memory", - "timeout" - ] + "description": "The configuration options of the node" + } }, - "startTime": { - "description": "Start epoch time stamp in seconds", - "type": "integer", - "exclusiveMinimum": 0 + "required": [ + "hostname" + ], + "minItems": 1 + } + }, + "metaData": { + "description": "Additional information about the job", + "type": "object", + "properties": { + "jobScript": { + "type": "string", + "description": "The batch script of the job" }, - "duration": { - "description": "Duration of job in seconds", - "type": "integer", - "exclusiveMinimum": 0 + "jobName": { + "type": "string", + "description": "Slurm Job name" }, - "resources": { - "description": "Resources used by job", - "type": "array", - "items": { - "type": "object", - "properties": { - "hostname": { - "type": "string" - }, - "hwthreads": { - "type": "array", - "description": "List of OS processor ids", - "items": { - "type": "integer" - } - }, - "accelerators": { - "type": "array", - "description": "List of of accelerator device ids", - "items": { - "type": "string" - } - }, - "configuration": { - "type": "string", - "description": "The configuration options of the node" - } - }, - "required": [ - "hostname" - ], - "minItems": 1 - } + "slurmInfo": { + "type": "string", + "description": "Additional slurm infos as show by scontrol show job" + } + } + }, + "tags": { + "description": "List of tags", + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "type": { + "type": "string" + } }, - "metaData": { - "description": "Additional information about the job", + "required": [ + "name", + "type" + ] + }, + "uniqueItems": true + }, + "statistics": { + "description": "Job statistic data", + "type": "object", + "properties": { + "mem_used": { + "description": "Memory capacity used (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "cpu_load": { + "description": "CPU requested core utilization (load 1m) (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "flops_any": { + "description": "Total flop rate with DP flops scaled up (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "mem_bw": { + "description": "Main memory bandwidth (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "net_bw": { + "description": "Total fast interconnect network bandwidth (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "file_bw": { + "description": "Total file IO bandwidth (required)", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ipc": { + "description": "Instructions executed per cycle", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "cpu_user": { + "description": "CPU user active core utilization", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "flops_dp": { + "description": "Double precision flop rate", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "flops_sp": { + "description": "Single precision flops rate", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "rapl_power": { + "description": "CPU power consumption", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "acc_used": { + "description": "GPU utilization", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "acc_mem_used": { + "description": "GPU memory capacity used", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "acc_power": { + "description": "GPU power consumption", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "clock": { + "description": "Average core frequency", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "eth_read_bw": { + "description": "Ethernet read bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "eth_write_bw": { + "description": "Ethernet write bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ic_rcv_packets": { + "description": "Network interconnect read packets", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ic_send_packets": { + "description": "Network interconnect send packet", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ic_read_bw": { + "description": "Network interconnect read bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "ic_write_bw": { + "description": "Network interconnect write bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "filesystems": { + "description": "Array of filesystems", + "type": "array", + "items": { "type": "object", "properties": { - "jobScript": { - "type": "string", - "description": "The batch script of the job" - }, - "jobName": { - "type": "string", - "description": "Slurm Job name" - }, - "slurmInfo": { - "type": "string", - "description": "Additional slurm infos as show by scontrol show job" - } - } - }, - "tags": { - "description": "List of tags", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "type": { - "type": "string" - } - }, - "required": [ - "name", - "type" + "name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "nfs", + "lustre", + "gpfs", + "nvme", + "ssd", + "hdd", + "beegfs" ] - }, - "uniqueItems": true - }, - "statistics": { - "description": "Job statistic data", - "type": "object", - "properties": { - "mem_used": { - "description": "Memory capacity used (required)", - "$ref": "job-metric-statistics.schema.json" - }, - "cpu_load": { - "description": "CPU requested core utilization (load 1m) (required)", - "$ref": "job-metric-statistics.schema.json" - }, - "flops_any": { - "description": "Total flop rate with DP flops scaled up (required)", - "$ref": "job-metric-statistics.schema.json" - }, - "mem_bw": { - "description": "Main memory bandwidth (required)", - "$ref": "job-metric-statistics.schema.json" - }, - "net_bw": { - "description": "Total fast interconnect network bandwidth (required)", - "$ref": "job-metric-statistics.schema.json" - }, - "file_bw": { - "description": "Total file IO bandwidth (required)", - "$ref": "job-metric-statistics.schema.json" - }, - "ipc": { - "description": "Instructions executed per cycle", - "$ref": "job-metric-statistics.schema.json" - }, - "cpu_user": { - "description": "CPU user active core utilization", - "$ref": "job-metric-statistics.schema.json" - }, - "flops_dp": { - "description": "Double precision flop rate", - "$ref": "job-metric-statistics.schema.json" - }, - "flops_sp": { - "description": "Single precision flops rate", - "$ref": "job-metric-statistics.schema.json" - }, - "rapl_power": { - "description": "CPU power consumption", - "$ref": "job-metric-statistics.schema.json" - }, - "acc_used": { - "description": "GPU utilization", - "$ref": "job-metric-statistics.schema.json" - }, - "acc_mem_used": { - "description": "GPU memory capacity used", - "$ref": "job-metric-statistics.schema.json" - }, - "acc_power": { - "description": "GPU power consumption", - "$ref": "job-metric-statistics.schema.json" - }, - "clock": { - "description": "Average core frequency", - "$ref": "job-metric-statistics.schema.json" - }, - "eth_read_bw": { - "description": "Ethernet read bandwidth", - "$ref": "job-metric-statistics.schema.json" - }, - "eth_write_bw": { - "description": "Ethernet write bandwidth", - "$ref": "job-metric-statistics.schema.json" - }, - "ic_rcv_packets": { - "description": "Network interconnect read packets", - "$ref": "job-metric-statistics.schema.json" - }, - "ic_send_packets": { - "description": "Network interconnect send packet", - "$ref": "job-metric-statistics.schema.json" - }, - "ic_read_bw": { - "description": "Network interconnect read bandwidth", - "$ref": "job-metric-statistics.schema.json" - }, - "ic_write_bw": { - "description": "Network interconnect write bandwidth", - "$ref": "job-metric-statistics.schema.json" - }, - "filesystems": { - "description": "Array of filesystems", - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "nfs", - "lustre", - "gpfs", - "nvme", - "ssd", - "hdd", - "beegfs" - ] - }, - "read_bw": { - "description": "File system read bandwidth", - "$ref": "job-metric-statistics.schema.json" - }, - "write_bw": { - "description": "File system write bandwidth", - "$ref": "job-metric-statistics.schema.json" - }, - "read_req": { - "description": "File system read requests", - "$ref": "job-metric-statistics.schema.json" - }, - "write_req": { - "description": "File system write requests", - "$ref": "job-metric-statistics.schema.json" - }, - "inodes": { - "description": "File system write requests", - "$ref": "job-metric-statistics.schema.json" - }, - "accesses": { - "description": "File system open and close", - "$ref": "job-metric-statistics.schema.json" - }, - "fsync": { - "description": "File system fsync", - "$ref": "job-metric-statistics.schema.json" - }, - "create": { - "description": "File system create", - "$ref": "job-metric-statistics.schema.json" - }, - "open": { - "description": "File system open", - "$ref": "job-metric-statistics.schema.json" - }, - "close": { - "description": "File system close", - "$ref": "job-metric-statistics.schema.json" - }, - "seek": { - "description": "File system seek", - "$ref": "job-metric-statistics.schema.json" - } - }, - "required": [ - "name", - "type", - "read_bw", - "write_bw" - ] - }, - "minItems": 1 - } + }, + "read_bw": { + "description": "File system read bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "write_bw": { + "description": "File system write bandwidth", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "read_req": { + "description": "File system read requests", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "write_req": { + "description": "File system write requests", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "inodes": { + "description": "File system write requests", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "accesses": { + "description": "File system open and close", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "fsync": { + "description": "File system fsync", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "create": { + "description": "File system create", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "open": { + "description": "File system open", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "close": { + "description": "File system close", + "$ref": "embedfs://job-metric-statistics.schema.json" + }, + "seek": { + "description": "File system seek", + "$ref": "embedfs://job-metric-statistics.schema.json" + } }, "required": [ - "cpu_user", - "cpu_load", - "mem_used", - "flops_any", - "mem_bw" + "name", + "type", + "read_bw", + "write_bw" ] + }, + "minItems": 1 } - }, - "required": [ - "jobId", - "user", - "project", - "cluster", - "subCluster", - "numNodes", - "exclusive", - "startTime", - "jobState", - "duration", - "resources", - "statistics" - ] + }, + "required": [ + "cpu_user", + "cpu_load", + "mem_used", + "flops_any", + "mem_bw" + ] + } + }, + "required": [ + "jobId", + "user", + "project", + "cluster", + "subCluster", + "numNodes", + "exclusive", + "startTime", + "jobState", + "duration", + "resources", + "statistics" + ] } diff --git a/datastructures/job-metric-data.schema.json b/datastructures/job-metric-data.schema.json index f616f8a..ad499bf 100644 --- a/datastructures/job-metric-data.schema.json +++ b/datastructures/job-metric-data.schema.json @@ -1,216 +1,216 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "job-metric-data.schema.json", - "title": "Job metric data", - "description": "Metric data of a HPC job", - "type": "object", - "properties": { - "unit": { - "description": "Metric unit", - "$ref": "unit.schema.json" - }, - "timestep": { - "description": "Measurement interval in seconds", - "type": "integer" - }, - "thresholds": { - "description": "Metric thresholds for specific system", - "type": "object", - "properties": { - "peak": { - "type": "number" - }, - "normal": { - "type": "number" - }, - "caution": { - "type": "number" - }, - "alert": { - "type": "number" - } - } - }, - "statisticsSeries": { - "type": "object", - "description": "Statistics series across topology", - "properties": { - "min": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "max": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "mean": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "percentiles": { - "type": "object", - "properties": { - "10": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "20": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "30": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "40": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "50": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "60": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "70": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "80": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "90": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "25": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - }, - "75": { - "type": "array", - "items": { - "type": "number", - "minimum": 0 - }, - "minItems": 3 - } - } - } - } - }, - "series": { - "type": "array", - "items": { - "type": "object", - "properties": { - "hostname": { - "type": "string" - }, - "id": { - "type": "string" - }, - "statistics": { - "type": "object", - "description": "Statistics across time dimension", - "properties": { - "avg": { - "description": "Series average", - "type": "number", - "minimum": 0 - }, - "min": { - "description": "Series minimum", - "type": "number", - "minimum": 0 - }, - "max": { - "description": "Series maximum", - "type": "number", - "minimum": 0 - } - }, - "required": [ - "avg", - "min", - "max" - ] - }, - "data": { - "type": "array", - "contains": { - "type": "number", - "minimum": 0 - }, - "minItems": 1 - } - }, - "required": [ - "hostname", - "statistics", - "data" - ] - } - } + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://job-metric-data.schema.json", + "title": "Job metric data", + "description": "Metric data of a HPC job", + "type": "object", + "properties": { + "unit": { + "description": "Metric unit", + "$ref": "embedfs://unit.schema.json" }, - "required": [ - "unit", - "timestep", - "series" - ] + "timestep": { + "description": "Measurement interval in seconds", + "type": "integer" + }, + "thresholds": { + "description": "Metric thresholds for specific system", + "type": "object", + "properties": { + "peak": { + "type": "number" + }, + "normal": { + "type": "number" + }, + "caution": { + "type": "number" + }, + "alert": { + "type": "number" + } + } + }, + "statisticsSeries": { + "type": "object", + "description": "Statistics series across topology", + "properties": { + "min": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "max": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "mean": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "percentiles": { + "type": "object", + "properties": { + "10": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "20": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "30": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "40": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "50": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "60": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "70": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "80": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "90": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "25": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + }, + "75": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "minItems": 3 + } + } + } + } + }, + "series": { + "type": "array", + "items": { + "type": "object", + "properties": { + "hostname": { + "type": "string" + }, + "id": { + "type": "string" + }, + "statistics": { + "type": "object", + "description": "Statistics across time dimension", + "properties": { + "avg": { + "description": "Series average", + "type": "number", + "minimum": 0 + }, + "min": { + "description": "Series minimum", + "type": "number", + "minimum": 0 + }, + "max": { + "description": "Series maximum", + "type": "number", + "minimum": 0 + } + }, + "required": [ + "avg", + "min", + "max" + ] + }, + "data": { + "type": "array", + "contains": { + "type": "number", + "minimum": 0 + }, + "minItems": 1 + } + }, + "required": [ + "hostname", + "statistics", + "data" + ] + } + } + }, + "required": [ + "unit", + "timestep", + "series" + ] } diff --git a/datastructures/job-metric-statistics.schema.json b/datastructures/job-metric-statistics.schema.json index 321f100..f753ed3 100644 --- a/datastructures/job-metric-statistics.schema.json +++ b/datastructures/job-metric-statistics.schema.json @@ -1,34 +1,34 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "job-metric-statistics.schema.json", - "title": "Job statistics", - "description": "Format specification for job metric statistics", - "type": "object", - "properties": { - "unit": { - "description": "Metric unit", - "$ref": "unit.schema.json" - }, - "avg": { - "description": "Job metric average", - "type": "number", - "minimum": 0 - }, - "min": { - "description": "Job metric minimum", - "type": "number", - "minimum": 0 - }, - "max": { - "description": "Job metric maximum", - "type": "number", - "minimum": 0 - } + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://job-metric-statistics.schema.json", + "title": "Job statistics", + "description": "Format specification for job metric statistics", + "type": "object", + "properties": { + "unit": { + "description": "Metric unit", + "$ref": "embedfs://unit.schema.json" }, - "required": [ - "unit", - "avg", - "min", - "max" - ] + "avg": { + "description": "Job metric average", + "type": "number", + "minimum": 0 + }, + "min": { + "description": "Job metric minimum", + "type": "number", + "minimum": 0 + }, + "max": { + "description": "Job metric maximum", + "type": "number", + "minimum": 0 + } + }, + "required": [ + "unit", + "avg", + "min", + "max" + ] } diff --git a/datastructures/unit.schema.json b/datastructures/unit.schema.json index aa31084..c0a3df3 100644 --- a/datastructures/unit.schema.json +++ b/datastructures/unit.schema.json @@ -1,40 +1,40 @@ { - "$schema": "http://json-schema.org/draft/2020-12/schema", - "$id": "unit.schema.json", - "title": "Metric unit", - "description": "Format specification for job metric units", - "type": "object", - "properties": { - "base": { - "description": "Metric base unit", - "type": "string", - "enum": [ - "B", - "F", - "B/s", - "F/s", - "CPI", - "IPC", - "Hz", - "W", - "°C", - "" - ] - }, - "prefix": { - "description": "Unit prefix", - "type": "string", - "enum": [ - "K", - "M", - "G", - "T", - "P", - "E" - ] - } + "$schema": "http://json-schema.org/draft/2020-12/schema", + "$id": "embedfs://unit.schema.json", + "title": "Metric unit", + "description": "Format specification for job metric units", + "type": "object", + "properties": { + "base": { + "description": "Metric base unit", + "type": "string", + "enum": [ + "B", + "F", + "B/s", + "F/s", + "CPI", + "IPC", + "Hz", + "W", + "°C", + "" + ] }, - "required": [ - "base" - ] + "prefix": { + "description": "Unit prefix", + "type": "string", + "enum": [ + "K", + "M", + "G", + "T", + "P", + "E" + ] + } + }, + "required": [ + "base" + ] }