2019-10-21 14:50:16 +02:00
|
|
|
{
|
|
|
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
|
|
"title": "HPC Cluster description",
|
|
|
|
"description": "Meta data information of a HPC cluster",
|
|
|
|
"type": "object",
|
|
|
|
"properties":{
|
2021-12-17 06:54:16 +01:00
|
|
|
"name": {
|
2019-10-21 14:50:16 +02:00
|
|
|
"description": "The unique identifier of a cluster",
|
|
|
|
"type": "string"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"metricDataRepository": {
|
2021-11-29 10:26:00 +01:00
|
|
|
"description": "Type of the metric data repository for this cluster",
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
2021-12-17 06:54:16 +01:00
|
|
|
"kind": {
|
2021-11-29 10:26:00 +01:00
|
|
|
"type": "string",
|
|
|
|
"enum": [
|
|
|
|
"influxdb-v1",
|
|
|
|
"influxdb-v2",
|
|
|
|
"prometheus",
|
|
|
|
"cc-metric-store"
|
|
|
|
]
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"url": {
|
2021-12-16 08:10:36 +01:00
|
|
|
"type": "string"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"token": {
|
2021-11-29 10:26:00 +01:00
|
|
|
"type": "string"
|
|
|
|
}
|
2021-12-16 08:10:36 +01:00
|
|
|
},
|
|
|
|
"required": [
|
2021-12-17 06:54:16 +01:00
|
|
|
"kind",
|
|
|
|
"url"
|
2021-12-16 08:10:36 +01:00
|
|
|
]
|
2021-11-29 10:26:00 +01:00
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"metricConfig": {
|
2021-04-23 07:14:53 +02:00
|
|
|
"description": "Metric specifications",
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "object",
|
|
|
|
"properties":{
|
2021-12-17 06:54:16 +01:00
|
|
|
"name": {
|
2021-04-23 07:14:53 +02:00
|
|
|
"description": "Metric name",
|
|
|
|
"type": "string"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"unit": {
|
2021-04-23 07:14:53 +02:00
|
|
|
"description": "Metric unit",
|
|
|
|
"type": "string"
|
|
|
|
},
|
2021-12-17 09:38:41 +01:00
|
|
|
"scope": {
|
|
|
|
"description": "Native measurement resolution",
|
|
|
|
"type": "string"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"timestep": {
|
2021-04-23 07:14:53 +02:00
|
|
|
"description": "Frequency of timeseries points",
|
|
|
|
"type": "integer"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"peak": {
|
2021-04-23 07:14:53 +02:00
|
|
|
"type": "number"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"normal": {
|
2021-04-23 07:14:53 +02:00
|
|
|
"type": "number"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"caution": {
|
2021-04-23 07:14:53 +02:00
|
|
|
"type": "number"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"alert": {
|
2021-04-23 07:14:53 +02:00
|
|
|
"type": "number"
|
|
|
|
}
|
|
|
|
}
|
2021-11-29 10:26:00 +01:00
|
|
|
},
|
2022-03-10 11:47:10 +01:00
|
|
|
"subClusters": {
|
|
|
|
"description": "Array of cluster hardware partitions",
|
2021-12-17 06:54:16 +01:00
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "object",
|
|
|
|
"properties":{
|
|
|
|
"name": {
|
2022-03-10 11:47:10 +01:00
|
|
|
"description": "Hardware partition name",
|
2021-12-17 06:54:16 +01:00
|
|
|
"type": "string"
|
|
|
|
},
|
|
|
|
"processorType": {
|
|
|
|
"description": "Processor type",
|
|
|
|
"type": "string"
|
|
|
|
},
|
|
|
|
"socketsPerNode": {
|
|
|
|
"description": "Number of sockets per node",
|
|
|
|
"type": "integer"
|
|
|
|
},
|
|
|
|
"coresPerSocket": {
|
|
|
|
"description": "Number of cores per socket",
|
|
|
|
"type": "integer"
|
|
|
|
},
|
|
|
|
"threadsPerCore": {
|
|
|
|
"description": "Number of SMT threads per core",
|
|
|
|
"type": "integer"
|
|
|
|
},
|
|
|
|
"flopRateScalar": {
|
|
|
|
"description": "Theoretical node peak flop rate for scalar code in GFlops/s",
|
|
|
|
"type": "integer"
|
|
|
|
},
|
|
|
|
"flopRateSimd": {
|
|
|
|
"description": "Theoretical node peak flop rate for SIMD code in GFlops/s",
|
|
|
|
"type": "integer"
|
|
|
|
},
|
|
|
|
"memoryBandwidth": {
|
|
|
|
"description": "Theoretical node peak memory bandwidth in GB/s",
|
|
|
|
"type": "integer"
|
|
|
|
},
|
2022-03-10 11:47:10 +01:00
|
|
|
"nodes": {
|
|
|
|
"description": "Node list expression",
|
|
|
|
"type": "string"
|
|
|
|
},
|
2021-12-17 06:54:16 +01:00
|
|
|
"topology": {
|
|
|
|
"description": "Node topology",
|
|
|
|
"type": "object",
|
|
|
|
"properties":{
|
|
|
|
"node": {
|
|
|
|
"description": "HwTread lists of node",
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "integer"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"socket": {
|
|
|
|
"description": "HwTread lists of sockets",
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "integer"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"memoryDomain": {
|
|
|
|
"description": "HwTread lists of memory domains",
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "integer"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"die": {
|
|
|
|
"description": "HwTread lists of dies",
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "integer"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"core": {
|
|
|
|
"description": "HwTread lists of cores",
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "array",
|
|
|
|
"items": {
|
|
|
|
"type": "integer"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"accelerators": {
|
|
|
|
"type": "array",
|
|
|
|
"description": "List of of accelerator devices",
|
|
|
|
"items": {
|
|
|
|
"type": "object",
|
|
|
|
"properties": {
|
|
|
|
"id": {
|
|
|
|
"type": "string",
|
|
|
|
"description": "The unique device id"
|
|
|
|
},
|
|
|
|
"type": {
|
|
|
|
"type": "string",
|
|
|
|
"description": "The accelerator type",
|
|
|
|
"enum": [
|
|
|
|
"Nvidia GPU",
|
|
|
|
"AMD GPU",
|
|
|
|
"Intel GPU"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"model": {
|
|
|
|
"type": "string",
|
|
|
|
"description": "The accelerator model"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required": [
|
|
|
|
"id",
|
|
|
|
"type",
|
|
|
|
"model"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required":[
|
|
|
|
"node",
|
|
|
|
"socket",
|
|
|
|
"memoryDomain"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"required":[
|
|
|
|
"name",
|
|
|
|
"topology",
|
|
|
|
"processorType",
|
|
|
|
"socketsPerNode",
|
|
|
|
"coresPerSocket",
|
|
|
|
"threadsPerCore",
|
|
|
|
"flopRateScalar",
|
|
|
|
"flopRateSimd",
|
|
|
|
"memoryBandwidth"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"required":[
|
|
|
|
"name",
|
|
|
|
"metricDataRepository",
|
|
|
|
"metricConfig",
|
|
|
|
"partitions"
|
|
|
|
]
|
|
|
|
}
|