Introduce hierarchical metriclists and thresholds

This commit is contained in:
Jan Eitzinger 2023-03-18 08:15:11 +01:00
parent d2b97c9e2f
commit efde2cbb8e
2 changed files with 36 additions and 12 deletions

View File

@ -38,6 +38,7 @@ type SubCluster struct {
FlopRateSimd *MetricValue `json:"flopRateSimd"`
MemoryBandwidth *MetricValue `json:"memoryBandwidth"`
Topology *Topology `json:"topology"`
MetricConfig []*MetricConfig `json:"metricConfig"`
}
type SubClusterConfig struct {
@ -54,6 +55,10 @@ type MetricConfig struct {
Scope MetricScope `json:"scope"`
Aggregation *string `json:"aggregation"`
Timestep int `json:"timestep"`
Peak *float64 `json:"peak"`
Normal *float64 `json:"normal"`
Caution *float64 `json:"caution"`
Alert *float64 `json:"alert"`
SubClusters []*SubClusterConfig `json:"subClusters"`
}

View File

@ -39,6 +39,22 @@
"avg"
]
},
"peak": {
"description": "Metric peak threshold (Upper metric limit)",
"type": "number"
},
"normal": {
"description": "Metric normal threshold",
"type": "number"
},
"caution": {
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
"type": "number"
},
"alert": {
"description": "Metric alert threshold (Requires immediate action)",
"type": "number"
},
"subClusters": {
"description": "Array of cluster hardware partition metric thresholds",
"type": "array",
@ -76,7 +92,10 @@
"unit",
"scope",
"timestep",
"subClusters"
"aggregation",
"peak",
"caution",
"alert"
]
},
"minItems": 1