Introduce hierarchical metriclists and thresholds

This commit is contained in:
Jan Eitzinger 2023-03-18 08:15:11 +01:00
parent d2b97c9e2f
commit efde2cbb8e
2 changed files with 36 additions and 12 deletions

View File

@ -38,6 +38,7 @@ type SubCluster struct {
FlopRateSimd *MetricValue `json:"flopRateSimd"` FlopRateSimd *MetricValue `json:"flopRateSimd"`
MemoryBandwidth *MetricValue `json:"memoryBandwidth"` MemoryBandwidth *MetricValue `json:"memoryBandwidth"`
Topology *Topology `json:"topology"` Topology *Topology `json:"topology"`
MetricConfig []*MetricConfig `json:"metricConfig"`
} }
type SubClusterConfig struct { type SubClusterConfig struct {
@ -54,6 +55,10 @@ type MetricConfig struct {
Scope MetricScope `json:"scope"` Scope MetricScope `json:"scope"`
Aggregation *string `json:"aggregation"` Aggregation *string `json:"aggregation"`
Timestep int `json:"timestep"` Timestep int `json:"timestep"`
Peak *float64 `json:"peak"`
Normal *float64 `json:"normal"`
Caution *float64 `json:"caution"`
Alert *float64 `json:"alert"`
SubClusters []*SubClusterConfig `json:"subClusters"` SubClusters []*SubClusterConfig `json:"subClusters"`
} }

View File

@ -39,6 +39,22 @@
"avg" "avg"
] ]
}, },
"peak": {
"description": "Metric peak threshold (Upper metric limit)",
"type": "number"
},
"normal": {
"description": "Metric normal threshold",
"type": "number"
},
"caution": {
"description": "Metric caution threshold (Suspicious but does not require immediate action)",
"type": "number"
},
"alert": {
"description": "Metric alert threshold (Requires immediate action)",
"type": "number"
},
"subClusters": { "subClusters": {
"description": "Array of cluster hardware partition metric thresholds", "description": "Array of cluster hardware partition metric thresholds",
"type": "array", "type": "array",
@ -76,7 +92,10 @@
"unit", "unit",
"scope", "scope",
"timestep", "timestep",
"subClusters" "aggregation",
"peak",
"caution",
"alert"
] ]
}, },
"minItems": 1 "minItems": 1