Add global metric list including graphQL query

This commit is contained in:
2024-07-11 11:09:14 +02:00
parent bf6b87d65c
commit f1427d5272
9 changed files with 770 additions and 50 deletions

View File

@@ -7,6 +7,7 @@ package archive
import (
"encoding/json"
"fmt"
"sync"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
@@ -53,40 +54,48 @@ type JobContainer struct {
}
var (
initOnce sync.Once
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
ar ArchiveBackend
useArchive bool
)
func Init(rawConfig json.RawMessage, disableArchive bool) error {
useArchive = !disableArchive
var err error
var cfg struct {
Kind string `json:"kind"`
}
initOnce.Do(func() {
useArchive = !disableArchive
if err := json.Unmarshal(rawConfig, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
return err
}
var cfg struct {
Kind string `json:"kind"`
}
switch cfg.Kind {
case "file":
ar = &FsArchive{}
// case "s3":
// ar = &S3Archive{}
default:
return fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
}
if err = json.Unmarshal(rawConfig, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
return
}
version, err := ar.Init(rawConfig)
if err != nil {
log.Error("Error while initializing archiveBackend")
return err
}
log.Infof("Load archive version %d", version)
switch cfg.Kind {
case "file":
ar = &FsArchive{}
// case "s3":
// ar = &S3Archive{}
default:
err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
}
return initClusterConfig()
var version uint64
version, err = ar.Init(rawConfig)
if err != nil {
log.Error("Error while initializing archiveBackend")
return
}
log.Infof("Load archive version %d", version)
err = initClusterConfig()
})
return err
}
func GetHandle() ArchiveBackend {

View File

@@ -13,13 +13,15 @@ import (
)
var (
Clusters []*schema.Cluster
nodeLists map[string]map[string]NodeList
Clusters []*schema.Cluster
GlobalMetricList []*schema.GlobalMetricListItem
nodeLists map[string]map[string]NodeList
)
func initClusterConfig() error {
Clusters = []*schema.Cluster{}
nodeLists = map[string]map[string]NodeList{}
metricLookup := make(map[string]schema.GlobalMetricListItem)
for _, c := range ar.GetClusters() {
@@ -51,6 +53,12 @@ func initClusterConfig() error {
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)")
}
ml, ok := metricLookup[mc.Name]
if !ok {
metricLookup[mc.Name] = schema.GlobalMetricListItem{Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit}
ml = metricLookup[mc.Name]
}
availability := schema.ClusterSupport{Cluster: cluster.Name}
scLookup := make(map[string]*schema.SubClusterConfig)
for _, scc := range mc.SubClusters {
@@ -63,6 +71,7 @@ func initClusterConfig() error {
if cfg, ok := scLookup[sc.Name]; ok {
if !cfg.Remove {
availability.SubClusters = append(availability.SubClusters, sc.Name)
newMetric.Peak = cfg.Peak
newMetric.Peak = cfg.Peak
newMetric.Normal = cfg.Normal
@@ -74,16 +83,24 @@ func initClusterConfig() error {
if newMetric.Footprint {
sc.Footprint = append(sc.Footprint, newMetric.Name)
}
if newMetric.Energy {
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
}
}
} else {
availability.SubClusters = append(availability.SubClusters, sc.Name)
sc.MetricConfig = append(sc.MetricConfig, *newMetric)
if newMetric.Footprint {
sc.Footprint = append(sc.Footprint, newMetric.Name)
}
if newMetric.Energy {
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
}
}
}
ml.Availability = append(metricLookup[mc.Name].Availability, availability)
metricLookup[mc.Name] = ml
}
Clusters = append(Clusters, cluster)
@@ -102,6 +119,10 @@ func initClusterConfig() error {
}
}
for _, ml := range metricLookup {
GlobalMetricList = append(GlobalMetricList, &ml)
}
return nil
}

View File

@@ -27,4 +27,6 @@ func TestClusterConfig(t *testing.T) {
if len(sc.MetricConfig) != 15 {
t.Fail()
}
// spew.Dump(archive.GlobalMetricList)
}

View File

@@ -39,6 +39,7 @@ type SubCluster struct {
MemoryBandwidth MetricValue `json:"memoryBandwidth"`
MetricConfig []MetricConfig `json:"metricConfig,omitempty"`
Footprint []string `json:"footprint,omitempty"`
EnergyFootprint []string `json:"energyFootprint,omitempty"`
SocketsPerNode int `json:"socketsPerNode"`
CoresPerSocket int `json:"coresPerSocket"`
ThreadsPerCore int `json:"threadsPerCore"`
@@ -66,6 +67,7 @@ type MetricConfig struct {
Caution float64 `json:"caution"`
Alert float64 `json:"alert"`
Footprint bool `json:"footprint"`
Energy bool `json:"energy"`
}
type Cluster struct {
@@ -74,6 +76,18 @@ type Cluster struct {
SubClusters []*SubCluster `json:"subClusters"`
}
type ClusterSupport struct {
Cluster string `json:"cluster"`
SubClusters []string `json:"subclusters"`
}
type GlobalMetricListItem struct {
Name string `json:"name"`
Unit Unit `json:"unit"`
Scope MetricScope `json:"scope"`
Availability []ClusterSupport `json:"availability"`
}
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
// hwthread is in that socket, add it to the list. If no hwthreads other than
// those in the argument list are assigned to one of the sockets in the first