diff --git a/go.mod b/go.mod index 75e62f1..df8e1fb 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ tool ( require ( github.com/99designs/gqlgen v0.17.84 - github.com/ClusterCockpit/cc-lib v1.0.0 + github.com/ClusterCockpit/cc-lib v1.0.2 github.com/Masterminds/squirrel v1.5.4 github.com/aws/aws-sdk-go-v2 v1.41.0 github.com/aws/aws-sdk-go-v2/config v1.31.20 diff --git a/go.sum b/go.sum index e8630b7..711c555 100644 --- a/go.sum +++ b/go.sum @@ -6,8 +6,8 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25 github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= -github.com/ClusterCockpit/cc-lib v1.0.0 h1:/8DFRomt4BpVWKWrsEZ/ru4K8x76QTVnEgdwHc5eSps= -github.com/ClusterCockpit/cc-lib v1.0.0/go.mod h1:UGdOvXEnjFqlnPSxtvtFwO6BtXYW6NnXFoud9FtN93k= +github.com/ClusterCockpit/cc-lib v1.0.2 h1:ZWn3oZkXgxrr3zSigBdlOOfayZ4Om4xL20DhmritPPg= +github.com/ClusterCockpit/cc-lib v1.0.2/go.mod h1:UGdOvXEnjFqlnPSxtvtFwO6BtXYW6NnXFoud9FtN93k= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= diff --git a/internal/graph/schema.resolvers.go b/internal/graph/schema.resolvers.go index 9747479..cd4af05 100644 --- a/internal/graph/schema.resolvers.go +++ b/internal/graph/schema.resolvers.go @@ -88,14 +88,14 @@ func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]* res := []*model.EnergyFootprintValue{} for name, value := range rawEnergyFootprint { // Suboptimal: Nearly hardcoded metric name expectations - matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`) + matchCPU := regexp.MustCompile(`cpu|Cpu|CPU`) matchAcc := regexp.MustCompile(`acc|Acc|ACC`) matchMem := regexp.MustCompile(`mem|Mem|MEM`) matchCore := regexp.MustCompile(`core|Core|CORE`) hwType := "" switch test := name; { // NOtice ';' for var declaration - case matchCpu.MatchString(test): + case matchCPU.MatchString(test): hwType = "CPU" case matchAcc.MatchString(test): hwType = "Accelerator" @@ -175,9 +175,9 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds } tags := []*schema.Tag{} - for _, tagId := range tagIds { + for _, tagID := range tagIds { // Get ID - tid, err := strconv.ParseInt(tagId, 10, 64) + tid, err := strconv.ParseInt(tagID, 10, 64) if err != nil { cclog.Warn("Error while parsing tag id") return nil, err @@ -222,9 +222,9 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta } tags := []*schema.Tag{} - for _, tagId := range tagIds { + for _, tagID := range tagIds { // Get ID - tid, err := strconv.ParseInt(tagId, 10, 64) + tid, err := strconv.ParseInt(tagID, 10, 64) if err != nil { cclog.Warn("Error while parsing tag id") return nil, err @@ -265,9 +265,9 @@ func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []strin } tags := []int{} - for _, tagId := range tagIds { + for _, tagID := range tagIds { // Get ID - tid, err := strconv.ParseInt(tagId, 10, 64) + tid, err := strconv.ParseInt(tagID, 10, 64) if err != nil { cclog.Warn("Error while parsing tag id for removal") return nil, err @@ -317,7 +317,7 @@ func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (sc if obj.NodeState != "" { return obj.NodeState, nil } else { - return "", fmt.Errorf("No SchedulerState (NodeState) on Object") + return "", fmt.Errorf("no SchedulerState (NodeState) on Object") } } @@ -343,6 +343,14 @@ func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) { // GlobalMetrics is the resolver for the globalMetrics field. func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) { + user := repository.GetUserFromContext(ctx) + + if user != nil { + if user.HasRole(schema.RoleUser) || user.HasRole(schema.RoleManager) { + return archive.GlobalUserMetricList, nil + } + } + return archive.GlobalMetricList, nil } @@ -373,12 +381,12 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]* // Node is the resolver for the node field. func (r *queryResolver) Node(ctx context.Context, id string) (*schema.Node, error) { repo := repository.GetNodeRepository() - numericId, err := strconv.ParseInt(id, 10, 64) + numericID, err := strconv.ParseInt(id, 10, 64) if err != nil { cclog.Warn("Error while parsing job id") return nil, err } - return repo.GetNodeByID(numericId, false) + return repo.GetNodeByID(numericID, false) } // Nodes is the resolver for the nodes field. @@ -405,8 +413,7 @@ func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilt return nil, herr } - allCounts := make([]*model.NodeStates, 0) - allCounts = append(stateCounts, healthCounts...) + allCounts := append(stateCounts, healthCounts...) return allCounts, nil } @@ -433,18 +440,18 @@ func (r *queryResolver) NodeStatesTimed(ctx context.Context, filter []*model.Nod return healthCounts, nil } - return nil, errors.New("Unknown Node State Query Type") + return nil, errors.New("unknown Node State Query Type") } // Job is the resolver for the job field. func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) { - numericId, err := strconv.ParseInt(id, 10, 64) + numericID, err := strconv.ParseInt(id, 10, 64) if err != nil { cclog.Warn("Error while parsing job id") return nil, err } - job, err := r.Repo.FindByID(ctx, numericId) + job, err := r.Repo.FindByID(ctx, numericID) if err != nil { cclog.Warn("Error while finding job by id") return nil, err @@ -809,7 +816,7 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub nodeRepo := repository.GetNodeRepository() nodes, stateMap, countNodes, hasNextPage, nerr := nodeRepo.GetNodesForList(ctx, cluster, subCluster, stateFilter, nodeFilter, page) if nerr != nil { - return nil, errors.New("Could not retrieve node list required for resolving NodeMetricsList") + return nil, errors.New("could not retrieve node list required for resolving NodeMetricsList") } if metrics == nil { @@ -898,9 +905,7 @@ func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metr collectorUnit[metric] = scopedMetric.Unit // Collect Initial Data for _, ser := range scopedMetric.Series { - for _, val := range ser.Data { - collectorData[metric] = append(collectorData[metric], val) - } + collectorData[metric] = append(collectorData[metric], ser.Data...) } } } else { diff --git a/pkg/archive/clusterConfig.go b/pkg/archive/clusterConfig.go index 13890c9..696601b 100644 --- a/pkg/archive/clusterConfig.go +++ b/pkg/archive/clusterConfig.go @@ -6,7 +6,6 @@ package archive import ( - "errors" "fmt" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" @@ -14,13 +13,16 @@ import ( ) var ( - Clusters []*schema.Cluster - GlobalMetricList []*schema.GlobalMetricListItem - NodeLists map[string]map[string]NodeList + Clusters []*schema.Cluster + GlobalMetricList []*schema.GlobalMetricListItem + GlobalUserMetricList []*schema.GlobalMetricListItem + NodeLists map[string]map[string]NodeList ) func initClusterConfig() error { Clusters = []*schema.Cluster{} + GlobalMetricList = []*schema.GlobalMetricListItem{} + GlobalUserMetricList = []*schema.GlobalMetricListItem{} NodeLists = map[string]map[string]NodeList{} metricLookup := make(map[string]schema.GlobalMetricListItem) @@ -29,38 +31,41 @@ func initClusterConfig() error { cluster, err := ar.LoadClusterCfg(c) if err != nil { cclog.Warnf("Error while loading cluster config for cluster '%v'", c) - return err + return fmt.Errorf("failed to load cluster config for '%s': %w", c, err) } - if len(cluster.Name) == 0 || - len(cluster.MetricConfig) == 0 || - len(cluster.SubClusters) == 0 { - return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty") + if len(cluster.Name) == 0 { + return fmt.Errorf("cluster name is empty in config for '%s'", c) + } + if len(cluster.MetricConfig) == 0 { + return fmt.Errorf("cluster '%s' has no metric configurations", cluster.Name) + } + if len(cluster.SubClusters) == 0 { + return fmt.Errorf("cluster '%s' has no subclusters defined", cluster.Name) } for _, mc := range cluster.MetricConfig { if len(mc.Name) == 0 { - return errors.New("cluster.metricConfig.name should not be empty") + return fmt.Errorf("cluster '%s' has a metric config with empty name", cluster.Name) } if mc.Timestep < 1 { - return errors.New("cluster.metricConfig.timestep should not be smaller than one") + return fmt.Errorf("metric '%s' in cluster '%s' has invalid timestep %d (must be >= 1)", mc.Name, cluster.Name, mc.Timestep) } - // For backwards compability... + // For backwards compatibility... if mc.Scope == "" { mc.Scope = schema.MetricScopeNode } if !mc.Scope.Valid() { - return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)") + return fmt.Errorf("metric '%s' in cluster '%s' has invalid scope '%s' (must be 'node', 'socket', 'core', etc.)", mc.Name, cluster.Name, mc.Scope) } - ml, ok := metricLookup[mc.Name] - if !ok { + if _, ok := metricLookup[mc.Name]; !ok { metricLookup[mc.Name] = schema.GlobalMetricListItem{ - Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint, + Name: mc.Name, Scope: mc.Scope, Restrict: mc.Restrict, Unit: mc.Unit, Footprint: mc.Footprint, } - ml = metricLookup[mc.Name] } + availability := schema.ClusterSupport{Cluster: cluster.Name} scLookup := make(map[string]*schema.SubClusterConfig) @@ -90,39 +95,35 @@ func initClusterConfig() error { } if cfg, ok := scLookup[sc.Name]; ok { - if !cfg.Remove { - availability.SubClusters = append(availability.SubClusters, sc.Name) - newMetric.Peak = cfg.Peak - newMetric.Normal = cfg.Normal - newMetric.Caution = cfg.Caution - newMetric.Alert = cfg.Alert - newMetric.Footprint = cfg.Footprint - newMetric.Energy = cfg.Energy - newMetric.LowerIsBetter = cfg.LowerIsBetter - sc.MetricConfig = append(sc.MetricConfig, *newMetric) + if cfg.Remove { + continue + } + newMetric.Peak = cfg.Peak + newMetric.Normal = cfg.Normal + newMetric.Caution = cfg.Caution + newMetric.Alert = cfg.Alert + newMetric.Footprint = cfg.Footprint + newMetric.Energy = cfg.Energy + newMetric.LowerIsBetter = cfg.LowerIsBetter + } - if newMetric.Footprint != "" { - sc.Footprint = append(sc.Footprint, newMetric.Name) - ml.Footprint = newMetric.Footprint - } - if newMetric.Energy != "" { - sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name) - } - } - } else { - availability.SubClusters = append(availability.SubClusters, sc.Name) - sc.MetricConfig = append(sc.MetricConfig, *newMetric) + availability.SubClusters = append(availability.SubClusters, sc.Name) + sc.MetricConfig = append(sc.MetricConfig, *newMetric) - if newMetric.Footprint != "" { - sc.Footprint = append(sc.Footprint, newMetric.Name) - } - if newMetric.Energy != "" { - sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name) - } + if newMetric.Footprint != "" { + sc.Footprint = append(sc.Footprint, newMetric.Name) + item := metricLookup[mc.Name] + item.Footprint = newMetric.Footprint + metricLookup[mc.Name] = item + } + if newMetric.Energy != "" { + sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name) } } - ml.Availability = append(metricLookup[mc.Name].Availability, availability) - metricLookup[mc.Name] = ml + + item := metricLookup[mc.Name] + item.Availability = append(item.Availability, availability) + metricLookup[mc.Name] = item } Clusters = append(Clusters, cluster) @@ -141,8 +142,11 @@ func initClusterConfig() error { } } - for _, ml := range metricLookup { - GlobalMetricList = append(GlobalMetricList, &ml) + for _, metric := range metricLookup { + GlobalMetricList = append(GlobalMetricList, &metric) + if !metric.Restrict { + GlobalUserMetricList = append(GlobalUserMetricList, &metric) + } } return nil diff --git a/pkg/nats/client.go b/pkg/nats/client.go index e61d060..822a7b2 100644 --- a/pkg/nats/client.go +++ b/pkg/nats/client.go @@ -83,7 +83,7 @@ func Connect() { client, err := NewClient(nil) if err != nil { - cclog.Errorf("NATS connection failed: %v", err) + cclog.Warnf("NATS connection failed: %v", err) return } diff --git a/startDemo.sh b/startDemo.sh index e709db2..108c95f 100755 --- a/startDemo.sh +++ b/startDemo.sh @@ -1,22 +1,18 @@ #!/bin/sh -# rm -rf var - if [ -d './var' ]; then echo 'Directory ./var already exists! Skipping initialization.' - ./cc-backend -server -dev + ./cc-backend -server -dev -loglevel info else make - wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar - tar xf job-archive-dev.tar - rm ./job-archive-dev.tar - - cp ./configs/env-template.txt .env + ./cc-backend --init cp ./configs/config-demo.json config.json - ./cc-backend -migrate-db + wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar + tar xf job-archive-demo.tar + rm ./job-archive-demo.tar + ./cc-backend -dev -init-db -add-user demo:admin,api:demo + ./cc-backend -server -dev -loglevel info +fi - ./cc-backend -server -dev - -fi \ No newline at end of file diff --git a/web/frontend/src/systems/nodelist/NodeListRow.svelte b/web/frontend/src/systems/nodelist/NodeListRow.svelte index d2c71ff..bc93a32 100644 --- a/web/frontend/src/systems/nodelist/NodeListRow.svelte +++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte @@ -148,13 +148,19 @@ hoststate={nodeData?.state? nodeData.state: 'notindb'}/> {/if} - {#each refinedData as metricData (metricData.data.name)} + {#each refinedData as metricData, i (metricData?.data?.name || i)} {#key metricData}
{metricData.data.name}:{nodeData.subCluster}{metricData?.data?.name ? metricData.data.name : `Metric Index ${i}`}:{nodeData.subCluster}{`Metric Index ${i}`}:{nodeData.subCluster}