Merge pull request #454 from ClusterCockpit/dev

Dev
This commit is contained in:
Jan Eitzinger
2025-12-18 15:52:06 +01:00
committed by GitHub
7 changed files with 97 additions and 86 deletions

2
go.mod
View File

@@ -11,7 +11,7 @@ tool (
require ( require (
github.com/99designs/gqlgen v0.17.84 github.com/99designs/gqlgen v0.17.84
github.com/ClusterCockpit/cc-lib v1.0.0 github.com/ClusterCockpit/cc-lib v1.0.2
github.com/Masterminds/squirrel v1.5.4 github.com/Masterminds/squirrel v1.5.4
github.com/aws/aws-sdk-go-v2 v1.41.0 github.com/aws/aws-sdk-go-v2 v1.41.0
github.com/aws/aws-sdk-go-v2/config v1.31.20 github.com/aws/aws-sdk-go-v2/config v1.31.20

4
go.sum
View File

@@ -6,8 +6,8 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8=
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/ClusterCockpit/cc-lib v1.0.0 h1:/8DFRomt4BpVWKWrsEZ/ru4K8x76QTVnEgdwHc5eSps= github.com/ClusterCockpit/cc-lib v1.0.2 h1:ZWn3oZkXgxrr3zSigBdlOOfayZ4Om4xL20DhmritPPg=
github.com/ClusterCockpit/cc-lib v1.0.0/go.mod h1:UGdOvXEnjFqlnPSxtvtFwO6BtXYW6NnXFoud9FtN93k= github.com/ClusterCockpit/cc-lib v1.0.2/go.mod h1:UGdOvXEnjFqlnPSxtvtFwO6BtXYW6NnXFoud9FtN93k=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE=
github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM= github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8afzqM=

View File

@@ -88,14 +88,14 @@ func (r *jobResolver) EnergyFootprint(ctx context.Context, obj *schema.Job) ([]*
res := []*model.EnergyFootprintValue{} res := []*model.EnergyFootprintValue{}
for name, value := range rawEnergyFootprint { for name, value := range rawEnergyFootprint {
// Suboptimal: Nearly hardcoded metric name expectations // Suboptimal: Nearly hardcoded metric name expectations
matchCpu := regexp.MustCompile(`cpu|Cpu|CPU`) matchCPU := regexp.MustCompile(`cpu|Cpu|CPU`)
matchAcc := regexp.MustCompile(`acc|Acc|ACC`) matchAcc := regexp.MustCompile(`acc|Acc|ACC`)
matchMem := regexp.MustCompile(`mem|Mem|MEM`) matchMem := regexp.MustCompile(`mem|Mem|MEM`)
matchCore := regexp.MustCompile(`core|Core|CORE`) matchCore := regexp.MustCompile(`core|Core|CORE`)
hwType := "" hwType := ""
switch test := name; { // NOtice ';' for var declaration switch test := name; { // NOtice ';' for var declaration
case matchCpu.MatchString(test): case matchCPU.MatchString(test):
hwType = "CPU" hwType = "CPU"
case matchAcc.MatchString(test): case matchAcc.MatchString(test):
hwType = "Accelerator" hwType = "Accelerator"
@@ -175,9 +175,9 @@ func (r *mutationResolver) AddTagsToJob(ctx context.Context, job string, tagIds
} }
tags := []*schema.Tag{} tags := []*schema.Tag{}
for _, tagId := range tagIds { for _, tagID := range tagIds {
// Get ID // Get ID
tid, err := strconv.ParseInt(tagId, 10, 64) tid, err := strconv.ParseInt(tagID, 10, 64)
if err != nil { if err != nil {
cclog.Warn("Error while parsing tag id") cclog.Warn("Error while parsing tag id")
return nil, err return nil, err
@@ -222,9 +222,9 @@ func (r *mutationResolver) RemoveTagsFromJob(ctx context.Context, job string, ta
} }
tags := []*schema.Tag{} tags := []*schema.Tag{}
for _, tagId := range tagIds { for _, tagID := range tagIds {
// Get ID // Get ID
tid, err := strconv.ParseInt(tagId, 10, 64) tid, err := strconv.ParseInt(tagID, 10, 64)
if err != nil { if err != nil {
cclog.Warn("Error while parsing tag id") cclog.Warn("Error while parsing tag id")
return nil, err return nil, err
@@ -265,9 +265,9 @@ func (r *mutationResolver) RemoveTagFromList(ctx context.Context, tagIds []strin
} }
tags := []int{} tags := []int{}
for _, tagId := range tagIds { for _, tagID := range tagIds {
// Get ID // Get ID
tid, err := strconv.ParseInt(tagId, 10, 64) tid, err := strconv.ParseInt(tagID, 10, 64)
if err != nil { if err != nil {
cclog.Warn("Error while parsing tag id for removal") cclog.Warn("Error while parsing tag id for removal")
return nil, err return nil, err
@@ -317,7 +317,7 @@ func (r *nodeResolver) SchedulerState(ctx context.Context, obj *schema.Node) (sc
if obj.NodeState != "" { if obj.NodeState != "" {
return obj.NodeState, nil return obj.NodeState, nil
} else { } else {
return "", fmt.Errorf("No SchedulerState (NodeState) on Object") return "", fmt.Errorf("no SchedulerState (NodeState) on Object")
} }
} }
@@ -343,6 +343,14 @@ func (r *queryResolver) Tags(ctx context.Context) ([]*schema.Tag, error) {
// GlobalMetrics is the resolver for the globalMetrics field. // GlobalMetrics is the resolver for the globalMetrics field.
func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) { func (r *queryResolver) GlobalMetrics(ctx context.Context) ([]*schema.GlobalMetricListItem, error) {
user := repository.GetUserFromContext(ctx)
if user != nil {
if user.HasRole(schema.RoleUser) || user.HasRole(schema.RoleManager) {
return archive.GlobalUserMetricList, nil
}
}
return archive.GlobalMetricList, nil return archive.GlobalMetricList, nil
} }
@@ -373,12 +381,12 @@ func (r *queryResolver) AllocatedNodes(ctx context.Context, cluster string) ([]*
// Node is the resolver for the node field. // Node is the resolver for the node field.
func (r *queryResolver) Node(ctx context.Context, id string) (*schema.Node, error) { func (r *queryResolver) Node(ctx context.Context, id string) (*schema.Node, error) {
repo := repository.GetNodeRepository() repo := repository.GetNodeRepository()
numericId, err := strconv.ParseInt(id, 10, 64) numericID, err := strconv.ParseInt(id, 10, 64)
if err != nil { if err != nil {
cclog.Warn("Error while parsing job id") cclog.Warn("Error while parsing job id")
return nil, err return nil, err
} }
return repo.GetNodeByID(numericId, false) return repo.GetNodeByID(numericID, false)
} }
// Nodes is the resolver for the nodes field. // Nodes is the resolver for the nodes field.
@@ -405,8 +413,7 @@ func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilt
return nil, herr return nil, herr
} }
allCounts := make([]*model.NodeStates, 0) allCounts := append(stateCounts, healthCounts...)
allCounts = append(stateCounts, healthCounts...)
return allCounts, nil return allCounts, nil
} }
@@ -433,18 +440,18 @@ func (r *queryResolver) NodeStatesTimed(ctx context.Context, filter []*model.Nod
return healthCounts, nil return healthCounts, nil
} }
return nil, errors.New("Unknown Node State Query Type") return nil, errors.New("unknown Node State Query Type")
} }
// Job is the resolver for the job field. // Job is the resolver for the job field.
func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) { func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error) {
numericId, err := strconv.ParseInt(id, 10, 64) numericID, err := strconv.ParseInt(id, 10, 64)
if err != nil { if err != nil {
cclog.Warn("Error while parsing job id") cclog.Warn("Error while parsing job id")
return nil, err return nil, err
} }
job, err := r.Repo.FindByID(ctx, numericId) job, err := r.Repo.FindByID(ctx, numericID)
if err != nil { if err != nil {
cclog.Warn("Error while finding job by id") cclog.Warn("Error while finding job by id")
return nil, err return nil, err
@@ -809,7 +816,7 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
nodeRepo := repository.GetNodeRepository() nodeRepo := repository.GetNodeRepository()
nodes, stateMap, countNodes, hasNextPage, nerr := nodeRepo.GetNodesForList(ctx, cluster, subCluster, stateFilter, nodeFilter, page) nodes, stateMap, countNodes, hasNextPage, nerr := nodeRepo.GetNodesForList(ctx, cluster, subCluster, stateFilter, nodeFilter, page)
if nerr != nil { if nerr != nil {
return nil, errors.New("Could not retrieve node list required for resolving NodeMetricsList") return nil, errors.New("could not retrieve node list required for resolving NodeMetricsList")
} }
if metrics == nil { if metrics == nil {
@@ -898,9 +905,7 @@ func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metr
collectorUnit[metric] = scopedMetric.Unit collectorUnit[metric] = scopedMetric.Unit
// Collect Initial Data // Collect Initial Data
for _, ser := range scopedMetric.Series { for _, ser := range scopedMetric.Series {
for _, val := range ser.Data { collectorData[metric] = append(collectorData[metric], ser.Data...)
collectorData[metric] = append(collectorData[metric], val)
}
} }
} }
} else { } else {

View File

@@ -6,7 +6,6 @@
package archive package archive
import ( import (
"errors"
"fmt" "fmt"
cclog "github.com/ClusterCockpit/cc-lib/ccLogger" cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
@@ -16,11 +15,14 @@ import (
var ( var (
Clusters []*schema.Cluster Clusters []*schema.Cluster
GlobalMetricList []*schema.GlobalMetricListItem GlobalMetricList []*schema.GlobalMetricListItem
GlobalUserMetricList []*schema.GlobalMetricListItem
NodeLists map[string]map[string]NodeList NodeLists map[string]map[string]NodeList
) )
func initClusterConfig() error { func initClusterConfig() error {
Clusters = []*schema.Cluster{} Clusters = []*schema.Cluster{}
GlobalMetricList = []*schema.GlobalMetricListItem{}
GlobalUserMetricList = []*schema.GlobalMetricListItem{}
NodeLists = map[string]map[string]NodeList{} NodeLists = map[string]map[string]NodeList{}
metricLookup := make(map[string]schema.GlobalMetricListItem) metricLookup := make(map[string]schema.GlobalMetricListItem)
@@ -29,38 +31,41 @@ func initClusterConfig() error {
cluster, err := ar.LoadClusterCfg(c) cluster, err := ar.LoadClusterCfg(c)
if err != nil { if err != nil {
cclog.Warnf("Error while loading cluster config for cluster '%v'", c) cclog.Warnf("Error while loading cluster config for cluster '%v'", c)
return err return fmt.Errorf("failed to load cluster config for '%s': %w", c, err)
} }
if len(cluster.Name) == 0 || if len(cluster.Name) == 0 {
len(cluster.MetricConfig) == 0 || return fmt.Errorf("cluster name is empty in config for '%s'", c)
len(cluster.SubClusters) == 0 { }
return errors.New("cluster.name, cluster.metricConfig and cluster.SubClusters should not be empty") if len(cluster.MetricConfig) == 0 {
return fmt.Errorf("cluster '%s' has no metric configurations", cluster.Name)
}
if len(cluster.SubClusters) == 0 {
return fmt.Errorf("cluster '%s' has no subclusters defined", cluster.Name)
} }
for _, mc := range cluster.MetricConfig { for _, mc := range cluster.MetricConfig {
if len(mc.Name) == 0 { if len(mc.Name) == 0 {
return errors.New("cluster.metricConfig.name should not be empty") return fmt.Errorf("cluster '%s' has a metric config with empty name", cluster.Name)
} }
if mc.Timestep < 1 { if mc.Timestep < 1 {
return errors.New("cluster.metricConfig.timestep should not be smaller than one") return fmt.Errorf("metric '%s' in cluster '%s' has invalid timestep %d (must be >= 1)", mc.Name, cluster.Name, mc.Timestep)
} }
// For backwards compability... // For backwards compatibility...
if mc.Scope == "" { if mc.Scope == "" {
mc.Scope = schema.MetricScopeNode mc.Scope = schema.MetricScopeNode
} }
if !mc.Scope.Valid() { if !mc.Scope.Valid() {
return errors.New("cluster.metricConfig.scope must be a valid scope ('node', 'scocket', ...)") return fmt.Errorf("metric '%s' in cluster '%s' has invalid scope '%s' (must be 'node', 'socket', 'core', etc.)", mc.Name, cluster.Name, mc.Scope)
} }
ml, ok := metricLookup[mc.Name] if _, ok := metricLookup[mc.Name]; !ok {
if !ok {
metricLookup[mc.Name] = schema.GlobalMetricListItem{ metricLookup[mc.Name] = schema.GlobalMetricListItem{
Name: mc.Name, Scope: mc.Scope, Unit: mc.Unit, Footprint: mc.Footprint, Name: mc.Name, Scope: mc.Scope, Restrict: mc.Restrict, Unit: mc.Unit, Footprint: mc.Footprint,
} }
ml = metricLookup[mc.Name]
} }
availability := schema.ClusterSupport{Cluster: cluster.Name} availability := schema.ClusterSupport{Cluster: cluster.Name}
scLookup := make(map[string]*schema.SubClusterConfig) scLookup := make(map[string]*schema.SubClusterConfig)
@@ -90,8 +95,9 @@ func initClusterConfig() error {
} }
if cfg, ok := scLookup[sc.Name]; ok { if cfg, ok := scLookup[sc.Name]; ok {
if !cfg.Remove { if cfg.Remove {
availability.SubClusters = append(availability.SubClusters, sc.Name) continue
}
newMetric.Peak = cfg.Peak newMetric.Peak = cfg.Peak
newMetric.Normal = cfg.Normal newMetric.Normal = cfg.Normal
newMetric.Caution = cfg.Caution newMetric.Caution = cfg.Caution
@@ -99,30 +105,25 @@ func initClusterConfig() error {
newMetric.Footprint = cfg.Footprint newMetric.Footprint = cfg.Footprint
newMetric.Energy = cfg.Energy newMetric.Energy = cfg.Energy
newMetric.LowerIsBetter = cfg.LowerIsBetter newMetric.LowerIsBetter = cfg.LowerIsBetter
sc.MetricConfig = append(sc.MetricConfig, *newMetric) }
if newMetric.Footprint != "" {
sc.Footprint = append(sc.Footprint, newMetric.Name)
ml.Footprint = newMetric.Footprint
}
if newMetric.Energy != "" {
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
}
}
} else {
availability.SubClusters = append(availability.SubClusters, sc.Name) availability.SubClusters = append(availability.SubClusters, sc.Name)
sc.MetricConfig = append(sc.MetricConfig, *newMetric) sc.MetricConfig = append(sc.MetricConfig, *newMetric)
if newMetric.Footprint != "" { if newMetric.Footprint != "" {
sc.Footprint = append(sc.Footprint, newMetric.Name) sc.Footprint = append(sc.Footprint, newMetric.Name)
item := metricLookup[mc.Name]
item.Footprint = newMetric.Footprint
metricLookup[mc.Name] = item
} }
if newMetric.Energy != "" { if newMetric.Energy != "" {
sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name) sc.EnergyFootprint = append(sc.EnergyFootprint, newMetric.Name)
} }
} }
}
ml.Availability = append(metricLookup[mc.Name].Availability, availability) item := metricLookup[mc.Name]
metricLookup[mc.Name] = ml item.Availability = append(item.Availability, availability)
metricLookup[mc.Name] = item
} }
Clusters = append(Clusters, cluster) Clusters = append(Clusters, cluster)
@@ -141,8 +142,11 @@ func initClusterConfig() error {
} }
} }
for _, ml := range metricLookup { for _, metric := range metricLookup {
GlobalMetricList = append(GlobalMetricList, &ml) GlobalMetricList = append(GlobalMetricList, &metric)
if !metric.Restrict {
GlobalUserMetricList = append(GlobalUserMetricList, &metric)
}
} }
return nil return nil

View File

@@ -83,7 +83,7 @@ func Connect() {
client, err := NewClient(nil) client, err := NewClient(nil)
if err != nil { if err != nil {
cclog.Errorf("NATS connection failed: %v", err) cclog.Warnf("NATS connection failed: %v", err)
return return
} }

View File

@@ -1,22 +1,18 @@
#!/bin/sh #!/bin/sh
# rm -rf var
if [ -d './var' ]; then if [ -d './var' ]; then
echo 'Directory ./var already exists! Skipping initialization.' echo 'Directory ./var already exists! Skipping initialization.'
./cc-backend -server -dev ./cc-backend -server -dev -loglevel info
else else
make make
wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-dev.tar ./cc-backend --init
tar xf job-archive-dev.tar
rm ./job-archive-dev.tar
cp ./configs/env-template.txt .env
cp ./configs/config-demo.json config.json cp ./configs/config-demo.json config.json
./cc-backend -migrate-db wget https://hpc-mover.rrze.uni-erlangen.de/HPC-Data/0x7b58aefb/eig7ahyo6fo2bais0ephuf2aitohv1ai/job-archive-demo.tar
tar xf job-archive-demo.tar
rm ./job-archive-demo.tar
./cc-backend -dev -init-db -add-user demo:admin,api:demo ./cc-backend -dev -init-db -add-user demo:admin,api:demo
./cc-backend -server -dev -loglevel info
./cc-backend -server -dev
fi fi

View File

@@ -148,13 +148,19 @@
hoststate={nodeData?.state? nodeData.state: 'notindb'}/> hoststate={nodeData?.state? nodeData.state: 'notindb'}/>
{/if} {/if}
</td> </td>
{#each refinedData as metricData (metricData.data.name)} {#each refinedData as metricData, i (metricData?.data?.name || i)}
{#key metricData} {#key metricData}
<td> <td>
{#if metricData?.disabled} {#if metricData?.disabled}
<Card body class="mx-3" color="info" <Card body class="mx-3" color="info"
>Metric disabled for subcluster <code >Metric disabled for subcluster <code
>{metricData.data.name}:{nodeData.subCluster}</code >{metricData?.data?.name ? metricData.data.name : `Metric Index ${i}`}:{nodeData.subCluster}</code
></Card
>
{:else if !metricData?.data?.name}
<Card body class="mx-3" color="warning"
>Metric without name for subcluster <code
>{`Metric Index ${i}`}:{nodeData.subCluster}</code
></Card ></Card
> >
{:else if !!metricData.data?.metric.statisticsSeries} {:else if !!metricData.data?.metric.statisticsSeries}