diff --git a/internal/api/rest.go b/internal/api/rest.go index b76da0b..fd2f86d 100644 --- a/internal/api/rest.go +++ b/internal/api/rest.go @@ -1008,8 +1008,8 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo return } - if job == nil || job.StartTime.Unix() >= req.StopTime { - handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw) + if job == nil || job.StartTime.Unix() > req.StopTime { + handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw) return } diff --git a/internal/config/default_metrics.go b/internal/config/default_metrics.go index 83015d4..b0a0cc5 100644 --- a/internal/config/default_metrics.go +++ b/internal/config/default_metrics.go @@ -16,7 +16,7 @@ type DefaultMetricsConfig struct { } func LoadDefaultMetricsConfig() (*DefaultMetricsConfig, error) { - filePath := "configs/default_metrics.json" + filePath := "default_metrics.json" if _, err := os.Stat(filePath); os.IsNotExist(err) { return nil, nil } diff --git a/internal/importer/handleImport.go b/internal/importer/handleImport.go index 01773a5..623291c 100644 --- a/internal/importer/handleImport.go +++ b/internal/importer/handleImport.go @@ -96,27 +96,35 @@ func HandleImportFlag(flag string) error { } job.EnergyFootprint = make(map[string]float64) - var totalEnergy float64 - var energy float64 + // Total Job Energy Outside Loop + totalEnergy := 0.0 for _, fp := range sc.EnergyFootprint { + // Always Init Metric Energy Inside Loop + metricEnergy := 0.0 if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil { // Note: For DB data, calculate and save as kWh - // Energy: Power (in Watts) * Time (in Seconds) if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules) + log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp) + // FIXME: Needs sum as stats type } else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt) - // Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits - energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100 + // Energy: Power (in Watts) * Time (in Seconds) + // Unit: (W * (s / 3600)) / 1000 = kWh + // Round 2 Digits: round(Energy * 100) / 100 + // Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000 + // Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1 + rawEnergy := ((repository.LoadJobStat(&job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0 + metricEnergy = math.Round(rawEnergy*100.0) / 100.0 } } else { log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID) } - job.EnergyFootprint[fp] = energy - totalEnergy += energy + job.EnergyFootprint[fp] = metricEnergy + totalEnergy += metricEnergy } - job.Energy = (math.Round(totalEnergy*100) / 100) + job.Energy = (math.Round(totalEnergy*100.0) / 100.0) if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil { log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID) return err diff --git a/internal/importer/initDB.go b/internal/importer/initDB.go index fa2ee6e..9a2ccdf 100644 --- a/internal/importer/initDB.go +++ b/internal/importer/initDB.go @@ -93,27 +93,35 @@ func InitDB() error { } job.EnergyFootprint = make(map[string]float64) - var totalEnergy float64 - var energy float64 + // Total Job Energy Outside Loop + totalEnergy := 0.0 for _, fp := range sc.EnergyFootprint { + // Always Init Metric Energy Inside Loop + metricEnergy := 0.0 if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil { // Note: For DB data, calculate and save as kWh - // Energy: Power (in Watts) * Time (in Seconds) if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules) + log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp) + // FIXME: Needs sum as stats type } else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt) - // Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits - energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100 + // Energy: Power (in Watts) * Time (in Seconds) + // Unit: (W * (s / 3600)) / 1000 = kWh + // Round 2 Digits: round(Energy * 100) / 100 + // Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000 + // Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1 + rawEnergy := ((repository.LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0 + metricEnergy = math.Round(rawEnergy*100.0) / 100.0 } } else { log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID) } - job.EnergyFootprint[fp] = energy - totalEnergy += energy + job.EnergyFootprint[fp] = metricEnergy + totalEnergy += metricEnergy } - job.Energy = (math.Round(totalEnergy*100) / 100) + job.Energy = (math.Round(totalEnergy*100.0) / 100.0) if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil { log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID) return err diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index 2b92fbb..fb6aca1 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -40,6 +40,7 @@ type CCMetricStore struct { jwt string url string queryEndpoint string + topologyCache map[string]*schema.Topology // cluster -> topology cache } type ApiQueryRequest struct { @@ -92,6 +93,7 @@ func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error { ccms.client = http.Client{ Timeout: 10 * time.Second, } + ccms.topologyCache = make(map[string]*schema.Topology) if config.Renamings != nil { ccms.here2there = config.Renamings @@ -181,6 +183,12 @@ func (ccms *CCMetricStore) LoadData( return nil, err } + // Verify assignment is correct - log any inconsistencies for debugging + if len(queries) != len(assignedScope) { + log.Errorf("Critical error: queries and assignedScope have different lengths after buildQueries: %d vs %d", + len(queries), len(assignedScope)) + } + req := ApiQueryRequest{ Cluster: job.Cluster, From: job.StartTime.Unix(), @@ -198,11 +206,36 @@ func (ccms *CCMetricStore) LoadData( var errors []string jobData := make(schema.JobData) + + // Add safety check for potential index out of range errors + if len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) { + log.Warnf("Mismatch in query results count: queries=%d, results=%d, assignedScope=%d", + len(req.Queries), len(resBody.Results), len(assignedScope)) + if len(resBody.Results) > len(req.Queries) { + resBody.Results = resBody.Results[:len(req.Queries)] + } + if len(assignedScope) > len(req.Queries) { + assignedScope = assignedScope[:len(req.Queries)] + } + } + for i, row := range resBody.Results { + // Safety check to prevent index out of range errors + if i >= len(req.Queries) || i >= len(assignedScope) { + log.Warnf("Index out of range prevented: i=%d, queries=%d, assignedScope=%d", + i, len(req.Queries), len(assignedScope)) + continue + } + query := req.Queries[i] metric := ccms.toLocalName(query.Metric) scope := assignedScope[i] mc := archive.GetMetricConfig(job.Cluster, metric) + if mc == nil { + log.Warnf("Metric config not found for %s on cluster %s", metric, job.Cluster) + continue + } + if _, ok := jobData[metric]; !ok { jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric) } @@ -231,8 +264,15 @@ func (ccms *CCMetricStore) LoadData( id := (*string)(nil) if query.Type != nil { - id = new(string) - *id = query.TypeIds[ndx] + // Check if ndx is within the bounds of TypeIds slice + if ndx < len(query.TypeIds) { + id = new(string) + *id = query.TypeIds[ndx] + } else { + // Log the error but continue processing + log.Warnf("TypeIds index out of range: %d with length %d for metric %s on host %s", + ndx, len(query.TypeIds), query.Metric, query.Hostname) + } } if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() { @@ -284,20 +324,19 @@ func (ccms *CCMetricStore) buildQueries( scopes []schema.MetricScope, resolution int, ) ([]ApiQuery, []schema.MetricScope, error) { + // Initialize both slices together queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources)) - assignedScope := []schema.MetricScope{} + assignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(job.Resources)) - subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster) - if scerr != nil { - return nil, nil, scerr + topology, err := ccms.getTopology(job.Cluster, job.SubCluster) + if err != nil { + return nil, nil, err } - topology := subcluster.Topology for _, metric := range metrics { remoteName := ccms.toRemoteName(metric) mc := archive.GetMetricConfig(job.Cluster, metric) if mc == nil { - // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster) log.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster) continue } @@ -329,7 +368,6 @@ func (ccms *CCMetricStore) buildQueries( // Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node) if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) { if scope != schema.MetricScopeAccelerator { - // Skip all other catched cases continue } @@ -502,6 +540,31 @@ func (ccms *CCMetricStore) buildQueries( continue } + // MemoryDomain -> Socket + if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeSocket { + memDomains, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads) + socketToDomains, err := topology.GetMemoryDomainsBySocket(memDomains) + if err != nil { + log.Errorf("Error mapping memory domains to sockets: %v", err) + continue + } + + // Create a query for each socket + for _, domains := range socketToDomains { + queries = append(queries, ApiQuery{ + Metric: remoteName, + Hostname: host.Hostname, + Aggregate: true, + Type: &memoryDomainString, + TypeIds: intToStringSlice(domains), + Resolution: resolution, + }) + // Add scope for each query, not just once + assignedScope = append(assignedScope, scope) + } + continue + } + // Socket -> Socket if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(hwthreads) @@ -772,6 +835,12 @@ func (ccms *CCMetricStore) LoadNodeListData( return nil, totalNodes, hasNextPage, err } + // Verify assignment is correct - log any inconsistencies for debugging + if len(queries) != len(assignedScope) { + log.Errorf("Critical error: queries and assignedScope have different lengths after buildNodeQueries: %d vs %d", + len(queries), len(assignedScope)) + } + req := ApiQueryRequest{ Cluster: cluster, Queries: queries, @@ -789,17 +858,48 @@ func (ccms *CCMetricStore) LoadNodeListData( var errors []string data := make(map[string]schema.JobData) + + // Add safety check for index out of range issues + if len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) { + log.Warnf("Mismatch in query results count: queries=%d, results=%d, assignedScope=%d", + len(req.Queries), len(resBody.Results), len(assignedScope)) + if len(resBody.Results) > len(req.Queries) { + resBody.Results = resBody.Results[:len(req.Queries)] + } + if len(assignedScope) > len(req.Queries) { + assignedScope = assignedScope[:len(req.Queries)] + } + } + for i, row := range resBody.Results { + // Safety check to prevent index out of range errors + if i >= len(req.Queries) || i >= len(assignedScope) { + log.Warnf("Index out of range prevented: i=%d, queries=%d, assignedScope=%d", + i, len(req.Queries), len(assignedScope)) + continue + } + var query ApiQuery if resBody.Queries != nil { - query = resBody.Queries[i] + if i < len(resBody.Queries) { + query = resBody.Queries[i] + } else { + log.Warnf("Index out of range prevented for resBody.Queries: i=%d, len=%d", + i, len(resBody.Queries)) + continue + } } else { query = req.Queries[i] } + // qdata := res[0] metric := ccms.toLocalName(query.Metric) scope := assignedScope[i] mc := archive.GetMetricConfig(cluster, metric) + if mc == nil { + log.Warnf("Metric config not found for %s on cluster %s", metric, cluster) + continue + } res := mc.Timestep if len(row) > 0 { @@ -838,8 +938,15 @@ func (ccms *CCMetricStore) LoadNodeListData( id := (*string)(nil) if query.Type != nil { - id = new(string) - *id = query.TypeIds[ndx] + // Check if ndx is within the bounds of TypeIds slice + if ndx < len(query.TypeIds) { + id = new(string) + *id = query.TypeIds[ndx] + } else { + // Log the error but continue processing + log.Warnf("TypeIds index out of range: %d with length %d for metric %s on host %s", + ndx, len(query.TypeIds), query.Metric, query.Hostname) + } } if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() { @@ -878,26 +985,14 @@ func (ccms *CCMetricStore) buildNodeQueries( scopes []schema.MetricScope, resolution int, ) ([]ApiQuery, []schema.MetricScope, error) { - + // Initialize both slices together with the same capacity queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes)) - assignedScope := []schema.MetricScope{} - - // Get Topol before loop if subCluster given - var subClusterTopol *schema.SubCluster - var scterr error - if subCluster != "" { - subClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster) - if scterr != nil { - // TODO: Log - return nil, nil, scterr - } - } + assignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(nodes)) for _, metric := range metrics { remoteName := ccms.toRemoteName(metric) mc := archive.GetMetricConfig(cluster, metric) if mc == nil { - // return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster) log.Infof("metric '%s' is not specified for cluster '%s'", metric, cluster) continue } @@ -918,22 +1013,22 @@ func (ccms *CCMetricStore) buildNodeQueries( handledScopes = append(handledScopes, scope) for _, hostname := range nodes { + var topology *schema.Topology + var err error // If no subCluster given, get it by node if subCluster == "" { - subClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname) - if scnerr != nil { - return nil, nil, scnerr - } - subClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName) - if scterr != nil { - return nil, nil, scterr - } + topology, err = ccms.getTopologyByNode(cluster, hostname) + } else { + topology, err = ccms.getTopology(cluster, subCluster) + } + + if err != nil { + return nil, nil, err } // Always full node hwthread id list, no partial queries expected -> Use "topology.Node" directly where applicable // Always full accelerator id list, no partial queries expected -> Use "acceleratorIds" directly where applicable - topology := subClusterTopol.Topology acceleratorIds := topology.GetAcceleratorIDs() // Moved check here if metric matches hardware specs @@ -944,7 +1039,6 @@ func (ccms *CCMetricStore) buildNodeQueries( // Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node) if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) { if scope != schema.MetricScopeAccelerator { - // Skip all other catched cases continue } @@ -1117,6 +1211,31 @@ func (ccms *CCMetricStore) buildNodeQueries( continue } + // MemoryDomain -> Socket + if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeSocket { + memDomains, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node) + socketToDomains, err := topology.GetMemoryDomainsBySocket(memDomains) + if err != nil { + log.Errorf("Error mapping memory domains to sockets: %v", err) + continue + } + + // Create a query for each socket + for _, domains := range socketToDomains { + queries = append(queries, ApiQuery{ + Metric: remoteName, + Hostname: hostname, + Aggregate: true, + Type: &memoryDomainString, + TypeIds: intToStringSlice(domains), + Resolution: resolution, + }) + // Add scope for each query, not just once + assignedScope = append(assignedScope, scope) + } + continue + } + // Socket -> Socket if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket { sockets, _ := topology.GetSocketsFromHWThreads(topology.Node) @@ -1173,3 +1292,29 @@ func intToStringSlice(is []int) []string { } return ss } + +// getTopology returns the topology for a given cluster and subcluster, caching it if not already present +func (ccms *CCMetricStore) getTopology(cluster, subCluster string) (*schema.Topology, error) { + cacheKey := fmt.Sprintf("%s:%s", cluster, subCluster) + if topology, ok := ccms.topologyCache[cacheKey]; ok { + return topology, nil + } + + subcluster, err := archive.GetSubCluster(cluster, subCluster) + if err != nil { + return nil, err + } + + ccms.topologyCache[cacheKey] = &subcluster.Topology + return &subcluster.Topology, nil +} + +// getTopologyByNode returns the topology for a given cluster and node, caching it if not already present +func (ccms *CCMetricStore) getTopologyByNode(cluster, node string) (*schema.Topology, error) { + subCluster, err := archive.GetSubClusterByNode(cluster, node) + if err != nil { + return nil, err + } + + return ccms.getTopology(cluster, subCluster) +} diff --git a/internal/repository/job.go b/internal/repository/job.go index 020c3c2..84de6f7 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -590,28 +590,34 @@ func (r *JobRepository) UpdateEnergy( return stmt, err } energyFootprint := make(map[string]float64) - var totalEnergy float64 - var energy float64 + // Total Job Energy Outside Loop + totalEnergy := 0.0 for _, fp := range sc.EnergyFootprint { + // Always Init Metric Energy Inside Loop + metricEnergy := 0.0 if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil { // Note: For DB data, calculate and save as kWh if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh) + log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp) // FIXME: Needs sum as stats type } else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt) // Energy: Power (in Watts) * Time (in Seconds) - // Unit: (( W * s ) / 3600) / 1000 = kWh ; Rounded to 2 nearest digits: (Energy * 100) / 100 - // Here: All-Node Metric Average * Number of Nodes * Job Runtime + // Unit: (W * (s / 3600)) / 1000 = kWh + // Round 2 Digits: round(Energy * 100) / 100 + // Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000 // Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1 - metricNodeSum := LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes) * float64(jobMeta.Duration) - energy = math.Round(((metricNodeSum/3600)/1000)*100) / 100 + rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0 + metricEnergy = math.Round(rawEnergy*100.0) / 100.0 } } else { log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID) } - energyFootprint[fp] = energy - totalEnergy += energy + energyFootprint[fp] = metricEnergy + totalEnergy += metricEnergy + + // log.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy) } var rawFootprint []byte @@ -620,7 +626,7 @@ func (r *JobRepository) UpdateEnergy( return stmt, err } - return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil + return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil } func (r *JobRepository) UpdateFootprint( diff --git a/pkg/schema/cluster.go b/pkg/schema/cluster.go index 322f308..54f50a0 100644 --- a/pkg/schema/cluster.go +++ b/pkg/schema/cluster.go @@ -22,6 +22,13 @@ type Topology struct { Die [][]*int `json:"die,omitempty"` Core [][]int `json:"core"` Accelerators []*Accelerator `json:"accelerators,omitempty"` + + // Cache maps for faster lookups + hwthreadToSocket map[int][]int + hwthreadToCore map[int][]int + hwthreadToMemoryDomain map[int][]int + coreToSocket map[int][]int + memoryDomainToSocket map[int]int // New: Direct mapping from memory domain to socket } type MetricValue struct { @@ -92,156 +99,233 @@ type GlobalMetricListItem struct { Availability []ClusterSupport `json:"availability"` } -// Return a list of socket IDs given a list of hwthread IDs. Even if just one -// hwthread is in that socket, add it to the list. If no hwthreads other than -// those in the argument list are assigned to one of the sockets in the first -// return value, return true as the second value. TODO: Optimize this, there -// must be a more efficient way/algorithm. +// InitTopologyMaps initializes the topology mapping caches +func (topo *Topology) InitTopologyMaps() { + // Initialize maps + topo.hwthreadToSocket = make(map[int][]int) + topo.hwthreadToCore = make(map[int][]int) + topo.hwthreadToMemoryDomain = make(map[int][]int) + topo.coreToSocket = make(map[int][]int) + topo.memoryDomainToSocket = make(map[int]int) + + // Build hwthread to socket mapping + for socketID, hwthreads := range topo.Socket { + for _, hwthread := range hwthreads { + topo.hwthreadToSocket[hwthread] = append(topo.hwthreadToSocket[hwthread], socketID) + } + } + + // Build hwthread to core mapping + for coreID, hwthreads := range topo.Core { + for _, hwthread := range hwthreads { + topo.hwthreadToCore[hwthread] = append(topo.hwthreadToCore[hwthread], coreID) + } + } + + // Build hwthread to memory domain mapping + for memDomID, hwthreads := range topo.MemoryDomain { + for _, hwthread := range hwthreads { + topo.hwthreadToMemoryDomain[hwthread] = append(topo.hwthreadToMemoryDomain[hwthread], memDomID) + } + } + + // Build core to socket mapping + for coreID, hwthreads := range topo.Core { + socketSet := make(map[int]struct{}) + for _, hwthread := range hwthreads { + for socketID := range topo.hwthreadToSocket[hwthread] { + socketSet[socketID] = struct{}{} + } + } + topo.coreToSocket[coreID] = make([]int, 0, len(socketSet)) + for socketID := range socketSet { + topo.coreToSocket[coreID] = append(topo.coreToSocket[coreID], socketID) + } + } + + // Build memory domain to socket mapping + for memDomID, hwthreads := range topo.MemoryDomain { + if len(hwthreads) > 0 { + // Use the first hwthread to determine the socket + if socketIDs, ok := topo.hwthreadToSocket[hwthreads[0]]; ok && len(socketIDs) > 0 { + topo.memoryDomainToSocket[memDomID] = socketIDs[0] + } + } + } +} + +// EnsureTopologyMaps ensures that the topology maps are initialized +func (topo *Topology) EnsureTopologyMaps() { + if topo.hwthreadToSocket == nil { + topo.InitTopologyMaps() + } +} + func (topo *Topology) GetSocketsFromHWThreads( hwthreads []int, ) (sockets []int, exclusive bool) { - socketsMap := map[int]int{} + topo.EnsureTopologyMaps() + + socketsMap := make(map[int]int) for _, hwthread := range hwthreads { - for socket, hwthreadsInSocket := range topo.Socket { - for _, hwthreadInSocket := range hwthreadsInSocket { - if hwthread == hwthreadInSocket { - socketsMap[socket] += 1 - } - } + for _, socketID := range topo.hwthreadToSocket[hwthread] { + socketsMap[socketID]++ } } exclusive = true - hwthreadsPerSocket := len(topo.Node) / len(topo.Socket) sockets = make([]int, 0, len(socketsMap)) for socket, count := range socketsMap { sockets = append(sockets, socket) - exclusive = exclusive && count == hwthreadsPerSocket + // Check if all hwthreads in this socket are in our input list + exclusive = exclusive && count == len(topo.Socket[socket]) } return sockets, exclusive } -// Return a list of socket IDs given a list of core IDs. Even if just one -// core is in that socket, add it to the list. If no cores other than -// those in the argument list are assigned to one of the sockets in the first -// return value, return true as the second value. TODO: Optimize this, there -// must be a more efficient way/algorithm. -func (topo *Topology) GetSocketsFromCores ( +func (topo *Topology) GetSocketsFromCores( cores []int, ) (sockets []int, exclusive bool) { - socketsMap := map[int]int{} + topo.EnsureTopologyMaps() + + socketsMap := make(map[int]int) for _, core := range cores { - for _, hwthreadInCore := range topo.Core[core] { - for socket, hwthreadsInSocket := range topo.Socket { - for _, hwthreadInSocket := range hwthreadsInSocket { - if hwthreadInCore == hwthreadInSocket { - socketsMap[socket] += 1 + for _, socketID := range topo.coreToSocket[core] { + socketsMap[socketID]++ + } + } + + exclusive = true + sockets = make([]int, 0, len(socketsMap)) + for socket, count := range socketsMap { + sockets = append(sockets, socket) + // Count total cores in this socket + totalCoresInSocket := 0 + for _, hwthreads := range topo.Core { + for _, hwthread := range hwthreads { + for _, sID := range topo.hwthreadToSocket[hwthread] { + if sID == socket { + totalCoresInSocket++ + break } } } } - } - - exclusive = true - hwthreadsPerSocket := len(topo.Node) / len(topo.Socket) - sockets = make([]int, 0, len(socketsMap)) - for socket, count := range socketsMap { - sockets = append(sockets, socket) - exclusive = exclusive && count == hwthreadsPerSocket + exclusive = exclusive && count == totalCoresInSocket } return sockets, exclusive } -// Return a list of core IDs given a list of hwthread IDs. Even if just one -// hwthread is in that core, add it to the list. If no hwthreads other than -// those in the argument list are assigned to one of the cores in the first -// return value, return true as the second value. TODO: Optimize this, there -// must be a more efficient way/algorithm. func (topo *Topology) GetCoresFromHWThreads( hwthreads []int, ) (cores []int, exclusive bool) { - coresMap := map[int]int{} + topo.EnsureTopologyMaps() + + coresMap := make(map[int]int) for _, hwthread := range hwthreads { - for core, hwthreadsInCore := range topo.Core { - for _, hwthreadInCore := range hwthreadsInCore { - if hwthread == hwthreadInCore { - coresMap[core] += 1 - } - } + for _, coreID := range topo.hwthreadToCore[hwthread] { + coresMap[coreID]++ } } exclusive = true - hwthreadsPerCore := len(topo.Node) / len(topo.Core) cores = make([]int, 0, len(coresMap)) for core, count := range coresMap { cores = append(cores, core) - exclusive = exclusive && count == hwthreadsPerCore + // Check if all hwthreads in this core are in our input list + exclusive = exclusive && count == len(topo.Core[core]) } return cores, exclusive } -// Return a list of memory domain IDs given a list of hwthread IDs. Even if -// just one hwthread is in that memory domain, add it to the list. If no -// hwthreads other than those in the argument list are assigned to one of the -// memory domains in the first return value, return true as the second value. -// TODO: Optimize this, there must be a more efficient way/algorithm. func (topo *Topology) GetMemoryDomainsFromHWThreads( hwthreads []int, ) (memDoms []int, exclusive bool) { - memDomsMap := map[int]int{} + topo.EnsureTopologyMaps() + + memDomsMap := make(map[int]int) for _, hwthread := range hwthreads { - for memDom, hwthreadsInmemDom := range topo.MemoryDomain { - for _, hwthreadInmemDom := range hwthreadsInmemDom { - if hwthread == hwthreadInmemDom { - memDomsMap[memDom] += 1 - } - } + for _, memDomID := range topo.hwthreadToMemoryDomain[hwthread] { + memDomsMap[memDomID]++ } } exclusive = true - hwthreadsPermemDom := len(topo.Node) / len(topo.MemoryDomain) memDoms = make([]int, 0, len(memDomsMap)) for memDom, count := range memDomsMap { memDoms = append(memDoms, memDom) - exclusive = exclusive && count == hwthreadsPermemDom + // Check if all hwthreads in this memory domain are in our input list + exclusive = exclusive && count == len(topo.MemoryDomain[memDom]) } return memDoms, exclusive } -// Temporary fix to convert back from int id to string id for accelerators -func (topo *Topology) GetAcceleratorID(id int) (string, error) { - if id < 0 { - fmt.Printf("ID smaller than 0!\n") - return topo.Accelerators[0].ID, nil - } else if id < len(topo.Accelerators) { - return topo.Accelerators[id].ID, nil - } else { - return "", fmt.Errorf("index %d out of range", id) +// GetMemoryDomainsBySocket can now use the direct mapping +func (topo *Topology) GetMemoryDomainsBySocket(domainIDs []int) (map[int][]int, error) { + socketToDomains := make(map[int][]int) + for _, domainID := range domainIDs { + if domainID < 0 || domainID >= len(topo.MemoryDomain) || len(topo.MemoryDomain[domainID]) == 0 { + return nil, fmt.Errorf("MemoryDomain %d is invalid or empty", domainID) + } + + socketID, ok := topo.memoryDomainToSocket[domainID] + if !ok { + return nil, fmt.Errorf("MemoryDomain %d could not be assigned to any socket", domainID) + } + + socketToDomains[socketID] = append(socketToDomains[socketID], domainID) } + + return socketToDomains, nil } -// Return list of hardware (string) accelerator IDs +// GetAcceleratorID converts a numeric ID to the corresponding Accelerator ID as a string. +// This is useful when accelerators are stored in arrays and accessed by index. +func (topo *Topology) GetAcceleratorID(id int) (string, error) { + if id < 0 { + return "", fmt.Errorf("accelerator ID %d is negative", id) + } + + if id >= len(topo.Accelerators) { + return "", fmt.Errorf("accelerator index %d out of valid range (max: %d)", + id, len(topo.Accelerators)-1) + } + + return topo.Accelerators[id].ID, nil +} + +// GetAcceleratorIDs returns a list of all Accelerator IDs (as strings). +// Capacity is pre-allocated to improve efficiency. func (topo *Topology) GetAcceleratorIDs() []string { - accels := make([]string, 0) + if len(topo.Accelerators) == 0 { + return []string{} + } + + accels := make([]string, 0, len(topo.Accelerators)) for _, accel := range topo.Accelerators { accels = append(accels, accel.ID) } return accels } -// Outdated? Or: Return indices of accelerators in parent array? +// GetAcceleratorIDsAsInt converts all Accelerator IDs to integer values. +// This function can fail if the IDs cannot be interpreted as numbers. +// Capacity is pre-allocated to improve efficiency. func (topo *Topology) GetAcceleratorIDsAsInt() ([]int, error) { - accels := make([]int, 0) - for _, accel := range topo.Accelerators { + if len(topo.Accelerators) == 0 { + return []int{}, nil + } + + accels := make([]int, 0, len(topo.Accelerators)) + for i, accel := range topo.Accelerators { id, err := strconv.Atoi(accel.ID) if err != nil { - return nil, err + return nil, fmt.Errorf("accelerator ID at position %d (%s) cannot be converted to a number: %w", + i, accel.ID, err) } accels = append(accels, id) } diff --git a/web/frontend/src/Analysis.root.svelte b/web/frontend/src/Analysis.root.svelte index 40757d3..861c0ec 100644 --- a/web/frontend/src/Analysis.root.svelte +++ b/web/frontend/src/Analysis.root.svelte @@ -20,6 +20,7 @@ Card, Table, Icon, + Tooltip } from "@sveltestrap/sveltestrap"; import { init, @@ -70,6 +71,8 @@ ...new Set([...metricsInHistograms, ...metricsInScatterplots.flat()]), ]; + $: clusterName = cluster?.name ? cluster.name : cluster; + const sortOptions = [ { key: "totalWalltime", label: "Walltime" }, { key: "totalNodeHours", label: "Node Hours" }, @@ -159,6 +162,7 @@ groupBy: $groupBy ) { id + name totalWalltime totalNodeHours totalCoreHours @@ -422,15 +426,22 @@ {#if groupSelection.key == "user"} - {te.id} + {#if te?.name} + {te.name} + {/if} {:else} {te.id} diff --git a/web/frontend/src/Job.root.svelte b/web/frontend/src/Job.root.svelte index a384e32..6980230 100644 --- a/web/frontend/src/Job.root.svelte +++ b/web/frontend/src/Job.root.svelte @@ -58,7 +58,8 @@ let plots = {}, statsTable - let missingMetrics = [], + let availableMetrics = new Set(), + missingMetrics = [], missingHosts = [], somethingMissing = false; @@ -127,10 +128,24 @@ if (!job) return; const pendingMetrics = [ - ...(ccconfig[`job_view_selectedMetrics:${job.cluster}`] || - ccconfig[`job_view_selectedMetrics`] + ...( + ( + ccconfig[`job_view_selectedMetrics:${job.cluster}:${job.subCluster}`] || + ccconfig[`job_view_selectedMetrics:${job.cluster}`] + ) || + $initq.data.globalMetrics + .reduce((names, gm) => { + if (gm.availability.find((av) => av.cluster === job.cluster && av.subClusters.includes(job.subCluster))) { + names.push(gm.name); + } + return names; + }, []) ), - ...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] || + ...( + ( + ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] || + ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] + ) || ccconfig[`job_view_nodestats_selectedMetrics`] ), ]; @@ -293,7 +308,7 @@ {#if $initq.data} {/if} @@ -428,9 +443,11 @@ {#if $initq.data} {/if} diff --git a/web/frontend/src/Jobs.root.svelte b/web/frontend/src/Jobs.root.svelte index df928d0..7faa8b8 100644 --- a/web/frontend/src/Jobs.root.svelte +++ b/web/frontend/src/Jobs.root.svelte @@ -137,5 +137,5 @@ bind:metrics bind:isOpen={isMetricsSelectionOpen} bind:showFootprint - footprintSelect={true} + footprintSelect /> diff --git a/web/frontend/src/Status.root.svelte b/web/frontend/src/Status.root.svelte index a44a962..5494f14 100644 --- a/web/frontend/src/Status.root.svelte +++ b/web/frontend/src/Status.root.svelte @@ -19,6 +19,7 @@ Progress, Icon, Button, + Tooltip } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -75,9 +76,9 @@ ); let isHistogramSelectionOpen = false; - $: metricsInHistograms = cluster - ? ccconfig[`user_view_histogramMetrics:${cluster}`] || [] - : ccconfig.user_view_histogramMetrics || []; + $: selectedHistograms = cluster + ? ccconfig[`user_view_histogramMetrics:${cluster}`] || ( ccconfig['user_view_histogramMetrics'] || [] ) + : ccconfig['user_view_histogramMetrics'] || []; const client = getContextClient(); // Note: nodeMetrics are requested on configured $timestep resolution @@ -90,7 +91,7 @@ $metrics: [String!] $from: Time! $to: Time! - $metricsInHistograms: [String!] + $selectedHistograms: [String!] ) { nodeMetrics( cluster: $cluster @@ -116,7 +117,7 @@ } } - stats: jobsStatistics(filter: $filter, metrics: $metricsInHistograms) { + stats: jobsStatistics(filter: $filter, metrics: $selectedHistograms) { histDuration { count value @@ -157,7 +158,7 @@ from: from.toISOString(), to: to.toISOString(), filter: [{ state: ["running"] }, { cluster: { eq: cluster } }], - metricsInHistograms: metricsInHistograms, + selectedHistograms: selectedHistograms, }, }); @@ -177,6 +178,7 @@ groupBy: USER ) { id + name totalJobs totalNodes totalCores @@ -515,12 +517,19 @@ {#each $topUserQuery.data.topUser as tu, i} - {tu.id} + {#if tu?.name} + {tu.name} + {/if} {tu[topUserSelection.key]} {/each} @@ -652,7 +661,7 @@ - {#if metricsInHistograms} + {#if selectedHistograms} {#key $mainQuery.data.stats[0].histMetrics} diff --git a/web/frontend/src/Systems.root.svelte b/web/frontend/src/Systems.root.svelte index 8089bbe..1589cac 100644 --- a/web/frontend/src/Systems.root.svelte +++ b/web/frontend/src/Systems.root.svelte @@ -29,8 +29,8 @@ import Refresher from "./generic/helper/Refresher.svelte"; export let displayType; - export let cluster; - export let subCluster = ""; + export let cluster = null; + export let subCluster = null; export let from = null; export let to = null; @@ -60,7 +60,10 @@ let hostnameFilter = ""; let pendingHostnameFilter = ""; let selectedMetric = ccconfig.system_view_selectedMetric || ""; - let selectedMetrics = ccconfig[`node_list_selectedMetrics:${cluster}`] || [ccconfig.system_view_selectedMetric]; + let selectedMetrics = ( + ccconfig[`node_list_selectedMetrics:${cluster}:${subCluster}`] || + ccconfig[`node_list_selectedMetrics:${cluster}`] + ) || [ccconfig.system_view_selectedMetric]; let isMetricsSelectionOpen = false; /* @@ -191,6 +194,7 @@ filterComponent.updateFilters()); @@ -290,7 +290,7 @@ -{#if metricsInHistograms?.length > 0} +{#if selectedHistograms?.length > 0} {#if $stats.error} @@ -352,11 +352,11 @@ bind:metrics bind:isOpen={isMetricsSelectionOpen} bind:showFootprint - footprintSelect={true} + footprintSelect /> diff --git a/web/frontend/src/generic/Filters.svelte b/web/frontend/src/generic/Filters.svelte index 481211b..4a9be3e 100644 --- a/web/frontend/src/generic/Filters.svelte +++ b/web/frontend/src/generic/Filters.svelte @@ -45,6 +45,14 @@ export let startTimeQuickSelect = false; export let matchedJobs = -2; + const startTimeSelectOptions = [ + { range: "", rangeLabel: "No Selection"}, + { range: "last6h", rangeLabel: "Last 6hrs"}, + { range: "last24h", rangeLabel: "Last 24hrs"}, + { range: "last7d", rangeLabel: "Last 7 days"}, + { range: "last30d", rangeLabel: "Last 30 days"} + ]; + let filters = { projectMatch: filterPresets.projectMatch || "contains", userMatch: filterPresets.userMatch || "contains", @@ -56,7 +64,7 @@ filterPresets.states || filterPresets.state ? [filterPresets.state].flat() : allJobStates, - startTime: filterPresets.startTime || { from: null, to: null }, + startTime: filterPresets.startTime || { from: null, to: null, range: ""}, tags: filterPresets.tags || [], duration: filterPresets.duration || { lessThan: null, @@ -268,16 +276,17 @@ {#if startTimeQuickSelect} Start Time Quick Selection - {#each [{ text: "Last 6hrs", range: "last6h" }, { text: "Last 24hrs", range: "last24h" }, { text: "Last 7 days", range: "last7d" }, { text: "Last 30 days", range: "last30d" }] as { text, range }} + {#each startTimeSelectOptions.filter((stso) => stso.range !== "") as { rangeLabel, range }} { + filters.startTime.from = null + filters.startTime.to = null filters.startTime.range = range; - filters.startTime.text = text; updateFilters(); }} > - {text} + {rangeLabel} {/each} {/if} @@ -316,7 +325,7 @@ {#if filters.startTime.range} (isStartTimeOpen = true)}> - {filters?.startTime?.text ? filters.startTime.text : filters.startTime.range } + {startTimeSelectOptions.find((stso) => stso.range === filters.startTime.range).rangeLabel } {/if} @@ -414,11 +423,8 @@ bind:from={filters.startTime.from} bind:to={filters.startTime.to} bind:range={filters.startTime.range} - on:set-filter={() => { - delete filters.startTime["text"]; - delete filters.startTime["range"]; - updateFilters(); - }} + {startTimeSelectOptions} + on:set-filter={() => updateFilters()} /> {#if $initialized}

Cluster

- - ((pendingCluster = null), (pendingPartition = null))} - > - Any Cluster - - {#each clusters as cluster} + {#if disableClusterSelection} + + + {:else} + ( - (pendingCluster = cluster.name), (pendingPartition = null) - )} + active={pendingCluster == null} + on:click={() => ((pendingCluster = null), (pendingPartition = null))} > - {cluster.name} + Any Cluster - {/each} - + {#each clusters as cluster} + ( + (pendingCluster = cluster.name), (pendingPartition = null) + )} + > + {cluster.name} + + {/each} + + {/if} {/if} {#if $initialized && pendingCluster != null}
diff --git a/web/frontend/src/generic/filters/StartTime.svelte b/web/frontend/src/generic/filters/StartTime.svelte index bc842f5..a109fbb 100644 --- a/web/frontend/src/generic/filters/StartTime.svelte +++ b/web/frontend/src/generic/filters/StartTime.svelte @@ -17,7 +17,6 @@ import { parse, format, sub } from "date-fns"; import { Row, - Col, Button, Input, Modal, @@ -34,8 +33,7 @@ export let from = null; export let to = null; export let range = ""; - - let pendingFrom, pendingTo; + export let startTimeSelectOptions; const now = new Date(Date.now()); const ago = sub(now, { months: 1 }); @@ -48,12 +46,24 @@ time: format(now, "HH:mm"), }; - function reset() { - pendingFrom = from == null ? defaultFrom : fromRFC3339(from); - pendingTo = to == null ? defaultTo : fromRFC3339(to); - } + $: pendingFrom = (from == null) ? defaultFrom : fromRFC3339(from) + $: pendingTo = (to == null) ? defaultTo : fromRFC3339(to) + $: pendingRange = range - reset(); + $: isModified = + (from != toRFC3339(pendingFrom) || to != toRFC3339(pendingTo, "59")) && + (range != pendingRange) && + !( + from == null && + pendingFrom.date == "0000-00-00" && + pendingFrom.time == "00:00" + ) && + !( + to == null && + pendingTo.date == "0000-00-00" && + pendingTo.time == "00:00" + ) && + !( range == "" && pendingRange == ""); function toRFC3339({ date, time }, secs = "00") { const parsedDate = parse( @@ -71,19 +81,6 @@ time: format(parsedDate, "HH:mm"), }; } - - $: isModified = - (from != toRFC3339(pendingFrom) || to != toRFC3339(pendingTo, "59")) && - !( - from == null && - pendingFrom.date == "0000-00-00" && - pendingFrom.time == "00:00" - ) && - !( - to == null && - pendingTo.date == "0000-00-00" && - pendingTo.time == "00:00" - ); (isOpen = !isOpen)}> @@ -92,52 +89,82 @@ {#if range !== ""}

Current Range

- - - + + + {#each startTimeSelectOptions as { rangeLabel, range }} + {/if}

From

- + - +

To

- + - + - + {#if pendingRange !== ""} + + + {:else} + + {/if} diff --git a/web/frontend/src/generic/plots/Roofline.svelte b/web/frontend/src/generic/plots/Roofline.svelte index 558d8e8..2941ecb 100644 --- a/web/frontend/src/generic/plots/Roofline.svelte +++ b/web/frontend/src/generic/plots/Roofline.svelte @@ -179,7 +179,7 @@ function render(plotData) { if (plotData) { const opts = { - title: "", + title: "CPU Roofline Diagram", mode: 2, width: width, height: height, diff --git a/web/frontend/src/generic/select/HistogramSelection.svelte b/web/frontend/src/generic/select/HistogramSelection.svelte index 4e38123..604fc95 100644 --- a/web/frontend/src/generic/select/HistogramSelection.svelte +++ b/web/frontend/src/generic/select/HistogramSelection.svelte @@ -3,7 +3,7 @@ Properties: - `cluster String`: Currently selected cluster - - `metricsInHistograms [String]`: The currently selected metrics to display as histogram + - `selectedHistograms [String]`: The currently selected metrics to display as histogram - ìsOpen Bool`: Is selection opened --> @@ -21,22 +21,27 @@ import { gql, getContextClient, mutationStore } from "@urql/svelte"; export let cluster; - export let metricsInHistograms; + export let selectedHistograms; export let isOpen; const client = getContextClient(); const initialized = getContext("initialized"); - let availableMetrics = [] + function loadHistoMetrics(isInitialized, thisCluster) { + if (!isInitialized) return []; - function loadHistoMetrics(isInitialized) { - if (!isInitialized) return; - const rawAvailableMetrics = getContext("globalMetrics").filter((gm) => gm?.footprint).map((fgm) => { return fgm.name }) - availableMetrics = [...rawAvailableMetrics] + if (!thisCluster) { + return getContext("globalMetrics") + .filter((gm) => gm?.footprint) + .map((fgm) => { return fgm.name }) + } else { + return getContext("globalMetrics") + .filter((gm) => gm?.availability.find((av) => av.cluster == thisCluster)) + .filter((agm) => agm?.footprint) + .map((afgm) => { return afgm.name }) + } } - let pendingMetrics = [...metricsInHistograms]; // Copy - const updateConfigurationMutation = ({ name, value }) => { return mutationStore({ client: client, @@ -61,17 +66,16 @@ } function closeAndApply() { - metricsInHistograms = [...pendingMetrics]; // Set for parent isOpen = !isOpen; updateConfiguration({ name: cluster ? `user_view_histogramMetrics:${cluster}` : "user_view_histogramMetrics", - value: metricsInHistograms, + value: selectedHistograms, }); } - $: loadHistoMetrics($initialized); + $: availableMetrics = loadHistoMetrics($initialized, cluster); @@ -81,7 +85,7 @@ {#each availableMetrics as metric (metric)} - + {metric} {/each} diff --git a/web/frontend/src/generic/select/MetricSelection.svelte b/web/frontend/src/generic/select/MetricSelection.svelte index 71b42b8..da0b340 100644 --- a/web/frontend/src/generic/select/MetricSelection.svelte +++ b/web/frontend/src/generic/select/MetricSelection.svelte @@ -28,6 +28,7 @@ export let configName; export let allMetrics = null; export let cluster = null; + export let subCluster = null; export let showFootprint = false; export let footprintSelect = false; @@ -46,12 +47,16 @@ $: { if (allMetrics != null) { - if (cluster == null) { + if (!cluster) { for (let metric of globalMetrics) allMetrics.add(metric.name); } else { allMetrics.clear(); for (let gm of globalMetrics) { - if (gm.availability.find((av) => av.cluster === cluster)) allMetrics.add(gm.name); + if (!subCluster) { + if (gm.availability.find((av) => av.cluster === cluster)) allMetrics.add(gm.name); + } else { + if (gm.availability.find((av) => av.cluster === cluster && av.subClusters.includes(subCluster))) allMetrics.add(gm.name); + } } } newMetricsOrder = [...allMetrics].filter((m) => !metrics.includes(m)); @@ -62,7 +67,7 @@ function printAvailability(metric, cluster) { const avail = globalMetrics.find((gm) => gm.name === metric)?.availability - if (cluster == null) { + if (!cluster) { return avail.map((av) => av.cluster).join(',') } else { return avail.find((av) => av.cluster === cluster).subClusters.join(',') @@ -107,10 +112,17 @@ metrics = newMetricsOrder.filter((m) => unorderedMetrics.includes(m)); isOpen = false; - showFootprint = !!pendingShowFootprint; + let configKey; + if (cluster && subCluster) { + configKey = `${configName}:${cluster}:${subCluster}`; + } else if (cluster && !subCluster) { + configKey = `${configName}:${cluster}`; + } else { + configKey = `${configName}`; + } updateConfigurationMutation({ - name: cluster == null ? configName : `${configName}:${cluster}`, + name: configKey, value: JSON.stringify(metrics), }).subscribe((res) => { if (res.fetching === false && res.error) { @@ -118,17 +130,20 @@ } }); - updateConfigurationMutation({ - name: - cluster == null - ? "plot_list_showFootprint" - : `plot_list_showFootprint:${cluster}`, - value: JSON.stringify(showFootprint), - }).subscribe((res) => { - if (res.fetching === false && res.error) { - throw res.error; - } - }); + if (footprintSelect) { + showFootprint = !!pendingShowFootprint; + updateConfigurationMutation({ + name: + !cluster + ? "plot_list_showFootprint" + : `plot_list_showFootprint:${cluster}`, + value: JSON.stringify(showFootprint), + }).subscribe((res) => { + if (res.fetching === false && res.error) { + throw res.error; + } + }); + }; dispatch('update-metrics', metrics); } diff --git a/web/frontend/src/job/StatsTable.svelte b/web/frontend/src/job/StatsTable.svelte index d68d237..c8f12f2 100644 --- a/web/frontend/src/job/StatsTable.svelte +++ b/web/frontend/src/job/StatsTable.svelte @@ -18,6 +18,8 @@ InputGroup, InputGroupText, Icon, + Row, + Col } from "@sveltestrap/sveltestrap"; import { maxScope } from "../generic/utils.js"; import StatsTableEntry from "./StatsTableEntry.svelte"; @@ -26,7 +28,7 @@ export let job; export let jobMetrics; - const allMetrics = [...new Set(jobMetrics.map((m) => m.name))].sort() + const sortedJobMetrics = [...new Set(jobMetrics.map((m) => m.name))].sort() const scopesForMetric = (metric) => jobMetrics.filter((jm) => jm.name == metric).map((jm) => jm.scope); @@ -34,11 +36,13 @@ selectedScopes = {}, sorting = {}, isMetricSelectionOpen = false, - selectedMetrics = - getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`] || - getContext("cc-config")["job_view_nodestats_selectedMetrics"]; + availableMetrics = new Set(), + selectedMetrics = ( + getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] || + getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`] + ) || getContext("cc-config")["job_view_nodestats_selectedMetrics"]; - for (let metric of allMetrics) { + for (let metric of sortedJobMetrics) { // Not Exclusive or Multi-Node: get maxScope directly (mostly: node) // -> Else: Load smallest available granularity as default as per availability const availableScopes = scopesForMetric(metric); @@ -95,15 +99,19 @@ }; + + + + + +
- + diff --git a/web/frontend/src/systems/nodelist/NodeInfo.svelte b/web/frontend/src/systems/nodelist/NodeInfo.svelte index ad6c98e..6b14656 100644 --- a/web/frontend/src/systems/nodelist/NodeInfo.svelte +++ b/web/frontend/src/systems/nodelist/NodeInfo.svelte @@ -102,6 +102,19 @@ Shared + + {:else if nodeJobsData.jobs.count >= 1} + + + + + + Status + + + {:else} diff --git a/web/frontend/src/systems/nodelist/NodeListRow.svelte b/web/frontend/src/systems/nodelist/NodeListRow.svelte index a1e4a54..5202573 100644 --- a/web/frontend/src/systems/nodelist/NodeListRow.svelte +++ b/web/frontend/src/systems/nodelist/NodeListRow.svelte @@ -98,12 +98,12 @@ let extendedLegendData = null; $: if ($nodeJobsData?.data) { - // Get Shared State of Node: Only Build extended Legend For Shared Nodes - if ($nodeJobsData.data.jobs.count >= 1 && !$nodeJobsData.data.jobs.items[0].exclusive) { + // Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes] + if ($nodeJobsData.data.jobs.count >= 1) { // "&& !$nodeJobsData.data.jobs.items[0].exclusive)" const accSet = Array.from(new Set($nodeJobsData.data.jobs.items .map((i) => i.resources - .filter((r) => r.hostname === nodeData.host) - .map((r) => r.accelerators) + .filter((r) => (r.hostname === nodeData.host) && r?.accelerators) + .map((r) => r?.accelerators) ) )).flat(2)
- - {#each selectedMetrics as metric} @@ -162,8 +170,9 @@ diff --git a/web/frontend/src/systems/NodeList.svelte b/web/frontend/src/systems/NodeList.svelte index ad64a1f..ca22d57 100644 --- a/web/frontend/src/systems/NodeList.svelte +++ b/web/frontend/src/systems/NodeList.svelte @@ -217,13 +217,15 @@
-

- Loading nodes {nodes.length + 1} to - { matchedNodes - ? `${(nodes.length + paging.itemsPerPage) > matchedNodes ? matchedNodes : (nodes.length + paging.itemsPerPage)} of ${matchedNodes} total` - : (nodes.length + paging.itemsPerPage) - } -

+ {#if !usePaging} +

+ Loading nodes {nodes.length + 1} to + { matchedNodes + ? `${(nodes.length + paging.itemsPerPage) > matchedNodes ? matchedNodes : (nodes.length + paging.itemsPerPage)} of ${matchedNodes} total` + : (nodes.length + paging.itemsPerPage) + } +

+ {/if}