Merge branch 'dev' of github.com:ClusterCockpit/cc-backend into dev

This commit is contained in:
brinkcoder 2025-03-14 10:52:39 +01:00
commit d009a065f1
23 changed files with 665 additions and 292 deletions

View File

@ -1008,8 +1008,8 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
return
}
if job == nil || job.StartTime.Unix() >= req.StopTime {
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
if job == nil || job.StartTime.Unix() > req.StopTime {
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
return
}

View File

@ -16,7 +16,7 @@ type DefaultMetricsConfig struct {
}
func LoadDefaultMetricsConfig() (*DefaultMetricsConfig, error) {
filePath := "configs/default_metrics.json"
filePath := "default_metrics.json"
if _, err := os.Stat(filePath); os.IsNotExist(err) {
return nil, nil
}

View File

@ -96,27 +96,35 @@ func HandleImportFlag(flag string) error {
}
job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
rawEnergy := ((repository.LoadJobStat(&job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
}
job.EnergyFootprint[fp] = energy
totalEnergy += energy
job.EnergyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
}
job.Energy = (math.Round(totalEnergy*100) / 100)
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
return err

View File

@ -93,27 +93,35 @@ func InitDB() error {
}
job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
rawEnergy := ((repository.LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
job.EnergyFootprint[fp] = energy
totalEnergy += energy
job.EnergyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
}
job.Energy = (math.Round(totalEnergy*100) / 100)
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return err

View File

@ -40,6 +40,7 @@ type CCMetricStore struct {
jwt string
url string
queryEndpoint string
topologyCache map[string]*schema.Topology // cluster -> topology cache
}
type ApiQueryRequest struct {
@ -92,6 +93,7 @@ func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
ccms.client = http.Client{
Timeout: 10 * time.Second,
}
ccms.topologyCache = make(map[string]*schema.Topology)
if config.Renamings != nil {
ccms.here2there = config.Renamings
@ -181,6 +183,12 @@ func (ccms *CCMetricStore) LoadData(
return nil, err
}
// Verify assignment is correct - log any inconsistencies for debugging
if len(queries) != len(assignedScope) {
log.Errorf("Critical error: queries and assignedScope have different lengths after buildQueries: %d vs %d",
len(queries), len(assignedScope))
}
req := ApiQueryRequest{
Cluster: job.Cluster,
From: job.StartTime.Unix(),
@ -198,11 +206,36 @@ func (ccms *CCMetricStore) LoadData(
var errors []string
jobData := make(schema.JobData)
// Add safety check for potential index out of range errors
if len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) {
log.Warnf("Mismatch in query results count: queries=%d, results=%d, assignedScope=%d",
len(req.Queries), len(resBody.Results), len(assignedScope))
if len(resBody.Results) > len(req.Queries) {
resBody.Results = resBody.Results[:len(req.Queries)]
}
if len(assignedScope) > len(req.Queries) {
assignedScope = assignedScope[:len(req.Queries)]
}
}
for i, row := range resBody.Results {
// Safety check to prevent index out of range errors
if i >= len(req.Queries) || i >= len(assignedScope) {
log.Warnf("Index out of range prevented: i=%d, queries=%d, assignedScope=%d",
i, len(req.Queries), len(assignedScope))
continue
}
query := req.Queries[i]
metric := ccms.toLocalName(query.Metric)
scope := assignedScope[i]
mc := archive.GetMetricConfig(job.Cluster, metric)
if mc == nil {
log.Warnf("Metric config not found for %s on cluster %s", metric, job.Cluster)
continue
}
if _, ok := jobData[metric]; !ok {
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
}
@ -231,8 +264,15 @@ func (ccms *CCMetricStore) LoadData(
id := (*string)(nil)
if query.Type != nil {
// Check if ndx is within the bounds of TypeIds slice
if ndx < len(query.TypeIds) {
id = new(string)
*id = query.TypeIds[ndx]
} else {
// Log the error but continue processing
log.Warnf("TypeIds index out of range: %d with length %d for metric %s on host %s",
ndx, len(query.TypeIds), query.Metric, query.Hostname)
}
}
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
@ -284,20 +324,19 @@ func (ccms *CCMetricStore) buildQueries(
scopes []schema.MetricScope,
resolution int,
) ([]ApiQuery, []schema.MetricScope, error) {
// Initialize both slices together
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
assignedScope := []schema.MetricScope{}
assignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(job.Resources))
subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)
if scerr != nil {
return nil, nil, scerr
topology, err := ccms.getTopology(job.Cluster, job.SubCluster)
if err != nil {
return nil, nil, err
}
topology := subcluster.Topology
for _, metric := range metrics {
remoteName := ccms.toRemoteName(metric)
mc := archive.GetMetricConfig(job.Cluster, metric)
if mc == nil {
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
log.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
continue
}
@ -329,7 +368,6 @@ func (ccms *CCMetricStore) buildQueries(
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
if scope != schema.MetricScopeAccelerator {
// Skip all other catched cases
continue
}
@ -502,6 +540,31 @@ func (ccms *CCMetricStore) buildQueries(
continue
}
// MemoryDomain -> Socket
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeSocket {
memDomains, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
socketToDomains, err := topology.GetMemoryDomainsBySocket(memDomains)
if err != nil {
log.Errorf("Error mapping memory domains to sockets: %v", err)
continue
}
// Create a query for each socket
for _, domains := range socketToDomains {
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Aggregate: true,
Type: &memoryDomainString,
TypeIds: intToStringSlice(domains),
Resolution: resolution,
})
// Add scope for each query, not just once
assignedScope = append(assignedScope, scope)
}
continue
}
// Socket -> Socket
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
@ -772,6 +835,12 @@ func (ccms *CCMetricStore) LoadNodeListData(
return nil, totalNodes, hasNextPage, err
}
// Verify assignment is correct - log any inconsistencies for debugging
if len(queries) != len(assignedScope) {
log.Errorf("Critical error: queries and assignedScope have different lengths after buildNodeQueries: %d vs %d",
len(queries), len(assignedScope))
}
req := ApiQueryRequest{
Cluster: cluster,
Queries: queries,
@ -789,17 +858,48 @@ func (ccms *CCMetricStore) LoadNodeListData(
var errors []string
data := make(map[string]schema.JobData)
// Add safety check for index out of range issues
if len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) {
log.Warnf("Mismatch in query results count: queries=%d, results=%d, assignedScope=%d",
len(req.Queries), len(resBody.Results), len(assignedScope))
if len(resBody.Results) > len(req.Queries) {
resBody.Results = resBody.Results[:len(req.Queries)]
}
if len(assignedScope) > len(req.Queries) {
assignedScope = assignedScope[:len(req.Queries)]
}
}
for i, row := range resBody.Results {
// Safety check to prevent index out of range errors
if i >= len(req.Queries) || i >= len(assignedScope) {
log.Warnf("Index out of range prevented: i=%d, queries=%d, assignedScope=%d",
i, len(req.Queries), len(assignedScope))
continue
}
var query ApiQuery
if resBody.Queries != nil {
if i < len(resBody.Queries) {
query = resBody.Queries[i]
} else {
log.Warnf("Index out of range prevented for resBody.Queries: i=%d, len=%d",
i, len(resBody.Queries))
continue
}
} else {
query = req.Queries[i]
}
// qdata := res[0]
metric := ccms.toLocalName(query.Metric)
scope := assignedScope[i]
mc := archive.GetMetricConfig(cluster, metric)
if mc == nil {
log.Warnf("Metric config not found for %s on cluster %s", metric, cluster)
continue
}
res := mc.Timestep
if len(row) > 0 {
@ -838,8 +938,15 @@ func (ccms *CCMetricStore) LoadNodeListData(
id := (*string)(nil)
if query.Type != nil {
// Check if ndx is within the bounds of TypeIds slice
if ndx < len(query.TypeIds) {
id = new(string)
*id = query.TypeIds[ndx]
} else {
// Log the error but continue processing
log.Warnf("TypeIds index out of range: %d with length %d for metric %s on host %s",
ndx, len(query.TypeIds), query.Metric, query.Hostname)
}
}
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
@ -878,26 +985,14 @@ func (ccms *CCMetricStore) buildNodeQueries(
scopes []schema.MetricScope,
resolution int,
) ([]ApiQuery, []schema.MetricScope, error) {
// Initialize both slices together with the same capacity
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
assignedScope := []schema.MetricScope{}
// Get Topol before loop if subCluster given
var subClusterTopol *schema.SubCluster
var scterr error
if subCluster != "" {
subClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster)
if scterr != nil {
// TODO: Log
return nil, nil, scterr
}
}
assignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(nodes))
for _, metric := range metrics {
remoteName := ccms.toRemoteName(metric)
mc := archive.GetMetricConfig(cluster, metric)
if mc == nil {
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster)
log.Infof("metric '%s' is not specified for cluster '%s'", metric, cluster)
continue
}
@ -918,22 +1013,22 @@ func (ccms *CCMetricStore) buildNodeQueries(
handledScopes = append(handledScopes, scope)
for _, hostname := range nodes {
var topology *schema.Topology
var err error
// If no subCluster given, get it by node
if subCluster == "" {
subClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname)
if scnerr != nil {
return nil, nil, scnerr
}
subClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName)
if scterr != nil {
return nil, nil, scterr
topology, err = ccms.getTopologyByNode(cluster, hostname)
} else {
topology, err = ccms.getTopology(cluster, subCluster)
}
if err != nil {
return nil, nil, err
}
// Always full node hwthread id list, no partial queries expected -> Use "topology.Node" directly where applicable
// Always full accelerator id list, no partial queries expected -> Use "acceleratorIds" directly where applicable
topology := subClusterTopol.Topology
acceleratorIds := topology.GetAcceleratorIDs()
// Moved check here if metric matches hardware specs
@ -944,7 +1039,6 @@ func (ccms *CCMetricStore) buildNodeQueries(
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
if scope != schema.MetricScopeAccelerator {
// Skip all other catched cases
continue
}
@ -1117,6 +1211,31 @@ func (ccms *CCMetricStore) buildNodeQueries(
continue
}
// MemoryDomain -> Socket
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeSocket {
memDomains, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
socketToDomains, err := topology.GetMemoryDomainsBySocket(memDomains)
if err != nil {
log.Errorf("Error mapping memory domains to sockets: %v", err)
continue
}
// Create a query for each socket
for _, domains := range socketToDomains {
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: hostname,
Aggregate: true,
Type: &memoryDomainString,
TypeIds: intToStringSlice(domains),
Resolution: resolution,
})
// Add scope for each query, not just once
assignedScope = append(assignedScope, scope)
}
continue
}
// Socket -> Socket
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
@ -1173,3 +1292,29 @@ func intToStringSlice(is []int) []string {
}
return ss
}
// getTopology returns the topology for a given cluster and subcluster, caching it if not already present
func (ccms *CCMetricStore) getTopology(cluster, subCluster string) (*schema.Topology, error) {
cacheKey := fmt.Sprintf("%s:%s", cluster, subCluster)
if topology, ok := ccms.topologyCache[cacheKey]; ok {
return topology, nil
}
subcluster, err := archive.GetSubCluster(cluster, subCluster)
if err != nil {
return nil, err
}
ccms.topologyCache[cacheKey] = &subcluster.Topology
return &subcluster.Topology, nil
}
// getTopologyByNode returns the topology for a given cluster and node, caching it if not already present
func (ccms *CCMetricStore) getTopologyByNode(cluster, node string) (*schema.Topology, error) {
subCluster, err := archive.GetSubClusterByNode(cluster, node)
if err != nil {
return nil, err
}
return ccms.getTopology(cluster, subCluster)
}

View File

@ -590,28 +590,34 @@ func (r *JobRepository) UpdateEnergy(
return stmt, err
}
energyFootprint := make(map[string]float64)
var totalEnergy float64
var energy float64
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (( W * s ) / 3600) / 1000 = kWh ; Rounded to 2 nearest digits: (Energy * 100) / 100
// Here: All-Node Metric Average * Number of Nodes * Job Runtime
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
metricNodeSum := LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes) * float64(jobMeta.Duration)
energy = math.Round(((metricNodeSum/3600)/1000)*100) / 100
rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
energyFootprint[fp] = energy
totalEnergy += energy
energyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
// log.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy)
}
var rawFootprint []byte
@ -620,7 +626,7 @@ func (r *JobRepository) UpdateEnergy(
return stmt, err
}
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
}
func (r *JobRepository) UpdateFootprint(

View File

@ -22,6 +22,13 @@ type Topology struct {
Die [][]*int `json:"die,omitempty"`
Core [][]int `json:"core"`
Accelerators []*Accelerator `json:"accelerators,omitempty"`
// Cache maps for faster lookups
hwthreadToSocket map[int][]int
hwthreadToCore map[int][]int
hwthreadToMemoryDomain map[int][]int
coreToSocket map[int][]int
memoryDomainToSocket map[int]int // New: Direct mapping from memory domain to socket
}
type MetricValue struct {
@ -92,156 +99,233 @@ type GlobalMetricListItem struct {
Availability []ClusterSupport `json:"availability"`
}
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
// hwthread is in that socket, add it to the list. If no hwthreads other than
// those in the argument list are assigned to one of the sockets in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
// InitTopologyMaps initializes the topology mapping caches
func (topo *Topology) InitTopologyMaps() {
// Initialize maps
topo.hwthreadToSocket = make(map[int][]int)
topo.hwthreadToCore = make(map[int][]int)
topo.hwthreadToMemoryDomain = make(map[int][]int)
topo.coreToSocket = make(map[int][]int)
topo.memoryDomainToSocket = make(map[int]int)
// Build hwthread to socket mapping
for socketID, hwthreads := range topo.Socket {
for _, hwthread := range hwthreads {
topo.hwthreadToSocket[hwthread] = append(topo.hwthreadToSocket[hwthread], socketID)
}
}
// Build hwthread to core mapping
for coreID, hwthreads := range topo.Core {
for _, hwthread := range hwthreads {
topo.hwthreadToCore[hwthread] = append(topo.hwthreadToCore[hwthread], coreID)
}
}
// Build hwthread to memory domain mapping
for memDomID, hwthreads := range topo.MemoryDomain {
for _, hwthread := range hwthreads {
topo.hwthreadToMemoryDomain[hwthread] = append(topo.hwthreadToMemoryDomain[hwthread], memDomID)
}
}
// Build core to socket mapping
for coreID, hwthreads := range topo.Core {
socketSet := make(map[int]struct{})
for _, hwthread := range hwthreads {
for socketID := range topo.hwthreadToSocket[hwthread] {
socketSet[socketID] = struct{}{}
}
}
topo.coreToSocket[coreID] = make([]int, 0, len(socketSet))
for socketID := range socketSet {
topo.coreToSocket[coreID] = append(topo.coreToSocket[coreID], socketID)
}
}
// Build memory domain to socket mapping
for memDomID, hwthreads := range topo.MemoryDomain {
if len(hwthreads) > 0 {
// Use the first hwthread to determine the socket
if socketIDs, ok := topo.hwthreadToSocket[hwthreads[0]]; ok && len(socketIDs) > 0 {
topo.memoryDomainToSocket[memDomID] = socketIDs[0]
}
}
}
}
// EnsureTopologyMaps ensures that the topology maps are initialized
func (topo *Topology) EnsureTopologyMaps() {
if topo.hwthreadToSocket == nil {
topo.InitTopologyMaps()
}
}
func (topo *Topology) GetSocketsFromHWThreads(
hwthreads []int,
) (sockets []int, exclusive bool) {
socketsMap := map[int]int{}
topo.EnsureTopologyMaps()
socketsMap := make(map[int]int)
for _, hwthread := range hwthreads {
for socket, hwthreadsInSocket := range topo.Socket {
for _, hwthreadInSocket := range hwthreadsInSocket {
if hwthread == hwthreadInSocket {
socketsMap[socket] += 1
}
}
for _, socketID := range topo.hwthreadToSocket[hwthread] {
socketsMap[socketID]++
}
}
exclusive = true
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
sockets = make([]int, 0, len(socketsMap))
for socket, count := range socketsMap {
sockets = append(sockets, socket)
exclusive = exclusive && count == hwthreadsPerSocket
// Check if all hwthreads in this socket are in our input list
exclusive = exclusive && count == len(topo.Socket[socket])
}
return sockets, exclusive
}
// Return a list of socket IDs given a list of core IDs. Even if just one
// core is in that socket, add it to the list. If no cores other than
// those in the argument list are assigned to one of the sockets in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
func (topo *Topology) GetSocketsFromCores (
func (topo *Topology) GetSocketsFromCores(
cores []int,
) (sockets []int, exclusive bool) {
socketsMap := map[int]int{}
topo.EnsureTopologyMaps()
socketsMap := make(map[int]int)
for _, core := range cores {
for _, hwthreadInCore := range topo.Core[core] {
for socket, hwthreadsInSocket := range topo.Socket {
for _, hwthreadInSocket := range hwthreadsInSocket {
if hwthreadInCore == hwthreadInSocket {
socketsMap[socket] += 1
}
}
}
for _, socketID := range topo.coreToSocket[core] {
socketsMap[socketID]++
}
}
exclusive = true
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
sockets = make([]int, 0, len(socketsMap))
for socket, count := range socketsMap {
sockets = append(sockets, socket)
exclusive = exclusive && count == hwthreadsPerSocket
// Count total cores in this socket
totalCoresInSocket := 0
for _, hwthreads := range topo.Core {
for _, hwthread := range hwthreads {
for _, sID := range topo.hwthreadToSocket[hwthread] {
if sID == socket {
totalCoresInSocket++
break
}
}
}
}
exclusive = exclusive && count == totalCoresInSocket
}
return sockets, exclusive
}
// Return a list of core IDs given a list of hwthread IDs. Even if just one
// hwthread is in that core, add it to the list. If no hwthreads other than
// those in the argument list are assigned to one of the cores in the first
// return value, return true as the second value. TODO: Optimize this, there
// must be a more efficient way/algorithm.
func (topo *Topology) GetCoresFromHWThreads(
hwthreads []int,
) (cores []int, exclusive bool) {
coresMap := map[int]int{}
topo.EnsureTopologyMaps()
coresMap := make(map[int]int)
for _, hwthread := range hwthreads {
for core, hwthreadsInCore := range topo.Core {
for _, hwthreadInCore := range hwthreadsInCore {
if hwthread == hwthreadInCore {
coresMap[core] += 1
}
}
for _, coreID := range topo.hwthreadToCore[hwthread] {
coresMap[coreID]++
}
}
exclusive = true
hwthreadsPerCore := len(topo.Node) / len(topo.Core)
cores = make([]int, 0, len(coresMap))
for core, count := range coresMap {
cores = append(cores, core)
exclusive = exclusive && count == hwthreadsPerCore
// Check if all hwthreads in this core are in our input list
exclusive = exclusive && count == len(topo.Core[core])
}
return cores, exclusive
}
// Return a list of memory domain IDs given a list of hwthread IDs. Even if
// just one hwthread is in that memory domain, add it to the list. If no
// hwthreads other than those in the argument list are assigned to one of the
// memory domains in the first return value, return true as the second value.
// TODO: Optimize this, there must be a more efficient way/algorithm.
func (topo *Topology) GetMemoryDomainsFromHWThreads(
hwthreads []int,
) (memDoms []int, exclusive bool) {
memDomsMap := map[int]int{}
topo.EnsureTopologyMaps()
memDomsMap := make(map[int]int)
for _, hwthread := range hwthreads {
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
for _, hwthreadInmemDom := range hwthreadsInmemDom {
if hwthread == hwthreadInmemDom {
memDomsMap[memDom] += 1
}
}
for _, memDomID := range topo.hwthreadToMemoryDomain[hwthread] {
memDomsMap[memDomID]++
}
}
exclusive = true
hwthreadsPermemDom := len(topo.Node) / len(topo.MemoryDomain)
memDoms = make([]int, 0, len(memDomsMap))
for memDom, count := range memDomsMap {
memDoms = append(memDoms, memDom)
exclusive = exclusive && count == hwthreadsPermemDom
// Check if all hwthreads in this memory domain are in our input list
exclusive = exclusive && count == len(topo.MemoryDomain[memDom])
}
return memDoms, exclusive
}
// Temporary fix to convert back from int id to string id for accelerators
func (topo *Topology) GetAcceleratorID(id int) (string, error) {
if id < 0 {
fmt.Printf("ID smaller than 0!\n")
return topo.Accelerators[0].ID, nil
} else if id < len(topo.Accelerators) {
return topo.Accelerators[id].ID, nil
} else {
return "", fmt.Errorf("index %d out of range", id)
// GetMemoryDomainsBySocket can now use the direct mapping
func (topo *Topology) GetMemoryDomainsBySocket(domainIDs []int) (map[int][]int, error) {
socketToDomains := make(map[int][]int)
for _, domainID := range domainIDs {
if domainID < 0 || domainID >= len(topo.MemoryDomain) || len(topo.MemoryDomain[domainID]) == 0 {
return nil, fmt.Errorf("MemoryDomain %d is invalid or empty", domainID)
}
socketID, ok := topo.memoryDomainToSocket[domainID]
if !ok {
return nil, fmt.Errorf("MemoryDomain %d could not be assigned to any socket", domainID)
}
socketToDomains[socketID] = append(socketToDomains[socketID], domainID)
}
return socketToDomains, nil
}
// Return list of hardware (string) accelerator IDs
// GetAcceleratorID converts a numeric ID to the corresponding Accelerator ID as a string.
// This is useful when accelerators are stored in arrays and accessed by index.
func (topo *Topology) GetAcceleratorID(id int) (string, error) {
if id < 0 {
return "", fmt.Errorf("accelerator ID %d is negative", id)
}
if id >= len(topo.Accelerators) {
return "", fmt.Errorf("accelerator index %d out of valid range (max: %d)",
id, len(topo.Accelerators)-1)
}
return topo.Accelerators[id].ID, nil
}
// GetAcceleratorIDs returns a list of all Accelerator IDs (as strings).
// Capacity is pre-allocated to improve efficiency.
func (topo *Topology) GetAcceleratorIDs() []string {
accels := make([]string, 0)
if len(topo.Accelerators) == 0 {
return []string{}
}
accels := make([]string, 0, len(topo.Accelerators))
for _, accel := range topo.Accelerators {
accels = append(accels, accel.ID)
}
return accels
}
// Outdated? Or: Return indices of accelerators in parent array?
// GetAcceleratorIDsAsInt converts all Accelerator IDs to integer values.
// This function can fail if the IDs cannot be interpreted as numbers.
// Capacity is pre-allocated to improve efficiency.
func (topo *Topology) GetAcceleratorIDsAsInt() ([]int, error) {
accels := make([]int, 0)
for _, accel := range topo.Accelerators {
if len(topo.Accelerators) == 0 {
return []int{}, nil
}
accels := make([]int, 0, len(topo.Accelerators))
for i, accel := range topo.Accelerators {
id, err := strconv.Atoi(accel.ID)
if err != nil {
return nil, err
return nil, fmt.Errorf("accelerator ID at position %d (%s) cannot be converted to a number: %w",
i, accel.ID, err)
}
accels = append(accels, id)
}

View File

@ -20,6 +20,7 @@
Card,
Table,
Icon,
Tooltip
} from "@sveltestrap/sveltestrap";
import {
init,
@ -70,6 +71,8 @@
...new Set([...metricsInHistograms, ...metricsInScatterplots.flat()]),
];
$: clusterName = cluster?.name ? cluster.name : cluster;
const sortOptions = [
{ key: "totalWalltime", label: "Walltime" },
{ key: "totalNodeHours", label: "Node Hours" },
@ -159,6 +162,7 @@
groupBy: $groupBy
) {
id
name
totalWalltime
totalNodeHours
totalCoreHours
@ -422,15 +426,22 @@
<tr>
<td><Icon name="circle-fill" style="color: {colors[i]};" /></td>
{#if groupSelection.key == "user"}
<th scope="col"
><a href="/monitoring/user/{te.id}?cluster={cluster}"
<th scope="col" id="topName-{te.id}"
><a href="/monitoring/user/{te.id}?cluster={clusterName}"
>{te.id}</a
></th
>
{#if te?.name}
<Tooltip
target={`topName-${te.id}`}
placement="left"
>{te.name}</Tooltip
>
{/if}
{:else}
<th scope="col"
><a
href="/monitoring/jobs/?cluster={cluster}&project={te.id}&projectMatch=eq"
href="/monitoring/jobs/?cluster={clusterName}&project={te.id}&projectMatch=eq"
>{te.id}</a
></th
>

View File

@ -58,7 +58,8 @@
let plots = {},
statsTable
let missingMetrics = [],
let availableMetrics = new Set(),
missingMetrics = [],
missingHosts = [],
somethingMissing = false;
@ -127,10 +128,24 @@
if (!job) return;
const pendingMetrics = [
...(ccconfig[`job_view_selectedMetrics:${job.cluster}`] ||
ccconfig[`job_view_selectedMetrics`]
...(
(
ccconfig[`job_view_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
ccconfig[`job_view_selectedMetrics:${job.cluster}`]
) ||
$initq.data.globalMetrics
.reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === job.cluster && av.subClusters.includes(job.subCluster))) {
names.push(gm.name);
}
return names;
}, [])
),
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
...(
(
ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`]
) ||
ccconfig[`job_view_nodestats_selectedMetrics`]
),
];
@ -293,7 +308,7 @@
{#if $initq.data}
<Col xs="auto">
<Button outline on:click={() => (isMetricsSelectionOpen = true)} color="primary">
Select Metrics
Select Metrics (Selected {selectedMetrics.length} of {availableMetrics.size} available)
</Button>
</Col>
{/if}
@ -428,9 +443,11 @@
{#if $initq.data}
<MetricSelection
cluster={$initq.data.job.cluster}
subCluster={$initq.data.job.subCluster}
configName="job_view_selectedMetrics"
bind:metrics={selectedMetrics}
bind:isOpen={isMetricsSelectionOpen}
bind:allMetrics={availableMetrics}
/>
{/if}

View File

@ -137,5 +137,5 @@
bind:metrics
bind:isOpen={isMetricsSelectionOpen}
bind:showFootprint
footprintSelect={true}
footprintSelect
/>

View File

@ -19,6 +19,7 @@
Progress,
Icon,
Button,
Tooltip
} from "@sveltestrap/sveltestrap";
import {
queryStore,
@ -75,9 +76,9 @@
);
let isHistogramSelectionOpen = false;
$: metricsInHistograms = cluster
? ccconfig[`user_view_histogramMetrics:${cluster}`] || []
: ccconfig.user_view_histogramMetrics || [];
$: selectedHistograms = cluster
? ccconfig[`user_view_histogramMetrics:${cluster}`] || ( ccconfig['user_view_histogramMetrics'] || [] )
: ccconfig['user_view_histogramMetrics'] || [];
const client = getContextClient();
// Note: nodeMetrics are requested on configured $timestep resolution
@ -90,7 +91,7 @@
$metrics: [String!]
$from: Time!
$to: Time!
$metricsInHistograms: [String!]
$selectedHistograms: [String!]
) {
nodeMetrics(
cluster: $cluster
@ -116,7 +117,7 @@
}
}
stats: jobsStatistics(filter: $filter, metrics: $metricsInHistograms) {
stats: jobsStatistics(filter: $filter, metrics: $selectedHistograms) {
histDuration {
count
value
@ -157,7 +158,7 @@
from: from.toISOString(),
to: to.toISOString(),
filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
metricsInHistograms: metricsInHistograms,
selectedHistograms: selectedHistograms,
},
});
@ -177,6 +178,7 @@
groupBy: USER
) {
id
name
totalJobs
totalNodes
totalCores
@ -515,12 +517,19 @@
{#each $topUserQuery.data.topUser as tu, i}
<tr>
<td><Icon name="circle-fill" style="color: {colors[i]};" /></td>
<th scope="col"
<th scope="col" id="topName-{tu.id}"
><a
href="/monitoring/user/{tu.id}?cluster={cluster}&state=running"
>{tu.id}</a
></th
>
{#if tu?.name}
<Tooltip
target={`topName-${tu.id}`}
placement="left"
>{tu.name}</Tooltip
>
{/if}
<td>{tu[topUserSelection.key]}</td>
</tr>
{/each}
@ -652,7 +661,7 @@
<!-- Selectable Stats as Histograms : Average Values of Running Jobs -->
{#if metricsInHistograms}
{#if selectedHistograms}
{#key $mainQuery.data.stats[0].histMetrics}
<PlotGrid
let:item
@ -675,6 +684,6 @@
<HistogramSelection
bind:cluster
bind:metricsInHistograms
bind:selectedHistograms
bind:isOpen={isHistogramSelectionOpen}
/>

View File

@ -29,8 +29,8 @@
import Refresher from "./generic/helper/Refresher.svelte";
export let displayType;
export let cluster;
export let subCluster = "";
export let cluster = null;
export let subCluster = null;
export let from = null;
export let to = null;
@ -60,7 +60,10 @@
let hostnameFilter = "";
let pendingHostnameFilter = "";
let selectedMetric = ccconfig.system_view_selectedMetric || "";
let selectedMetrics = ccconfig[`node_list_selectedMetrics:${cluster}`] || [ccconfig.system_view_selectedMetric];
let selectedMetrics = (
ccconfig[`node_list_selectedMetrics:${cluster}:${subCluster}`] ||
ccconfig[`node_list_selectedMetrics:${cluster}`]
) || [ccconfig.system_view_selectedMetric];
let isMetricsSelectionOpen = false;
/*
@ -191,6 +194,7 @@
<MetricSelection
{cluster}
{subCluster}
configName="node_list_selectedMetrics"
metrics={selectedMetrics}
bind:isOpen={isMetricsSelectionOpen}

View File

@ -68,16 +68,16 @@
let durationBinOptions = ["1m","10m","1h","6h","12h"];
let metricBinOptions = [10, 20, 50, 100];
$: metricsInHistograms = selectedCluster
? ccconfig[`user_view_histogramMetrics:${selectedCluster}`] || []
: ccconfig.user_view_histogramMetrics || [];
$: selectedHistograms = selectedCluster
? ccconfig[`user_view_histogramMetrics:${selectedCluster}`] || ( ccconfig['user_view_histogramMetrics'] || [] )
: ccconfig['user_view_histogramMetrics'] || [];
const client = getContextClient();
$: stats = queryStore({
client: client,
query: gql`
query ($jobFilters: [JobFilter!]!, $metricsInHistograms: [String!], $numDurationBins: String, $numMetricBins: Int) {
jobsStatistics(filter: $jobFilters, metrics: $metricsInHistograms, numDurationBins: $numDurationBins , numMetricBins: $numMetricBins ) {
query ($jobFilters: [JobFilter!]!, $selectedHistograms: [String!], $numDurationBins: String, $numMetricBins: Int) {
jobsStatistics(filter: $jobFilters, metrics: $selectedHistograms, numDurationBins: $numDurationBins , numMetricBins: $numMetricBins ) {
totalJobs
shortJobs
totalWalltime
@ -104,7 +104,7 @@
}
}
`,
variables: { jobFilters, metricsInHistograms, numDurationBins, numMetricBins },
variables: { jobFilters, selectedHistograms, numDurationBins, numMetricBins },
});
onMount(() => filterComponent.updateFilters());
@ -290,7 +290,7 @@
</InputGroup>
</Col>
</Row>
{#if metricsInHistograms?.length > 0}
{#if selectedHistograms?.length > 0}
{#if $stats.error}
<Row>
<Col>
@ -352,11 +352,11 @@
bind:metrics
bind:isOpen={isMetricsSelectionOpen}
bind:showFootprint
footprintSelect={true}
footprintSelect
/>
<HistogramSelection
bind:cluster={selectedCluster}
bind:metricsInHistograms
bind:selectedHistograms
bind:isOpen={isHistogramSelectionOpen}
/>

View File

@ -45,6 +45,14 @@
export let startTimeQuickSelect = false;
export let matchedJobs = -2;
const startTimeSelectOptions = [
{ range: "", rangeLabel: "No Selection"},
{ range: "last6h", rangeLabel: "Last 6hrs"},
{ range: "last24h", rangeLabel: "Last 24hrs"},
{ range: "last7d", rangeLabel: "Last 7 days"},
{ range: "last30d", rangeLabel: "Last 30 days"}
];
let filters = {
projectMatch: filterPresets.projectMatch || "contains",
userMatch: filterPresets.userMatch || "contains",
@ -56,7 +64,7 @@
filterPresets.states || filterPresets.state
? [filterPresets.state].flat()
: allJobStates,
startTime: filterPresets.startTime || { from: null, to: null },
startTime: filterPresets.startTime || { from: null, to: null, range: ""},
tags: filterPresets.tags || [],
duration: filterPresets.duration || {
lessThan: null,
@ -268,16 +276,17 @@
{#if startTimeQuickSelect}
<DropdownItem divider />
<DropdownItem disabled>Start Time Quick Selection</DropdownItem>
{#each [{ text: "Last 6hrs", range: "last6h" }, { text: "Last 24hrs", range: "last24h" }, { text: "Last 7 days", range: "last7d" }, { text: "Last 30 days", range: "last30d" }] as { text, range }}
{#each startTimeSelectOptions.filter((stso) => stso.range !== "") as { rangeLabel, range }}
<DropdownItem
on:click={() => {
filters.startTime.from = null
filters.startTime.to = null
filters.startTime.range = range;
filters.startTime.text = text;
updateFilters();
}}
>
<Icon name="calendar-range" />
{text}
{rangeLabel}
</DropdownItem>
{/each}
{/if}
@ -316,7 +325,7 @@
{#if filters.startTime.range}
<Info icon="calendar-range" on:click={() => (isStartTimeOpen = true)}>
{filters?.startTime?.text ? filters.startTime.text : filters.startTime.range }
{startTimeSelectOptions.find((stso) => stso.range === filters.startTime.range).rangeLabel }
</Info>
{/if}
@ -414,11 +423,8 @@
bind:from={filters.startTime.from}
bind:to={filters.startTime.to}
bind:range={filters.startTime.range}
on:set-filter={() => {
delete filters.startTime["text"];
delete filters.startTime["range"];
updateFilters();
}}
{startTimeSelectOptions}
on:set-filter={() => updateFilters()}
/>
<Duration

View File

@ -43,6 +43,10 @@
<ModalBody>
{#if $initialized}
<h4>Cluster</h4>
{#if disableClusterSelection}
<Button color="info" class="w-100 mb-2" disabled><b>Info: Cluster Selection Disabled in This View</b></Button>
<Button outline color="primary" class="w-100 mb-2" disabled><b>Selected Cluster: {cluster}</b></Button>
{:else}
<ListGroup>
<ListGroupItem
disabled={disableClusterSelection}
@ -64,6 +68,7 @@
{/each}
</ListGroup>
{/if}
{/if}
{#if $initialized && pendingCluster != null}
<br />
<h4>Partiton</h4>

View File

@ -17,7 +17,6 @@
import { parse, format, sub } from "date-fns";
import {
Row,
Col,
Button,
Input,
Modal,
@ -34,8 +33,7 @@
export let from = null;
export let to = null;
export let range = "";
let pendingFrom, pendingTo;
export let startTimeSelectOptions;
const now = new Date(Date.now());
const ago = sub(now, { months: 1 });
@ -48,12 +46,24 @@
time: format(now, "HH:mm"),
};
function reset() {
pendingFrom = from == null ? defaultFrom : fromRFC3339(from);
pendingTo = to == null ? defaultTo : fromRFC3339(to);
}
$: pendingFrom = (from == null) ? defaultFrom : fromRFC3339(from)
$: pendingTo = (to == null) ? defaultTo : fromRFC3339(to)
$: pendingRange = range
reset();
$: isModified =
(from != toRFC3339(pendingFrom) || to != toRFC3339(pendingTo, "59")) &&
(range != pendingRange) &&
!(
from == null &&
pendingFrom.date == "0000-00-00" &&
pendingFrom.time == "00:00"
) &&
!(
to == null &&
pendingTo.date == "0000-00-00" &&
pendingTo.time == "00:00"
) &&
!( range == "" && pendingRange == "");
function toRFC3339({ date, time }, secs = "00") {
const parsedDate = parse(
@ -71,19 +81,6 @@
time: format(parsedDate, "HH:mm"),
};
}
$: isModified =
(from != toRFC3339(pendingFrom) || to != toRFC3339(pendingTo, "59")) &&
!(
from == null &&
pendingFrom.date == "0000-00-00" &&
pendingFrom.time == "00:00"
) &&
!(
to == null &&
pendingTo.date == "0000-00-00" &&
pendingTo.time == "00:00"
);
</script>
<Modal {isOpen} toggle={() => (isOpen = !isOpen)}>
@ -92,31 +89,59 @@
{#if range !== ""}
<h4>Current Range</h4>
<Row>
<Col>
<Input type="text" value={range} disabled/>
</Col>
<FormGroup class="col">
<Input type ="select" bind:value={pendingRange} >
{#each startTimeSelectOptions as { rangeLabel, range }}
<option label={rangeLabel} value={range}/>
{/each}
</Input>
</FormGroup>
</Row>
{/if}
<h4>From</h4>
<Row>
<FormGroup class="col">
<Input type="date" bind:value={pendingFrom.date} />
<Input type="date" bind:value={pendingFrom.date} disabled={pendingRange !== ""}/>
</FormGroup>
<FormGroup class="col">
<Input type="time" bind:value={pendingFrom.time} />
<Input type="time" bind:value={pendingFrom.time} disabled={pendingRange !== ""}/>
</FormGroup>
</Row>
<h4>To</h4>
<Row>
<FormGroup class="col">
<Input type="date" bind:value={pendingTo.date} />
<Input type="date" bind:value={pendingTo.date} disabled={pendingRange !== ""}/>
</FormGroup>
<FormGroup class="col">
<Input type="time" bind:value={pendingTo.time} />
<Input type="time" bind:value={pendingTo.time} disabled={pendingRange !== ""}/>
</FormGroup>
</Row>
</ModalBody>
<ModalFooter>
{#if pendingRange !== ""}
<Button
color="warning"
disabled={pendingRange === ""}
on:click={() => {
pendingRange = ""
}}
>
Reset Range
</Button>
<Button
color="primary"
disabled={pendingRange === ""}
on:click={() => {
isOpen = false;
from = null;
to = null;
range = pendingRange;
dispatch("set-filter", { from, to, range });
}}
>
Close & Apply Range
</Button>
{:else}
<Button
color="primary"
disabled={pendingFrom.date == "0000-00-00" ||
@ -125,19 +150,21 @@
isOpen = false;
from = toRFC3339(pendingFrom);
to = toRFC3339(pendingTo, "59");
dispatch("set-filter", { from, to });
range = "";
dispatch("set-filter", { from, to, range });
}}
>
Close & Apply
Close & Apply Dates
</Button>
{/if}
<Button
color="danger"
on:click={() => {
isOpen = false;
from = null;
to = null;
reset();
dispatch("set-filter", { from, to });
range = "";
dispatch("set-filter", { from, to, range });
}}>Reset</Button
>
<Button on:click={() => (isOpen = false)}>Close</Button>

View File

@ -179,7 +179,7 @@
function render(plotData) {
if (plotData) {
const opts = {
title: "",
title: "CPU Roofline Diagram",
mode: 2,
width: width,
height: height,

View File

@ -3,7 +3,7 @@
Properties:
- `cluster String`: Currently selected cluster
- `metricsInHistograms [String]`: The currently selected metrics to display as histogram
- `selectedHistograms [String]`: The currently selected metrics to display as histogram
- ìsOpen Bool`: Is selection opened
-->
@ -21,21 +21,26 @@
import { gql, getContextClient, mutationStore } from "@urql/svelte";
export let cluster;
export let metricsInHistograms;
export let selectedHistograms;
export let isOpen;
const client = getContextClient();
const initialized = getContext("initialized");
let availableMetrics = []
function loadHistoMetrics(isInitialized, thisCluster) {
if (!isInitialized) return [];
function loadHistoMetrics(isInitialized) {
if (!isInitialized) return;
const rawAvailableMetrics = getContext("globalMetrics").filter((gm) => gm?.footprint).map((fgm) => { return fgm.name })
availableMetrics = [...rawAvailableMetrics]
if (!thisCluster) {
return getContext("globalMetrics")
.filter((gm) => gm?.footprint)
.map((fgm) => { return fgm.name })
} else {
return getContext("globalMetrics")
.filter((gm) => gm?.availability.find((av) => av.cluster == thisCluster))
.filter((agm) => agm?.footprint)
.map((afgm) => { return afgm.name })
}
}
let pendingMetrics = [...metricsInHistograms]; // Copy
const updateConfigurationMutation = ({ name, value }) => {
return mutationStore({
@ -61,17 +66,16 @@
}
function closeAndApply() {
metricsInHistograms = [...pendingMetrics]; // Set for parent
isOpen = !isOpen;
updateConfiguration({
name: cluster
? `user_view_histogramMetrics:${cluster}`
: "user_view_histogramMetrics",
value: metricsInHistograms,
value: selectedHistograms,
});
}
$: loadHistoMetrics($initialized);
$: availableMetrics = loadHistoMetrics($initialized, cluster);
</script>
@ -81,7 +85,7 @@
<ListGroup>
{#each availableMetrics as metric (metric)}
<ListGroupItem>
<input type="checkbox" bind:group={pendingMetrics} value={metric} />
<input type="checkbox" bind:group={selectedHistograms} value={metric} />
{metric}
</ListGroupItem>
{/each}

View File

@ -28,6 +28,7 @@
export let configName;
export let allMetrics = null;
export let cluster = null;
export let subCluster = null;
export let showFootprint = false;
export let footprintSelect = false;
@ -46,12 +47,16 @@
$: {
if (allMetrics != null) {
if (cluster == null) {
if (!cluster) {
for (let metric of globalMetrics) allMetrics.add(metric.name);
} else {
allMetrics.clear();
for (let gm of globalMetrics) {
if (!subCluster) {
if (gm.availability.find((av) => av.cluster === cluster)) allMetrics.add(gm.name);
} else {
if (gm.availability.find((av) => av.cluster === cluster && av.subClusters.includes(subCluster))) allMetrics.add(gm.name);
}
}
}
newMetricsOrder = [...allMetrics].filter((m) => !metrics.includes(m));
@ -62,7 +67,7 @@
function printAvailability(metric, cluster) {
const avail = globalMetrics.find((gm) => gm.name === metric)?.availability
if (cluster == null) {
if (!cluster) {
return avail.map((av) => av.cluster).join(',')
} else {
return avail.find((av) => av.cluster === cluster).subClusters.join(',')
@ -107,10 +112,17 @@
metrics = newMetricsOrder.filter((m) => unorderedMetrics.includes(m));
isOpen = false;
showFootprint = !!pendingShowFootprint;
let configKey;
if (cluster && subCluster) {
configKey = `${configName}:${cluster}:${subCluster}`;
} else if (cluster && !subCluster) {
configKey = `${configName}:${cluster}`;
} else {
configKey = `${configName}`;
}
updateConfigurationMutation({
name: cluster == null ? configName : `${configName}:${cluster}`,
name: configKey,
value: JSON.stringify(metrics),
}).subscribe((res) => {
if (res.fetching === false && res.error) {
@ -118,9 +130,11 @@
}
});
if (footprintSelect) {
showFootprint = !!pendingShowFootprint;
updateConfigurationMutation({
name:
cluster == null
!cluster
? "plot_list_showFootprint"
: `plot_list_showFootprint:${cluster}`,
value: JSON.stringify(showFootprint),
@ -129,6 +143,7 @@
throw res.error;
}
});
};
dispatch('update-metrics', metrics);
}

View File

@ -18,6 +18,8 @@
InputGroup,
InputGroupText,
Icon,
Row,
Col
} from "@sveltestrap/sveltestrap";
import { maxScope } from "../generic/utils.js";
import StatsTableEntry from "./StatsTableEntry.svelte";
@ -26,7 +28,7 @@
export let job;
export let jobMetrics;
const allMetrics = [...new Set(jobMetrics.map((m) => m.name))].sort()
const sortedJobMetrics = [...new Set(jobMetrics.map((m) => m.name))].sort()
const scopesForMetric = (metric) =>
jobMetrics.filter((jm) => jm.name == metric).map((jm) => jm.scope);
@ -34,11 +36,13 @@
selectedScopes = {},
sorting = {},
isMetricSelectionOpen = false,
selectedMetrics =
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
getContext("cc-config")["job_view_nodestats_selectedMetrics"];
availableMetrics = new Set(),
selectedMetrics = (
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`]
) || getContext("cc-config")["job_view_nodestats_selectedMetrics"];
for (let metric of allMetrics) {
for (let metric of sortedJobMetrics) {
// Not Exclusive or Multi-Node: get maxScope directly (mostly: node)
// -> Else: Load smallest available granularity as default as per availability
const availableScopes = scopesForMetric(metric);
@ -95,15 +99,19 @@
};
</script>
<Row>
<Col class="m-2">
<Button outline on:click={() => (isMetricSelectionOpen = true)} class="w-auto px-2" color="primary">
Select Metrics (Selected {selectedMetrics.length} of {availableMetrics.size} available)
</Button>
</Col>
</Row>
<hr class="mb-1 mt-1"/>
<Table class="mb-0">
<thead>
<!-- Header Row 1: Selectors -->
<tr>
<th>
<Button outline on:click={() => (isMetricSelectionOpen = true)} class="w-100 px-2" color="primary">
Select Metrics
</Button>
</th>
<th/>
{#each selectedMetrics as metric}
<!-- To Match Row-2 Header Field Count-->
<th colspan={selectedScopes[metric] == "node" ? 3 : 4}>
@ -162,8 +170,9 @@
<MetricSelection
cluster={job.cluster}
subCluster={job.subCluster}
configName="job_view_nodestats_selectedMetrics"
allMetrics={new Set(allMetrics)}
bind:allMetrics={availableMetrics}
bind:metrics={selectedMetrics}
bind:isOpen={isMetricSelectionOpen}
/>

View File

@ -217,6 +217,7 @@
<tr>
<td colspan={selectedMetrics.length + 1}>
<div style="text-align:center;">
{#if !usePaging}
<p><b>
Loading nodes {nodes.length + 1} to
{ matchedNodes
@ -224,6 +225,7 @@
: (nodes.length + paging.itemsPerPage)
}
</b></p>
{/if}
<Spinner secondary />
</div>
</td>

View File

@ -102,6 +102,19 @@
Shared
</Button>
</InputGroup>
<!-- Fallback -->
{:else if nodeJobsData.jobs.count >= 1}
<InputGroup>
<InputGroupText>
<Icon name="circle-fill"/>
</InputGroupText>
<InputGroupText>
Status
</InputGroupText>
<Button color="success" disabled>
Allocated Jobs
</Button>
</InputGroup>
{:else}
<InputGroup>
<InputGroupText>

View File

@ -98,12 +98,12 @@
let extendedLegendData = null;
$: if ($nodeJobsData?.data) {
// Get Shared State of Node: Only Build extended Legend For Shared Nodes
if ($nodeJobsData.data.jobs.count >= 1 && !$nodeJobsData.data.jobs.items[0].exclusive) {
// Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes]
if ($nodeJobsData.data.jobs.count >= 1) { // "&& !$nodeJobsData.data.jobs.items[0].exclusive)"
const accSet = Array.from(new Set($nodeJobsData.data.jobs.items
.map((i) => i.resources
.filter((r) => r.hostname === nodeData.host)
.map((r) => r.accelerators)
.filter((r) => (r.hostname === nodeData.host) && r?.accelerators)
.map((r) => r?.accelerators)
)
)).flat(2)