mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-03-31 01:25:55 +02:00
Merge branch 'dev' of github.com:ClusterCockpit/cc-backend into dev
This commit is contained in:
commit
d009a065f1
@ -1008,8 +1008,8 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
|
||||
return
|
||||
}
|
||||
|
||||
if job == nil || job.StartTime.Unix() >= req.StopTime {
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
|
||||
if job == nil || job.StartTime.Unix() > req.StopTime {
|
||||
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@ type DefaultMetricsConfig struct {
|
||||
}
|
||||
|
||||
func LoadDefaultMetricsConfig() (*DefaultMetricsConfig, error) {
|
||||
filePath := "configs/default_metrics.json"
|
||||
filePath := "default_metrics.json"
|
||||
if _, err := os.Stat(filePath); os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
|
@ -96,27 +96,35 @@ func HandleImportFlag(flag string) error {
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||
energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(&job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
job.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100) / 100)
|
||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||
return err
|
||||
|
@ -93,27 +93,35 @@ func InitDB() error {
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
|
||||
energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
job.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100) / 100)
|
||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return err
|
||||
|
@ -40,6 +40,7 @@ type CCMetricStore struct {
|
||||
jwt string
|
||||
url string
|
||||
queryEndpoint string
|
||||
topologyCache map[string]*schema.Topology // cluster -> topology cache
|
||||
}
|
||||
|
||||
type ApiQueryRequest struct {
|
||||
@ -92,6 +93,7 @@ func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
|
||||
ccms.client = http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
}
|
||||
ccms.topologyCache = make(map[string]*schema.Topology)
|
||||
|
||||
if config.Renamings != nil {
|
||||
ccms.here2there = config.Renamings
|
||||
@ -181,6 +183,12 @@ func (ccms *CCMetricStore) LoadData(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Verify assignment is correct - log any inconsistencies for debugging
|
||||
if len(queries) != len(assignedScope) {
|
||||
log.Errorf("Critical error: queries and assignedScope have different lengths after buildQueries: %d vs %d",
|
||||
len(queries), len(assignedScope))
|
||||
}
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: job.Cluster,
|
||||
From: job.StartTime.Unix(),
|
||||
@ -198,11 +206,36 @@ func (ccms *CCMetricStore) LoadData(
|
||||
|
||||
var errors []string
|
||||
jobData := make(schema.JobData)
|
||||
|
||||
// Add safety check for potential index out of range errors
|
||||
if len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) {
|
||||
log.Warnf("Mismatch in query results count: queries=%d, results=%d, assignedScope=%d",
|
||||
len(req.Queries), len(resBody.Results), len(assignedScope))
|
||||
if len(resBody.Results) > len(req.Queries) {
|
||||
resBody.Results = resBody.Results[:len(req.Queries)]
|
||||
}
|
||||
if len(assignedScope) > len(req.Queries) {
|
||||
assignedScope = assignedScope[:len(req.Queries)]
|
||||
}
|
||||
}
|
||||
|
||||
for i, row := range resBody.Results {
|
||||
// Safety check to prevent index out of range errors
|
||||
if i >= len(req.Queries) || i >= len(assignedScope) {
|
||||
log.Warnf("Index out of range prevented: i=%d, queries=%d, assignedScope=%d",
|
||||
i, len(req.Queries), len(assignedScope))
|
||||
continue
|
||||
}
|
||||
|
||||
query := req.Queries[i]
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
scope := assignedScope[i]
|
||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||
if mc == nil {
|
||||
log.Warnf("Metric config not found for %s on cluster %s", metric, job.Cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
if _, ok := jobData[metric]; !ok {
|
||||
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
|
||||
}
|
||||
@ -231,8 +264,15 @@ func (ccms *CCMetricStore) LoadData(
|
||||
|
||||
id := (*string)(nil)
|
||||
if query.Type != nil {
|
||||
id = new(string)
|
||||
*id = query.TypeIds[ndx]
|
||||
// Check if ndx is within the bounds of TypeIds slice
|
||||
if ndx < len(query.TypeIds) {
|
||||
id = new(string)
|
||||
*id = query.TypeIds[ndx]
|
||||
} else {
|
||||
// Log the error but continue processing
|
||||
log.Warnf("TypeIds index out of range: %d with length %d for metric %s on host %s",
|
||||
ndx, len(query.TypeIds), query.Metric, query.Hostname)
|
||||
}
|
||||
}
|
||||
|
||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||
@ -284,20 +324,19 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||
// Initialize both slices together
|
||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
|
||||
assignedScope := []schema.MetricScope{}
|
||||
assignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(job.Resources))
|
||||
|
||||
subcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||
if scerr != nil {
|
||||
return nil, nil, scerr
|
||||
topology, err := ccms.getTopology(job.Cluster, job.SubCluster)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
topology := subcluster.Topology
|
||||
|
||||
for _, metric := range metrics {
|
||||
remoteName := ccms.toRemoteName(metric)
|
||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||
if mc == nil {
|
||||
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
log.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||
continue
|
||||
}
|
||||
@ -329,7 +368,6 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
||||
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
||||
if scope != schema.MetricScopeAccelerator {
|
||||
// Skip all other catched cases
|
||||
continue
|
||||
}
|
||||
|
||||
@ -502,6 +540,31 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
continue
|
||||
}
|
||||
|
||||
// MemoryDomain -> Socket
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeSocket {
|
||||
memDomains, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)
|
||||
socketToDomains, err := topology.GetMemoryDomainsBySocket(memDomains)
|
||||
if err != nil {
|
||||
log.Errorf("Error mapping memory domains to sockets: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Create a query for each socket
|
||||
for _, domains := range socketToDomains {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
Aggregate: true,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(domains),
|
||||
Resolution: resolution,
|
||||
})
|
||||
// Add scope for each query, not just once
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Socket -> Socket
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(hwthreads)
|
||||
@ -772,6 +835,12 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
||||
return nil, totalNodes, hasNextPage, err
|
||||
}
|
||||
|
||||
// Verify assignment is correct - log any inconsistencies for debugging
|
||||
if len(queries) != len(assignedScope) {
|
||||
log.Errorf("Critical error: queries and assignedScope have different lengths after buildNodeQueries: %d vs %d",
|
||||
len(queries), len(assignedScope))
|
||||
}
|
||||
|
||||
req := ApiQueryRequest{
|
||||
Cluster: cluster,
|
||||
Queries: queries,
|
||||
@ -789,17 +858,48 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
||||
|
||||
var errors []string
|
||||
data := make(map[string]schema.JobData)
|
||||
|
||||
// Add safety check for index out of range issues
|
||||
if len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) {
|
||||
log.Warnf("Mismatch in query results count: queries=%d, results=%d, assignedScope=%d",
|
||||
len(req.Queries), len(resBody.Results), len(assignedScope))
|
||||
if len(resBody.Results) > len(req.Queries) {
|
||||
resBody.Results = resBody.Results[:len(req.Queries)]
|
||||
}
|
||||
if len(assignedScope) > len(req.Queries) {
|
||||
assignedScope = assignedScope[:len(req.Queries)]
|
||||
}
|
||||
}
|
||||
|
||||
for i, row := range resBody.Results {
|
||||
// Safety check to prevent index out of range errors
|
||||
if i >= len(req.Queries) || i >= len(assignedScope) {
|
||||
log.Warnf("Index out of range prevented: i=%d, queries=%d, assignedScope=%d",
|
||||
i, len(req.Queries), len(assignedScope))
|
||||
continue
|
||||
}
|
||||
|
||||
var query ApiQuery
|
||||
if resBody.Queries != nil {
|
||||
query = resBody.Queries[i]
|
||||
if i < len(resBody.Queries) {
|
||||
query = resBody.Queries[i]
|
||||
} else {
|
||||
log.Warnf("Index out of range prevented for resBody.Queries: i=%d, len=%d",
|
||||
i, len(resBody.Queries))
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
query = req.Queries[i]
|
||||
}
|
||||
|
||||
// qdata := res[0]
|
||||
metric := ccms.toLocalName(query.Metric)
|
||||
scope := assignedScope[i]
|
||||
mc := archive.GetMetricConfig(cluster, metric)
|
||||
if mc == nil {
|
||||
log.Warnf("Metric config not found for %s on cluster %s", metric, cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
res := mc.Timestep
|
||||
if len(row) > 0 {
|
||||
@ -838,8 +938,15 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
||||
|
||||
id := (*string)(nil)
|
||||
if query.Type != nil {
|
||||
id = new(string)
|
||||
*id = query.TypeIds[ndx]
|
||||
// Check if ndx is within the bounds of TypeIds slice
|
||||
if ndx < len(query.TypeIds) {
|
||||
id = new(string)
|
||||
*id = query.TypeIds[ndx]
|
||||
} else {
|
||||
// Log the error but continue processing
|
||||
log.Warnf("TypeIds index out of range: %d with length %d for metric %s on host %s",
|
||||
ndx, len(query.TypeIds), query.Metric, query.Hostname)
|
||||
}
|
||||
}
|
||||
|
||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||
@ -878,26 +985,14 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
) ([]ApiQuery, []schema.MetricScope, error) {
|
||||
|
||||
// Initialize both slices together with the same capacity
|
||||
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||
assignedScope := []schema.MetricScope{}
|
||||
|
||||
// Get Topol before loop if subCluster given
|
||||
var subClusterTopol *schema.SubCluster
|
||||
var scterr error
|
||||
if subCluster != "" {
|
||||
subClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster)
|
||||
if scterr != nil {
|
||||
// TODO: Log
|
||||
return nil, nil, scterr
|
||||
}
|
||||
}
|
||||
assignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(nodes))
|
||||
|
||||
for _, metric := range metrics {
|
||||
remoteName := ccms.toRemoteName(metric)
|
||||
mc := archive.GetMetricConfig(cluster, metric)
|
||||
if mc == nil {
|
||||
// return nil, fmt.Errorf("METRICDATA/CCMS > metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||
log.Infof("metric '%s' is not specified for cluster '%s'", metric, cluster)
|
||||
continue
|
||||
}
|
||||
@ -918,22 +1013,22 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
||||
handledScopes = append(handledScopes, scope)
|
||||
|
||||
for _, hostname := range nodes {
|
||||
var topology *schema.Topology
|
||||
var err error
|
||||
|
||||
// If no subCluster given, get it by node
|
||||
if subCluster == "" {
|
||||
subClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname)
|
||||
if scnerr != nil {
|
||||
return nil, nil, scnerr
|
||||
}
|
||||
subClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName)
|
||||
if scterr != nil {
|
||||
return nil, nil, scterr
|
||||
}
|
||||
topology, err = ccms.getTopologyByNode(cluster, hostname)
|
||||
} else {
|
||||
topology, err = ccms.getTopology(cluster, subCluster)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Always full node hwthread id list, no partial queries expected -> Use "topology.Node" directly where applicable
|
||||
// Always full accelerator id list, no partial queries expected -> Use "acceleratorIds" directly where applicable
|
||||
topology := subClusterTopol.Topology
|
||||
acceleratorIds := topology.GetAcceleratorIDs()
|
||||
|
||||
// Moved check here if metric matches hardware specs
|
||||
@ -944,7 +1039,6 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
||||
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
||||
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
||||
if scope != schema.MetricScopeAccelerator {
|
||||
// Skip all other catched cases
|
||||
continue
|
||||
}
|
||||
|
||||
@ -1117,6 +1211,31 @@ func (ccms *CCMetricStore) buildNodeQueries(
|
||||
continue
|
||||
}
|
||||
|
||||
// MemoryDomain -> Socket
|
||||
if nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeSocket {
|
||||
memDomains, _ := topology.GetMemoryDomainsFromHWThreads(topology.Node)
|
||||
socketToDomains, err := topology.GetMemoryDomainsBySocket(memDomains)
|
||||
if err != nil {
|
||||
log.Errorf("Error mapping memory domains to sockets: %v", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Create a query for each socket
|
||||
for _, domains := range socketToDomains {
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: hostname,
|
||||
Aggregate: true,
|
||||
Type: &memoryDomainString,
|
||||
TypeIds: intToStringSlice(domains),
|
||||
Resolution: resolution,
|
||||
})
|
||||
// Add scope for each query, not just once
|
||||
assignedScope = append(assignedScope, scope)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Socket -> Socket
|
||||
if nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {
|
||||
sockets, _ := topology.GetSocketsFromHWThreads(topology.Node)
|
||||
@ -1173,3 +1292,29 @@ func intToStringSlice(is []int) []string {
|
||||
}
|
||||
return ss
|
||||
}
|
||||
|
||||
// getTopology returns the topology for a given cluster and subcluster, caching it if not already present
|
||||
func (ccms *CCMetricStore) getTopology(cluster, subCluster string) (*schema.Topology, error) {
|
||||
cacheKey := fmt.Sprintf("%s:%s", cluster, subCluster)
|
||||
if topology, ok := ccms.topologyCache[cacheKey]; ok {
|
||||
return topology, nil
|
||||
}
|
||||
|
||||
subcluster, err := archive.GetSubCluster(cluster, subCluster)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ccms.topologyCache[cacheKey] = &subcluster.Topology
|
||||
return &subcluster.Topology, nil
|
||||
}
|
||||
|
||||
// getTopologyByNode returns the topology for a given cluster and node, caching it if not already present
|
||||
func (ccms *CCMetricStore) getTopologyByNode(cluster, node string) (*schema.Topology, error) {
|
||||
subCluster, err := archive.GetSubClusterByNode(cluster, node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return ccms.getTopology(cluster, subCluster)
|
||||
}
|
||||
|
@ -590,28 +590,34 @@ func (r *JobRepository) UpdateEnergy(
|
||||
return stmt, err
|
||||
}
|
||||
energyFootprint := make(map[string]float64)
|
||||
var totalEnergy float64
|
||||
var energy float64
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
|
||||
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (( W * s ) / 3600) / 1000 = kWh ; Rounded to 2 nearest digits: (Energy * 100) / 100
|
||||
// Here: All-Node Metric Average * Number of Nodes * Job Runtime
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
metricNodeSum := LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes) * float64(jobMeta.Duration)
|
||||
energy = math.Round(((metricNodeSum/3600)/1000)*100) / 100
|
||||
rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
energyFootprint[fp] = energy
|
||||
totalEnergy += energy
|
||||
energyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
|
||||
// log.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy)
|
||||
}
|
||||
|
||||
var rawFootprint []byte
|
||||
@ -620,7 +626,7 @@ func (r *JobRepository) UpdateEnergy(
|
||||
return stmt, err
|
||||
}
|
||||
|
||||
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil
|
||||
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
|
||||
}
|
||||
|
||||
func (r *JobRepository) UpdateFootprint(
|
||||
|
@ -22,6 +22,13 @@ type Topology struct {
|
||||
Die [][]*int `json:"die,omitempty"`
|
||||
Core [][]int `json:"core"`
|
||||
Accelerators []*Accelerator `json:"accelerators,omitempty"`
|
||||
|
||||
// Cache maps for faster lookups
|
||||
hwthreadToSocket map[int][]int
|
||||
hwthreadToCore map[int][]int
|
||||
hwthreadToMemoryDomain map[int][]int
|
||||
coreToSocket map[int][]int
|
||||
memoryDomainToSocket map[int]int // New: Direct mapping from memory domain to socket
|
||||
}
|
||||
|
||||
type MetricValue struct {
|
||||
@ -92,156 +99,233 @@ type GlobalMetricListItem struct {
|
||||
Availability []ClusterSupport `json:"availability"`
|
||||
}
|
||||
|
||||
// Return a list of socket IDs given a list of hwthread IDs. Even if just one
|
||||
// hwthread is in that socket, add it to the list. If no hwthreads other than
|
||||
// those in the argument list are assigned to one of the sockets in the first
|
||||
// return value, return true as the second value. TODO: Optimize this, there
|
||||
// must be a more efficient way/algorithm.
|
||||
// InitTopologyMaps initializes the topology mapping caches
|
||||
func (topo *Topology) InitTopologyMaps() {
|
||||
// Initialize maps
|
||||
topo.hwthreadToSocket = make(map[int][]int)
|
||||
topo.hwthreadToCore = make(map[int][]int)
|
||||
topo.hwthreadToMemoryDomain = make(map[int][]int)
|
||||
topo.coreToSocket = make(map[int][]int)
|
||||
topo.memoryDomainToSocket = make(map[int]int)
|
||||
|
||||
// Build hwthread to socket mapping
|
||||
for socketID, hwthreads := range topo.Socket {
|
||||
for _, hwthread := range hwthreads {
|
||||
topo.hwthreadToSocket[hwthread] = append(topo.hwthreadToSocket[hwthread], socketID)
|
||||
}
|
||||
}
|
||||
|
||||
// Build hwthread to core mapping
|
||||
for coreID, hwthreads := range topo.Core {
|
||||
for _, hwthread := range hwthreads {
|
||||
topo.hwthreadToCore[hwthread] = append(topo.hwthreadToCore[hwthread], coreID)
|
||||
}
|
||||
}
|
||||
|
||||
// Build hwthread to memory domain mapping
|
||||
for memDomID, hwthreads := range topo.MemoryDomain {
|
||||
for _, hwthread := range hwthreads {
|
||||
topo.hwthreadToMemoryDomain[hwthread] = append(topo.hwthreadToMemoryDomain[hwthread], memDomID)
|
||||
}
|
||||
}
|
||||
|
||||
// Build core to socket mapping
|
||||
for coreID, hwthreads := range topo.Core {
|
||||
socketSet := make(map[int]struct{})
|
||||
for _, hwthread := range hwthreads {
|
||||
for socketID := range topo.hwthreadToSocket[hwthread] {
|
||||
socketSet[socketID] = struct{}{}
|
||||
}
|
||||
}
|
||||
topo.coreToSocket[coreID] = make([]int, 0, len(socketSet))
|
||||
for socketID := range socketSet {
|
||||
topo.coreToSocket[coreID] = append(topo.coreToSocket[coreID], socketID)
|
||||
}
|
||||
}
|
||||
|
||||
// Build memory domain to socket mapping
|
||||
for memDomID, hwthreads := range topo.MemoryDomain {
|
||||
if len(hwthreads) > 0 {
|
||||
// Use the first hwthread to determine the socket
|
||||
if socketIDs, ok := topo.hwthreadToSocket[hwthreads[0]]; ok && len(socketIDs) > 0 {
|
||||
topo.memoryDomainToSocket[memDomID] = socketIDs[0]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// EnsureTopologyMaps ensures that the topology maps are initialized
|
||||
func (topo *Topology) EnsureTopologyMaps() {
|
||||
if topo.hwthreadToSocket == nil {
|
||||
topo.InitTopologyMaps()
|
||||
}
|
||||
}
|
||||
|
||||
func (topo *Topology) GetSocketsFromHWThreads(
|
||||
hwthreads []int,
|
||||
) (sockets []int, exclusive bool) {
|
||||
socketsMap := map[int]int{}
|
||||
topo.EnsureTopologyMaps()
|
||||
|
||||
socketsMap := make(map[int]int)
|
||||
for _, hwthread := range hwthreads {
|
||||
for socket, hwthreadsInSocket := range topo.Socket {
|
||||
for _, hwthreadInSocket := range hwthreadsInSocket {
|
||||
if hwthread == hwthreadInSocket {
|
||||
socketsMap[socket] += 1
|
||||
}
|
||||
}
|
||||
for _, socketID := range topo.hwthreadToSocket[hwthread] {
|
||||
socketsMap[socketID]++
|
||||
}
|
||||
}
|
||||
|
||||
exclusive = true
|
||||
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
|
||||
sockets = make([]int, 0, len(socketsMap))
|
||||
for socket, count := range socketsMap {
|
||||
sockets = append(sockets, socket)
|
||||
exclusive = exclusive && count == hwthreadsPerSocket
|
||||
// Check if all hwthreads in this socket are in our input list
|
||||
exclusive = exclusive && count == len(topo.Socket[socket])
|
||||
}
|
||||
|
||||
return sockets, exclusive
|
||||
}
|
||||
|
||||
// Return a list of socket IDs given a list of core IDs. Even if just one
|
||||
// core is in that socket, add it to the list. If no cores other than
|
||||
// those in the argument list are assigned to one of the sockets in the first
|
||||
// return value, return true as the second value. TODO: Optimize this, there
|
||||
// must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetSocketsFromCores (
|
||||
func (topo *Topology) GetSocketsFromCores(
|
||||
cores []int,
|
||||
) (sockets []int, exclusive bool) {
|
||||
socketsMap := map[int]int{}
|
||||
topo.EnsureTopologyMaps()
|
||||
|
||||
socketsMap := make(map[int]int)
|
||||
for _, core := range cores {
|
||||
for _, hwthreadInCore := range topo.Core[core] {
|
||||
for socket, hwthreadsInSocket := range topo.Socket {
|
||||
for _, hwthreadInSocket := range hwthreadsInSocket {
|
||||
if hwthreadInCore == hwthreadInSocket {
|
||||
socketsMap[socket] += 1
|
||||
for _, socketID := range topo.coreToSocket[core] {
|
||||
socketsMap[socketID]++
|
||||
}
|
||||
}
|
||||
|
||||
exclusive = true
|
||||
sockets = make([]int, 0, len(socketsMap))
|
||||
for socket, count := range socketsMap {
|
||||
sockets = append(sockets, socket)
|
||||
// Count total cores in this socket
|
||||
totalCoresInSocket := 0
|
||||
for _, hwthreads := range topo.Core {
|
||||
for _, hwthread := range hwthreads {
|
||||
for _, sID := range topo.hwthreadToSocket[hwthread] {
|
||||
if sID == socket {
|
||||
totalCoresInSocket++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
exclusive = true
|
||||
hwthreadsPerSocket := len(topo.Node) / len(topo.Socket)
|
||||
sockets = make([]int, 0, len(socketsMap))
|
||||
for socket, count := range socketsMap {
|
||||
sockets = append(sockets, socket)
|
||||
exclusive = exclusive && count == hwthreadsPerSocket
|
||||
exclusive = exclusive && count == totalCoresInSocket
|
||||
}
|
||||
|
||||
return sockets, exclusive
|
||||
}
|
||||
|
||||
// Return a list of core IDs given a list of hwthread IDs. Even if just one
|
||||
// hwthread is in that core, add it to the list. If no hwthreads other than
|
||||
// those in the argument list are assigned to one of the cores in the first
|
||||
// return value, return true as the second value. TODO: Optimize this, there
|
||||
// must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetCoresFromHWThreads(
|
||||
hwthreads []int,
|
||||
) (cores []int, exclusive bool) {
|
||||
coresMap := map[int]int{}
|
||||
topo.EnsureTopologyMaps()
|
||||
|
||||
coresMap := make(map[int]int)
|
||||
for _, hwthread := range hwthreads {
|
||||
for core, hwthreadsInCore := range topo.Core {
|
||||
for _, hwthreadInCore := range hwthreadsInCore {
|
||||
if hwthread == hwthreadInCore {
|
||||
coresMap[core] += 1
|
||||
}
|
||||
}
|
||||
for _, coreID := range topo.hwthreadToCore[hwthread] {
|
||||
coresMap[coreID]++
|
||||
}
|
||||
}
|
||||
|
||||
exclusive = true
|
||||
hwthreadsPerCore := len(topo.Node) / len(topo.Core)
|
||||
cores = make([]int, 0, len(coresMap))
|
||||
for core, count := range coresMap {
|
||||
cores = append(cores, core)
|
||||
exclusive = exclusive && count == hwthreadsPerCore
|
||||
// Check if all hwthreads in this core are in our input list
|
||||
exclusive = exclusive && count == len(topo.Core[core])
|
||||
}
|
||||
|
||||
return cores, exclusive
|
||||
}
|
||||
|
||||
// Return a list of memory domain IDs given a list of hwthread IDs. Even if
|
||||
// just one hwthread is in that memory domain, add it to the list. If no
|
||||
// hwthreads other than those in the argument list are assigned to one of the
|
||||
// memory domains in the first return value, return true as the second value.
|
||||
// TODO: Optimize this, there must be a more efficient way/algorithm.
|
||||
func (topo *Topology) GetMemoryDomainsFromHWThreads(
|
||||
hwthreads []int,
|
||||
) (memDoms []int, exclusive bool) {
|
||||
memDomsMap := map[int]int{}
|
||||
topo.EnsureTopologyMaps()
|
||||
|
||||
memDomsMap := make(map[int]int)
|
||||
for _, hwthread := range hwthreads {
|
||||
for memDom, hwthreadsInmemDom := range topo.MemoryDomain {
|
||||
for _, hwthreadInmemDom := range hwthreadsInmemDom {
|
||||
if hwthread == hwthreadInmemDom {
|
||||
memDomsMap[memDom] += 1
|
||||
}
|
||||
}
|
||||
for _, memDomID := range topo.hwthreadToMemoryDomain[hwthread] {
|
||||
memDomsMap[memDomID]++
|
||||
}
|
||||
}
|
||||
|
||||
exclusive = true
|
||||
hwthreadsPermemDom := len(topo.Node) / len(topo.MemoryDomain)
|
||||
memDoms = make([]int, 0, len(memDomsMap))
|
||||
for memDom, count := range memDomsMap {
|
||||
memDoms = append(memDoms, memDom)
|
||||
exclusive = exclusive && count == hwthreadsPermemDom
|
||||
// Check if all hwthreads in this memory domain are in our input list
|
||||
exclusive = exclusive && count == len(topo.MemoryDomain[memDom])
|
||||
}
|
||||
|
||||
return memDoms, exclusive
|
||||
}
|
||||
|
||||
// Temporary fix to convert back from int id to string id for accelerators
|
||||
func (topo *Topology) GetAcceleratorID(id int) (string, error) {
|
||||
if id < 0 {
|
||||
fmt.Printf("ID smaller than 0!\n")
|
||||
return topo.Accelerators[0].ID, nil
|
||||
} else if id < len(topo.Accelerators) {
|
||||
return topo.Accelerators[id].ID, nil
|
||||
} else {
|
||||
return "", fmt.Errorf("index %d out of range", id)
|
||||
// GetMemoryDomainsBySocket can now use the direct mapping
|
||||
func (topo *Topology) GetMemoryDomainsBySocket(domainIDs []int) (map[int][]int, error) {
|
||||
socketToDomains := make(map[int][]int)
|
||||
for _, domainID := range domainIDs {
|
||||
if domainID < 0 || domainID >= len(topo.MemoryDomain) || len(topo.MemoryDomain[domainID]) == 0 {
|
||||
return nil, fmt.Errorf("MemoryDomain %d is invalid or empty", domainID)
|
||||
}
|
||||
|
||||
socketID, ok := topo.memoryDomainToSocket[domainID]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("MemoryDomain %d could not be assigned to any socket", domainID)
|
||||
}
|
||||
|
||||
socketToDomains[socketID] = append(socketToDomains[socketID], domainID)
|
||||
}
|
||||
|
||||
return socketToDomains, nil
|
||||
}
|
||||
|
||||
// Return list of hardware (string) accelerator IDs
|
||||
// GetAcceleratorID converts a numeric ID to the corresponding Accelerator ID as a string.
|
||||
// This is useful when accelerators are stored in arrays and accessed by index.
|
||||
func (topo *Topology) GetAcceleratorID(id int) (string, error) {
|
||||
if id < 0 {
|
||||
return "", fmt.Errorf("accelerator ID %d is negative", id)
|
||||
}
|
||||
|
||||
if id >= len(topo.Accelerators) {
|
||||
return "", fmt.Errorf("accelerator index %d out of valid range (max: %d)",
|
||||
id, len(topo.Accelerators)-1)
|
||||
}
|
||||
|
||||
return topo.Accelerators[id].ID, nil
|
||||
}
|
||||
|
||||
// GetAcceleratorIDs returns a list of all Accelerator IDs (as strings).
|
||||
// Capacity is pre-allocated to improve efficiency.
|
||||
func (topo *Topology) GetAcceleratorIDs() []string {
|
||||
accels := make([]string, 0)
|
||||
if len(topo.Accelerators) == 0 {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
accels := make([]string, 0, len(topo.Accelerators))
|
||||
for _, accel := range topo.Accelerators {
|
||||
accels = append(accels, accel.ID)
|
||||
}
|
||||
return accels
|
||||
}
|
||||
|
||||
// Outdated? Or: Return indices of accelerators in parent array?
|
||||
// GetAcceleratorIDsAsInt converts all Accelerator IDs to integer values.
|
||||
// This function can fail if the IDs cannot be interpreted as numbers.
|
||||
// Capacity is pre-allocated to improve efficiency.
|
||||
func (topo *Topology) GetAcceleratorIDsAsInt() ([]int, error) {
|
||||
accels := make([]int, 0)
|
||||
for _, accel := range topo.Accelerators {
|
||||
if len(topo.Accelerators) == 0 {
|
||||
return []int{}, nil
|
||||
}
|
||||
|
||||
accels := make([]int, 0, len(topo.Accelerators))
|
||||
for i, accel := range topo.Accelerators {
|
||||
id, err := strconv.Atoi(accel.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("accelerator ID at position %d (%s) cannot be converted to a number: %w",
|
||||
i, accel.ID, err)
|
||||
}
|
||||
accels = append(accels, id)
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
Card,
|
||||
Table,
|
||||
Icon,
|
||||
Tooltip
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import {
|
||||
init,
|
||||
@ -70,6 +71,8 @@
|
||||
...new Set([...metricsInHistograms, ...metricsInScatterplots.flat()]),
|
||||
];
|
||||
|
||||
$: clusterName = cluster?.name ? cluster.name : cluster;
|
||||
|
||||
const sortOptions = [
|
||||
{ key: "totalWalltime", label: "Walltime" },
|
||||
{ key: "totalNodeHours", label: "Node Hours" },
|
||||
@ -159,6 +162,7 @@
|
||||
groupBy: $groupBy
|
||||
) {
|
||||
id
|
||||
name
|
||||
totalWalltime
|
||||
totalNodeHours
|
||||
totalCoreHours
|
||||
@ -422,15 +426,22 @@
|
||||
<tr>
|
||||
<td><Icon name="circle-fill" style="color: {colors[i]};" /></td>
|
||||
{#if groupSelection.key == "user"}
|
||||
<th scope="col"
|
||||
><a href="/monitoring/user/{te.id}?cluster={cluster}"
|
||||
<th scope="col" id="topName-{te.id}"
|
||||
><a href="/monitoring/user/{te.id}?cluster={clusterName}"
|
||||
>{te.id}</a
|
||||
></th
|
||||
>
|
||||
{#if te?.name}
|
||||
<Tooltip
|
||||
target={`topName-${te.id}`}
|
||||
placement="left"
|
||||
>{te.name}</Tooltip
|
||||
>
|
||||
{/if}
|
||||
{:else}
|
||||
<th scope="col"
|
||||
><a
|
||||
href="/monitoring/jobs/?cluster={cluster}&project={te.id}&projectMatch=eq"
|
||||
href="/monitoring/jobs/?cluster={clusterName}&project={te.id}&projectMatch=eq"
|
||||
>{te.id}</a
|
||||
></th
|
||||
>
|
||||
|
@ -58,7 +58,8 @@
|
||||
let plots = {},
|
||||
statsTable
|
||||
|
||||
let missingMetrics = [],
|
||||
let availableMetrics = new Set(),
|
||||
missingMetrics = [],
|
||||
missingHosts = [],
|
||||
somethingMissing = false;
|
||||
|
||||
@ -127,10 +128,24 @@
|
||||
if (!job) return;
|
||||
|
||||
const pendingMetrics = [
|
||||
...(ccconfig[`job_view_selectedMetrics:${job.cluster}`] ||
|
||||
ccconfig[`job_view_selectedMetrics`]
|
||||
...(
|
||||
(
|
||||
ccconfig[`job_view_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
|
||||
ccconfig[`job_view_selectedMetrics:${job.cluster}`]
|
||||
) ||
|
||||
$initq.data.globalMetrics
|
||||
.reduce((names, gm) => {
|
||||
if (gm.availability.find((av) => av.cluster === job.cluster && av.subClusters.includes(job.subCluster))) {
|
||||
names.push(gm.name);
|
||||
}
|
||||
return names;
|
||||
}, [])
|
||||
),
|
||||
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
|
||||
...(
|
||||
(
|
||||
ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
|
||||
ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`]
|
||||
) ||
|
||||
ccconfig[`job_view_nodestats_selectedMetrics`]
|
||||
),
|
||||
];
|
||||
@ -293,7 +308,7 @@
|
||||
{#if $initq.data}
|
||||
<Col xs="auto">
|
||||
<Button outline on:click={() => (isMetricsSelectionOpen = true)} color="primary">
|
||||
Select Metrics
|
||||
Select Metrics (Selected {selectedMetrics.length} of {availableMetrics.size} available)
|
||||
</Button>
|
||||
</Col>
|
||||
{/if}
|
||||
@ -428,9 +443,11 @@
|
||||
{#if $initq.data}
|
||||
<MetricSelection
|
||||
cluster={$initq.data.job.cluster}
|
||||
subCluster={$initq.data.job.subCluster}
|
||||
configName="job_view_selectedMetrics"
|
||||
bind:metrics={selectedMetrics}
|
||||
bind:isOpen={isMetricsSelectionOpen}
|
||||
bind:allMetrics={availableMetrics}
|
||||
/>
|
||||
{/if}
|
||||
|
||||
|
@ -137,5 +137,5 @@
|
||||
bind:metrics
|
||||
bind:isOpen={isMetricsSelectionOpen}
|
||||
bind:showFootprint
|
||||
footprintSelect={true}
|
||||
footprintSelect
|
||||
/>
|
||||
|
@ -19,6 +19,7 @@
|
||||
Progress,
|
||||
Icon,
|
||||
Button,
|
||||
Tooltip
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import {
|
||||
queryStore,
|
||||
@ -75,9 +76,9 @@
|
||||
);
|
||||
|
||||
let isHistogramSelectionOpen = false;
|
||||
$: metricsInHistograms = cluster
|
||||
? ccconfig[`user_view_histogramMetrics:${cluster}`] || []
|
||||
: ccconfig.user_view_histogramMetrics || [];
|
||||
$: selectedHistograms = cluster
|
||||
? ccconfig[`user_view_histogramMetrics:${cluster}`] || ( ccconfig['user_view_histogramMetrics'] || [] )
|
||||
: ccconfig['user_view_histogramMetrics'] || [];
|
||||
|
||||
const client = getContextClient();
|
||||
// Note: nodeMetrics are requested on configured $timestep resolution
|
||||
@ -90,7 +91,7 @@
|
||||
$metrics: [String!]
|
||||
$from: Time!
|
||||
$to: Time!
|
||||
$metricsInHistograms: [String!]
|
||||
$selectedHistograms: [String!]
|
||||
) {
|
||||
nodeMetrics(
|
||||
cluster: $cluster
|
||||
@ -116,7 +117,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
stats: jobsStatistics(filter: $filter, metrics: $metricsInHistograms) {
|
||||
stats: jobsStatistics(filter: $filter, metrics: $selectedHistograms) {
|
||||
histDuration {
|
||||
count
|
||||
value
|
||||
@ -157,7 +158,7 @@
|
||||
from: from.toISOString(),
|
||||
to: to.toISOString(),
|
||||
filter: [{ state: ["running"] }, { cluster: { eq: cluster } }],
|
||||
metricsInHistograms: metricsInHistograms,
|
||||
selectedHistograms: selectedHistograms,
|
||||
},
|
||||
});
|
||||
|
||||
@ -177,6 +178,7 @@
|
||||
groupBy: USER
|
||||
) {
|
||||
id
|
||||
name
|
||||
totalJobs
|
||||
totalNodes
|
||||
totalCores
|
||||
@ -515,12 +517,19 @@
|
||||
{#each $topUserQuery.data.topUser as tu, i}
|
||||
<tr>
|
||||
<td><Icon name="circle-fill" style="color: {colors[i]};" /></td>
|
||||
<th scope="col"
|
||||
<th scope="col" id="topName-{tu.id}"
|
||||
><a
|
||||
href="/monitoring/user/{tu.id}?cluster={cluster}&state=running"
|
||||
>{tu.id}</a
|
||||
></th
|
||||
>
|
||||
{#if tu?.name}
|
||||
<Tooltip
|
||||
target={`topName-${tu.id}`}
|
||||
placement="left"
|
||||
>{tu.name}</Tooltip
|
||||
>
|
||||
{/if}
|
||||
<td>{tu[topUserSelection.key]}</td>
|
||||
</tr>
|
||||
{/each}
|
||||
@ -652,7 +661,7 @@
|
||||
|
||||
<!-- Selectable Stats as Histograms : Average Values of Running Jobs -->
|
||||
|
||||
{#if metricsInHistograms}
|
||||
{#if selectedHistograms}
|
||||
{#key $mainQuery.data.stats[0].histMetrics}
|
||||
<PlotGrid
|
||||
let:item
|
||||
@ -675,6 +684,6 @@
|
||||
|
||||
<HistogramSelection
|
||||
bind:cluster
|
||||
bind:metricsInHistograms
|
||||
bind:selectedHistograms
|
||||
bind:isOpen={isHistogramSelectionOpen}
|
||||
/>
|
||||
|
@ -29,8 +29,8 @@
|
||||
import Refresher from "./generic/helper/Refresher.svelte";
|
||||
|
||||
export let displayType;
|
||||
export let cluster;
|
||||
export let subCluster = "";
|
||||
export let cluster = null;
|
||||
export let subCluster = null;
|
||||
export let from = null;
|
||||
export let to = null;
|
||||
|
||||
@ -60,7 +60,10 @@
|
||||
let hostnameFilter = "";
|
||||
let pendingHostnameFilter = "";
|
||||
let selectedMetric = ccconfig.system_view_selectedMetric || "";
|
||||
let selectedMetrics = ccconfig[`node_list_selectedMetrics:${cluster}`] || [ccconfig.system_view_selectedMetric];
|
||||
let selectedMetrics = (
|
||||
ccconfig[`node_list_selectedMetrics:${cluster}:${subCluster}`] ||
|
||||
ccconfig[`node_list_selectedMetrics:${cluster}`]
|
||||
) || [ccconfig.system_view_selectedMetric];
|
||||
let isMetricsSelectionOpen = false;
|
||||
|
||||
/*
|
||||
@ -191,6 +194,7 @@
|
||||
|
||||
<MetricSelection
|
||||
{cluster}
|
||||
{subCluster}
|
||||
configName="node_list_selectedMetrics"
|
||||
metrics={selectedMetrics}
|
||||
bind:isOpen={isMetricsSelectionOpen}
|
||||
|
@ -68,16 +68,16 @@
|
||||
let durationBinOptions = ["1m","10m","1h","6h","12h"];
|
||||
let metricBinOptions = [10, 20, 50, 100];
|
||||
|
||||
$: metricsInHistograms = selectedCluster
|
||||
? ccconfig[`user_view_histogramMetrics:${selectedCluster}`] || []
|
||||
: ccconfig.user_view_histogramMetrics || [];
|
||||
$: selectedHistograms = selectedCluster
|
||||
? ccconfig[`user_view_histogramMetrics:${selectedCluster}`] || ( ccconfig['user_view_histogramMetrics'] || [] )
|
||||
: ccconfig['user_view_histogramMetrics'] || [];
|
||||
|
||||
const client = getContextClient();
|
||||
$: stats = queryStore({
|
||||
client: client,
|
||||
query: gql`
|
||||
query ($jobFilters: [JobFilter!]!, $metricsInHistograms: [String!], $numDurationBins: String, $numMetricBins: Int) {
|
||||
jobsStatistics(filter: $jobFilters, metrics: $metricsInHistograms, numDurationBins: $numDurationBins , numMetricBins: $numMetricBins ) {
|
||||
query ($jobFilters: [JobFilter!]!, $selectedHistograms: [String!], $numDurationBins: String, $numMetricBins: Int) {
|
||||
jobsStatistics(filter: $jobFilters, metrics: $selectedHistograms, numDurationBins: $numDurationBins , numMetricBins: $numMetricBins ) {
|
||||
totalJobs
|
||||
shortJobs
|
||||
totalWalltime
|
||||
@ -104,7 +104,7 @@
|
||||
}
|
||||
}
|
||||
`,
|
||||
variables: { jobFilters, metricsInHistograms, numDurationBins, numMetricBins },
|
||||
variables: { jobFilters, selectedHistograms, numDurationBins, numMetricBins },
|
||||
});
|
||||
|
||||
onMount(() => filterComponent.updateFilters());
|
||||
@ -290,7 +290,7 @@
|
||||
</InputGroup>
|
||||
</Col>
|
||||
</Row>
|
||||
{#if metricsInHistograms?.length > 0}
|
||||
{#if selectedHistograms?.length > 0}
|
||||
{#if $stats.error}
|
||||
<Row>
|
||||
<Col>
|
||||
@ -352,11 +352,11 @@
|
||||
bind:metrics
|
||||
bind:isOpen={isMetricsSelectionOpen}
|
||||
bind:showFootprint
|
||||
footprintSelect={true}
|
||||
footprintSelect
|
||||
/>
|
||||
|
||||
<HistogramSelection
|
||||
bind:cluster={selectedCluster}
|
||||
bind:metricsInHistograms
|
||||
bind:selectedHistograms
|
||||
bind:isOpen={isHistogramSelectionOpen}
|
||||
/>
|
||||
|
@ -45,6 +45,14 @@
|
||||
export let startTimeQuickSelect = false;
|
||||
export let matchedJobs = -2;
|
||||
|
||||
const startTimeSelectOptions = [
|
||||
{ range: "", rangeLabel: "No Selection"},
|
||||
{ range: "last6h", rangeLabel: "Last 6hrs"},
|
||||
{ range: "last24h", rangeLabel: "Last 24hrs"},
|
||||
{ range: "last7d", rangeLabel: "Last 7 days"},
|
||||
{ range: "last30d", rangeLabel: "Last 30 days"}
|
||||
];
|
||||
|
||||
let filters = {
|
||||
projectMatch: filterPresets.projectMatch || "contains",
|
||||
userMatch: filterPresets.userMatch || "contains",
|
||||
@ -56,7 +64,7 @@
|
||||
filterPresets.states || filterPresets.state
|
||||
? [filterPresets.state].flat()
|
||||
: allJobStates,
|
||||
startTime: filterPresets.startTime || { from: null, to: null },
|
||||
startTime: filterPresets.startTime || { from: null, to: null, range: ""},
|
||||
tags: filterPresets.tags || [],
|
||||
duration: filterPresets.duration || {
|
||||
lessThan: null,
|
||||
@ -268,16 +276,17 @@
|
||||
{#if startTimeQuickSelect}
|
||||
<DropdownItem divider />
|
||||
<DropdownItem disabled>Start Time Quick Selection</DropdownItem>
|
||||
{#each [{ text: "Last 6hrs", range: "last6h" }, { text: "Last 24hrs", range: "last24h" }, { text: "Last 7 days", range: "last7d" }, { text: "Last 30 days", range: "last30d" }] as { text, range }}
|
||||
{#each startTimeSelectOptions.filter((stso) => stso.range !== "") as { rangeLabel, range }}
|
||||
<DropdownItem
|
||||
on:click={() => {
|
||||
filters.startTime.from = null
|
||||
filters.startTime.to = null
|
||||
filters.startTime.range = range;
|
||||
filters.startTime.text = text;
|
||||
updateFilters();
|
||||
}}
|
||||
>
|
||||
<Icon name="calendar-range" />
|
||||
{text}
|
||||
{rangeLabel}
|
||||
</DropdownItem>
|
||||
{/each}
|
||||
{/if}
|
||||
@ -316,7 +325,7 @@
|
||||
|
||||
{#if filters.startTime.range}
|
||||
<Info icon="calendar-range" on:click={() => (isStartTimeOpen = true)}>
|
||||
{filters?.startTime?.text ? filters.startTime.text : filters.startTime.range }
|
||||
{startTimeSelectOptions.find((stso) => stso.range === filters.startTime.range).rangeLabel }
|
||||
</Info>
|
||||
{/if}
|
||||
|
||||
@ -414,11 +423,8 @@
|
||||
bind:from={filters.startTime.from}
|
||||
bind:to={filters.startTime.to}
|
||||
bind:range={filters.startTime.range}
|
||||
on:set-filter={() => {
|
||||
delete filters.startTime["text"];
|
||||
delete filters.startTime["range"];
|
||||
updateFilters();
|
||||
}}
|
||||
{startTimeSelectOptions}
|
||||
on:set-filter={() => updateFilters()}
|
||||
/>
|
||||
|
||||
<Duration
|
||||
|
@ -43,26 +43,31 @@
|
||||
<ModalBody>
|
||||
{#if $initialized}
|
||||
<h4>Cluster</h4>
|
||||
<ListGroup>
|
||||
<ListGroupItem
|
||||
disabled={disableClusterSelection}
|
||||
active={pendingCluster == null}
|
||||
on:click={() => ((pendingCluster = null), (pendingPartition = null))}
|
||||
>
|
||||
Any Cluster
|
||||
</ListGroupItem>
|
||||
{#each clusters as cluster}
|
||||
{#if disableClusterSelection}
|
||||
<Button color="info" class="w-100 mb-2" disabled><b>Info: Cluster Selection Disabled in This View</b></Button>
|
||||
<Button outline color="primary" class="w-100 mb-2" disabled><b>Selected Cluster: {cluster}</b></Button>
|
||||
{:else}
|
||||
<ListGroup>
|
||||
<ListGroupItem
|
||||
disabled={disableClusterSelection}
|
||||
active={pendingCluster == cluster.name}
|
||||
on:click={() => (
|
||||
(pendingCluster = cluster.name), (pendingPartition = null)
|
||||
)}
|
||||
active={pendingCluster == null}
|
||||
on:click={() => ((pendingCluster = null), (pendingPartition = null))}
|
||||
>
|
||||
{cluster.name}
|
||||
Any Cluster
|
||||
</ListGroupItem>
|
||||
{/each}
|
||||
</ListGroup>
|
||||
{#each clusters as cluster}
|
||||
<ListGroupItem
|
||||
disabled={disableClusterSelection}
|
||||
active={pendingCluster == cluster.name}
|
||||
on:click={() => (
|
||||
(pendingCluster = cluster.name), (pendingPartition = null)
|
||||
)}
|
||||
>
|
||||
{cluster.name}
|
||||
</ListGroupItem>
|
||||
{/each}
|
||||
</ListGroup>
|
||||
{/if}
|
||||
{/if}
|
||||
{#if $initialized && pendingCluster != null}
|
||||
<br />
|
||||
|
@ -17,7 +17,6 @@
|
||||
import { parse, format, sub } from "date-fns";
|
||||
import {
|
||||
Row,
|
||||
Col,
|
||||
Button,
|
||||
Input,
|
||||
Modal,
|
||||
@ -34,8 +33,7 @@
|
||||
export let from = null;
|
||||
export let to = null;
|
||||
export let range = "";
|
||||
|
||||
let pendingFrom, pendingTo;
|
||||
export let startTimeSelectOptions;
|
||||
|
||||
const now = new Date(Date.now());
|
||||
const ago = sub(now, { months: 1 });
|
||||
@ -48,12 +46,24 @@
|
||||
time: format(now, "HH:mm"),
|
||||
};
|
||||
|
||||
function reset() {
|
||||
pendingFrom = from == null ? defaultFrom : fromRFC3339(from);
|
||||
pendingTo = to == null ? defaultTo : fromRFC3339(to);
|
||||
}
|
||||
$: pendingFrom = (from == null) ? defaultFrom : fromRFC3339(from)
|
||||
$: pendingTo = (to == null) ? defaultTo : fromRFC3339(to)
|
||||
$: pendingRange = range
|
||||
|
||||
reset();
|
||||
$: isModified =
|
||||
(from != toRFC3339(pendingFrom) || to != toRFC3339(pendingTo, "59")) &&
|
||||
(range != pendingRange) &&
|
||||
!(
|
||||
from == null &&
|
||||
pendingFrom.date == "0000-00-00" &&
|
||||
pendingFrom.time == "00:00"
|
||||
) &&
|
||||
!(
|
||||
to == null &&
|
||||
pendingTo.date == "0000-00-00" &&
|
||||
pendingTo.time == "00:00"
|
||||
) &&
|
||||
!( range == "" && pendingRange == "");
|
||||
|
||||
function toRFC3339({ date, time }, secs = "00") {
|
||||
const parsedDate = parse(
|
||||
@ -71,19 +81,6 @@
|
||||
time: format(parsedDate, "HH:mm"),
|
||||
};
|
||||
}
|
||||
|
||||
$: isModified =
|
||||
(from != toRFC3339(pendingFrom) || to != toRFC3339(pendingTo, "59")) &&
|
||||
!(
|
||||
from == null &&
|
||||
pendingFrom.date == "0000-00-00" &&
|
||||
pendingFrom.time == "00:00"
|
||||
) &&
|
||||
!(
|
||||
to == null &&
|
||||
pendingTo.date == "0000-00-00" &&
|
||||
pendingTo.time == "00:00"
|
||||
);
|
||||
</script>
|
||||
|
||||
<Modal {isOpen} toggle={() => (isOpen = !isOpen)}>
|
||||
@ -92,52 +89,82 @@
|
||||
{#if range !== ""}
|
||||
<h4>Current Range</h4>
|
||||
<Row>
|
||||
<Col>
|
||||
<Input type="text" value={range} disabled/>
|
||||
</Col>
|
||||
<FormGroup class="col">
|
||||
<Input type ="select" bind:value={pendingRange} >
|
||||
{#each startTimeSelectOptions as { rangeLabel, range }}
|
||||
<option label={rangeLabel} value={range}/>
|
||||
{/each}
|
||||
</Input>
|
||||
</FormGroup>
|
||||
</Row>
|
||||
{/if}
|
||||
<h4>From</h4>
|
||||
<Row>
|
||||
<FormGroup class="col">
|
||||
<Input type="date" bind:value={pendingFrom.date} />
|
||||
<Input type="date" bind:value={pendingFrom.date} disabled={pendingRange !== ""}/>
|
||||
</FormGroup>
|
||||
<FormGroup class="col">
|
||||
<Input type="time" bind:value={pendingFrom.time} />
|
||||
<Input type="time" bind:value={pendingFrom.time} disabled={pendingRange !== ""}/>
|
||||
</FormGroup>
|
||||
</Row>
|
||||
<h4>To</h4>
|
||||
<Row>
|
||||
<FormGroup class="col">
|
||||
<Input type="date" bind:value={pendingTo.date} />
|
||||
<Input type="date" bind:value={pendingTo.date} disabled={pendingRange !== ""}/>
|
||||
</FormGroup>
|
||||
<FormGroup class="col">
|
||||
<Input type="time" bind:value={pendingTo.time} />
|
||||
<Input type="time" bind:value={pendingTo.time} disabled={pendingRange !== ""}/>
|
||||
</FormGroup>
|
||||
</Row>
|
||||
</ModalBody>
|
||||
<ModalFooter>
|
||||
<Button
|
||||
color="primary"
|
||||
disabled={pendingFrom.date == "0000-00-00" ||
|
||||
pendingTo.date == "0000-00-00"}
|
||||
on:click={() => {
|
||||
isOpen = false;
|
||||
from = toRFC3339(pendingFrom);
|
||||
to = toRFC3339(pendingTo, "59");
|
||||
dispatch("set-filter", { from, to });
|
||||
}}
|
||||
>
|
||||
Close & Apply
|
||||
</Button>
|
||||
{#if pendingRange !== ""}
|
||||
<Button
|
||||
color="warning"
|
||||
disabled={pendingRange === ""}
|
||||
on:click={() => {
|
||||
pendingRange = ""
|
||||
}}
|
||||
>
|
||||
Reset Range
|
||||
</Button>
|
||||
<Button
|
||||
color="primary"
|
||||
disabled={pendingRange === ""}
|
||||
on:click={() => {
|
||||
isOpen = false;
|
||||
from = null;
|
||||
to = null;
|
||||
range = pendingRange;
|
||||
dispatch("set-filter", { from, to, range });
|
||||
}}
|
||||
>
|
||||
Close & Apply Range
|
||||
</Button>
|
||||
{:else}
|
||||
<Button
|
||||
color="primary"
|
||||
disabled={pendingFrom.date == "0000-00-00" ||
|
||||
pendingTo.date == "0000-00-00"}
|
||||
on:click={() => {
|
||||
isOpen = false;
|
||||
from = toRFC3339(pendingFrom);
|
||||
to = toRFC3339(pendingTo, "59");
|
||||
range = "";
|
||||
dispatch("set-filter", { from, to, range });
|
||||
}}
|
||||
>
|
||||
Close & Apply Dates
|
||||
</Button>
|
||||
{/if}
|
||||
<Button
|
||||
color="danger"
|
||||
on:click={() => {
|
||||
isOpen = false;
|
||||
from = null;
|
||||
to = null;
|
||||
reset();
|
||||
dispatch("set-filter", { from, to });
|
||||
range = "";
|
||||
dispatch("set-filter", { from, to, range });
|
||||
}}>Reset</Button
|
||||
>
|
||||
<Button on:click={() => (isOpen = false)}>Close</Button>
|
||||
|
@ -179,7 +179,7 @@
|
||||
function render(plotData) {
|
||||
if (plotData) {
|
||||
const opts = {
|
||||
title: "",
|
||||
title: "CPU Roofline Diagram",
|
||||
mode: 2,
|
||||
width: width,
|
||||
height: height,
|
||||
|
@ -3,7 +3,7 @@
|
||||
|
||||
Properties:
|
||||
- `cluster String`: Currently selected cluster
|
||||
- `metricsInHistograms [String]`: The currently selected metrics to display as histogram
|
||||
- `selectedHistograms [String]`: The currently selected metrics to display as histogram
|
||||
- ìsOpen Bool`: Is selection opened
|
||||
-->
|
||||
|
||||
@ -21,22 +21,27 @@
|
||||
import { gql, getContextClient, mutationStore } from "@urql/svelte";
|
||||
|
||||
export let cluster;
|
||||
export let metricsInHistograms;
|
||||
export let selectedHistograms;
|
||||
export let isOpen;
|
||||
|
||||
const client = getContextClient();
|
||||
const initialized = getContext("initialized");
|
||||
|
||||
let availableMetrics = []
|
||||
function loadHistoMetrics(isInitialized, thisCluster) {
|
||||
if (!isInitialized) return [];
|
||||
|
||||
function loadHistoMetrics(isInitialized) {
|
||||
if (!isInitialized) return;
|
||||
const rawAvailableMetrics = getContext("globalMetrics").filter((gm) => gm?.footprint).map((fgm) => { return fgm.name })
|
||||
availableMetrics = [...rawAvailableMetrics]
|
||||
if (!thisCluster) {
|
||||
return getContext("globalMetrics")
|
||||
.filter((gm) => gm?.footprint)
|
||||
.map((fgm) => { return fgm.name })
|
||||
} else {
|
||||
return getContext("globalMetrics")
|
||||
.filter((gm) => gm?.availability.find((av) => av.cluster == thisCluster))
|
||||
.filter((agm) => agm?.footprint)
|
||||
.map((afgm) => { return afgm.name })
|
||||
}
|
||||
}
|
||||
|
||||
let pendingMetrics = [...metricsInHistograms]; // Copy
|
||||
|
||||
const updateConfigurationMutation = ({ name, value }) => {
|
||||
return mutationStore({
|
||||
client: client,
|
||||
@ -61,17 +66,16 @@
|
||||
}
|
||||
|
||||
function closeAndApply() {
|
||||
metricsInHistograms = [...pendingMetrics]; // Set for parent
|
||||
isOpen = !isOpen;
|
||||
updateConfiguration({
|
||||
name: cluster
|
||||
? `user_view_histogramMetrics:${cluster}`
|
||||
: "user_view_histogramMetrics",
|
||||
value: metricsInHistograms,
|
||||
value: selectedHistograms,
|
||||
});
|
||||
}
|
||||
|
||||
$: loadHistoMetrics($initialized);
|
||||
$: availableMetrics = loadHistoMetrics($initialized, cluster);
|
||||
|
||||
</script>
|
||||
|
||||
@ -81,7 +85,7 @@
|
||||
<ListGroup>
|
||||
{#each availableMetrics as metric (metric)}
|
||||
<ListGroupItem>
|
||||
<input type="checkbox" bind:group={pendingMetrics} value={metric} />
|
||||
<input type="checkbox" bind:group={selectedHistograms} value={metric} />
|
||||
{metric}
|
||||
</ListGroupItem>
|
||||
{/each}
|
||||
|
@ -28,6 +28,7 @@
|
||||
export let configName;
|
||||
export let allMetrics = null;
|
||||
export let cluster = null;
|
||||
export let subCluster = null;
|
||||
export let showFootprint = false;
|
||||
export let footprintSelect = false;
|
||||
|
||||
@ -46,12 +47,16 @@
|
||||
|
||||
$: {
|
||||
if (allMetrics != null) {
|
||||
if (cluster == null) {
|
||||
if (!cluster) {
|
||||
for (let metric of globalMetrics) allMetrics.add(metric.name);
|
||||
} else {
|
||||
allMetrics.clear();
|
||||
for (let gm of globalMetrics) {
|
||||
if (gm.availability.find((av) => av.cluster === cluster)) allMetrics.add(gm.name);
|
||||
if (!subCluster) {
|
||||
if (gm.availability.find((av) => av.cluster === cluster)) allMetrics.add(gm.name);
|
||||
} else {
|
||||
if (gm.availability.find((av) => av.cluster === cluster && av.subClusters.includes(subCluster))) allMetrics.add(gm.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
newMetricsOrder = [...allMetrics].filter((m) => !metrics.includes(m));
|
||||
@ -62,7 +67,7 @@
|
||||
|
||||
function printAvailability(metric, cluster) {
|
||||
const avail = globalMetrics.find((gm) => gm.name === metric)?.availability
|
||||
if (cluster == null) {
|
||||
if (!cluster) {
|
||||
return avail.map((av) => av.cluster).join(',')
|
||||
} else {
|
||||
return avail.find((av) => av.cluster === cluster).subClusters.join(',')
|
||||
@ -107,10 +112,17 @@
|
||||
metrics = newMetricsOrder.filter((m) => unorderedMetrics.includes(m));
|
||||
isOpen = false;
|
||||
|
||||
showFootprint = !!pendingShowFootprint;
|
||||
let configKey;
|
||||
if (cluster && subCluster) {
|
||||
configKey = `${configName}:${cluster}:${subCluster}`;
|
||||
} else if (cluster && !subCluster) {
|
||||
configKey = `${configName}:${cluster}`;
|
||||
} else {
|
||||
configKey = `${configName}`;
|
||||
}
|
||||
|
||||
updateConfigurationMutation({
|
||||
name: cluster == null ? configName : `${configName}:${cluster}`,
|
||||
name: configKey,
|
||||
value: JSON.stringify(metrics),
|
||||
}).subscribe((res) => {
|
||||
if (res.fetching === false && res.error) {
|
||||
@ -118,17 +130,20 @@
|
||||
}
|
||||
});
|
||||
|
||||
updateConfigurationMutation({
|
||||
name:
|
||||
cluster == null
|
||||
? "plot_list_showFootprint"
|
||||
: `plot_list_showFootprint:${cluster}`,
|
||||
value: JSON.stringify(showFootprint),
|
||||
}).subscribe((res) => {
|
||||
if (res.fetching === false && res.error) {
|
||||
throw res.error;
|
||||
}
|
||||
});
|
||||
if (footprintSelect) {
|
||||
showFootprint = !!pendingShowFootprint;
|
||||
updateConfigurationMutation({
|
||||
name:
|
||||
!cluster
|
||||
? "plot_list_showFootprint"
|
||||
: `plot_list_showFootprint:${cluster}`,
|
||||
value: JSON.stringify(showFootprint),
|
||||
}).subscribe((res) => {
|
||||
if (res.fetching === false && res.error) {
|
||||
throw res.error;
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
dispatch('update-metrics', metrics);
|
||||
}
|
||||
|
@ -18,6 +18,8 @@
|
||||
InputGroup,
|
||||
InputGroupText,
|
||||
Icon,
|
||||
Row,
|
||||
Col
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { maxScope } from "../generic/utils.js";
|
||||
import StatsTableEntry from "./StatsTableEntry.svelte";
|
||||
@ -26,7 +28,7 @@
|
||||
export let job;
|
||||
export let jobMetrics;
|
||||
|
||||
const allMetrics = [...new Set(jobMetrics.map((m) => m.name))].sort()
|
||||
const sortedJobMetrics = [...new Set(jobMetrics.map((m) => m.name))].sort()
|
||||
const scopesForMetric = (metric) =>
|
||||
jobMetrics.filter((jm) => jm.name == metric).map((jm) => jm.scope);
|
||||
|
||||
@ -34,11 +36,13 @@
|
||||
selectedScopes = {},
|
||||
sorting = {},
|
||||
isMetricSelectionOpen = false,
|
||||
selectedMetrics =
|
||||
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
|
||||
getContext("cc-config")["job_view_nodestats_selectedMetrics"];
|
||||
availableMetrics = new Set(),
|
||||
selectedMetrics = (
|
||||
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
|
||||
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`]
|
||||
) || getContext("cc-config")["job_view_nodestats_selectedMetrics"];
|
||||
|
||||
for (let metric of allMetrics) {
|
||||
for (let metric of sortedJobMetrics) {
|
||||
// Not Exclusive or Multi-Node: get maxScope directly (mostly: node)
|
||||
// -> Else: Load smallest available granularity as default as per availability
|
||||
const availableScopes = scopesForMetric(metric);
|
||||
@ -95,15 +99,19 @@
|
||||
};
|
||||
</script>
|
||||
|
||||
<Row>
|
||||
<Col class="m-2">
|
||||
<Button outline on:click={() => (isMetricSelectionOpen = true)} class="w-auto px-2" color="primary">
|
||||
Select Metrics (Selected {selectedMetrics.length} of {availableMetrics.size} available)
|
||||
</Button>
|
||||
</Col>
|
||||
</Row>
|
||||
<hr class="mb-1 mt-1"/>
|
||||
<Table class="mb-0">
|
||||
<thead>
|
||||
<!-- Header Row 1: Selectors -->
|
||||
<tr>
|
||||
<th>
|
||||
<Button outline on:click={() => (isMetricSelectionOpen = true)} class="w-100 px-2" color="primary">
|
||||
Select Metrics
|
||||
</Button>
|
||||
</th>
|
||||
<th/>
|
||||
{#each selectedMetrics as metric}
|
||||
<!-- To Match Row-2 Header Field Count-->
|
||||
<th colspan={selectedScopes[metric] == "node" ? 3 : 4}>
|
||||
@ -162,8 +170,9 @@
|
||||
|
||||
<MetricSelection
|
||||
cluster={job.cluster}
|
||||
subCluster={job.subCluster}
|
||||
configName="job_view_nodestats_selectedMetrics"
|
||||
allMetrics={new Set(allMetrics)}
|
||||
bind:allMetrics={availableMetrics}
|
||||
bind:metrics={selectedMetrics}
|
||||
bind:isOpen={isMetricSelectionOpen}
|
||||
/>
|
||||
|
@ -217,13 +217,15 @@
|
||||
<tr>
|
||||
<td colspan={selectedMetrics.length + 1}>
|
||||
<div style="text-align:center;">
|
||||
<p><b>
|
||||
Loading nodes {nodes.length + 1} to
|
||||
{ matchedNodes
|
||||
? `${(nodes.length + paging.itemsPerPage) > matchedNodes ? matchedNodes : (nodes.length + paging.itemsPerPage)} of ${matchedNodes} total`
|
||||
: (nodes.length + paging.itemsPerPage)
|
||||
}
|
||||
</b></p>
|
||||
{#if !usePaging}
|
||||
<p><b>
|
||||
Loading nodes {nodes.length + 1} to
|
||||
{ matchedNodes
|
||||
? `${(nodes.length + paging.itemsPerPage) > matchedNodes ? matchedNodes : (nodes.length + paging.itemsPerPage)} of ${matchedNodes} total`
|
||||
: (nodes.length + paging.itemsPerPage)
|
||||
}
|
||||
</b></p>
|
||||
{/if}
|
||||
<Spinner secondary />
|
||||
</div>
|
||||
</td>
|
||||
|
@ -102,6 +102,19 @@
|
||||
Shared
|
||||
</Button>
|
||||
</InputGroup>
|
||||
<!-- Fallback -->
|
||||
{:else if nodeJobsData.jobs.count >= 1}
|
||||
<InputGroup>
|
||||
<InputGroupText>
|
||||
<Icon name="circle-fill"/>
|
||||
</InputGroupText>
|
||||
<InputGroupText>
|
||||
Status
|
||||
</InputGroupText>
|
||||
<Button color="success" disabled>
|
||||
Allocated Jobs
|
||||
</Button>
|
||||
</InputGroup>
|
||||
{:else}
|
||||
<InputGroup>
|
||||
<InputGroupText>
|
||||
|
@ -98,12 +98,12 @@
|
||||
|
||||
let extendedLegendData = null;
|
||||
$: if ($nodeJobsData?.data) {
|
||||
// Get Shared State of Node: Only Build extended Legend For Shared Nodes
|
||||
if ($nodeJobsData.data.jobs.count >= 1 && !$nodeJobsData.data.jobs.items[0].exclusive) {
|
||||
// Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes]
|
||||
if ($nodeJobsData.data.jobs.count >= 1) { // "&& !$nodeJobsData.data.jobs.items[0].exclusive)"
|
||||
const accSet = Array.from(new Set($nodeJobsData.data.jobs.items
|
||||
.map((i) => i.resources
|
||||
.filter((r) => r.hostname === nodeData.host)
|
||||
.map((r) => r.accelerators)
|
||||
.filter((r) => (r.hostname === nodeData.host) && r?.accelerators)
|
||||
.map((r) => r?.accelerators)
|
||||
)
|
||||
)).flat(2)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user