mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-20 23:07:29 +01:00
@@ -23,6 +23,7 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
|
ccunit "github.com/ClusterCockpit/cc-lib/v2/ccUnits"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -938,15 +939,21 @@ func (r *queryResolver) ClusterMetrics(ctx context.Context, cluster string, metr
|
|||||||
}
|
}
|
||||||
|
|
||||||
for metricName, data := range collectorData {
|
for metricName, data := range collectorData {
|
||||||
cu := collectorUnit[metricName]
|
// use ccUnits for backend normalization to "Tera"
|
||||||
|
p_old := ccunit.NewPrefix(collectorUnit[metricName].Prefix)
|
||||||
|
p_new := ccunit.NewPrefix("T")
|
||||||
|
convFunc := ccunit.GetPrefixPrefixFactor(p_old, p_new)
|
||||||
|
u_new := schema.Unit{Prefix: p_new.Prefix(), Base: collectorUnit[metricName].Base}
|
||||||
|
|
||||||
roundedData := make([]schema.Float, 0)
|
roundedData := make([]schema.Float, 0)
|
||||||
for _, val := range data {
|
for _, v_old := range data {
|
||||||
roundedData = append(roundedData, schema.Float((math.Round(float64(val)*100.0) / 100.0)))
|
v_new := math.Round(convFunc(float64(v_old)).(float64)*100.0) / 100.0
|
||||||
|
roundedData = append(roundedData, schema.Float(v_new))
|
||||||
}
|
}
|
||||||
|
|
||||||
cm := model.ClusterMetricWithName{
|
cm := model.ClusterMetricWithName{
|
||||||
Name: metricName,
|
Name: metricName,
|
||||||
Unit: &cu,
|
Unit: &u_new,
|
||||||
Timestep: collectorTimestep[metricName],
|
Timestep: collectorTimestep[metricName],
|
||||||
Data: roundedData,
|
Data: roundedData,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -57,13 +57,13 @@ func (r *queryResolver) rooflineHeatmap(
|
|||||||
|
|
||||||
jobdata, err := metricdispatch.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
jobdata, err := metricdispatch.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while loading roofline metrics for job %d", job.ID)
|
cclog.Warnf("Error while loading roofline metrics for job %d", job.ID)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
|
flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"]
|
||||||
if flops_ == nil && membw_ == nil {
|
if flops_ == nil && membw_ == nil {
|
||||||
cclog.Infof("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
cclog.Warnf("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||||
continue
|
continue
|
||||||
// return nil, fmt.Errorf("GRAPH/UTIL > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
// return nil, fmt.Errorf("GRAPH/UTIL > 'flops_any' or 'mem_bw' missing for job %d", job.ID)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -97,8 +97,8 @@ func LoadData(job *schema.Job,
|
|||||||
|
|
||||||
ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster)
|
ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.Cluster, job.SubCluster, err.Error())
|
||||||
return err, 0, 0
|
return err, 0, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -116,11 +116,11 @@ func LoadData(job *schema.Job,
|
|||||||
jd, err = ms.LoadData(job, metrics, scopes, ctx, resolution)
|
jd, err = ms.LoadData(job, metrics, scopes, ctx, resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if len(jd) != 0 {
|
if len(jd) != 0 {
|
||||||
cclog.Warnf("partial error loading metrics from store for job %d (user: %s, project: %s): %s",
|
cclog.Warnf("partial error loading metrics from store for job %d (user: %s, project: %s, cluster: %s-%s): %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error())
|
||||||
} else {
|
} else {
|
||||||
cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Warnf("failed to load job data from metric store for job %d (user: %s, project: %s, cluster: %s-%s): %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error())
|
||||||
return err, 0, 0
|
return err, 0, 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -129,8 +129,8 @@ func LoadData(job *schema.Job,
|
|||||||
var jdTemp schema.JobData
|
var jdTemp schema.JobData
|
||||||
jdTemp, err = archive.GetHandle().LoadJobData(job)
|
jdTemp, err = archive.GetHandle().LoadJobData(job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load job data from archive for job %d (user: %s, project: %s): %s",
|
cclog.Warnf("failed to load job data from archive for job %d (user: %s, project: %s, cluster: %s-%s): %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error())
|
||||||
return err, 0, 0
|
return err, 0, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -244,15 +244,15 @@ func LoadAverages(
|
|||||||
|
|
||||||
ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster)
|
ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.Cluster, job.SubCluster, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
stats, err := ms.LoadStats(job, metrics, ctx)
|
stats, err := ms.LoadStats(job, metrics, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Warnf("failed to load statistics from metric store for job %d (user: %s, project: %s, cluster: %s-%s): %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error())
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -288,15 +288,15 @@ func LoadScopedJobStats(
|
|||||||
|
|
||||||
ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster)
|
ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.Cluster, job.SubCluster, err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
scopedStats, err := ms.LoadScopedStats(job, metrics, scopes, ctx)
|
scopedStats, err := ms.LoadScopedStats(job, metrics, scopes, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load scoped statistics from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Warnf("failed to load scoped statistics from metric store for job %d (user: %s, project: %s, cluster: %s-%s): %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -320,8 +320,8 @@ func LoadJobStats(
|
|||||||
|
|
||||||
ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster)
|
ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.Cluster, job.SubCluster, err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -329,8 +329,8 @@ func LoadJobStats(
|
|||||||
|
|
||||||
stats, err := ms.LoadStats(job, metrics, ctx)
|
stats, err := ms.LoadStats(job, metrics, ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Warnf("failed to load statistics from metric store for job %d (user: %s, project: %s, cluster: %s-%s): %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error())
|
||||||
return data, err
|
return data, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -379,8 +379,8 @@ func LoadNodeData(
|
|||||||
|
|
||||||
ms, err := GetMetricDataRepo(cluster, "")
|
ms, err := GetMetricDataRepo(cluster, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load node data from metric store: %s",
|
cclog.Errorf("failed to access metricDataRepo for cluster %s: %s",
|
||||||
err.Error())
|
cluster, err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -389,7 +389,7 @@ func LoadNodeData(
|
|||||||
if len(data) != 0 {
|
if len(data) != 0 {
|
||||||
cclog.Warnf("partial error loading node data from metric store for cluster %s: %s", cluster, err.Error())
|
cclog.Warnf("partial error loading node data from metric store for cluster %s: %s", cluster, err.Error())
|
||||||
} else {
|
} else {
|
||||||
cclog.Errorf("failed to load node data from metric store for cluster %s: %s", cluster, err.Error())
|
cclog.Warnf("failed to load node data from metric store for cluster %s: %s", cluster, err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -423,8 +423,8 @@ func LoadNodeListData(
|
|||||||
|
|
||||||
ms, err := GetMetricDataRepo(cluster, subCluster)
|
ms, err := GetMetricDataRepo(cluster, subCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load node data from metric store: %s",
|
cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s",
|
||||||
err.Error())
|
cluster, subCluster, err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -434,7 +434,7 @@ func LoadNodeListData(
|
|||||||
cclog.Warnf("partial error loading node list data from metric store for cluster %s, subcluster %s: %s",
|
cclog.Warnf("partial error loading node list data from metric store for cluster %s, subcluster %s: %s",
|
||||||
cluster, subCluster, err.Error())
|
cluster, subCluster, err.Error())
|
||||||
} else {
|
} else {
|
||||||
cclog.Errorf("failed to load node list data from metric store for cluster %s, subcluster %s: %s",
|
cclog.Warnf("failed to load node list data from metric store for cluster %s, subcluster %s: %s",
|
||||||
cluster, subCluster, err.Error())
|
cluster, subCluster, err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -329,7 +329,7 @@ func (ccms *CCMetricStore) LoadStats(
|
|||||||
metric := query.Metric
|
metric := query.Metric
|
||||||
data := res[0]
|
data := res[0]
|
||||||
if data.Error != nil {
|
if data.Error != nil {
|
||||||
cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
cclog.Warnf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -556,7 +556,7 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
|||||||
) (map[string]schema.JobData, error) {
|
) (map[string]schema.JobData, error) {
|
||||||
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution)
|
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error())
|
cclog.Errorf("Error while building node queries for Cluster %s, SubCluster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error())
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,7 @@
|
|||||||
// - All queries use prepared statements via stmtCache
|
// - All queries use prepared statements via stmtCache
|
||||||
// - Complex aggregations use SQL for efficiency
|
// - Complex aggregations use SQL for efficiency
|
||||||
// - Histogram pre-initialization ensures consistent bin ranges
|
// - Histogram pre-initialization ensures consistent bin ranges
|
||||||
// - Metric histogram queries limited to 500 jobs for running job analysis
|
// - Metric histogram queries limited to 5000 jobs for running job analysis
|
||||||
|
|
||||||
package repository
|
package repository
|
||||||
|
|
||||||
@@ -686,7 +686,7 @@ func (r *JobRepository) AddHistograms(
|
|||||||
// - Pre-initialized with zeros for consistent visualization
|
// - Pre-initialized with zeros for consistent visualization
|
||||||
//
|
//
|
||||||
// Limitations:
|
// Limitations:
|
||||||
// - Running jobs: Limited to 500 jobs for performance
|
// - Running jobs: Limited to 5000 jobs for performance
|
||||||
// - Requires valid cluster configuration with metric peak values
|
// - Requires valid cluster configuration with metric peak values
|
||||||
// - Uses footprint statistic (avg/max/min) configured per metric
|
// - Uses footprint statistic (avg/max/min) configured per metric
|
||||||
func (r *JobRepository) AddMetricHistograms(
|
func (r *JobRepository) AddMetricHistograms(
|
||||||
@@ -995,12 +995,12 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
|
|||||||
// Returns slice of MetricHistoPoints, one per metric.
|
// Returns slice of MetricHistoPoints, one per metric.
|
||||||
//
|
//
|
||||||
// Limitations:
|
// Limitations:
|
||||||
// - Maximum 500 jobs (returns nil if more jobs match)
|
// - Maximum 5000 jobs (returns nil if more jobs match)
|
||||||
// - Requires metric backend availability
|
// - Requires metric backend availability
|
||||||
// - Bins based on metric peak values from cluster configuration
|
// - Bins based on metric peak values from cluster configuration
|
||||||
//
|
//
|
||||||
// Algorithm:
|
// Algorithm:
|
||||||
// 1. Query first 501 jobs to check count limit
|
// 1. Query first 5001 jobs to check count limit
|
||||||
// 2. Load metric averages for all jobs via metricdispatch
|
// 2. Load metric averages for all jobs via metricdispatch
|
||||||
// 3. For each metric, create bins based on peak value
|
// 3. For each metric, create bins based on peak value
|
||||||
// 4. Iterate averages and count jobs per bin
|
// 4. Iterate averages and count jobs per bin
|
||||||
@@ -1011,13 +1011,13 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
|
|||||||
bins *int,
|
bins *int,
|
||||||
) []*model.MetricHistoPoints {
|
) []*model.MetricHistoPoints {
|
||||||
// Get Jobs
|
// Get Jobs
|
||||||
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
|
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 5000 + 1}, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("Error while querying jobs for footprint: %s", err)
|
cclog.Errorf("Error while querying jobs for footprint: %s", err)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if len(jobs) > 500 {
|
if len(jobs) > 5000 {
|
||||||
cclog.Errorf("too many jobs matched (max: %d)", 500)
|
cclog.Errorf("too many jobs matched (max: %d)", 5000)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -68,8 +68,8 @@ func RegisterFootprintWorker() {
|
|||||||
|
|
||||||
ms, err := metricdispatch.GetMetricDataRepo(job.Cluster, job.SubCluster)
|
ms, err := metricdispatch.GetMetricDataRepo(job.Cluster, job.SubCluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s",
|
cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s",
|
||||||
job.JobID, job.User, job.Project, err.Error())
|
job.Cluster, job.SubCluster, err.Error())
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,13 +13,14 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// ErrNoHostOrMetric is returned when the metric store does not find the host or the metric
|
// ErrNoHostOrMetric is returned when the metric store does not find the host or the metric
|
||||||
ErrNoHostOrMetric error = errors.New("[METRICSTORE]> [METRICSTORE]> metric or host not found")
|
ErrNoHostOrMetric error = errors.New("[METRICSTORE]> metric or host not found")
|
||||||
// ErrInvalidTimeRange is returned when a query has 'from' >= 'to'
|
// ErrInvalidTimeRange is returned when a query has 'from' >= 'to'
|
||||||
ErrInvalidTimeRange = errors.New("[METRICSTORE]> invalid time range: 'from' must be before 'to'")
|
ErrInvalidTimeRange = errors.New("[METRICSTORE]> invalid time range: 'from' must be before 'to'")
|
||||||
// ErrEmptyCluster is returned when a query with ForAllNodes has no cluster specified
|
// ErrEmptyCluster is returned when a query with ForAllNodes has no cluster specified
|
||||||
@@ -280,20 +281,16 @@ func FetchData(req APIQueryRequest) (*APIQueryResponse, error) {
|
|||||||
|
|
||||||
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)
|
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Check a special case where only the metric or host.
|
// Skip Error If Just Missing Host or Metric, Continue
|
||||||
// Dont send errors, instead just send empty array
|
// Empty Return For Metric Handled Gracefully By Frontend
|
||||||
// where frontend already renders error for empty array.
|
if err != ErrNoHostOrMetric {
|
||||||
if err == ErrNoHostOrMetric {
|
|
||||||
data.Data = make([]schema.Float, 0)
|
|
||||||
data.From = req.From
|
|
||||||
data.To = req.To
|
|
||||||
data.Resolution = query.Resolution
|
|
||||||
} else {
|
|
||||||
msg := err.Error()
|
msg := err.Error()
|
||||||
data.Error = &msg
|
data.Error = &msg
|
||||||
res = append(res, data)
|
res = append(res, data)
|
||||||
continue
|
} else {
|
||||||
|
cclog.Warnf("failed to fetch '%s' from host '%s' (cluster: %s): %s", query.Metric, query.Hostname, req.Cluster, err.Error())
|
||||||
}
|
}
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if req.WithStats {
|
if req.WithStats {
|
||||||
|
|||||||
@@ -104,6 +104,11 @@ func (ccms *InternalMetricStore) LoadData(
|
|||||||
var errors []string
|
var errors []string
|
||||||
jobData := make(schema.JobData)
|
jobData := make(schema.JobData)
|
||||||
for i, row := range resBody.Results {
|
for i, row := range resBody.Results {
|
||||||
|
if len(row) == 0 {
|
||||||
|
// No Data Found For Metric, Logged in FetchData to Warn
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
query := req.Queries[i]
|
query := req.Queries[i]
|
||||||
metric := query.Metric
|
metric := query.Metric
|
||||||
scope := assignedScope[i]
|
scope := assignedScope[i]
|
||||||
@@ -229,7 +234,7 @@ func buildQueries(
|
|||||||
for _, metric := range metrics {
|
for _, metric := range metrics {
|
||||||
mc := archive.GetMetricConfig(job.Cluster, metric)
|
mc := archive.GetMetricConfig(job.Cluster, metric)
|
||||||
if mc == nil {
|
if mc == nil {
|
||||||
cclog.Infof("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
cclog.Warnf("metric '%s' is not specified for cluster '%s'", metric, job.Cluster)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -535,11 +540,15 @@ func (ccms *InternalMetricStore) LoadStats(
|
|||||||
|
|
||||||
stats := make(map[string]map[string]schema.MetricStatistics, len(metrics))
|
stats := make(map[string]map[string]schema.MetricStatistics, len(metrics))
|
||||||
for i, res := range resBody.Results {
|
for i, res := range resBody.Results {
|
||||||
|
if len(res) == 0 {
|
||||||
|
// No Data Found For Metric, Logged in FetchData to Warn
|
||||||
|
continue
|
||||||
|
}
|
||||||
query := req.Queries[i]
|
query := req.Queries[i]
|
||||||
metric := query.Metric
|
metric := query.Metric
|
||||||
data := res[0]
|
data := res[0]
|
||||||
if data.Error != nil {
|
if data.Error != nil {
|
||||||
cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
cclog.Warnf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -609,6 +618,10 @@ func (ccms *InternalMetricStore) LoadScopedStats(
|
|||||||
scopedJobStats := make(schema.ScopedJobStats)
|
scopedJobStats := make(schema.ScopedJobStats)
|
||||||
|
|
||||||
for i, row := range resBody.Results {
|
for i, row := range resBody.Results {
|
||||||
|
if len(row) == 0 {
|
||||||
|
// No Data Found For Metric, Logged in FetchData to Warn
|
||||||
|
continue
|
||||||
|
}
|
||||||
query := req.Queries[i]
|
query := req.Queries[i]
|
||||||
metric := query.Metric
|
metric := query.Metric
|
||||||
scope := assignedScope[i]
|
scope := assignedScope[i]
|
||||||
@@ -717,6 +730,11 @@ func (ccms *InternalMetricStore) LoadNodeData(
|
|||||||
var errors []string
|
var errors []string
|
||||||
data := make(map[string]map[string][]*schema.JobMetric)
|
data := make(map[string]map[string][]*schema.JobMetric)
|
||||||
for i, res := range resBody.Results {
|
for i, res := range resBody.Results {
|
||||||
|
if len(res) == 0 {
|
||||||
|
// No Data Found For Metric, Logged in FetchData to Warn
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
var query APIQuery
|
var query APIQuery
|
||||||
if resBody.Queries != nil {
|
if resBody.Queries != nil {
|
||||||
query = resBody.Queries[i]
|
query = resBody.Queries[i]
|
||||||
@@ -816,6 +834,10 @@ func (ccms *InternalMetricStore) LoadNodeListData(
|
|||||||
var errors []string
|
var errors []string
|
||||||
data := make(map[string]schema.JobData)
|
data := make(map[string]schema.JobData)
|
||||||
for i, row := range resBody.Results {
|
for i, row := range resBody.Results {
|
||||||
|
if len(row) == 0 {
|
||||||
|
// No Data Found For Metric, Logged in FetchData to Warn
|
||||||
|
continue
|
||||||
|
}
|
||||||
var query APIQuery
|
var query APIQuery
|
||||||
if resBody.Queries != nil {
|
if resBody.Queries != nil {
|
||||||
query = resBody.Queries[i]
|
query = resBody.Queries[i]
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
} from "./generic/utils.js";
|
} from "./generic/utils.js";
|
||||||
import {
|
import {
|
||||||
formatNumber,
|
formatNumber,
|
||||||
|
scaleNumber
|
||||||
} from "./generic/units.js";
|
} from "./generic/units.js";
|
||||||
import {
|
import {
|
||||||
Row,
|
Row,
|
||||||
@@ -222,8 +223,10 @@
|
|||||||
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
||||||
|
|
||||||
// Units (Set Once)
|
// Units (Set Once)
|
||||||
if (!rawInfos['flopRateUnit']) rawInfos['flopRateUnit'] = subCluster.flopRateSimd.unit.prefix + subCluster.flopRateSimd.unit.base
|
if (!rawInfos['flopRateUnitBase']) rawInfos['flopRateUnitBase'] = subCluster.flopRateSimd.unit.base
|
||||||
if (!rawInfos['memBwRateUnit']) rawInfos['memBwRateUnit'] = subCluster.memoryBandwidth.unit.prefix + subCluster.memoryBandwidth.unit.base
|
if (!rawInfos['memBwRateUnitBase']) rawInfos['memBwRateUnitBase'] = subCluster.memoryBandwidth.unit.base
|
||||||
|
if (!rawInfos['flopRateUnitPrefix']) rawInfos['flopRateUnitPrefix'] = subCluster.flopRateSimd.unit.prefix
|
||||||
|
if (!rawInfos['memBwRateUnitPrefix']) rawInfos['memBwRateUnitPrefix'] = subCluster.memoryBandwidth.unit.prefix
|
||||||
|
|
||||||
// Get Maxima For Roofline Knee Render
|
// Get Maxima For Roofline Knee Render
|
||||||
if (!rawInfos['roofData']) {
|
if (!rawInfos['roofData']) {
|
||||||
@@ -239,10 +242,14 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get Idle Infos after Sums
|
// Get Simple Idle Infos after Sums by Diff
|
||||||
if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes'];
|
if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes'];
|
||||||
if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores'];
|
if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores'];
|
||||||
if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs'];
|
if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs'];
|
||||||
|
// Cap at 0 (Negative hints towards Config <> Reality Mismatch!)
|
||||||
|
if (rawInfos['idleNodes'] < 0) rawInfos['idleNodes'] = 0;
|
||||||
|
if (rawInfos['idleCores'] < 0) rawInfos['idleCores'] = 0;
|
||||||
|
if (rawInfos['idleAccs'] < 0) rawInfos['idleAccs'] = 0;
|
||||||
|
|
||||||
// Keymetrics (Data on Cluster-Scope)
|
// Keymetrics (Data on Cluster-Scope)
|
||||||
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
||||||
@@ -262,20 +269,20 @@
|
|||||||
0, // Initial Value
|
0, // Initial Value
|
||||||
) || 0;
|
) || 0;
|
||||||
rawInfos['cpuPwr'] = Math.floor((rawCpuPwr * 100) / 100)
|
rawInfos['cpuPwr'] = Math.floor((rawCpuPwr * 100) / 100)
|
||||||
if (!rawInfos['cpuPwrUnit']) {
|
|
||||||
let rawCpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'cpu_power')?.metric?.unit || null
|
let rawCpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'cpu_power')?.metric?.unit || null
|
||||||
rawInfos['cpuPwrUnit'] = rawCpuUnit ? rawCpuUnit.prefix + rawCpuUnit.base : ''
|
if (!rawInfos['cpuPwrUnitBase']) rawInfos['cpuPwrUnitBase'] = rawCpuUnit ? rawCpuUnit.base : ''
|
||||||
}
|
if (!rawInfos['cpuPwrUnitPrefix']) rawInfos['cpuPwrUnitPrefix'] = rawCpuUnit ? rawCpuUnit.prefix : ''
|
||||||
|
|
||||||
let rawGpuPwr = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
let rawGpuPwr = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
||||||
sum + (node.metrics.find((m) => m.name == 'acc_power')?.metric?.series[0]?.statistics?.avg || 0),
|
sum + (node.metrics.find((m) => m.name == 'acc_power')?.metric?.series[0]?.statistics?.avg || 0),
|
||||||
0, // Initial Value
|
0, // Initial Value
|
||||||
) || 0;
|
) || 0;
|
||||||
rawInfos['gpuPwr'] = Math.floor((rawGpuPwr * 100) / 100)
|
rawInfos['gpuPwr'] = Math.floor((rawGpuPwr * 100) / 100)
|
||||||
if (!rawInfos['gpuPwrUnit']) {
|
|
||||||
let rawGpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'acc_power')?.metric?.unit || null
|
let rawGpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'acc_power')?.metric?.unit || null
|
||||||
rawInfos['gpuPwrUnit'] = rawGpuUnit ? rawGpuUnit.prefix + rawGpuUnit.base : ''
|
if (!rawInfos['gpuPwrUnitBase']) rawInfos['gpuPwrUnitBase'] = rawGpuUnit ? rawGpuUnit.base : ''
|
||||||
}
|
if (!rawInfos['gpuPwrUnitPrefix']) rawInfos['gpuPwrUnitPrefix'] = rawGpuUnit ? rawGpuUnit.prefix : ''
|
||||||
}
|
}
|
||||||
return rawInfos;
|
return rawInfos;
|
||||||
});
|
});
|
||||||
@@ -443,7 +450,7 @@
|
|||||||
<Row class="mt-1 mb-2">
|
<Row class="mt-1 mb-2">
|
||||||
<Col xs={4} class="d-inline-flex align-items-center justify-content-center">
|
<Col xs={4} class="d-inline-flex align-items-center justify-content-center">
|
||||||
<Badge color="secondary" style="font-size:x-large;margin-right:0.25rem;">
|
<Badge color="secondary" style="font-size:x-large;margin-right:0.25rem;">
|
||||||
{clusterInfo?.flopRate} {clusterInfo?.flopRateUnit}
|
{scaleNumber(clusterInfo?.flopRate, clusterInfo?.flopRateUnitPrefix)}{clusterInfo?.flopRateUnitBase}
|
||||||
</Badge>
|
</Badge>
|
||||||
<div style="font-size:large;">
|
<div style="font-size:large;">
|
||||||
Total Flop Rate
|
Total Flop Rate
|
||||||
@@ -451,7 +458,7 @@
|
|||||||
</Col>
|
</Col>
|
||||||
<Col xs={4} class="d-inline-flex align-items-center justify-content-center">
|
<Col xs={4} class="d-inline-flex align-items-center justify-content-center">
|
||||||
<Badge color="secondary" style="font-size:x-large;margin-right:0.25rem;">
|
<Badge color="secondary" style="font-size:x-large;margin-right:0.25rem;">
|
||||||
{clusterInfo?.memBwRate} {clusterInfo?.memBwRateUnit}
|
{scaleNumber(clusterInfo?.memBwRate, clusterInfo?.memBwRateUnitPrefix)}{clusterInfo?.memBwRateUnitBase}
|
||||||
</Badge>
|
</Badge>
|
||||||
<div style="font-size:large;">
|
<div style="font-size:large;">
|
||||||
Total Memory Bandwidth
|
Total Memory Bandwidth
|
||||||
@@ -460,7 +467,7 @@
|
|||||||
{#if clusterInfo?.totalAccs !== 0}
|
{#if clusterInfo?.totalAccs !== 0}
|
||||||
<Col xs={4} class="d-inline-flex align-items-center justify-content-center">
|
<Col xs={4} class="d-inline-flex align-items-center justify-content-center">
|
||||||
<Badge color="secondary" style="font-size:x-large;margin-right:0.25rem;">
|
<Badge color="secondary" style="font-size:x-large;margin-right:0.25rem;">
|
||||||
{clusterInfo?.gpuPwr} {clusterInfo?.gpuPwrUnit}
|
{scaleNumber(clusterInfo?.gpuPwr, clusterInfo?.gpuPwrUnitPrefix)}{clusterInfo?.gpuPwrUnitBase}
|
||||||
</Badge>
|
</Badge>
|
||||||
<div style="font-size:large;">
|
<div style="font-size:large;">
|
||||||
Total GPU Power
|
Total GPU Power
|
||||||
@@ -469,7 +476,7 @@
|
|||||||
{:else}
|
{:else}
|
||||||
<Col xs={4} class="d-inline-flex align-items-center justify-content-center">
|
<Col xs={4} class="d-inline-flex align-items-center justify-content-center">
|
||||||
<Badge color="secondary" style="font-size:x-large;margin-right:0.25rem;">
|
<Badge color="secondary" style="font-size:x-large;margin-right:0.25rem;">
|
||||||
{clusterInfo?.cpuPwr} {clusterInfo?.cpuPwrUnit}
|
{scaleNumber(clusterInfo?.cpuPwr, clusterInfo?.cpuPwrUnitPrefix)}{clusterInfo?.cpuPwrUnitBase}
|
||||||
</Badge>
|
</Badge>
|
||||||
<div style="font-size:large;">
|
<div style="font-size:large;">
|
||||||
Total CPU Power
|
Total CPU Power
|
||||||
|
|||||||
@@ -342,7 +342,7 @@
|
|||||||
<b>Disabled Metric</b>
|
<b>Disabled Metric</b>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardBody>
|
<CardBody>
|
||||||
<p>Metric <b>{item.metric}</b> is disabled for subcluster <b>{$initq.data.job.subCluster}</b>.</p>
|
<p>Metric <b>{item.metric}</b> is disabled for cluster <b>{$initq.data.job.cluster}:{$initq.data.job.subCluster}</b>.</p>
|
||||||
<p class="mb-1">To remove this card, open metric selection and press "Close and Apply".</p>
|
<p class="mb-1">To remove this card, open metric selection and press "Close and Apply".</p>
|
||||||
</CardBody>
|
</CardBody>
|
||||||
</Card>
|
</Card>
|
||||||
@@ -352,7 +352,8 @@
|
|||||||
<b>Missing Metric</b>
|
<b>Missing Metric</b>
|
||||||
</CardHeader>
|
</CardHeader>
|
||||||
<CardBody>
|
<CardBody>
|
||||||
<p class="mb-1">No dataset returned for <b>{item.metric}</b>.</p>
|
<p>No dataset(s) returned for <b>{item.metric}</b>.</p>
|
||||||
|
<p class="mb-1">Metric was not found in metric store for cluster <b>{$initq.data.job.cluster}</b>.</p>
|
||||||
</CardBody>
|
</CardBody>
|
||||||
</Card>
|
</Card>
|
||||||
{/if}
|
{/if}
|
||||||
@@ -386,17 +387,17 @@
|
|||||||
<CardBody>
|
<CardBody>
|
||||||
{#if missingMetrics.length > 0}
|
{#if missingMetrics.length > 0}
|
||||||
<p>
|
<p>
|
||||||
No data at all is available for the metrics: {missingMetrics.join(
|
No datasets were returned for the metrics: <b>{missingMetrics.join(
|
||||||
", ",
|
", ",
|
||||||
)}
|
)}</b>
|
||||||
</p>
|
</p>
|
||||||
{/if}
|
{/if}
|
||||||
{#if missingHosts.length > 0}
|
{#if missingHosts.length > 0}
|
||||||
<p>Some metrics are missing for the following hosts:</p>
|
<p>Metrics are missing for the following hosts:</p>
|
||||||
<ul>
|
<ul>
|
||||||
{#each missingHosts as missing}
|
{#each missingHosts as missing}
|
||||||
<li>
|
<li>
|
||||||
{missing.hostname}: {missing.metrics.join(", ")}
|
<b>{missing.hostname}</b>: {missing.metrics.join(", ")}
|
||||||
</li>
|
</li>
|
||||||
{/each}
|
{/each}
|
||||||
</ul>
|
</ul>
|
||||||
|
|||||||
@@ -37,6 +37,7 @@
|
|||||||
/* Const Init */
|
/* Const Init */
|
||||||
const { query: initq } = init();
|
const { query: initq } = init();
|
||||||
const ccconfig = getContext("cc-config");
|
const ccconfig = getContext("cc-config");
|
||||||
|
const matchedJobCompareLimit = 500;
|
||||||
|
|
||||||
/* State Init */
|
/* State Init */
|
||||||
let filterComponent = $state(); // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
let filterComponent = $state(); // see why here: https://stackoverflow.com/questions/58287729/how-can-i-export-a-function-from-a-svelte-component-that-changes-a-value-in-the
|
||||||
@@ -154,8 +155,9 @@
|
|||||||
{#if !showCompare}
|
{#if !showCompare}
|
||||||
<TextFilter
|
<TextFilter
|
||||||
{presetProject}
|
{presetProject}
|
||||||
bind:authlevel
|
{authlevel}
|
||||||
bind:roles
|
{roles}
|
||||||
|
{filterBuffer}
|
||||||
setFilter={(filter) => filterComponent.updateFilters(filter)}
|
setFilter={(filter) => filterComponent.updateFilters(filter)}
|
||||||
/>
|
/>
|
||||||
{/if}
|
{/if}
|
||||||
@@ -169,12 +171,12 @@
|
|||||||
{/if}
|
{/if}
|
||||||
<div class="mx-1"></div>
|
<div class="mx-1"></div>
|
||||||
<ButtonGroup class="w-50">
|
<ButtonGroup class="w-50">
|
||||||
<Button color="primary" disabled={(matchedListJobs >= 500 && !(selectedJobs.length != 0)) || $initq.fetching} onclick={() => {
|
<Button color="primary" disabled={(matchedListJobs >= matchedJobCompareLimit && !(selectedJobs.length != 0)) || $initq.fetching} onclick={() => {
|
||||||
if (selectedJobs.length != 0) filterComponent.updateFilters({dbId: selectedJobs}, true)
|
if (selectedJobs.length != 0) filterComponent.updateFilters({dbId: selectedJobs})
|
||||||
showCompare = !showCompare
|
showCompare = !showCompare
|
||||||
}} >
|
}} >
|
||||||
{showCompare ? 'Return to List' :
|
{showCompare ? 'Return to List' :
|
||||||
matchedListJobs >= 500 && selectedJobs.length == 0
|
matchedListJobs >= matchedJobCompareLimit && selectedJobs.length == 0
|
||||||
? 'Compare Disabled'
|
? 'Compare Disabled'
|
||||||
: 'Compare' + (selectedJobs.length != 0 ? ` ${selectedJobs.length} ` : ' ') + 'Jobs'
|
: 'Compare' + (selectedJobs.length != 0 ? ` ${selectedJobs.length} ` : ' ') + 'Jobs'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,8 @@
|
|||||||
Icon,
|
Icon,
|
||||||
Spinner,
|
Spinner,
|
||||||
Card,
|
Card,
|
||||||
|
CardHeader,
|
||||||
|
CardBody
|
||||||
} from "@sveltestrap/sveltestrap";
|
} from "@sveltestrap/sveltestrap";
|
||||||
import {
|
import {
|
||||||
queryStore,
|
queryStore,
|
||||||
@@ -174,7 +176,11 @@
|
|||||||
<InputGroupText><Icon name="clipboard2-pulse" /></InputGroupText>
|
<InputGroupText><Icon name="clipboard2-pulse" /></InputGroupText>
|
||||||
<InputGroupText>Node State</InputGroupText>
|
<InputGroupText>Node State</InputGroupText>
|
||||||
<Button class="flex-grow-1 text-center" color={stateColors[thisNodeState]} disabled>
|
<Button class="flex-grow-1 text-center" color={stateColors[thisNodeState]} disabled>
|
||||||
{thisNodeState}
|
{#if $nodeMetricsData?.data}
|
||||||
|
{thisNodeState}
|
||||||
|
{:else}
|
||||||
|
<span><Spinner size="sm" secondary/></span>
|
||||||
|
{/if}
|
||||||
</Button>
|
</Button>
|
||||||
</InputGroup>
|
</InputGroup>
|
||||||
</Col>
|
</Col>
|
||||||
@@ -254,12 +260,15 @@
|
|||||||
></Card
|
></Card
|
||||||
>
|
>
|
||||||
{:else}
|
{:else}
|
||||||
<Card
|
<Card color="warning" class="mx-2">
|
||||||
style="margin-left: 2rem;margin-right: 2rem;"
|
<CardHeader class="mb-0">
|
||||||
body
|
<b>Missing Metric</b>
|
||||||
color="warning"
|
</CardHeader>
|
||||||
>No dataset returned for <code>{item.name}</code></Card
|
<CardBody>
|
||||||
>
|
<p>No dataset returned for <b>{item.name}</b>.</p>
|
||||||
|
<p class="mb-1">Metric was not found in metric store for cluster <b>{cluster}</b>.</p>
|
||||||
|
</CardBody>
|
||||||
|
</Card>
|
||||||
{/if}
|
{/if}
|
||||||
{/snippet}
|
{/snippet}
|
||||||
|
|
||||||
|
|||||||
@@ -65,10 +65,6 @@
|
|||||||
let timeoutId = null;
|
let timeoutId = null;
|
||||||
|
|
||||||
/* State Init */
|
/* State Init */
|
||||||
// svelte-ignore state_referenced_locally
|
|
||||||
let to = $state(presetTo || new Date(Date.now()));
|
|
||||||
// svelte-ignore state_referenced_locally
|
|
||||||
let from = $state(presetFrom || new Date(nowDate.setHours(nowDate.getHours() - 4)));
|
|
||||||
let selectedResolution = $state(resampleConfig ? resampleDefault : 0);
|
let selectedResolution = $state(resampleConfig ? resampleDefault : 0);
|
||||||
let hostnameFilter = $state("");
|
let hostnameFilter = $state("");
|
||||||
let hoststateFilter = $state("all");
|
let hoststateFilter = $state("all");
|
||||||
@@ -76,6 +72,8 @@
|
|||||||
let isMetricsSelectionOpen = $state(false);
|
let isMetricsSelectionOpen = $state(false);
|
||||||
|
|
||||||
/* Derived States */
|
/* Derived States */
|
||||||
|
let to = $derived(presetTo ? presetTo : new Date(Date.now()));
|
||||||
|
let from = $derived(presetFrom ? presetFrom : new Date(nowDate.setHours(nowDate.getHours() - 4)));
|
||||||
const displayNodeOverview = $derived((displayType === 'OVERVIEW'));
|
const displayNodeOverview = $derived((displayType === 'OVERVIEW'));
|
||||||
const systemMetrics = $derived($initialized ? [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))] : []);
|
const systemMetrics = $derived($initialized ? [...globalMetrics.filter((gm) => gm?.availability.find((av) => av.cluster == cluster))] : []);
|
||||||
const presetSystemUnits = $derived(loadUnits(systemMetrics));
|
const presetSystemUnits = $derived(loadUnits(systemMetrics));
|
||||||
|
|||||||
@@ -241,12 +241,15 @@
|
|||||||
if (filters.project) opts.push(`project=${filters.project}`);
|
if (filters.project) opts.push(`project=${filters.project}`);
|
||||||
if (filters.project && filters.projectMatch != "contains") // "contains" is default-case
|
if (filters.project && filters.projectMatch != "contains") // "contains" is default-case
|
||||||
opts.push(`projectMatch=${filters.projectMatch}`);
|
opts.push(`projectMatch=${filters.projectMatch}`);
|
||||||
if (filters.user.length != 0)
|
if (filters.user) {
|
||||||
if (filters.userMatch != "in") {
|
if (filters.user.length != 0) {
|
||||||
opts.push(`user=${filters.user}`);
|
if (filters.userMatch != "in") {
|
||||||
} else {
|
opts.push(`user=${filters.user}`);
|
||||||
for (let singleUser of filters.user) opts.push(`user=${singleUser}`);
|
} else {
|
||||||
|
for (let singleUser of filters.user) opts.push(`user=${singleUser}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (filters.userMatch != "contains") // "contains" is default-case
|
if (filters.userMatch != "contains") // "contains" is default-case
|
||||||
opts.push(`userMatch=${filters.userMatch}`);
|
opts.push(`userMatch=${filters.userMatch}`);
|
||||||
// Filter Modals
|
// Filter Modals
|
||||||
|
|||||||
@@ -2,9 +2,10 @@
|
|||||||
@component Search Field for Job-Lists with separate mode if project filter is active
|
@component Search Field for Job-Lists with separate mode if project filter is active
|
||||||
|
|
||||||
Properties:
|
Properties:
|
||||||
- `presetProject String?`: Currently active project filter [Default: '']
|
- `presetProject String?`: Currently active project filter preset [Default: '']
|
||||||
- `authlevel Number?`: The current users authentication level [Default: null]
|
- `authlevel Number?`: The current users authentication level [Default: null]
|
||||||
- `roles [Number]?`: Enum containing available roles [Default: null]
|
- `roles [Number]?`: Enum containing available roles [Default: null]
|
||||||
|
- `filterBuffer [Obj]?`: Currently active filters, if any.
|
||||||
- `setFilter Func`: The callback function to apply current filter selection
|
- `setFilter Func`: The callback function to apply current filter selection
|
||||||
-->
|
-->
|
||||||
|
|
||||||
@@ -18,78 +19,69 @@
|
|||||||
presetProject = "",
|
presetProject = "",
|
||||||
authlevel = null,
|
authlevel = null,
|
||||||
roles = null,
|
roles = null,
|
||||||
|
filterBuffer = [],
|
||||||
setFilter
|
setFilter
|
||||||
} = $props();
|
} = $props();
|
||||||
|
|
||||||
/* Const Init*/
|
/* Const Init*/
|
||||||
const throttle = 500;
|
const throttle = 300;
|
||||||
|
|
||||||
/* Var Init */
|
/* Var Init */
|
||||||
let user = "";
|
|
||||||
let jobName = "";
|
|
||||||
let timeoutId = null;
|
let timeoutId = null;
|
||||||
|
|
||||||
/* State Init */
|
/* Derived */
|
||||||
let term = $state("");
|
const bufferProject = $derived.by(() => {
|
||||||
|
let bp = filterBuffer.find((fb) =>
|
||||||
|
Object.keys(fb).includes("project")
|
||||||
|
)
|
||||||
|
return bp?.project?.contains || null
|
||||||
|
});
|
||||||
|
|
||||||
/* Derived */
|
const bufferUser = $derived.by(() => {
|
||||||
let project = $derived(presetProject ? presetProject : "");
|
let bu = filterBuffer.find((fb) =>
|
||||||
let mode = $derived(presetProject ? "jobName" : "project");
|
Object.keys(fb).includes("user")
|
||||||
|
)
|
||||||
|
return bu?.user?.contains || null
|
||||||
|
});
|
||||||
|
|
||||||
|
const bufferJobName = $derived.by(() => {
|
||||||
|
let bjn = filterBuffer.find((fb) =>
|
||||||
|
Object.keys(fb).includes("jobName")
|
||||||
|
)
|
||||||
|
return bjn?.jobName?.contains || null
|
||||||
|
});
|
||||||
|
|
||||||
|
let mode = $derived.by(() => {
|
||||||
|
if (presetProject) return "jobName" // Search by jobName if presetProject set
|
||||||
|
else if (bufferUser) return "user"
|
||||||
|
else if (bufferJobName) return "jobName"
|
||||||
|
else return "project"
|
||||||
|
});
|
||||||
|
|
||||||
|
let term = $derived(bufferUser || bufferJobName || bufferProject || "");
|
||||||
|
|
||||||
/* Functions */
|
/* Functions */
|
||||||
function modeChanged() {
|
function inputChanged(sleep = throttle) {
|
||||||
|
if (timeoutId != null) clearTimeout(timeoutId);
|
||||||
if (mode == "user") {
|
if (mode == "user") {
|
||||||
project = presetProject ? presetProject : "";
|
timeoutId = setTimeout(() => {
|
||||||
jobName = "";
|
setFilter({ user: term, project: (presetProject ? presetProject : null), jobName: null });
|
||||||
|
}, sleep);
|
||||||
} else if (mode == "project") {
|
} else if (mode == "project") {
|
||||||
user = "";
|
|
||||||
jobName = "";
|
|
||||||
} else {
|
|
||||||
project = presetProject ? presetProject : "";
|
|
||||||
user = "";
|
|
||||||
}
|
|
||||||
termChanged(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compatibility: Handle "user role" and "no role" identically
|
|
||||||
function termChanged(sleep = throttle) {
|
|
||||||
if (roles && authlevel >= roles.manager) {
|
|
||||||
if (mode == "user") user = term;
|
|
||||||
else if (mode == "project") project = term;
|
|
||||||
else jobName = term;
|
|
||||||
|
|
||||||
if (timeoutId != null) clearTimeout(timeoutId);
|
|
||||||
|
|
||||||
timeoutId = setTimeout(() => {
|
timeoutId = setTimeout(() => {
|
||||||
setFilter({
|
setFilter({ project: term, user: null, jobName: null });
|
||||||
user,
|
}, sleep);
|
||||||
project,
|
} else if (mode == "jobName") {
|
||||||
jobName
|
|
||||||
});
|
|
||||||
}, sleep);
|
|
||||||
} else {
|
|
||||||
if (mode == "project") project = term;
|
|
||||||
else jobName = term;
|
|
||||||
|
|
||||||
if (timeoutId != null) clearTimeout(timeoutId);
|
|
||||||
|
|
||||||
timeoutId = setTimeout(() => {
|
timeoutId = setTimeout(() => {
|
||||||
setFilter({
|
setFilter({ jobName: term, user: null, project: (presetProject ? presetProject : null) });
|
||||||
project,
|
}, sleep);
|
||||||
jobName
|
|
||||||
});
|
|
||||||
}, sleep);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function resetProject () {
|
function resetProject () {
|
||||||
mode = "project"
|
presetProject = "";
|
||||||
term = ""
|
term = "";
|
||||||
presetProject = ""
|
inputChanged(0);
|
||||||
project = ""
|
|
||||||
jobName = ""
|
|
||||||
user = ""
|
|
||||||
termChanged(0);
|
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
@@ -100,12 +92,12 @@
|
|||||||
class="form-select w-auto"
|
class="form-select w-auto"
|
||||||
title="Search Mode"
|
title="Search Mode"
|
||||||
bind:value={mode}
|
bind:value={mode}
|
||||||
onchange={modeChanged}
|
onchange={() => inputChanged()}
|
||||||
>
|
>
|
||||||
{#if !presetProject}
|
{#if !presetProject}
|
||||||
<option value={"project"}>Project</option>
|
<option value={"project"}>Project</option>
|
||||||
{/if}
|
{/if}
|
||||||
{#if roles && authlevel >= roles.manager}
|
{#if roles && authlevel >= roles?.manager}
|
||||||
<option value={"user"}>User</option>
|
<option value={"user"}>User</option>
|
||||||
{/if}
|
{/if}
|
||||||
<option value={"jobName"}>Jobname</option>
|
<option value={"jobName"}>Jobname</option>
|
||||||
@@ -113,8 +105,8 @@
|
|||||||
<Input
|
<Input
|
||||||
type="text"
|
type="text"
|
||||||
bind:value={term}
|
bind:value={term}
|
||||||
onchange={() => termChanged()}
|
onchange={() => inputChanged()}
|
||||||
onkeyup={(event) => termChanged(event.key == "Enter" ? 0 : throttle)}
|
onkeyup={(event) => inputChanged(event.key == "Enter" ? 0 : throttle)}
|
||||||
placeholder={presetProject ? `Find in ${scrambleNames ? scramble(presetProject) : presetProject} ...` : `Find ${mode} ...`}
|
placeholder={presetProject ? `Find in ${scrambleNames ? scramble(presetProject) : presetProject} ...` : `Find ${mode} ...`}
|
||||||
/>
|
/>
|
||||||
{#if presetProject}
|
{#if presetProject}
|
||||||
|
|||||||
@@ -229,7 +229,12 @@
|
|||||||
></Card
|
></Card
|
||||||
>
|
>
|
||||||
{:else}
|
{:else}
|
||||||
<Card body color="warning">No dataset returned</Card>
|
<Card body class="mx-2" color="warning">
|
||||||
|
<p>No dataset(s) returned for <b>{metrics[i]}</b></p>
|
||||||
|
<p class="mb-1">Metric or host was not found in metric store for cluster <b>{job.cluster}</b>:</p>
|
||||||
|
<p class="mb-1">Identical messages in <i>{metrics[i]} column</i>: Metric not found.</p>
|
||||||
|
<p class="mb-1">Identical messages in <i>job {job.jobId} row</i>: Host not found.</p>
|
||||||
|
</Card>
|
||||||
{/if}
|
{/if}
|
||||||
</td>
|
</td>
|
||||||
{/each}
|
{/each}
|
||||||
|
|||||||
@@ -25,7 +25,7 @@
|
|||||||
metricData,
|
metricData,
|
||||||
timestep,
|
timestep,
|
||||||
numNodes,
|
numNodes,
|
||||||
cluster,
|
cluster = "",
|
||||||
forNode = true,
|
forNode = true,
|
||||||
enableFlip = false,
|
enableFlip = false,
|
||||||
publicMode = false,
|
publicMode = false,
|
||||||
@@ -316,12 +316,14 @@
|
|||||||
<div bind:this={plotWrapper} bind:clientWidth={width}
|
<div bind:this={plotWrapper} bind:clientWidth={width}
|
||||||
class={forNode ? 'py-2 rounded' : 'rounded'}
|
class={forNode ? 'py-2 rounded' : 'rounded'}
|
||||||
></div>
|
></div>
|
||||||
{:else if cluster}
|
|
||||||
<Card body color="warning" class="mx-4"
|
|
||||||
>Cannot render plot: No series data returned for <code>{cluster}</code>.</Card
|
|
||||||
>
|
|
||||||
{:else}
|
{:else}
|
||||||
<Card body color="warning" class="mx-4"
|
<Card color="warning" class="mx-2 mt-2">
|
||||||
>Cannot render plot: No series data returned.</Card
|
<CardHeader class="mb-0">
|
||||||
>
|
<b>Empty Metrics</b>
|
||||||
|
</CardHeader>
|
||||||
|
<CardBody>
|
||||||
|
<p>Cannot render plot for cluster <b>{cluster}</b>.</p>
|
||||||
|
<p class="mb-1">Metrics found but returned without timeseries data.</p>
|
||||||
|
</CardBody>
|
||||||
|
</Card>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -27,7 +27,7 @@
|
|||||||
import uPlot from "uplot";
|
import uPlot from "uplot";
|
||||||
import { formatNumber, formatDurationTime } from "../units.js";
|
import { formatNumber, formatDurationTime } from "../units.js";
|
||||||
import { getContext, onMount, onDestroy } from "svelte";
|
import { getContext, onMount, onDestroy } from "svelte";
|
||||||
import { Card } from "@sveltestrap/sveltestrap";
|
import { Card, CardBody, CardHeader } from "@sveltestrap/sveltestrap";
|
||||||
|
|
||||||
/* Svelte 5 Props */
|
/* Svelte 5 Props */
|
||||||
let {
|
let {
|
||||||
@@ -633,7 +633,13 @@
|
|||||||
style="background-color: {backgroundColor()};" class={forNode ? 'py-2 rounded' : 'rounded'}
|
style="background-color: {backgroundColor()};" class={forNode ? 'py-2 rounded' : 'rounded'}
|
||||||
></div>
|
></div>
|
||||||
{:else}
|
{:else}
|
||||||
<Card body color="warning" class="mx-4"
|
<Card color="warning" class={forNode ? 'mx-2' : 'mt-2'}>
|
||||||
>Cannot render plot: No series data returned for <code>{metric}</code></Card
|
<CardHeader class="mb-0">
|
||||||
>
|
<b>Empty Metric</b>
|
||||||
|
</CardHeader>
|
||||||
|
<CardBody>
|
||||||
|
<p>Cannot render plot for <b>{metric}</b>.</p>
|
||||||
|
<p class="mb-1">Metric found but returned without timeseries data.</p>
|
||||||
|
</CardBody>
|
||||||
|
</Card>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
@@ -67,6 +67,11 @@
|
|||||||
reserved: "rgba(255, 0, 255, 0.75)",
|
reserved: "rgba(255, 0, 255, 0.75)",
|
||||||
mixed: "rgba(255, 215, 0, 0.75)",
|
mixed: "rgba(255, 215, 0, 0.75)",
|
||||||
unknown: "rgba(0, 0, 0, 0.75)"
|
unknown: "rgba(0, 0, 0, 0.75)"
|
||||||
|
},
|
||||||
|
healthStates: {
|
||||||
|
full: "rgba(0, 128, 0, 0.75)",
|
||||||
|
failed: "rgba(255, 0, 0, 0.75)",
|
||||||
|
partial: "rgba(255, 215, 0, 0.75)",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|||||||
@@ -46,6 +46,7 @@
|
|||||||
/* Const Init */
|
/* Const Init */
|
||||||
const lineWidth = 2 // clusterCockpitConfig.plotConfiguration_lineWidth;
|
const lineWidth = 2 // clusterCockpitConfig.plotConfiguration_lineWidth;
|
||||||
const cbmode = clusterCockpitConfig?.plotConfiguration_colorblindMode || false;
|
const cbmode = clusterCockpitConfig?.plotConfiguration_colorblindMode || false;
|
||||||
|
const bubbleSizeMax = 50;
|
||||||
|
|
||||||
/* Var Init */
|
/* Var Init */
|
||||||
let timeoutId = null;
|
let timeoutId = null;
|
||||||
@@ -317,8 +318,13 @@
|
|||||||
size = sizeBase + scaling
|
size = sizeBase + scaling
|
||||||
// Nodes: Size based on Jobcount
|
// Nodes: Size based on Jobcount
|
||||||
} else if (nodesData) {
|
} else if (nodesData) {
|
||||||
size = sizeBase + (nodesData[i]?.numJobs * 1.5) // Max Jobs Scale: 8 * 1.5 = 12
|
size = sizeBase + (nodesData[i]?.numJobs * 1.5)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Apply Size Capping
|
||||||
|
if (size >= bubbleSizeMax) {
|
||||||
|
size = bubbleSizeMax;
|
||||||
|
}
|
||||||
|
|
||||||
if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) {
|
if (xVal >= filtLft && xVal <= filtRgt && yVal >= filtBtm && yVal <= filtTop) {
|
||||||
let cx = valToPosX(xVal, scaleX, xDim, xOff);
|
let cx = valToPosX(xVal, scaleX, xDim, xOff);
|
||||||
|
|||||||
@@ -46,13 +46,13 @@
|
|||||||
/* Derived */
|
/* Derived */
|
||||||
let timeRange = $derived.by(() => {
|
let timeRange = $derived.by(() => {
|
||||||
if (presetTo && presetFrom) {
|
if (presetTo && presetFrom) {
|
||||||
return ((presetTo.getTime() - presetFrom.getTime()) / 1000)
|
return Math.floor(((presetTo.getTime() - presetFrom.getTime()) / 1000))
|
||||||
} else {
|
} else {
|
||||||
return ((defaultTo.getTime() - defaultFrom.getTime()) / 1000)
|
return Math.floor(((defaultTo.getTime() - defaultFrom.getTime()) / 1000))
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
let unknownRange = $derived(!Object.values(options).includes(timeRange));
|
let unknownRange = $derived(!Object.values(options).includes(timeRange));
|
||||||
|
|
||||||
/* Functions */
|
/* Functions */
|
||||||
function updateTimeRange() {
|
function updateTimeRange() {
|
||||||
let now = Date.now();
|
let now = Date.now();
|
||||||
|
|||||||
@@ -17,11 +17,26 @@ export function formatNumber(x) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function scaleNumber(x, p = '') {
|
||||||
|
if ( isNaN(x) || x == null) {
|
||||||
|
return `${x} ${p}` // Return if String or Null
|
||||||
|
} else {
|
||||||
|
const oldPower = power[prefix.indexOf(p)]
|
||||||
|
const rawValue = x * oldPower
|
||||||
|
for (let i = 0; i < prefix.length; i++) {
|
||||||
|
if (power[i] <= rawValue && rawValue < power[i+1]) {
|
||||||
|
return `${Math.round((rawValue / power[i]) * 100) / 100} ${prefix[i]}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return `${x} ${p}`
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function roundTwoDigits(x) {
|
export function roundTwoDigits(x) {
|
||||||
return Math.round(x * 100) / 100
|
return Math.round(x * 100) / 100
|
||||||
}
|
}
|
||||||
|
|
||||||
export function scaleNumbers(x, y , p = '') {
|
export function scaleNumbers(x, y, p = '') {
|
||||||
const oldPower = power[prefix.indexOf(p)]
|
const oldPower = power[prefix.indexOf(p)]
|
||||||
const rawXValue = x * oldPower
|
const rawXValue = x * oldPower
|
||||||
const rawYValue = y * oldPower
|
const rawYValue = y * oldPower
|
||||||
|
|||||||
@@ -55,6 +55,7 @@
|
|||||||
function setupAvailable(data) {
|
function setupAvailable(data) {
|
||||||
let pendingAvailable = {};
|
let pendingAvailable = {};
|
||||||
if (data) {
|
if (data) {
|
||||||
|
// Returns Only For Available Metrics
|
||||||
for (let d of data) {
|
for (let d of data) {
|
||||||
if (!pendingAvailable[d.name]) {
|
if (!pendingAvailable[d.name]) {
|
||||||
pendingAvailable[d.name] = [d.scope]
|
pendingAvailable[d.name] = [d.scope]
|
||||||
@@ -90,13 +91,16 @@
|
|||||||
pendingTableData[host] = {};
|
pendingTableData[host] = {};
|
||||||
};
|
};
|
||||||
for (const metric of sm) {
|
for (const metric of sm) {
|
||||||
if (!pendingTableData[host][metric]) {
|
// Only Returned, Available Metrics
|
||||||
pendingTableData[host][metric] = {};
|
if (as[metric]) {
|
||||||
};
|
if (!pendingTableData[host][metric]) {
|
||||||
for (const scope of as[metric]) {
|
pendingTableData[host][metric] = {};
|
||||||
pendingTableData[host][metric][scope] = js.find((d) => d.name == metric && d.scope == scope)
|
};
|
||||||
?.stats.filter((st) => st.hostname == host && st.data != null)
|
for (const scope of as[metric]) {
|
||||||
?.sort((a, b) => a.id - b.id) || []
|
pendingTableData[host][metric][scope] = js.find((d) => d.name == metric && d.scope == scope)
|
||||||
|
?.stats.filter((st) => st.hostname == host && st.data != null)
|
||||||
|
?.sort((a, b) => a.id - b.id) || []
|
||||||
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@@ -136,40 +140,56 @@
|
|||||||
<th></th>
|
<th></th>
|
||||||
{#each selectedMetrics as metric}
|
{#each selectedMetrics as metric}
|
||||||
<!-- To Match Row-2 Header Field Count-->
|
<!-- To Match Row-2 Header Field Count-->
|
||||||
<th colspan={selectedScopes[metric] == "node" ? 3 : 4}>
|
{#if availableScopes[metric]}
|
||||||
<InputGroup>
|
<th colspan={selectedScopes[metric] == "node" ? 3 : 4}>
|
||||||
<InputGroupText>
|
<InputGroup>
|
||||||
{metric}
|
<InputGroupText>
|
||||||
</InputGroupText>
|
{metric}
|
||||||
<Input type="select" bind:value={selectedScopes[metric]} disabled={availableScopes[metric]?.length === 1}>
|
</InputGroupText>
|
||||||
{#each (availableScopes[metric] || []) as scope}
|
<Input type="select" bind:value={selectedScopes[metric]} disabled={availableScopes[metric]?.length === 1}>
|
||||||
<option value={scope}>{scope}</option>
|
{#each (availableScopes[metric] || []) as scope}
|
||||||
{/each}
|
<option value={scope}>{scope}</option>
|
||||||
</Input>
|
{/each}
|
||||||
</InputGroup>
|
</Input>
|
||||||
</th>
|
</InputGroup>
|
||||||
|
</th>
|
||||||
|
{:else}
|
||||||
|
<th>
|
||||||
|
<InputGroup>
|
||||||
|
<InputGroupText>
|
||||||
|
{metric}
|
||||||
|
</InputGroupText>
|
||||||
|
</InputGroup>
|
||||||
|
</th>
|
||||||
|
{/if}
|
||||||
{/each}
|
{/each}
|
||||||
</tr>
|
</tr>
|
||||||
<!-- Header Row 2: Fields -->
|
<!-- Header Row 2: Fields -->
|
||||||
<tr>
|
<tr>
|
||||||
<th>Node</th>
|
<th>Node</th>
|
||||||
{#each selectedMetrics as metric}
|
{#each selectedMetrics as metric}
|
||||||
{#if selectedScopes[metric] != "node"}
|
{#if availableScopes[metric]}
|
||||||
<th>Id</th>
|
{#if selectedScopes[metric] != "node"}
|
||||||
{/if}
|
<th>Id</th>
|
||||||
{#each ["min", "avg", "max"] as stat}
|
{/if}
|
||||||
<th onclick={() => sortBy(metric, stat)}>
|
{#each ["min", "avg", "max"] as stat}
|
||||||
{stat}
|
<th onclick={() => sortBy(metric, stat)}>
|
||||||
{#if selectedScopes[metric] == "node"}
|
{stat}
|
||||||
<Icon
|
{#if selectedScopes[metric] == "node"}
|
||||||
name="caret-{sorting[metric][stat].dir}{sorting[metric][stat]
|
<Icon
|
||||||
.active
|
name="caret-{sorting[metric][stat].dir}{sorting[metric][stat]
|
||||||
? '-fill'
|
.active
|
||||||
: ''}"
|
? '-fill'
|
||||||
/>
|
: ''}"
|
||||||
{/if}
|
/>
|
||||||
|
{/if}
|
||||||
|
</th>
|
||||||
|
{/each}
|
||||||
|
{:else}
|
||||||
|
<th class="table-warning">
|
||||||
|
Missing Metric
|
||||||
</th>
|
</th>
|
||||||
{/each}
|
{/if}
|
||||||
{/each}
|
{/each}
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
@@ -178,10 +198,17 @@
|
|||||||
<tr>
|
<tr>
|
||||||
<th scope="col">{host}</th>
|
<th scope="col">{host}</th>
|
||||||
{#each selectedMetrics as metric (metric)}
|
{#each selectedMetrics as metric (metric)}
|
||||||
<StatsTableEntry
|
{#if tableData[host][metric]}
|
||||||
data={tableData[host][metric][selectedScopes[metric]]}
|
<StatsTableEntry
|
||||||
scope={selectedScopes[metric]}
|
data={tableData[host][metric][selectedScopes[metric]]}
|
||||||
/>
|
scope={selectedScopes[metric]}
|
||||||
|
/>
|
||||||
|
{:else}
|
||||||
|
<td class="table-warning" style="max-width:10rem;">
|
||||||
|
<p>No dataset(s) returned for <b>{metric}</b>.</p>
|
||||||
|
<p>Metric was not found in metric store for host <b>{host}</b>.</p>
|
||||||
|
</td>
|
||||||
|
{/if}
|
||||||
{/each}
|
{/each}
|
||||||
</tr>
|
</tr>
|
||||||
{/each}
|
{/each}
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
import {
|
import {
|
||||||
formatDurationTime,
|
formatDurationTime,
|
||||||
formatNumber,
|
formatNumber,
|
||||||
|
scaleNumber
|
||||||
} from "../generic/units.js";
|
} from "../generic/units.js";
|
||||||
import {
|
import {
|
||||||
Row,
|
Row,
|
||||||
@@ -250,9 +251,11 @@
|
|||||||
if (!rawInfos['totalAccs']) rawInfos['totalAccs'] = (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
if (!rawInfos['totalAccs']) rawInfos['totalAccs'] = (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
||||||
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
else rawInfos['totalAccs'] += (subCluster?.numberOfNodes * subCluster?.topology?.accelerators?.length) || 0;
|
||||||
|
|
||||||
// Units (Set Once)
|
// Unit Parts (Set Once)
|
||||||
if (!rawInfos['flopRateUnit']) rawInfos['flopRateUnit'] = subCluster.flopRateSimd.unit.prefix + subCluster.flopRateSimd.unit.base
|
if (!rawInfos['flopRateUnitBase']) rawInfos['flopRateUnitBase'] = subCluster.flopRateSimd.unit.base
|
||||||
if (!rawInfos['memBwRateUnit']) rawInfos['memBwRateUnit'] = subCluster.memoryBandwidth.unit.prefix + subCluster.memoryBandwidth.unit.base
|
if (!rawInfos['memBwRateUnitBase']) rawInfos['memBwRateUnitBase'] = subCluster.memoryBandwidth.unit.base
|
||||||
|
if (!rawInfos['flopRateUnitPrefix']) rawInfos['flopRateUnitPrefix'] = subCluster.flopRateSimd.unit.prefix
|
||||||
|
if (!rawInfos['memBwRateUnitPrefix']) rawInfos['memBwRateUnitPrefix'] = subCluster.memoryBandwidth.unit.prefix
|
||||||
|
|
||||||
// Get Maxima For Roofline Knee Render
|
// Get Maxima For Roofline Knee Render
|
||||||
if (!rawInfos['roofData']) {
|
if (!rawInfos['roofData']) {
|
||||||
@@ -268,10 +271,14 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get Idle Infos after Sums
|
// Get Simple Idle Infos after Sums by Diff
|
||||||
if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes'];
|
if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes'];
|
||||||
if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores'];
|
if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores'];
|
||||||
if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs'];
|
if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs'];
|
||||||
|
// Cap at 0 (Negative hints towards Config <> Reality Mismatch!)
|
||||||
|
if (rawInfos['idleNodes'] < 0) rawInfos['idleNodes'] = 0;
|
||||||
|
if (rawInfos['idleCores'] < 0) rawInfos['idleCores'] = 0;
|
||||||
|
if (rawInfos['idleAccs'] < 0) rawInfos['idleAccs'] = 0;
|
||||||
|
|
||||||
// Keymetrics (Data on Cluster-Scope)
|
// Keymetrics (Data on Cluster-Scope)
|
||||||
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) =>
|
||||||
@@ -418,12 +425,10 @@
|
|||||||
</tr>
|
</tr>
|
||||||
<tr class="pb-2">
|
<tr class="pb-2">
|
||||||
<td style="font-size:x-large;">
|
<td style="font-size:x-large;">
|
||||||
{clusterInfo?.flopRate}
|
{scaleNumber(clusterInfo?.flopRate, clusterInfo?.flopRateUnitPrefix)}{clusterInfo?.flopRateUnitBase}
|
||||||
{clusterInfo?.flopRateUnit}
|
|
||||||
</td>
|
</td>
|
||||||
<td style="font-size:x-large;">
|
<td style="font-size:x-large;">
|
||||||
{clusterInfo?.memBwRate}
|
{scaleNumber(clusterInfo?.memBwRate, clusterInfo?.memBwRateUnitPrefix)}{clusterInfo?.memBwRateUnitBase}
|
||||||
{clusterInfo?.memBwRateUnit}
|
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<hr class="my-1"/>
|
<hr class="my-1"/>
|
||||||
|
|||||||
@@ -23,7 +23,7 @@
|
|||||||
gql,
|
gql,
|
||||||
getContextClient,
|
getContextClient,
|
||||||
} from "@urql/svelte";
|
} from "@urql/svelte";
|
||||||
import { formatDurationTime } from "../../generic/units.js";
|
import { formatDurationTime, scaleNumber } from "../../generic/units.js";
|
||||||
import Refresher from "../../generic/helper/Refresher.svelte";
|
import Refresher from "../../generic/helper/Refresher.svelte";
|
||||||
import TimeSelection from "../../generic/select/TimeSelection.svelte";
|
import TimeSelection from "../../generic/select/TimeSelection.svelte";
|
||||||
import Roofline from "../../generic/plots/Roofline.svelte";
|
import Roofline from "../../generic/plots/Roofline.svelte";
|
||||||
@@ -418,7 +418,7 @@
|
|||||||
{:else if $statesTimed.error}
|
{:else if $statesTimed.error}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Card body color="danger">{$statesTimed.error.message}</Card>
|
<Card body color="danger">States Timed: {$statesTimed.error.message}</Card>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statesTimed.data}
|
{:else if $statesTimed.data}
|
||||||
@@ -472,7 +472,7 @@
|
|||||||
{:else if $statusQuery.error}
|
{:else if $statusQuery.error}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Card body color="danger">{$statesTimed.error.message}</Card>
|
<Card body color="danger">Status Query (States): {$statesTimed.error.message}</Card>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statusQuery?.data?.nodeStates}
|
{:else if $statusQuery?.data?.nodeStates}
|
||||||
@@ -484,7 +484,6 @@
|
|||||||
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States
|
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node States
|
||||||
</h4>
|
</h4>
|
||||||
<Pie
|
<Pie
|
||||||
{useAltColors}
|
|
||||||
canvasId="hpcpie-slurm"
|
canvasId="hpcpie-slurm"
|
||||||
size={pieWidth * 0.55}
|
size={pieWidth * 0.55}
|
||||||
sliceLabel="Nodes"
|
sliceLabel="Nodes"
|
||||||
@@ -494,6 +493,9 @@
|
|||||||
entities={refinedStateData.map(
|
entities={refinedStateData.map(
|
||||||
(sd) => sd.state,
|
(sd) => sd.state,
|
||||||
)}
|
)}
|
||||||
|
fixColors={refinedStateData.map(
|
||||||
|
(sd) => colors['nodeStates'][sd.state],
|
||||||
|
)}
|
||||||
/>
|
/>
|
||||||
{/key}
|
{/key}
|
||||||
</div>
|
</div>
|
||||||
@@ -508,7 +510,7 @@
|
|||||||
</tr>
|
</tr>
|
||||||
{#each refinedStateData as sd, i}
|
{#each refinedStateData as sd, i}
|
||||||
<tr>
|
<tr>
|
||||||
<td><Icon name="circle-fill" style="color: {legendColors(i)};"/></td>
|
<td><Icon name="circle-fill" style="color: {colors['nodeStates'][sd.state]};"/></td>
|
||||||
<td>{sd.state}</td>
|
<td>{sd.state}</td>
|
||||||
<td>{sd.count}</td>
|
<td>{sd.count}</td>
|
||||||
</tr>
|
</tr>
|
||||||
@@ -524,15 +526,17 @@
|
|||||||
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health
|
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health
|
||||||
</h4>
|
</h4>
|
||||||
<Pie
|
<Pie
|
||||||
{useAltColors}
|
|
||||||
canvasId="hpcpie-health"
|
canvasId="hpcpie-health"
|
||||||
size={pieWidth * 0.55}
|
size={pieWidth * 0.55}
|
||||||
sliceLabel="Nodes"
|
sliceLabel="Nodes"
|
||||||
quantities={refinedHealthData.map(
|
quantities={refinedHealthData.map(
|
||||||
(sd) => sd.count,
|
(hd) => hd.count,
|
||||||
)}
|
)}
|
||||||
entities={refinedHealthData.map(
|
entities={refinedHealthData.map(
|
||||||
(sd) => sd.state,
|
(hd) => hd.state,
|
||||||
|
)}
|
||||||
|
fixColors={refinedHealthData.map(
|
||||||
|
(hd) => colors['healthStates'][hd.state],
|
||||||
)}
|
)}
|
||||||
/>
|
/>
|
||||||
{/key}
|
{/key}
|
||||||
@@ -548,7 +552,7 @@
|
|||||||
</tr>
|
</tr>
|
||||||
{#each refinedHealthData as hd, i}
|
{#each refinedHealthData as hd, i}
|
||||||
<tr>
|
<tr>
|
||||||
<td><Icon name="circle-fill" style="color: {legendColors(i)};" /></td>
|
<td><Icon name="circle-fill"style="color: {colors['healthStates'][hd.state]};" /></td>
|
||||||
<td>{hd.state}</td>
|
<td>{hd.state}</td>
|
||||||
<td>{hd.count}</td>
|
<td>{hd.count}</td>
|
||||||
</tr>
|
</tr>
|
||||||
@@ -570,7 +574,7 @@
|
|||||||
{:else if $statusQuery.error}
|
{:else if $statusQuery.error}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Card body color="danger">{$statusQuery.error.message}</Card>
|
<Card body color="danger">Status Query (Details): {$statusQuery.error.message}</Card>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statusQuery.data}
|
{:else if $statusQuery.data}
|
||||||
@@ -599,12 +603,10 @@
|
|||||||
</tr>
|
</tr>
|
||||||
<tr class="pb-2">
|
<tr class="pb-2">
|
||||||
<td style="font-size:x-large;">
|
<td style="font-size:x-large;">
|
||||||
{flopRate[subCluster.name]}
|
{scaleNumber(flopRate[subCluster.name], flopRateUnitPrefix[subCluster.name])}{flopRateUnitBase[subCluster.name]}
|
||||||
{flopRateUnitPrefix[subCluster.name]}{flopRateUnitBase[subCluster.name]}
|
|
||||||
</td>
|
</td>
|
||||||
<td colspan="2" style="font-size:x-large;">
|
<td colspan="2" style="font-size:x-large;">
|
||||||
{memBwRate[subCluster.name]}
|
{scaleNumber(memBwRate[subCluster.name], memBwRateUnitPrefix[subCluster.name])}{memBwRateUnitBase[subCluster.name]}
|
||||||
{memBwRateUnitPrefix[subCluster.name]}{memBwRateUnitBase[subCluster.name]}
|
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<hr class="my-1"/>
|
<hr class="my-1"/>
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
<script>
|
<script>
|
||||||
import { getContext } from "svelte";
|
import { getContext } from "svelte";
|
||||||
import { queryStore, gql, getContextClient } from "@urql/svelte";
|
import { queryStore, gql, getContextClient } from "@urql/svelte";
|
||||||
import { Row, Col, Card, Spinner, Badge } from "@sveltestrap/sveltestrap";
|
import { Row, Col, Card, CardHeader, CardBody, Spinner, Badge } from "@sveltestrap/sveltestrap";
|
||||||
import { checkMetricDisabled } from "../generic/utils.js";
|
import { checkMetricDisabled } from "../generic/utils.js";
|
||||||
import MetricPlot from "../generic/plots/MetricPlot.svelte";
|
import MetricPlot from "../generic/plots/MetricPlot.svelte";
|
||||||
|
|
||||||
@@ -156,37 +156,63 @@
|
|||||||
>
|
>
|
||||||
</h4>
|
</h4>
|
||||||
<span style="margin-right: 0.5rem;">
|
<span style="margin-right: 0.5rem;">
|
||||||
<Badge color={stateColors[item?.state? item.state : 'notindb']}>{item?.state? item.state : 'notindb'}</Badge>
|
<Badge color={stateColors[item?.state? item.state : 'notindb']}>
|
||||||
|
State: {item?.state? item.state.charAt(0).toUpperCase() + item.state.slice(1) : 'Not in DB'}
|
||||||
|
</Badge>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
{#if item.disabled === true}
|
{#if item?.data}
|
||||||
<Card body class="mx-3" color="info"
|
{#if item.disabled === true}
|
||||||
>Metric disabled for subcluster <code
|
<Card body class="mx-3" color="info"
|
||||||
>{selectedMetric}:{item.subCluster}</code
|
>Metric disabled for subcluster <code
|
||||||
></Card
|
>{selectedMetric}:{item.subCluster}</code
|
||||||
>
|
></Card
|
||||||
{:else if item.disabled === false}
|
>
|
||||||
<!-- "No Data"-Warning included in MetricPlot-Component -->
|
{:else if item.disabled === false}
|
||||||
<!-- #key: X-axis keeps last selected timerange otherwise -->
|
<!-- "No Data"-Warning included in MetricPlot-Component -->
|
||||||
{#key item.data[0].metric.series[0].data.length}
|
<!-- #key: X-axis keeps last selected timerange otherwise -->
|
||||||
<MetricPlot
|
{#key item.data[0].metric.series[0].data.length}
|
||||||
timestep={item.data[0].metric.timestep}
|
<MetricPlot
|
||||||
series={item.data[0].metric.series}
|
timestep={item.data[0].metric.timestep}
|
||||||
metric={item.data[0].name}
|
series={item.data[0].metric.series}
|
||||||
{cluster}
|
metric={item.data[0].name}
|
||||||
subCluster={item.subCluster}
|
{cluster}
|
||||||
forNode
|
subCluster={item.subCluster}
|
||||||
enableFlip
|
forNode
|
||||||
/>
|
enableFlip
|
||||||
{/key}
|
/>
|
||||||
{:else if item.disabled === null}
|
{/key}
|
||||||
<Card body class="mx-3" color="info">
|
{:else if item.disabled === null}
|
||||||
Global Metric List Not Initialized
|
<Card body class="mx-3" color="info">
|
||||||
Can not determine {selectedMetric} availability: Please Reload Page
|
Global Metric List Not Initialized
|
||||||
|
Can not determine {selectedMetric} availability: Please Reload Page
|
||||||
|
</Card>
|
||||||
|
{/if}
|
||||||
|
{:else}
|
||||||
|
<Card color="warning">
|
||||||
|
<CardHeader class="mb-0">
|
||||||
|
<b>Missing Metric</b>
|
||||||
|
</CardHeader>
|
||||||
|
<CardBody>
|
||||||
|
<p>No dataset(s) returned for <b>{selectedMetric}</b>.</p>
|
||||||
|
<p class="mb-1">Metric was not found in metric store for host <b>{item.host}</b>.</p>
|
||||||
|
</CardBody>
|
||||||
</Card>
|
</Card>
|
||||||
{/if}
|
{/if}
|
||||||
</Col>
|
</Col>
|
||||||
{/each}
|
{/each}
|
||||||
{/key}
|
{/key}
|
||||||
</Row>
|
</Row>
|
||||||
|
{:else}
|
||||||
|
<Row>
|
||||||
|
<Card color="warning">
|
||||||
|
<CardHeader class="mb-0">
|
||||||
|
<b>Missing Metric</b>
|
||||||
|
</CardHeader>
|
||||||
|
<CardBody>
|
||||||
|
<p>No datasets returned for <b>{selectedMetric}</b>.</p>
|
||||||
|
<p class="mb-1">Metric was not found in metric store for cluster <b>{cluster}</b>.</p>
|
||||||
|
</CardBody>
|
||||||
|
</Card>
|
||||||
|
</Row>
|
||||||
{/if}
|
{/if}
|
||||||
@@ -171,13 +171,18 @@
|
|||||||
{#key metricData}
|
{#key metricData}
|
||||||
<td>
|
<td>
|
||||||
{#if metricData?.disabled}
|
{#if metricData?.disabled}
|
||||||
<Card body class="mx-3" color="info"
|
<Card body class="mx-2" color="info"
|
||||||
>Metric disabled for subcluster <code
|
>Metric <b>{selectedMetrics[i]}</b> disabled for subcluster <code
|
||||||
>{metricData?.data?.name ? metricData.data.name : `Metric Index ${i}`}:{nodeData.subCluster}</code
|
>{nodeData.subCluster}</code
|
||||||
></Card
|
></Card
|
||||||
>
|
>
|
||||||
|
{:else if !metricData?.data}
|
||||||
|
<Card body class="mx-2" color="warning">
|
||||||
|
<p>No dataset(s) returned for <b>{selectedMetrics[i]}</b></p>
|
||||||
|
<p class="mb-1">Metric was not found in metric store for cluster <b>{cluster}</b>.</p>
|
||||||
|
</Card>
|
||||||
{:else if !metricData?.data?.name}
|
{:else if !metricData?.data?.name}
|
||||||
<Card body class="mx-3" color="warning"
|
<Card body class="mx-2" color="warning"
|
||||||
>Metric without name for subcluster <code
|
>Metric without name for subcluster <code
|
||||||
>{`Metric Index ${i}`}:{nodeData.subCluster}</code
|
>{`Metric Index ${i}`}:{nodeData.subCluster}</code
|
||||||
></Card
|
></Card
|
||||||
|
|||||||
Reference in New Issue
Block a user