diff --git a/internal/graph/util.go b/internal/graph/util.go index 4135ca72..dd5e388f 100644 --- a/internal/graph/util.go +++ b/internal/graph/util.go @@ -57,13 +57,13 @@ func (r *queryResolver) rooflineHeatmap( jobdata, err := metricdispatch.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0) if err != nil { - cclog.Errorf("Error while loading roofline metrics for job %d", job.ID) + cclog.Warnf("Error while loading roofline metrics for job %d", job.ID) return nil, err } flops_, membw_ := jobdata["flops_any"], jobdata["mem_bw"] if flops_ == nil && membw_ == nil { - cclog.Infof("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID) + cclog.Warnf("rooflineHeatmap(): 'flops_any' or 'mem_bw' missing for job %d", job.ID) continue // return nil, fmt.Errorf("GRAPH/UTIL > 'flops_any' or 'mem_bw' missing for job %d", job.ID) } diff --git a/internal/metricdispatch/dataLoader.go b/internal/metricdispatch/dataLoader.go index 43a6d92b..09a8ac09 100644 --- a/internal/metricdispatch/dataLoader.go +++ b/internal/metricdispatch/dataLoader.go @@ -97,8 +97,8 @@ func LoadData(job *schema.Job, ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster) if err != nil { - cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s", + job.Cluster, job.SubCluster, err.Error()) return err, 0, 0 } @@ -116,11 +116,11 @@ func LoadData(job *schema.Job, jd, err = ms.LoadData(job, metrics, scopes, ctx, resolution) if err != nil { if len(jd) != 0 { - cclog.Warnf("partial error loading metrics from store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Warnf("partial error loading metrics from store for job %d (user: %s, project: %s, cluster: %s-%s): %s", + job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error()) } else { - cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Warnf("failed to load job data from metric store for job %d (user: %s, project: %s, cluster: %s-%s): %s", + job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error()) return err, 0, 0 } } @@ -129,8 +129,8 @@ func LoadData(job *schema.Job, var jdTemp schema.JobData jdTemp, err = archive.GetHandle().LoadJobData(job) if err != nil { - cclog.Errorf("failed to load job data from archive for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Warnf("failed to load job data from archive for job %d (user: %s, project: %s, cluster: %s-%s): %s", + job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error()) return err, 0, 0 } @@ -244,15 +244,15 @@ func LoadAverages( ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster) if err != nil { - cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s", + job.Cluster, job.SubCluster, err.Error()) return err } stats, err := ms.LoadStats(job, metrics, ctx) if err != nil { - cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Warnf("failed to load statistics from metric store for job %d (user: %s, project: %s, cluster: %s-%s): %s", + job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error()) return err } @@ -288,15 +288,15 @@ func LoadScopedJobStats( ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster) if err != nil { - cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s", + job.Cluster, job.SubCluster, err.Error()) return nil, err } scopedStats, err := ms.LoadScopedStats(job, metrics, scopes, ctx) if err != nil { - cclog.Errorf("failed to load scoped statistics from metric store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Warnf("failed to load scoped statistics from metric store for job %d (user: %s, project: %s, cluster: %s-%s): %s", + job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error()) return nil, err } @@ -320,8 +320,8 @@ func LoadJobStats( ms, err := GetMetricDataRepo(job.Cluster, job.SubCluster) if err != nil { - cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s", + job.Cluster, job.SubCluster, err.Error()) return nil, err } @@ -329,8 +329,8 @@ func LoadJobStats( stats, err := ms.LoadStats(job, metrics, ctx) if err != nil { - cclog.Errorf("failed to load statistics from metric store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Warnf("failed to load statistics from metric store for job %d (user: %s, project: %s, cluster: %s-%s): %s", + job.JobID, job.User, job.Project, job.Cluster, job.SubCluster, err.Error()) return data, err } @@ -379,8 +379,8 @@ func LoadNodeData( ms, err := GetMetricDataRepo(cluster, "") if err != nil { - cclog.Errorf("failed to load node data from metric store: %s", - err.Error()) + cclog.Errorf("failed to access metricDataRepo for cluster %s: %s", + cluster, err.Error()) return nil, err } @@ -389,7 +389,7 @@ func LoadNodeData( if len(data) != 0 { cclog.Warnf("partial error loading node data from metric store for cluster %s: %s", cluster, err.Error()) } else { - cclog.Errorf("failed to load node data from metric store for cluster %s: %s", cluster, err.Error()) + cclog.Warnf("failed to load node data from metric store for cluster %s: %s", cluster, err.Error()) return nil, err } } @@ -423,8 +423,8 @@ func LoadNodeListData( ms, err := GetMetricDataRepo(cluster, subCluster) if err != nil { - cclog.Errorf("failed to load node data from metric store: %s", - err.Error()) + cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s", + cluster, subCluster, err.Error()) return nil, err } @@ -434,7 +434,7 @@ func LoadNodeListData( cclog.Warnf("partial error loading node list data from metric store for cluster %s, subcluster %s: %s", cluster, subCluster, err.Error()) } else { - cclog.Errorf("failed to load node list data from metric store for cluster %s, subcluster %s: %s", + cclog.Warnf("failed to load node list data from metric store for cluster %s, subcluster %s: %s", cluster, subCluster, err.Error()) return nil, err } diff --git a/internal/metricstoreclient/cc-metric-store.go b/internal/metricstoreclient/cc-metric-store.go index ded644ea..aadbe1b1 100644 --- a/internal/metricstoreclient/cc-metric-store.go +++ b/internal/metricstoreclient/cc-metric-store.go @@ -329,7 +329,7 @@ func (ccms *CCMetricStore) LoadStats( metric := query.Metric data := res[0] if data.Error != nil { - cclog.Errorf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) + cclog.Warnf("fetching %s for node %s failed: %s", metric, query.Hostname, *data.Error) continue } @@ -556,7 +556,7 @@ func (ccms *CCMetricStore) LoadNodeListData( ) (map[string]schema.JobData, error) { queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution) if err != nil { - cclog.Errorf("Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error()) + cclog.Errorf("Error while building node queries for Cluster %s, SubCluster %s, Metrics %v, Scopes %v: %s", cluster, subCluster, metrics, scopes, err.Error()) return nil, err } diff --git a/internal/taskmanager/updateFootprintService.go b/internal/taskmanager/updateFootprintService.go index 71bf4089..65f4c229 100644 --- a/internal/taskmanager/updateFootprintService.go +++ b/internal/taskmanager/updateFootprintService.go @@ -68,8 +68,8 @@ func RegisterFootprintWorker() { ms, err := metricdispatch.GetMetricDataRepo(job.Cluster, job.SubCluster) if err != nil { - cclog.Errorf("failed to load job data from metric store for job %d (user: %s, project: %s): %s", - job.JobID, job.User, job.Project, err.Error()) + cclog.Errorf("failed to access metricDataRepo for cluster %s-%s: %s", + job.Cluster, job.SubCluster, err.Error()) continue } diff --git a/web/frontend/src/Job.root.svelte b/web/frontend/src/Job.root.svelte index 8cfceb96..3cd65f9a 100644 --- a/web/frontend/src/Job.root.svelte +++ b/web/frontend/src/Job.root.svelte @@ -342,7 +342,7 @@ Disabled Metric -

Metric {item.metric} is disabled for subcluster {$initq.data.job.subCluster}.

+

Metric {item.metric} is disabled for cluster {$initq.data.job.cluster}:{$initq.data.job.subCluster}.

To remove this card, open metric selection and press "Close and Apply".

@@ -352,7 +352,8 @@ Missing Metric -

No dataset returned for {item.metric}.

+

No dataset returned for {item.metric}.

+

Metric was not found in metric store for cluster {$initq.data.job.cluster}.

{/if} diff --git a/web/frontend/src/Node.root.svelte b/web/frontend/src/Node.root.svelte index c875f2b2..099233f4 100644 --- a/web/frontend/src/Node.root.svelte +++ b/web/frontend/src/Node.root.svelte @@ -22,6 +22,8 @@ Icon, Spinner, Card, + CardHeader, + CardBody } from "@sveltestrap/sveltestrap"; import { queryStore, @@ -254,12 +256,15 @@ > {:else} - No dataset returned for {item.name} + + + Missing Metric + + +

No dataset returned for {item.name}.

+

Metric was not found in metric store for cluster {cluster}.

+
+
{/if} {/snippet} diff --git a/web/frontend/src/generic/joblist/JobListRow.svelte b/web/frontend/src/generic/joblist/JobListRow.svelte index 4604883a..4b71991a 100644 --- a/web/frontend/src/generic/joblist/JobListRow.svelte +++ b/web/frontend/src/generic/joblist/JobListRow.svelte @@ -229,7 +229,10 @@ > {:else} - No dataset returned + +

No dataset returned for {metrics[i]}

+

Metric was not found in metric store for cluster {job.cluster}.

+
{/if} {/each} diff --git a/web/frontend/src/generic/plots/MetricPlot.svelte b/web/frontend/src/generic/plots/MetricPlot.svelte index 7e48e8e1..063b43fb 100644 --- a/web/frontend/src/generic/plots/MetricPlot.svelte +++ b/web/frontend/src/generic/plots/MetricPlot.svelte @@ -27,7 +27,7 @@ import uPlot from "uplot"; import { formatNumber, formatDurationTime } from "../units.js"; import { getContext, onMount, onDestroy } from "svelte"; - import { Card } from "@sveltestrap/sveltestrap"; + import { Card, CardBody, CardHeader } from "@sveltestrap/sveltestrap"; /* Svelte 5 Props */ let { @@ -633,7 +633,13 @@ style="background-color: {backgroundColor()};" class={forNode ? 'py-2 rounded' : 'rounded'} > {:else} - Cannot render plot: No series data returned for {metric} + + + Empty Metric + + +

Cannot render plot for {metric}.

+

Metric found but returned without timeseries data.

+
+
{/if} diff --git a/web/frontend/src/job/statstab/StatsTable.svelte b/web/frontend/src/job/statstab/StatsTable.svelte index 06b2d105..e7ec216a 100644 --- a/web/frontend/src/job/statstab/StatsTable.svelte +++ b/web/frontend/src/job/statstab/StatsTable.svelte @@ -55,6 +55,7 @@ function setupAvailable(data) { let pendingAvailable = {}; if (data) { + // Returns Only For Available Metrics for (let d of data) { if (!pendingAvailable[d.name]) { pendingAvailable[d.name] = [d.scope] @@ -90,13 +91,16 @@ pendingTableData[host] = {}; }; for (const metric of sm) { - if (!pendingTableData[host][metric]) { - pendingTableData[host][metric] = {}; - }; - for (const scope of as[metric]) { - pendingTableData[host][metric][scope] = js.find((d) => d.name == metric && d.scope == scope) - ?.stats.filter((st) => st.hostname == host && st.data != null) - ?.sort((a, b) => a.id - b.id) || [] + // Only Returned, Available Metrics + if (as[metric]) { + if (!pendingTableData[host][metric]) { + pendingTableData[host][metric] = {}; + }; + for (const scope of as[metric]) { + pendingTableData[host][metric][scope] = js.find((d) => d.name == metric && d.scope == scope) + ?.stats.filter((st) => st.hostname == host && st.data != null) + ?.sort((a, b) => a.id - b.id) || [] + }; }; }; }; @@ -136,40 +140,56 @@ {#each selectedMetrics as metric} - - - - {metric} - - - {#each (availableScopes[metric] || []) as scope} - - {/each} - - - + {#if availableScopes[metric]} + + + + {metric} + + + {#each (availableScopes[metric] || []) as scope} + + {/each} + + + + {:else} + + + + {metric} + + + + {/if} {/each} Node {#each selectedMetrics as metric} - {#if selectedScopes[metric] != "node"} - Id - {/if} - {#each ["min", "avg", "max"] as stat} - sortBy(metric, stat)}> - {stat} - {#if selectedScopes[metric] == "node"} - - {/if} + {#if availableScopes[metric]} + {#if selectedScopes[metric] != "node"} + Id + {/if} + {#each ["min", "avg", "max"] as stat} + sortBy(metric, stat)}> + {stat} + {#if selectedScopes[metric] == "node"} + + {/if} + + {/each} + {:else} + + Missing Metric - {/each} + {/if} {/each} @@ -178,10 +198,17 @@ {host} {#each selectedMetrics as metric (metric)} - + {#if tableData[host][metric]} + + {:else} + +

No dataset returned for {metric}.

+

Metric was not found in metric store for cluster.

+ + {/if} {/each} {/each} diff --git a/web/frontend/src/systems/NodeOverview.svelte b/web/frontend/src/systems/NodeOverview.svelte index 6e893989..68c4feb9 100644 --- a/web/frontend/src/systems/NodeOverview.svelte +++ b/web/frontend/src/systems/NodeOverview.svelte @@ -14,7 +14,7 @@