diff --git a/internal/metricdata/cc-metric-store.go b/internal/metricdata/cc-metric-store.go index 6d446d1..be2e956 100644 --- a/internal/metricdata/cc-metric-store.go +++ b/internal/metricdata/cc-metric-store.go @@ -770,21 +770,25 @@ func (ccms *CCMetricStore) LoadNodeData( } mc := archive.GetMetricConfig(cluster, metric) - hostdata[metric] = append(hostdata[metric], &schema.JobMetric{ - Unit: mc.Unit, - Timestep: mc.Timestep, - Series: []schema.Series{ - { - Hostname: query.Hostname, - Data: qdata.Data, - Statistics: schema.MetricStatistics{ - Avg: float64(qdata.Avg), - Min: float64(qdata.Min), - Max: float64(qdata.Max), + if mc != nil { + hostdata[metric] = append(hostdata[metric], &schema.JobMetric{ + Unit: mc.Unit, + Timestep: mc.Timestep, + Series: []schema.Series{ + { + Hostname: query.Hostname, + Data: qdata.Data, + Statistics: schema.MetricStatistics{ + Avg: float64(qdata.Avg), + Min: float64(qdata.Min), + Max: float64(qdata.Max), + }, }, }, - }, - }) + }) + } else { + cclog.Warnf("Metric '%s' not configured for cluster '%s': Skipped in LoadNodeData() Return!", metric, cluster) + } } if len(errors) != 0 { diff --git a/web/frontend/src/DashPublic.root.svelte b/web/frontend/src/DashPublic.root.svelte index c69b28f..fbbf486 100644 --- a/web/frontend/src/DashPublic.root.svelte +++ b/web/frontend/src/DashPublic.root.svelte @@ -30,7 +30,8 @@ Table, Progress, Icon, - Button + Button, + Badge } from "@sveltestrap/sveltestrap"; import Roofline from "./generic/plots/Roofline.svelte"; import Pie, { colors } from "./generic/plots/Pie.svelte"; @@ -85,7 +86,8 @@ query: gql` query ( $cluster: String! - $metrics: [String!] + $nmetrics: [String!] + $cmetrics: [String!] $from: Time! $to: Time! $clusterFrom: Time! @@ -97,7 +99,7 @@ # Node 5 Minute Averages for Roofline nodeMetrics( cluster: $cluster - metrics: $metrics + metrics: $nmetrics from: $from to: $to ) { @@ -106,6 +108,10 @@ metrics { name metric { + unit { + base + prefix + } series { statistics { avg @@ -114,21 +120,6 @@ } } } - # Running Job Metric Average for Rooflines - jobsMetricStats(filter: $jobFilter, metrics: $metrics) { - id - jobId - duration - numNodes - numAccelerators - subCluster - stats { - name - data { - avg - } - } - } # Get Jobs for Per-Node Counts jobs(filter: $jobFilter, order: $sorting, page: $paging) { items { @@ -175,7 +166,7 @@ # ClusterMetrics for doubleMetricPlot clusterMetrics( cluster: $cluster - metrics: $metrics + metrics: $cmetrics from: $clusterFrom to: $to ) { @@ -194,7 +185,8 @@ `, variables: { cluster: presetCluster, - metrics: ["flops_any", "mem_bw"], // Metrics For Cluster Plot and Roofline + nmetrics: ["flops_any", "mem_bw", "cpu_power", "acc_power"], // Metrics For Roofline and Stats + cmetrics: ["flops_any", "mem_bw"], // Metrics For Cluster Plot from: from.toISOString(), clusterFrom: clusterFrom.toISOString(), to: to.toISOString(), @@ -258,6 +250,11 @@ } } + // Get Idle Infos after Sums + if (!rawInfos['idleNodes']) rawInfos['idleNodes'] = rawInfos['totalNodes'] - rawInfos['allocatedNodes']; + if (!rawInfos['idleCores']) rawInfos['idleCores'] = rawInfos['totalCores'] - rawInfos['allocatedCores']; + if (!rawInfos['idleAccs']) rawInfos['idleAccs'] = rawInfos['totalAccs'] - rawInfos['allocatedAccs']; + // Keymetrics (Data on Cluster-Scope) let rawFlops = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => sum + (node.metrics.find((m) => m.name == 'flops_any')?.metric?.series[0]?.statistics?.avg || 0), @@ -271,6 +268,26 @@ ) || 0; rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100) + let rawCpuPwr = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => + sum + (node.metrics.find((m) => m.name == 'cpu_power')?.metric?.series[0]?.statistics?.avg || 0), + 0, // Initial Value + ) || 0; + rawInfos['cpuPwr'] = Math.floor((rawCpuPwr * 100) / 100) + if (!rawInfos['cpuPwrUnit']) { + let rawCpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'cpu_power')?.metric?.unit || null + rawInfos['cpuPwrUnit'] = rawCpuUnit ? rawCpuUnit.prefix + rawCpuUnit.base : '' + } + + let rawGpuPwr = $statusQuery?.data?.nodeMetrics?.reduce((sum, node) => + sum + (node.metrics.find((m) => m.name == 'acc_power')?.metric?.series[0]?.statistics?.avg || 0), + 0, // Initial Value + ) || 0; + rawInfos['gpuPwr'] = Math.floor((rawGpuPwr * 100) / 100) + if (!rawInfos['gpuPwrUnit']) { + let rawGpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'acc_power')?.metric?.unit || null + rawInfos['gpuPwrUnit'] = rawGpuUnit ? rawGpuUnit.prefix + rawGpuUnit.base : '' + } + return rawInfos } else { return {}; @@ -408,79 +425,99 @@
| {clusterInfo?.runningJobs} Running Jobs | -{clusterInfo?.activeUsers} Active Users | -|
| - Flop Rate (Any) - | -- Memory BW Rate - | -|
| - {clusterInfo?.flopRate} - {clusterInfo?.flopRateUnit} - | -- {clusterInfo?.memBwRate} - {clusterInfo?.memBwRateUnit} - | -|
| Allocated Nodes | -
-
- |
- {clusterInfo?.allocatedNodes} / {clusterInfo?.totalNodes} - Nodes | -
|---|---|---|
| Allocated Cores | -
-
- |
- {formatNumber(clusterInfo?.allocatedCores)} / {formatNumber(clusterInfo?.totalCores)} - Cores | -
| Allocated Accelerators | -
-
- |
- {clusterInfo?.allocatedAccs} / {clusterInfo?.totalAccs} - Accelerators | -