mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-26 13:29:05 +01:00
Merge pull request #251 from ClusterCockpit/hotfix
Accelerator ID Display Bugs and Footprint
This commit is contained in:
commit
ddd3fad1c6
@ -205,7 +205,7 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
jobData[metric][scope] = jobMetric
|
jobData[metric][scope] = jobMetric
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, res := range row {
|
for ndx, res := range row {
|
||||||
if res.Error != nil {
|
if res.Error != nil {
|
||||||
/* Build list for "partial errors", if any */
|
/* Build list for "partial errors", if any */
|
||||||
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
||||||
@ -215,7 +215,7 @@ func (ccms *CCMetricStore) LoadData(
|
|||||||
id := (*string)(nil)
|
id := (*string)(nil)
|
||||||
if query.Type != nil {
|
if query.Type != nil {
|
||||||
id = new(string)
|
id = new(string)
|
||||||
*id = query.TypeIds[0]
|
*id = query.TypeIds[ndx]
|
||||||
}
|
}
|
||||||
|
|
||||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||||
@ -312,6 +312,11 @@ func (ccms *CCMetricStore) buildQueries(
|
|||||||
|
|
||||||
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
||||||
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
||||||
|
if scope != schema.MetricScopeAccelerator {
|
||||||
|
// Skip all other catched cases
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
queries = append(queries, ApiQuery{
|
queries = append(queries, ApiQuery{
|
||||||
Metric: remoteName,
|
Metric: remoteName,
|
||||||
Hostname: host.Hostname,
|
Hostname: host.Hostname,
|
||||||
|
@ -23,38 +23,25 @@
|
|||||||
: metricConfig.alert,
|
: metricConfig.alert,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (job.exclusive === 1) {
|
// Job_Exclusivity does not matter, only aggregation
|
||||||
// Exclusive: Use as defined
|
if (metricConfig.aggregation === "avg") {
|
||||||
return defaultThresholds;
|
return defaultThresholds;
|
||||||
|
} else if (metricConfig.aggregation === "sum") {
|
||||||
|
const jobFraction =
|
||||||
|
job.numHWThreads / subClusterConfig.topology.node.length;
|
||||||
|
return {
|
||||||
|
peak: round(defaultThresholds.peak * jobFraction, 0),
|
||||||
|
normal: round(defaultThresholds.normal * jobFraction, 0),
|
||||||
|
caution: round(defaultThresholds.caution * jobFraction, 0),
|
||||||
|
alert: round(defaultThresholds.alert * jobFraction, 0),
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
// Shared: Handle specifically
|
console.warn(
|
||||||
if (metricConfig.name === "cpu_load") {
|
"Missing or unkown aggregation mode (sum/avg) for metric:",
|
||||||
// Special: Avg Aggregation BUT scaled based on #hwthreads
|
metricConfig,
|
||||||
return {
|
);
|
||||||
peak: job.numHWThreads,
|
return defaultThresholds;
|
||||||
normal: job.numHWThreads,
|
}
|
||||||
caution: defaultThresholds.caution,
|
|
||||||
alert: defaultThresholds.alert,
|
|
||||||
};
|
|
||||||
} else if (metricConfig.aggregation === "avg") {
|
|
||||||
return defaultThresholds;
|
|
||||||
} else if (metricConfig.aggregation === "sum") {
|
|
||||||
const jobFraction =
|
|
||||||
job.numHWThreads / subClusterConfig.topology.node.length;
|
|
||||||
return {
|
|
||||||
peak: round(defaultThresholds.peak * jobFraction, 0),
|
|
||||||
normal: round(defaultThresholds.normal * jobFraction, 0),
|
|
||||||
caution: round(defaultThresholds.caution * jobFraction, 0),
|
|
||||||
alert: round(defaultThresholds.alert * jobFraction, 0),
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
console.warn(
|
|
||||||
"Missing or unkown aggregation mode (sum/avg) for metric:",
|
|
||||||
metricConfig,
|
|
||||||
);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} // Other job.exclusive cases?
|
|
||||||
}
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
@ -91,29 +78,6 @@
|
|||||||
: ["cpu_load", "flops_any", "mem_used", "mem_bw"]; // Exclusive
|
: ["cpu_load", "flops_any", "mem_used", "mem_bw"]; // Exclusive
|
||||||
|
|
||||||
const footprintData = footprintMetrics.map((fm) => {
|
const footprintData = footprintMetrics.map((fm) => {
|
||||||
// Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
|
|
||||||
let mv = null;
|
|
||||||
if (fm === "cpu_load" && job.loadAvg !== 0) {
|
|
||||||
mv = round(job.loadAvg, 2);
|
|
||||||
} else if (fm === "flops_any" && job.flopsAnyAvg !== 0) {
|
|
||||||
mv = round(job.flopsAnyAvg, 2);
|
|
||||||
} else if (fm === "mem_bw" && job.memBwAvg !== 0) {
|
|
||||||
mv = round(job.memBwAvg, 2);
|
|
||||||
} else {
|
|
||||||
// Calculate from jobMetrics
|
|
||||||
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
|
|
||||||
if (jm?.metric?.statisticsSeries) {
|
|
||||||
mv = round(mean(jm.metric.statisticsSeries.mean), 2);
|
|
||||||
} else if (jm?.metric?.series?.length > 1) {
|
|
||||||
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
|
|
||||||
mv = round(mean(avgs), 2);
|
|
||||||
} else if (jm?.metric?.series) {
|
|
||||||
mv = round(jm.metric.series[0].statistics.avg, 2);
|
|
||||||
} else {
|
|
||||||
mv = 0.0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Unit
|
// Unit
|
||||||
const fmc = getContext("metrics")(job.cluster, fm);
|
const fmc = getContext("metrics")(job.cluster, fm);
|
||||||
let unit = "";
|
let unit = "";
|
||||||
@ -123,6 +87,44 @@
|
|||||||
const fmt = findJobThresholds(job, fmc, subclusterConfig);
|
const fmt = findJobThresholds(job, fmc, subclusterConfig);
|
||||||
if (fm === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0);
|
if (fm === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0);
|
||||||
|
|
||||||
|
// Value: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
|
||||||
|
// Exclusivity does not matter
|
||||||
|
let mv = 0.0;
|
||||||
|
if (fmc.aggregation === "avg") {
|
||||||
|
if (fm === "cpu_load" && job.loadAvg !== 0) {
|
||||||
|
mv = round(job.loadAvg, 2);
|
||||||
|
} else if (fm === "flops_any" && job.flopsAnyAvg !== 0) {
|
||||||
|
mv = round(job.flopsAnyAvg, 2);
|
||||||
|
} else if (fm === "mem_bw" && job.memBwAvg !== 0) {
|
||||||
|
mv = round(job.memBwAvg, 2);
|
||||||
|
} else {
|
||||||
|
// Calculate Avg from jobMetrics
|
||||||
|
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
|
||||||
|
if (jm?.metric?.statisticsSeries) {
|
||||||
|
mv = round(mean(jm.metric.statisticsSeries.mean), 2);
|
||||||
|
} else if (jm?.metric?.series?.length > 1) {
|
||||||
|
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
|
||||||
|
mv = round(mean(avgs), 2);
|
||||||
|
} else if (jm?.metric?.series) {
|
||||||
|
mv = round(jm.metric.series[0].statistics.avg, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (fmc.aggregation === "sum") {
|
||||||
|
// Calculate Sum from jobMetrics: Sum all node averages
|
||||||
|
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
|
||||||
|
if (jm?.metric?.series?.length > 1) { // More than 1 node
|
||||||
|
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
|
||||||
|
mv = round(avgs.reduce((a, b) => a + b, 0));
|
||||||
|
} else if (jm?.metric?.series) {
|
||||||
|
mv = round(jm.metric.series[0].statistics.avg, 2);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.warn(
|
||||||
|
"Missing or unkown aggregation mode (sum/avg) for metric:",
|
||||||
|
metricConfig,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Define basic data
|
// Define basic data
|
||||||
const fmBase = {
|
const fmBase = {
|
||||||
name: fm,
|
name: fm,
|
||||||
|
Loading…
Reference in New Issue
Block a user