mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-01-26 03:19:06 +01:00
Merge pull request #251 from ClusterCockpit/hotfix
Accelerator ID Display Bugs and Footprint
This commit is contained in:
commit
ddd3fad1c6
@ -205,7 +205,7 @@ func (ccms *CCMetricStore) LoadData(
|
||||
jobData[metric][scope] = jobMetric
|
||||
}
|
||||
|
||||
for _, res := range row {
|
||||
for ndx, res := range row {
|
||||
if res.Error != nil {
|
||||
/* Build list for "partial errors", if any */
|
||||
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
|
||||
@ -215,7 +215,7 @@ func (ccms *CCMetricStore) LoadData(
|
||||
id := (*string)(nil)
|
||||
if query.Type != nil {
|
||||
id = new(string)
|
||||
*id = query.TypeIds[0]
|
||||
*id = query.TypeIds[ndx]
|
||||
}
|
||||
|
||||
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
|
||||
@ -312,6 +312,11 @@ func (ccms *CCMetricStore) buildQueries(
|
||||
|
||||
// Accelerator -> Accelerator (Use "accelerator" scope if requested scope is lower than node)
|
||||
if nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {
|
||||
if scope != schema.MetricScopeAccelerator {
|
||||
// Skip all other catched cases
|
||||
continue
|
||||
}
|
||||
|
||||
queries = append(queries, ApiQuery{
|
||||
Metric: remoteName,
|
||||
Hostname: host.Hostname,
|
||||
|
@ -23,38 +23,25 @@
|
||||
: metricConfig.alert,
|
||||
};
|
||||
|
||||
if (job.exclusive === 1) {
|
||||
// Exclusive: Use as defined
|
||||
// Job_Exclusivity does not matter, only aggregation
|
||||
if (metricConfig.aggregation === "avg") {
|
||||
return defaultThresholds;
|
||||
} else if (metricConfig.aggregation === "sum") {
|
||||
const jobFraction =
|
||||
job.numHWThreads / subClusterConfig.topology.node.length;
|
||||
return {
|
||||
peak: round(defaultThresholds.peak * jobFraction, 0),
|
||||
normal: round(defaultThresholds.normal * jobFraction, 0),
|
||||
caution: round(defaultThresholds.caution * jobFraction, 0),
|
||||
alert: round(defaultThresholds.alert * jobFraction, 0),
|
||||
};
|
||||
} else {
|
||||
// Shared: Handle specifically
|
||||
if (metricConfig.name === "cpu_load") {
|
||||
// Special: Avg Aggregation BUT scaled based on #hwthreads
|
||||
return {
|
||||
peak: job.numHWThreads,
|
||||
normal: job.numHWThreads,
|
||||
caution: defaultThresholds.caution,
|
||||
alert: defaultThresholds.alert,
|
||||
};
|
||||
} else if (metricConfig.aggregation === "avg") {
|
||||
return defaultThresholds;
|
||||
} else if (metricConfig.aggregation === "sum") {
|
||||
const jobFraction =
|
||||
job.numHWThreads / subClusterConfig.topology.node.length;
|
||||
return {
|
||||
peak: round(defaultThresholds.peak * jobFraction, 0),
|
||||
normal: round(defaultThresholds.normal * jobFraction, 0),
|
||||
caution: round(defaultThresholds.caution * jobFraction, 0),
|
||||
alert: round(defaultThresholds.alert * jobFraction, 0),
|
||||
};
|
||||
} else {
|
||||
console.warn(
|
||||
"Missing or unkown aggregation mode (sum/avg) for metric:",
|
||||
metricConfig,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
} // Other job.exclusive cases?
|
||||
console.warn(
|
||||
"Missing or unkown aggregation mode (sum/avg) for metric:",
|
||||
metricConfig,
|
||||
);
|
||||
return defaultThresholds;
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
@ -91,29 +78,6 @@
|
||||
: ["cpu_load", "flops_any", "mem_used", "mem_bw"]; // Exclusive
|
||||
|
||||
const footprintData = footprintMetrics.map((fm) => {
|
||||
// Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
|
||||
let mv = null;
|
||||
if (fm === "cpu_load" && job.loadAvg !== 0) {
|
||||
mv = round(job.loadAvg, 2);
|
||||
} else if (fm === "flops_any" && job.flopsAnyAvg !== 0) {
|
||||
mv = round(job.flopsAnyAvg, 2);
|
||||
} else if (fm === "mem_bw" && job.memBwAvg !== 0) {
|
||||
mv = round(job.memBwAvg, 2);
|
||||
} else {
|
||||
// Calculate from jobMetrics
|
||||
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
|
||||
if (jm?.metric?.statisticsSeries) {
|
||||
mv = round(mean(jm.metric.statisticsSeries.mean), 2);
|
||||
} else if (jm?.metric?.series?.length > 1) {
|
||||
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
|
||||
mv = round(mean(avgs), 2);
|
||||
} else if (jm?.metric?.series) {
|
||||
mv = round(jm.metric.series[0].statistics.avg, 2);
|
||||
} else {
|
||||
mv = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
// Unit
|
||||
const fmc = getContext("metrics")(job.cluster, fm);
|
||||
let unit = "";
|
||||
@ -123,6 +87,44 @@
|
||||
const fmt = findJobThresholds(job, fmc, subclusterConfig);
|
||||
if (fm === "flops_any") fmt.peak = round(fmt.peak * 0.85, 0);
|
||||
|
||||
// Value: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
|
||||
// Exclusivity does not matter
|
||||
let mv = 0.0;
|
||||
if (fmc.aggregation === "avg") {
|
||||
if (fm === "cpu_load" && job.loadAvg !== 0) {
|
||||
mv = round(job.loadAvg, 2);
|
||||
} else if (fm === "flops_any" && job.flopsAnyAvg !== 0) {
|
||||
mv = round(job.flopsAnyAvg, 2);
|
||||
} else if (fm === "mem_bw" && job.memBwAvg !== 0) {
|
||||
mv = round(job.memBwAvg, 2);
|
||||
} else {
|
||||
// Calculate Avg from jobMetrics
|
||||
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
|
||||
if (jm?.metric?.statisticsSeries) {
|
||||
mv = round(mean(jm.metric.statisticsSeries.mean), 2);
|
||||
} else if (jm?.metric?.series?.length > 1) {
|
||||
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
|
||||
mv = round(mean(avgs), 2);
|
||||
} else if (jm?.metric?.series) {
|
||||
mv = round(jm.metric.series[0].statistics.avg, 2);
|
||||
}
|
||||
}
|
||||
} else if (fmc.aggregation === "sum") {
|
||||
// Calculate Sum from jobMetrics: Sum all node averages
|
||||
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node");
|
||||
if (jm?.metric?.series?.length > 1) { // More than 1 node
|
||||
const avgs = jm.metric.series.map((jms) => jms.statistics.avg);
|
||||
mv = round(avgs.reduce((a, b) => a + b, 0));
|
||||
} else if (jm?.metric?.series) {
|
||||
mv = round(jm.metric.series[0].statistics.avg, 2);
|
||||
}
|
||||
} else {
|
||||
console.warn(
|
||||
"Missing or unkown aggregation mode (sum/avg) for metric:",
|
||||
metricConfig,
|
||||
);
|
||||
}
|
||||
|
||||
// Define basic data
|
||||
const fmBase = {
|
||||
name: fm,
|
||||
|
Loading…
Reference in New Issue
Block a user