diff --git a/api/schema.graphqls b/api/schema.graphqls index f1592b1..017fd70 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -147,9 +147,10 @@ type MetricStatistics { } type StatsSeries { - mean: [NullableFloat!]! - min: [NullableFloat!]! - max: [NullableFloat!]! + mean: [NullableFloat!]! + median: [NullableFloat!]! + min: [NullableFloat!]! + max: [NullableFloat!]! } type MetricFootprints { diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index 910dbaa..cb688f2 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -249,9 +249,10 @@ type ComplexityRoot struct { } StatsSeries struct { - Max func(childComplexity int) int - Mean func(childComplexity int) int - Min func(childComplexity int) int + Max func(childComplexity int) int + Mean func(childComplexity int) int + Median func(childComplexity int) int + Min func(childComplexity int) int } SubCluster struct { @@ -1326,6 +1327,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.StatsSeries.Mean(childComplexity), true + case "StatsSeries.median": + if e.complexity.StatsSeries.Median == nil { + break + } + + return e.complexity.StatsSeries.Median(childComplexity), true + case "StatsSeries.min": if e.complexity.StatsSeries.Min == nil { break @@ -1860,9 +1868,10 @@ type MetricStatistics { } type StatsSeries { - mean: [NullableFloat!]! - min: [NullableFloat!]! - max: [NullableFloat!]! + mean: [NullableFloat!]! + median: [NullableFloat!]! + min: [NullableFloat!]! + max: [NullableFloat!]! } type MetricFootprints { @@ -4662,6 +4671,8 @@ func (ec *executionContext) fieldContext_JobMetric_statisticsSeries(ctx context. switch field.Name { case "mean": return ec.fieldContext_StatsSeries_mean(ctx, field) + case "median": + return ec.fieldContext_StatsSeries_median(ctx, field) case "min": return ec.fieldContext_StatsSeries_min(ctx, field) case "max": @@ -8649,6 +8660,50 @@ func (ec *executionContext) fieldContext_StatsSeries_mean(ctx context.Context, f return fc, nil } +func (ec *executionContext) _StatsSeries_median(ctx context.Context, field graphql.CollectedField, obj *schema.StatsSeries) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_StatsSeries_median(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.Median, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + if !graphql.HasFieldError(ctx, fc) { + ec.Errorf(ctx, "must not be null") + } + return graphql.Null + } + res := resTmp.([]schema.Float) + fc.Result = res + return ec.marshalNNullableFloat2ᚕgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋpkgᚋschemaᚐFloatᚄ(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_StatsSeries_median(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "StatsSeries", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type NullableFloat does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _StatsSeries_min(ctx context.Context, field graphql.CollectedField, obj *schema.StatsSeries) (ret graphql.Marshaler) { fc, err := ec.fieldContext_StatsSeries_min(ctx, field) if err != nil { @@ -14431,6 +14486,11 @@ func (ec *executionContext) _StatsSeries(ctx context.Context, sel ast.SelectionS if out.Values[i] == graphql.Null { out.Invalids++ } + case "median": + out.Values[i] = ec._StatsSeries_median(ctx, field, obj) + if out.Values[i] == graphql.Null { + out.Invalids++ + } case "min": out.Values[i] = ec._StatsSeries_min(ctx, field, obj) if out.Values[i] == graphql.Null { diff --git a/internal/metricdata/metricdata.go b/internal/metricdata/metricdata.go index f54ae41..74d4347 100644 --- a/internal/metricdata/metricdata.go +++ b/internal/metricdata/metricdata.go @@ -263,7 +263,7 @@ func cacheKey( // For /monitoring/job/ and some other places, flops_any and mem_bw need // to be available at the scope 'node'. If a job has a lot of nodes, -// statisticsSeries should be available so that a min/mean/max Graph can be +// statisticsSeries should be available so that a min/median/max Graph can be // used instead of a lot of single lines. func prepareJobData( job *schema.Job, diff --git a/internal/util/statistics.go b/internal/util/statistics.go index 9d971dc..9e23b15 100644 --- a/internal/util/statistics.go +++ b/internal/util/statistics.go @@ -7,6 +7,10 @@ package util import ( "github.com/ClusterCockpit/cc-backend/pkg/schema" "golang.org/x/exp/constraints" + + "fmt" + "math" + "sort" ) func Min[T constraints.Ordered](a, b T) T { @@ -34,3 +38,36 @@ func LoadJobStat(job *schema.JobMeta, metric string) float64 { return 0.0 } + +func sortedCopy(input []float64) []float64 { + sorted := make([]float64, len(input)) + copy(sorted, input) + sort.Float64s(sorted) + return sorted +} + +func Mean(input []float64) (float64, error) { + if len(input) == 0 { + return math.NaN(), fmt.Errorf("input array is empty: %#v", input) + } + sum := 0.0 + for _, n := range input { + sum += n + } + return sum / float64(len(input)), nil +} + +func Median(input []float64) (median float64, err error) { + c := sortedCopy(input) + // Even numbers: add the two middle numbers, divide by two (use mean function) + // Odd numbers: Use the middle number + l := len(c) + if l == 0 { + return math.NaN(), fmt.Errorf("input array is empty: %#v", input) + } else if l%2 == 0 { + median, _ = Mean(c[l/2-1 : l/2+1]) + } else { + median = c[l/2] + } + return median, nil +} diff --git a/pkg/schema/metrics.go b/pkg/schema/metrics.go index e340747..08636f1 100644 --- a/pkg/schema/metrics.go +++ b/pkg/schema/metrics.go @@ -10,6 +10,8 @@ import ( "math" "sort" "unsafe" + + "github.com/ClusterCockpit/cc-backend/internal/util" ) type JobData map[string]map[MetricScope]*JobMetric @@ -36,6 +38,7 @@ type MetricStatistics struct { type StatsSeries struct { Mean []Float `json:"mean"` + Median []Float `json:"median"` Min []Float `json:"min"` Max []Float `json:"max"` Percentiles map[int][]Float `json:"percentiles,omitempty"` @@ -120,7 +123,7 @@ func (jd *JobData) Size() int { for _, metric := range scopes { if metric.StatisticsSeries != nil { n += len(metric.StatisticsSeries.Max) - n += len(metric.StatisticsSeries.Mean) + n += len(metric.StatisticsSeries.Median) n += len(metric.StatisticsSeries.Min) } @@ -149,53 +152,74 @@ func (jm *JobMetric) AddStatisticsSeries() { } } - min, mean, max := make([]Float, n), make([]Float, n), make([]Float, n) + // mean := make([]Float, n) + min, median, max := make([]Float, n), make([]Float, n), make([]Float, n) i := 0 for ; i < m; i++ { - smin, ssum, smax := math.MaxFloat32, 0.0, -math.MaxFloat32 + seriesCount := len(jm.Series) + // ssum := 0.0 + smin, smed, smax := math.MaxFloat32, make([]float64, seriesCount), -math.MaxFloat32 notnan := 0 - for j := 0; j < len(jm.Series); j++ { + for j := 0; j < seriesCount; j++ { x := float64(jm.Series[j].Data[i]) if math.IsNaN(x) { continue } notnan += 1 - ssum += x + // ssum += x + smed[j] = x smin = math.Min(smin, x) smax = math.Max(smax, x) } if notnan < 3 { min[i] = NaN - mean[i] = NaN + // mean[i] = NaN + median[i] = NaN max[i] = NaN } else { min[i] = Float(smin) - mean[i] = Float(ssum / float64(notnan)) + // mean[i] = Float(ssum / float64(notnan)) max[i] = Float(smax) + + medianRaw, err := util.Median(smed) + if err != nil { + median[i] = NaN + } else { + median[i] = Float(medianRaw) + } } } for ; i < n; i++ { min[i] = NaN - mean[i] = NaN + // mean[i] = NaN + median[i] = NaN max[i] = NaN } if smooth { - for i := 2; i < len(mean)-2; i++ { + for i := 2; i < len(median)-2; i++ { if min[i].IsNaN() { continue } min[i] = (min[i-2] + min[i-1] + min[i] + min[i+1] + min[i+2]) / 5 max[i] = (max[i-2] + max[i-1] + max[i] + max[i+1] + max[i+2]) / 5 - mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5 + // mean[i] = (mean[i-2] + mean[i-1] + mean[i] + mean[i+1] + mean[i+2]) / 5 + // Reduce Median further + smoothRaw := []float64{float64(median[i-2]), float64(median[i-1]), float64(median[i]), float64(median[i+1]), float64(median[i+2])} + smoothMedian, err := util.Median(smoothRaw) + if err != nil { + median[i] = NaN + } else { + median[i] = Float(smoothMedian) + } } } - jm.StatisticsSeries = &StatsSeries{Mean: mean, Min: min, Max: max} + jm.StatisticsSeries = &StatsSeries{Median: median, Min: min, Max: max} // Mean: mean } func (jd *JobData) AddNodeScope(metric string) bool { diff --git a/web/frontend/src/JobFootprint.svelte b/web/frontend/src/JobFootprint.svelte index 84d6efe..c6e178c 100644 --- a/web/frontend/src/JobFootprint.svelte +++ b/web/frontend/src/JobFootprint.svelte @@ -101,7 +101,7 @@ // Calculate Avg from jobMetrics const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === "node"); if (jm?.metric?.statisticsSeries) { - const noNan = jm.metric.statisticsSeries.mean.filter(function (val) { + const noNan = jm.metric.statisticsSeries.median.filter(function (val) { return val != null; }); mv = round(mean(noNan), 2); diff --git a/web/frontend/src/Metric.svelte b/web/frontend/src/Metric.svelte index a3bedaa..279df13 100644 --- a/web/frontend/src/Metric.svelte +++ b/web/frontend/src/Metric.svelte @@ -33,7 +33,7 @@ error = null; let selectedScope = minScope(scopes); - let statsPattern = /(.*)-stats$/ + let statsPattern = /(.*)-stat$/ let statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null) let selectedScopeIndex @@ -92,7 +92,7 @@ {#each availableScopes as scope, index} {#if statsSeries[index]} - + {/if} {/each} {#if availableScopes.length == 1 && metricConfig?.scope != "node"} diff --git a/web/frontend/src/joblist/Row.svelte b/web/frontend/src/joblist/Row.svelte index 98d3190..dd92ec4 100644 --- a/web/frontend/src/joblist/Row.svelte +++ b/web/frontend/src/joblist/Row.svelte @@ -50,7 +50,7 @@ timestep statisticsSeries { min - mean + median max } series { diff --git a/web/frontend/src/plots/MetricPlot.svelte b/web/frontend/src/plots/MetricPlot.svelte index ba3c294..db6f4fd 100644 --- a/web/frontend/src/plots/MetricPlot.svelte +++ b/web/frontend/src/plots/MetricPlot.svelte @@ -216,7 +216,7 @@ // conditional hide series color markers: if ( - useStatsSeries === true || // Min/Max/Avg Self-Explanatory + useStatsSeries === true || // Min/Max/Median Self-Explanatory dataSize === 1 || // Only one Y-Dataseries dataSize > 6 ) { @@ -296,7 +296,7 @@ } const longestSeries = useStatsSeries - ? statisticsSeries.mean.length + ? statisticsSeries.median.length : series.reduce((n, series) => Math.max(n, series.data.length), 0); const maxX = longestSeries * timestep; let maxY = null; @@ -346,13 +346,15 @@ if (useStatsSeries) { plotData.push(statisticsSeries.min); plotData.push(statisticsSeries.max); - plotData.push(statisticsSeries.mean); + plotData.push(statisticsSeries.median); + // plotData.push(statisticsSeries.mean); if (forNode === true) { // timestamp 0 with null value for reversed time axis if (plotData[1].length != 0) plotData[1].push(null); if (plotData[2].length != 0) plotData[2].push(null); if (plotData[3].length != 0) plotData[3].push(null); + // if (plotData[4].length != 0) plotData[4].push(null); } plotSeries.push({ @@ -368,11 +370,17 @@ stroke: "green", }); plotSeries.push({ - label: "mean", + label: "median", scale: "y", width: lineWidth, stroke: "black", }); + // plotSeries.push({ + // label: "mean", + // scale: "y", + // width: lineWidth, + // stroke: "blue", + // }); plotBands = [ { series: [2, 3], fill: "rgba(0,255,0,0.1)" }, @@ -422,7 +430,7 @@ // Draw plot type label: let textl = `${scope}${plotSeries.length > 2 ? "s" : ""}${ useStatsSeries - ? ": min/avg/max" + ? ": min/median/max" : metricConfig != null && scope != metricConfig.scope ? ` (${metricConfig.aggregation})` : ""