diff --git a/api/schema.graphqls b/api/schema.graphqls index 21a9ad2..8a43a54 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -293,10 +293,10 @@ type MetricHistoPoints { } type MetricHistoPoint { - min: Int! - max: Int! + bin: Int count: Int! - bin: Int! + min: Int + max: Int } type JobsStatistics { diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index f3d4f8a..12d829a 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -1969,10 +1969,10 @@ type MetricHistoPoints { } type MetricHistoPoint { - min: Int! - max: Int! + bin: Int count: Int! - bin: Int! + min: Int + max: Int } type JobsStatistics { @@ -6336,8 +6336,8 @@ func (ec *executionContext) fieldContext_MetricFootprints_data(ctx context.Conte return fc, nil } -func (ec *executionContext) _MetricHistoPoint_min(ctx context.Context, field graphql.CollectedField, obj *model.MetricHistoPoint) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_MetricHistoPoint_min(ctx, field) +func (ec *executionContext) _MetricHistoPoint_bin(ctx context.Context, field graphql.CollectedField, obj *model.MetricHistoPoint) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_MetricHistoPoint_bin(ctx, field) if err != nil { return graphql.Null } @@ -6350,68 +6350,21 @@ func (ec *executionContext) _MetricHistoPoint_min(ctx context.Context, field gra }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.Min, nil + return obj.Bin, nil }) if err != nil { ec.Error(ctx, err) return graphql.Null } if resTmp == nil { - if !graphql.HasFieldError(ctx, fc) { - ec.Errorf(ctx, "must not be null") - } return graphql.Null } - res := resTmp.(int) + res := resTmp.(*int) fc.Result = res - return ec.marshalNInt2int(ctx, field.Selections, res) + return ec.marshalOInt2ᚖint(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_MetricHistoPoint_min(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { - fc = &graphql.FieldContext{ - Object: "MetricHistoPoint", - Field: field, - IsMethod: false, - IsResolver: false, - Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { - return nil, errors.New("field of type Int does not have child fields") - }, - } - return fc, nil -} - -func (ec *executionContext) _MetricHistoPoint_max(ctx context.Context, field graphql.CollectedField, obj *model.MetricHistoPoint) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_MetricHistoPoint_max(ctx, field) - if err != nil { - return graphql.Null - } - ctx = graphql.WithFieldContext(ctx, fc) - defer func() { - if r := recover(); r != nil { - ec.Error(ctx, ec.Recover(ctx, r)) - ret = graphql.Null - } - }() - resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { - ctx = rctx // use context from middleware stack in children - return obj.Max, nil - }) - if err != nil { - ec.Error(ctx, err) - return graphql.Null - } - if resTmp == nil { - if !graphql.HasFieldError(ctx, fc) { - ec.Errorf(ctx, "must not be null") - } - return graphql.Null - } - res := resTmp.(int) - fc.Result = res - return ec.marshalNInt2int(ctx, field.Selections, res) -} - -func (ec *executionContext) fieldContext_MetricHistoPoint_max(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_MetricHistoPoint_bin(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "MetricHistoPoint", Field: field, @@ -6468,8 +6421,8 @@ func (ec *executionContext) fieldContext_MetricHistoPoint_count(ctx context.Cont return fc, nil } -func (ec *executionContext) _MetricHistoPoint_bin(ctx context.Context, field graphql.CollectedField, obj *model.MetricHistoPoint) (ret graphql.Marshaler) { - fc, err := ec.fieldContext_MetricHistoPoint_bin(ctx, field) +func (ec *executionContext) _MetricHistoPoint_min(ctx context.Context, field graphql.CollectedField, obj *model.MetricHistoPoint) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_MetricHistoPoint_min(ctx, field) if err != nil { return graphql.Null } @@ -6482,24 +6435,62 @@ func (ec *executionContext) _MetricHistoPoint_bin(ctx context.Context, field gra }() resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { ctx = rctx // use context from middleware stack in children - return obj.Bin, nil + return obj.Min, nil }) if err != nil { ec.Error(ctx, err) return graphql.Null } if resTmp == nil { - if !graphql.HasFieldError(ctx, fc) { - ec.Errorf(ctx, "must not be null") - } return graphql.Null } - res := resTmp.(int) + res := resTmp.(*int) fc.Result = res - return ec.marshalNInt2int(ctx, field.Selections, res) + return ec.marshalOInt2ᚖint(ctx, field.Selections, res) } -func (ec *executionContext) fieldContext_MetricHistoPoint_bin(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { +func (ec *executionContext) fieldContext_MetricHistoPoint_min(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "MetricHistoPoint", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Int does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _MetricHistoPoint_max(ctx context.Context, field graphql.CollectedField, obj *model.MetricHistoPoint) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_MetricHistoPoint_max(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.Max, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(*int) + fc.Result = res + return ec.marshalOInt2ᚖint(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_MetricHistoPoint_max(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { fc = &graphql.FieldContext{ Object: "MetricHistoPoint", Field: field, @@ -6636,14 +6627,14 @@ func (ec *executionContext) fieldContext_MetricHistoPoints_data(ctx context.Cont IsResolver: false, Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { switch field.Name { + case "bin": + return ec.fieldContext_MetricHistoPoint_bin(ctx, field) + case "count": + return ec.fieldContext_MetricHistoPoint_count(ctx, field) case "min": return ec.fieldContext_MetricHistoPoint_min(ctx, field) case "max": return ec.fieldContext_MetricHistoPoint_max(ctx, field) - case "count": - return ec.fieldContext_MetricHistoPoint_count(ctx, field) - case "bin": - return ec.fieldContext_MetricHistoPoint_bin(ctx, field) } return nil, fmt.Errorf("no field named %q was found under type MetricHistoPoint", field.Name) }, @@ -13542,26 +13533,17 @@ func (ec *executionContext) _MetricHistoPoint(ctx context.Context, sel ast.Selec switch field.Name { case "__typename": out.Values[i] = graphql.MarshalString("MetricHistoPoint") - case "min": - out.Values[i] = ec._MetricHistoPoint_min(ctx, field, obj) - if out.Values[i] == graphql.Null { - out.Invalids++ - } - case "max": - out.Values[i] = ec._MetricHistoPoint_max(ctx, field, obj) - if out.Values[i] == graphql.Null { - out.Invalids++ - } + case "bin": + out.Values[i] = ec._MetricHistoPoint_bin(ctx, field, obj) case "count": out.Values[i] = ec._MetricHistoPoint_count(ctx, field, obj) if out.Values[i] == graphql.Null { out.Invalids++ } - case "bin": - out.Values[i] = ec._MetricHistoPoint_bin(ctx, field, obj) - if out.Values[i] == graphql.Null { - out.Invalids++ - } + case "min": + out.Values[i] = ec._MetricHistoPoint_min(ctx, field, obj) + case "max": + out.Values[i] = ec._MetricHistoPoint_max(ctx, field, obj) default: panic("unknown field " + strconv.Quote(field.Name)) } diff --git a/internal/graph/model/models_gen.go b/internal/graph/model/models_gen.go index eb35bda..7b8ebd2 100644 --- a/internal/graph/model/models_gen.go +++ b/internal/graph/model/models_gen.go @@ -110,10 +110,10 @@ type MetricFootprints struct { } type MetricHistoPoint struct { - Min int `json:"min"` - Max int `json:"max"` - Count int `json:"count"` - Bin int `json:"bin"` + Bin *int `json:"bin,omitempty"` + Count int `json:"count"` + Min *int `json:"min,omitempty"` + Max *int `json:"max,omitempty"` } type MetricHistoPoints struct { diff --git a/internal/repository/query.go b/internal/repository/query.go index 84b8048..317302b 100644 --- a/internal/repository/query.go +++ b/internal/repository/query.go @@ -96,7 +96,7 @@ func SecurityCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilde user := GetUserFromContext(ctx) if user == nil { var qnil sq.SelectBuilder - return qnil, fmt.Errorf("user context is nil!") + return qnil, fmt.Errorf("user context is nil") } else if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleApi}) { // Admin & Co. : All jobs return query, nil } else if user.HasRole(schema.RoleManager) { // Manager : Add filter for managed projects' jobs only + personal jobs diff --git a/internal/repository/stats.go b/internal/repository/stats.go index bd870a4..ab70427 100644 --- a/internal/repository/stats.go +++ b/internal/repository/stats.go @@ -460,13 +460,8 @@ func (r *JobRepository) AddMetricHistograms( stat *model.JobsStatistics) (*model.JobsStatistics, error) { start := time.Now() - for i, m := range metrics { - // DEBUG - fmt.Println(i, m) - var err error - var metricHisto *model.MetricHistoPoints - - metricHisto, err = r.jobsMetricStatisticsHistogram(ctx, m, filter) + for _, m := range metrics { + metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter) if err != nil { log.Warnf("Error while loading job metric statistics histogram: %s", m) continue @@ -529,6 +524,12 @@ func (r *JobRepository) jobsMetricStatisticsHistogram( dbMetric = "flops_any_avg" case "mem_bw": dbMetric = "mem_bw_avg" + case "mem_used": + dbMetric = "mem_used_max" + case "net_bw": + dbMetric = "net_bw_avg" + case "file_bw": + dbMetric = "file_bw_avg" default: return nil, fmt.Errorf("%s not implemented", metric) } @@ -562,46 +563,67 @@ func (r *JobRepository) jobsMetricStatisticsHistogram( } } + // log.Debugf("Metric %s: DB %s, Peak %f, Unit %s", metric, dbMetric, peak, unit) // Make bins, see https://jereze.com/code/sql-histogram/ - // Diffs: - // CAST(X AS INTEGER) instead of floor(X), used also for for Min , Max selection - // renamed to bin for simplicity and model struct - // Ditched rename from job to data, as it conflicts with security check afterwards - start := time.Now() - prepQuery := sq.Select( - fmt.Sprintf(`CAST(min(job.%s) as INTEGER) as min`, dbMetric), - fmt.Sprintf(`CAST(max(job.%s) as INTEGER) as max`, dbMetric), - fmt.Sprintf(`count(job.%s) as count`, dbMetric), - fmt.Sprintf(`CAST((case when job.%s = value.max then value.max*0.999999999 else job.%s end - value.min) / (value.max - value.min) * 10 as INTEGER) +1 as bin`, dbMetric, dbMetric)) - prepQuery = prepQuery.From("job") - prepQuery = prepQuery.CrossJoin(fmt.Sprintf(`(select max(%s) as max, min(%s) as min from job where %s is not null and %s < %f) as value`, dbMetric, dbMetric, dbMetric, dbMetric, peak)) - prepQuery = prepQuery.Where(fmt.Sprintf(`job.%s is not null and job.%s < %f`, dbMetric, dbMetric, peak)) - query, qerr := SecurityCheck(ctx, prepQuery) + start := time.Now() + + crossJoinQuery := sq.Select( + fmt.Sprintf(`max(%s) as max`, dbMetric), + fmt.Sprintf(`min(%s) as min`, dbMetric), + ).From("job").Where( + fmt.Sprintf(`%s is not null`, dbMetric), + ).Where( + fmt.Sprintf(`%s <= %f`, dbMetric, peak), + ) + + crossJoinQuery, cjqerr := SecurityCheck(ctx, crossJoinQuery) + if cjqerr != nil { + return nil, cjqerr + } + + crossJoinQuerySql, _, sqlerr := crossJoinQuery.ToSql() + if sqlerr != nil { + return nil, sqlerr + } + + bins := 10 + binQuery := fmt.Sprintf(`CAST( (case when job.%s = value.max then value.max*0.999999999 else job.%s end - value.min) / (value.max - value.min) * %d as INTEGER )`, dbMetric, dbMetric, bins) + + mainQuery := sq.Select( + fmt.Sprintf(`%s + 1 as bin`, binQuery), + fmt.Sprintf(`count(job.%s) as count`, dbMetric), + fmt.Sprintf(`CAST(((value.max / %d) * (%s )) as INTEGER ) as min`, bins, binQuery), + fmt.Sprintf(`CAST(((value.max / %d) * (%s + 1 )) as INTEGER ) as max`, bins, binQuery), + ).From("job").CrossJoin( + fmt.Sprintf(`(%s) as value`, crossJoinQuerySql), + ).Where(fmt.Sprintf(`job.%s is not null and job.%s <= %f`, dbMetric, dbMetric, peak)) + + mainQuery, qerr := SecurityCheck(ctx, mainQuery) if qerr != nil { return nil, qerr } for _, f := range filters { - query = BuildWhereClause(f, query) + mainQuery = BuildWhereClause(f, mainQuery) } // Finalize query with Grouping and Ordering - query = query.GroupBy("bin").OrderBy("bin") + mainQuery = mainQuery.GroupBy("bin").OrderBy("bin") - rows, err := query.RunWith(r.DB).Query() + rows, err := mainQuery.RunWith(r.DB).Query() if err != nil { - log.Errorf("Error while running query: %s", err) + log.Errorf("Error while running mainQuery: %s", err) return nil, err } points := make([]*model.MetricHistoPoint, 0) for rows.Next() { point := model.MetricHistoPoint{} - if err := rows.Scan(&point.Min, &point.Max, &point.Count, &point.Bin); err != nil { - log.Warn("Error while scanning rows") - return nil, err + if err := rows.Scan(&point.Bin, &point.Count, &point.Min, &point.Max); err != nil { + log.Warnf("Error while scanning rows for %s", metric) + return nil, err // Totally bricks cc-backend if returned and if all metrics requested? } points = append(points, &point) diff --git a/web/frontend/src/HistogramSelection.svelte b/web/frontend/src/HistogramSelection.svelte index afef8c7..142f678 100644 --- a/web/frontend/src/HistogramSelection.svelte +++ b/web/frontend/src/HistogramSelection.svelte @@ -4,10 +4,10 @@ import { gql, getContextClient , mutationStore } from '@urql/svelte' export let cluster - export let availableMetrics = ['cpu_load', 'flops_any', 'mem_bw'] export let metricsInHistograms export let isOpen + let availableMetrics = ['cpu_load', 'flops_any', 'mem_used', 'mem_bw', 'net_bw', 'file_bw'] let pendingMetrics = [...metricsInHistograms] // Copy const client = getContextClient() diff --git a/web/frontend/src/User.root.svelte b/web/frontend/src/User.root.svelte index e216aa6..a26c1aa 100644 --- a/web/frontend/src/User.root.svelte +++ b/web/frontend/src/User.root.svelte @@ -44,7 +44,7 @@ histNumNodes { count, value } histMetrics { metric, unit, data { min, max, count, bin } } }}`, - variables: { jobFilters, metricsInHistograms} + variables: { jobFilters, metricsInHistograms } }) onMount(() => filterComponent.update()) diff --git a/web/frontend/src/utils.js b/web/frontend/src/utils.js index 537ad3f..794a23a 100644 --- a/web/frontend/src/utils.js +++ b/web/frontend/src/utils.js @@ -316,16 +316,18 @@ export function checkMetricDisabled(m, c, s) { //[m]etric, [c]luster, [s]ubclust } export function convert2uplot(canvasData) { - // initial use: Canvas Histogram Data to Uplot + // Prep: Uplot Data Structure let uplotData = [[],[]] // [X, Y1, Y2, ...] + // MetricHisto Only: Check if 1st bin not-null -> Set 0-Value bin for scaling + // Else: Only Single 0-Value bin returned -> No reset required + if (canvasData[0]?.bin) { + uplotData[0].push(0) + uplotData[1].push(0) + } + // Iterate canvasData.forEach( cd => { - if (cd.bin) { // MetricHisto Datafromat - // Force Zero Entry for scaling - if (uplotData[0].length == 0) { - uplotData[0].push(0) - uplotData[1].push(0) - } - uplotData[0].push(cd.max) + if (Object.keys(cd).length == 4) { // MetricHisto Datafromat + uplotData[0].push(cd?.max ? cd.max : 0) uplotData[1].push(cd.count) } else { // Default uplotData[0].push(cd.value)