mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-06-17 08:57:30 +02:00
Compare commits
4 Commits
main
...
feature/52
| Author | SHA1 | Date | |
|---|---|---|---|
|
5d8d4e228e
|
|||
| 0c56591e4b | |||
|
0069c86e81
|
|||
|
c0d2d65f96
|
@@ -250,6 +250,12 @@ type TimeWeights {
|
||||
coreHours: [NullableFloat!]!
|
||||
}
|
||||
|
||||
enum ResampleAlgo {
|
||||
LTTB
|
||||
AVERAGE
|
||||
SIMPLE
|
||||
}
|
||||
|
||||
enum Aggregate {
|
||||
USER
|
||||
PROJECT
|
||||
@@ -340,6 +346,7 @@ type Query {
|
||||
metrics: [String!]
|
||||
scopes: [MetricScope!]
|
||||
resolution: Int
|
||||
resampleAlgo: ResampleAlgo
|
||||
): [JobMetricWithName!]!
|
||||
|
||||
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||
@@ -399,6 +406,7 @@ type Query {
|
||||
to: Time!
|
||||
page: PageRequest
|
||||
resolution: Int
|
||||
resampleAlgo: ResampleAlgo
|
||||
): NodesResultList!
|
||||
|
||||
clusterMetrics(
|
||||
|
||||
@@ -34,12 +34,8 @@ const configString = `
|
||||
"addr": "127.0.0.1:8080",
|
||||
"short-running-jobs-duration": 300,
|
||||
"resampling": {
|
||||
"minimum-points": 600,
|
||||
"trigger": 300,
|
||||
"resolutions": [
|
||||
240,
|
||||
60
|
||||
]
|
||||
"default-policy": "medium",
|
||||
"default-algo": "lttb"
|
||||
},
|
||||
"api-allowed-ips": [
|
||||
"*"
|
||||
|
||||
@@ -14,9 +14,8 @@
|
||||
"target-path": "./var/nodestate-archive"
|
||||
},
|
||||
"resampling": {
|
||||
"minimum-points": 600,
|
||||
"trigger": 180,
|
||||
"resolutions": [240, 60]
|
||||
"default-policy": "medium",
|
||||
"default-algo": "lttb"
|
||||
},
|
||||
"api-subjects": {
|
||||
"subject-job-event": "cc.job.event",
|
||||
|
||||
@@ -356,7 +356,7 @@ func TestRestApi(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run("CheckArchive", func(t *testing.T) {
|
||||
data, err := metricdispatch.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
|
||||
data, err := metricdispatch.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60, "")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -309,7 +309,7 @@ func (api *RestAPI) getCompleteJobByID(rw http.ResponseWriter, r *http.Request)
|
||||
}
|
||||
|
||||
if r.URL.Query().Get("all-metrics") == "true" {
|
||||
data, err = metricdispatch.LoadData(job, nil, scopes, r.Context(), resolution)
|
||||
data, err = metricdispatch.LoadData(job, nil, scopes, r.Context(), resolution, "")
|
||||
if err != nil {
|
||||
cclog.Warnf("REST: error while loading all-metrics job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
@@ -405,7 +405,7 @@ func (api *RestAPI) getJobByID(rw http.ResponseWriter, r *http.Request) {
|
||||
resolution = max(resolution, mc.Timestep)
|
||||
}
|
||||
|
||||
data, err := metricdispatch.LoadData(job, metrics, scopes, r.Context(), resolution)
|
||||
data, err := metricdispatch.LoadData(job, metrics, scopes, r.Context(), resolution, "")
|
||||
if err != nil {
|
||||
cclog.Warnf("REST: error while loading job data for JobID %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
@@ -1086,7 +1086,7 @@ func (api *RestAPI) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
resolver := graph.GetResolverInstance()
|
||||
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
|
||||
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil, nil)
|
||||
if err != nil {
|
||||
if err := json.NewEncoder(rw).Encode(Response{
|
||||
Error: &struct {
|
||||
|
||||
@@ -59,7 +59,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.Job, error) {
|
||||
scopes = append(scopes, schema.MetricScopeAccelerator)
|
||||
}
|
||||
|
||||
jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
|
||||
jobData, err := metricdispatch.LoadData(job, allMetrics, scopes, ctx, 0, "") // 0 Resulotion-Value retrieves highest res (60s)
|
||||
if err != nil {
|
||||
cclog.Error("Error wile loading job data for archiving")
|
||||
return nil, err
|
||||
|
||||
@@ -106,12 +106,12 @@ type NodeStateRetention struct {
|
||||
}
|
||||
|
||||
type ResampleConfig struct {
|
||||
// Minimum number of points to trigger resampling of data
|
||||
MinimumPoints int `json:"minimum-points"`
|
||||
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
|
||||
Resolutions []int `json:"resolutions"`
|
||||
// Trigger next zoom level at less than this many visible datapoints
|
||||
Trigger int `json:"trigger"`
|
||||
// Default resample policy when no user preference is set ("low", "medium", "high")
|
||||
DefaultPolicy string `json:"default-policy"`
|
||||
// Default resample algorithm when no user preference is set ("lttb", "average", "simple")
|
||||
DefaultAlgo string `json:"default-algo"`
|
||||
// Policy-derived target point count (set dynamically from user preference, not from config.json)
|
||||
TargetPoints int `json:"targetPoints,omitempty"`
|
||||
}
|
||||
|
||||
type NATSConfig struct {
|
||||
@@ -155,7 +155,24 @@ func Init(mainConfig json.RawMessage) {
|
||||
cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
|
||||
}
|
||||
|
||||
if Keys.EnableResampling != nil && Keys.EnableResampling.MinimumPoints > 0 {
|
||||
resampler.SetMinimumRequiredPoints(Keys.EnableResampling.MinimumPoints)
|
||||
if Keys.EnableResampling != nil {
|
||||
policy := Keys.EnableResampling.DefaultPolicy
|
||||
if policy == "" {
|
||||
policy = "medium"
|
||||
}
|
||||
resampler.SetMinimumRequiredPoints(targetPointsForPolicy(policy))
|
||||
}
|
||||
}
|
||||
|
||||
func targetPointsForPolicy(policy string) int {
|
||||
switch policy {
|
||||
case "low":
|
||||
return 200
|
||||
case "medium":
|
||||
return 500
|
||||
case "high":
|
||||
return 1000
|
||||
default:
|
||||
return 500
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,24 +92,18 @@ var configSchema = `
|
||||
"description": "Enable dynamic zoom in frontend metric plots.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"minimum-points": {
|
||||
"description": "Minimum points to trigger resampling of time-series data.",
|
||||
"type": "integer"
|
||||
"default-policy": {
|
||||
"description": "Default resample policy when no user preference is set.",
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"]
|
||||
},
|
||||
"trigger": {
|
||||
"description": "Trigger next zoom level at less than this many visible datapoints.",
|
||||
"type": "integer"
|
||||
},
|
||||
"resolutions": {
|
||||
"description": "Array of resampling target resolutions, in seconds.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "integer"
|
||||
"default-algo": {
|
||||
"description": "Default resample algorithm when no user preference is set.",
|
||||
"type": "string",
|
||||
"enum": ["lttb", "average", "simple"]
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["trigger", "resolutions"]
|
||||
},
|
||||
"api-subjects": {
|
||||
"description": "NATS subjects configuration for subscribing to job and node events.",
|
||||
"type": "object",
|
||||
|
||||
@@ -327,7 +327,7 @@ type ComplexityRoot struct {
|
||||
Clusters func(childComplexity int) int
|
||||
GlobalMetrics func(childComplexity int) int
|
||||
Job func(childComplexity int, id string) int
|
||||
JobMetrics func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope, resolution *int) int
|
||||
JobMetrics func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope, resolution *int, resampleAlgo *model.ResampleAlgo) int
|
||||
JobStats func(childComplexity int, id string, metrics []string) int
|
||||
Jobs func(childComplexity int, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) int
|
||||
JobsFootprints func(childComplexity int, filter []*model.JobFilter, metrics []string) int
|
||||
@@ -335,7 +335,7 @@ type ComplexityRoot struct {
|
||||
JobsStatistics func(childComplexity int, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *string, numMetricBins *int) int
|
||||
Node func(childComplexity int, id string) int
|
||||
NodeMetrics func(childComplexity int, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) int
|
||||
NodeMetricsList func(childComplexity int, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) int
|
||||
NodeMetricsList func(childComplexity int, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int, resampleAlgo *model.ResampleAlgo) int
|
||||
NodeStates func(childComplexity int, filter []*model.NodeFilter) int
|
||||
NodeStatesTimed func(childComplexity int, filter []*model.NodeFilter, typeArg string) int
|
||||
Nodes func(childComplexity int, filter []*model.NodeFilter, order *model.OrderByInput) int
|
||||
@@ -483,7 +483,7 @@ type QueryResolver interface {
|
||||
NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error)
|
||||
NodeStatesTimed(ctx context.Context, filter []*model.NodeFilter, typeArg string) ([]*model.NodeStatesTimed, error)
|
||||
Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error)
|
||||
JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int, resampleAlgo *model.ResampleAlgo) ([]*model.JobMetricWithName, error)
|
||||
JobStats(ctx context.Context, id string, metrics []string) ([]*model.NamedStats, error)
|
||||
ScopedJobStats(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.NamedStatsWithScope, error)
|
||||
Jobs(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) (*model.JobResultList, error)
|
||||
@@ -492,7 +492,7 @@ type QueryResolver interface {
|
||||
JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error)
|
||||
RooflineHeatmap(ctx context.Context, filter []*model.JobFilter, rows int, cols int, minX float64, minY float64, maxX float64, maxY float64) ([][]float64, error)
|
||||
NodeMetrics(ctx context.Context, cluster string, nodes []string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time) ([]*model.NodeMetrics, error)
|
||||
NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error)
|
||||
NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int, resampleAlgo *model.ResampleAlgo) (*model.NodesResultList, error)
|
||||
ClusterMetrics(ctx context.Context, cluster string, metrics []string, from time.Time, to time.Time) (*model.ClusterMetrics, error)
|
||||
}
|
||||
type SubClusterResolver interface {
|
||||
@@ -1666,7 +1666,7 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return e.ComplexityRoot.Query.JobMetrics(childComplexity, args["id"].(string), args["metrics"].([]string), args["scopes"].([]schema.MetricScope), args["resolution"].(*int)), true
|
||||
return e.ComplexityRoot.Query.JobMetrics(childComplexity, args["id"].(string), args["metrics"].([]string), args["scopes"].([]schema.MetricScope), args["resolution"].(*int), args["resampleAlgo"].(*model.ResampleAlgo)), true
|
||||
case "Query.jobStats":
|
||||
if e.ComplexityRoot.Query.JobStats == nil {
|
||||
break
|
||||
@@ -1754,7 +1754,7 @@ func (e *executableSchema) Complexity(ctx context.Context, typeName, field strin
|
||||
return 0, false
|
||||
}
|
||||
|
||||
return e.ComplexityRoot.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["stateFilter"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int)), true
|
||||
return e.ComplexityRoot.Query.NodeMetricsList(childComplexity, args["cluster"].(string), args["subCluster"].(string), args["stateFilter"].(string), args["nodeFilter"].(string), args["scopes"].([]schema.MetricScope), args["metrics"].([]string), args["from"].(time.Time), args["to"].(time.Time), args["page"].(*model.PageRequest), args["resolution"].(*int), args["resampleAlgo"].(*model.ResampleAlgo)), true
|
||||
case "Query.nodeStates":
|
||||
if e.ComplexityRoot.Query.NodeStates == nil {
|
||||
break
|
||||
@@ -2525,6 +2525,12 @@ type TimeWeights {
|
||||
coreHours: [NullableFloat!]!
|
||||
}
|
||||
|
||||
enum ResampleAlgo {
|
||||
LTTB
|
||||
AVERAGE
|
||||
SIMPLE
|
||||
}
|
||||
|
||||
enum Aggregate {
|
||||
USER
|
||||
PROJECT
|
||||
@@ -2615,6 +2621,7 @@ type Query {
|
||||
metrics: [String!]
|
||||
scopes: [MetricScope!]
|
||||
resolution: Int
|
||||
resampleAlgo: ResampleAlgo
|
||||
): [JobMetricWithName!]!
|
||||
|
||||
jobStats(id: ID!, metrics: [String!]): [NamedStats!]!
|
||||
@@ -2674,6 +2681,7 @@ type Query {
|
||||
to: Time!
|
||||
page: PageRequest
|
||||
resolution: Int
|
||||
resampleAlgo: ResampleAlgo
|
||||
): NodesResultList!
|
||||
|
||||
clusterMetrics(
|
||||
@@ -3882,6 +3890,11 @@ func (ec *executionContext) field_Query_jobMetrics_args(ctx context.Context, raw
|
||||
return nil, err
|
||||
}
|
||||
args["resolution"] = arg3
|
||||
arg4, err := graphql.ProcessArgField(ctx, rawArgs, "resampleAlgo", ec.unmarshalOResampleAlgo2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐResampleAlgo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
args["resampleAlgo"] = arg4
|
||||
return args, nil
|
||||
}
|
||||
|
||||
@@ -4140,6 +4153,11 @@ func (ec *executionContext) field_Query_nodeMetricsList_args(ctx context.Context
|
||||
return nil, err
|
||||
}
|
||||
args["resolution"] = arg9
|
||||
arg10, err := graphql.ProcessArgField(ctx, rawArgs, "resampleAlgo", ec.unmarshalOResampleAlgo2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐResampleAlgo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
args["resampleAlgo"] = arg10
|
||||
return args, nil
|
||||
}
|
||||
|
||||
@@ -9307,7 +9325,7 @@ func (ec *executionContext) _Query_jobMetrics(ctx context.Context, field graphql
|
||||
},
|
||||
func(ctx context.Context) (any, error) {
|
||||
fc := graphql.GetFieldContext(ctx)
|
||||
return ec.Resolvers.Query().JobMetrics(ctx, fc.Args["id"].(string), fc.Args["metrics"].([]string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["resolution"].(*int))
|
||||
return ec.Resolvers.Query().JobMetrics(ctx, fc.Args["id"].(string), fc.Args["metrics"].([]string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["resolution"].(*int), fc.Args["resampleAlgo"].(*model.ResampleAlgo))
|
||||
},
|
||||
nil,
|
||||
func(ctx context.Context, selections ast.SelectionSet, v []*model.JobMetricWithName) graphql.Marshaler {
|
||||
@@ -9703,7 +9721,7 @@ func (ec *executionContext) _Query_nodeMetricsList(ctx context.Context, field gr
|
||||
},
|
||||
func(ctx context.Context) (any, error) {
|
||||
fc := graphql.GetFieldContext(ctx)
|
||||
return ec.Resolvers.Query().NodeMetricsList(ctx, fc.Args["cluster"].(string), fc.Args["subCluster"].(string), fc.Args["stateFilter"].(string), fc.Args["nodeFilter"].(string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["metrics"].([]string), fc.Args["from"].(time.Time), fc.Args["to"].(time.Time), fc.Args["page"].(*model.PageRequest), fc.Args["resolution"].(*int))
|
||||
return ec.Resolvers.Query().NodeMetricsList(ctx, fc.Args["cluster"].(string), fc.Args["subCluster"].(string), fc.Args["stateFilter"].(string), fc.Args["nodeFilter"].(string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["metrics"].([]string), fc.Args["from"].(time.Time), fc.Args["to"].(time.Time), fc.Args["page"].(*model.PageRequest), fc.Args["resolution"].(*int), fc.Args["resampleAlgo"].(*model.ResampleAlgo))
|
||||
},
|
||||
nil,
|
||||
func(ctx context.Context, selections ast.SelectionSet, v *model.NodesResultList) graphql.Marshaler {
|
||||
@@ -18679,6 +18697,22 @@ func (ec *executionContext) unmarshalOPageRequest2ᚖgithubᚗcomᚋClusterCockp
|
||||
return &res, graphql.ErrorOnPath(ctx, err)
|
||||
}
|
||||
|
||||
func (ec *executionContext) unmarshalOResampleAlgo2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐResampleAlgo(ctx context.Context, v any) (*model.ResampleAlgo, error) {
|
||||
if v == nil {
|
||||
return nil, nil
|
||||
}
|
||||
var res = new(model.ResampleAlgo)
|
||||
err := res.UnmarshalGQL(v)
|
||||
return res, graphql.ErrorOnPath(ctx, err)
|
||||
}
|
||||
|
||||
func (ec *executionContext) marshalOResampleAlgo2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐResampleAlgo(ctx context.Context, sel ast.SelectionSet, v *model.ResampleAlgo) graphql.Marshaler {
|
||||
if v == nil {
|
||||
return graphql.Null
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func (ec *executionContext) unmarshalOSchedulerState2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑlibᚋv2ᚋschemaᚐSchedulerState(ctx context.Context, v any) (*schema.SchedulerState, error) {
|
||||
if v == nil {
|
||||
return nil, nil
|
||||
|
||||
@@ -328,6 +328,63 @@ func (e Aggregate) MarshalJSON() ([]byte, error) {
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
type ResampleAlgo string
|
||||
|
||||
const (
|
||||
ResampleAlgoLttb ResampleAlgo = "LTTB"
|
||||
ResampleAlgoAverage ResampleAlgo = "AVERAGE"
|
||||
ResampleAlgoSimple ResampleAlgo = "SIMPLE"
|
||||
)
|
||||
|
||||
var AllResampleAlgo = []ResampleAlgo{
|
||||
ResampleAlgoLttb,
|
||||
ResampleAlgoAverage,
|
||||
ResampleAlgoSimple,
|
||||
}
|
||||
|
||||
func (e ResampleAlgo) IsValid() bool {
|
||||
switch e {
|
||||
case ResampleAlgoLttb, ResampleAlgoAverage, ResampleAlgoSimple:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e ResampleAlgo) String() string {
|
||||
return string(e)
|
||||
}
|
||||
|
||||
func (e *ResampleAlgo) UnmarshalGQL(v any) error {
|
||||
str, ok := v.(string)
|
||||
if !ok {
|
||||
return fmt.Errorf("enums must be strings")
|
||||
}
|
||||
|
||||
*e = ResampleAlgo(str)
|
||||
if !e.IsValid() {
|
||||
return fmt.Errorf("%s is not a valid ResampleAlgo", str)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e ResampleAlgo) MarshalGQL(w io.Writer) {
|
||||
fmt.Fprint(w, strconv.Quote(e.String()))
|
||||
}
|
||||
|
||||
func (e *ResampleAlgo) UnmarshalJSON(b []byte) error {
|
||||
s, err := strconv.Unquote(string(b))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return e.UnmarshalGQL(s)
|
||||
}
|
||||
|
||||
func (e ResampleAlgo) MarshalJSON() ([]byte, error) {
|
||||
var buf bytes.Buffer
|
||||
e.MarshalGQL(&buf)
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
type SortByAggregate string
|
||||
|
||||
const (
|
||||
|
||||
145
internal/graph/resample.go
Normal file
145
internal/graph/resample.go
Normal file
@@ -0,0 +1,145 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package graph
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||
)
|
||||
|
||||
// resolveResolutionFromPolicy reads the user's resample policy preference and
|
||||
// computes a resolution based on job duration and metric frequency. Returns nil
|
||||
// if the user has no policy set.
|
||||
func resolveResolutionFromPolicy(ctx context.Context, duration int64, cluster string, metrics []string) *int {
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
conf, err := repository.GetUserCfgRepo().GetUIConfig(user)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
policyVal, ok := conf["plotConfiguration_resamplePolicy"]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
policyStr, ok := policyVal.(string)
|
||||
if !ok || policyStr == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
policy := metricdispatch.ResamplePolicy(policyStr)
|
||||
targetPoints := metricdispatch.TargetPointsForPolicy(policy)
|
||||
if targetPoints == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Find the smallest metric frequency across the requested metrics
|
||||
frequency := smallestFrequency(cluster, metrics)
|
||||
if frequency <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
res := metricdispatch.ComputeResolution(duration, int64(frequency), targetPoints)
|
||||
return &res
|
||||
}
|
||||
|
||||
// resolveResampleAlgo returns the resampling algorithm name to use, checking
|
||||
// the explicit GraphQL parameter first, then the user's preference.
|
||||
func resolveResampleAlgo(ctx context.Context, resampleAlgo *model.ResampleAlgo) string {
|
||||
if resampleAlgo != nil {
|
||||
return strings.ToLower(resampleAlgo.String())
|
||||
}
|
||||
|
||||
user := repository.GetUserFromContext(ctx)
|
||||
if user == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
conf, err := repository.GetUserCfgRepo().GetUIConfig(user)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
algoVal, ok := conf["plotConfiguration_resampleAlgo"]
|
||||
if ok {
|
||||
if algoStr, ok := algoVal.(string); ok && algoStr != "" {
|
||||
return algoStr
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to global default algo
|
||||
if config.Keys.EnableResampling != nil && config.Keys.EnableResampling.DefaultAlgo != "" {
|
||||
return config.Keys.EnableResampling.DefaultAlgo
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// resolveResolutionFromDefaultPolicy computes a resolution using the global
|
||||
// default policy from config. Returns nil if no policy is configured.
|
||||
func resolveResolutionFromDefaultPolicy(duration int64, cluster string, metrics []string) *int {
|
||||
cfg := config.Keys.EnableResampling
|
||||
if cfg == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
policyStr := cfg.DefaultPolicy
|
||||
if policyStr == "" {
|
||||
policyStr = "medium"
|
||||
}
|
||||
|
||||
policy := metricdispatch.ResamplePolicy(policyStr)
|
||||
targetPoints := metricdispatch.TargetPointsForPolicy(policy)
|
||||
if targetPoints == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
frequency := smallestFrequency(cluster, metrics)
|
||||
if frequency <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
res := metricdispatch.ComputeResolution(duration, int64(frequency), targetPoints)
|
||||
return &res
|
||||
}
|
||||
|
||||
// smallestFrequency returns the smallest metric timestep (in seconds) among the
|
||||
// requested metrics for the given cluster. Falls back to 0 if nothing is found.
|
||||
func smallestFrequency(cluster string, metrics []string) int {
|
||||
cl := archive.GetCluster(cluster)
|
||||
if cl == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
minFreq := 0
|
||||
for _, mc := range cl.MetricConfig {
|
||||
if len(metrics) > 0 {
|
||||
found := false
|
||||
for _, m := range metrics {
|
||||
if mc.Name == m {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if minFreq == 0 || mc.Timestep < minFreq {
|
||||
minFreq = mc.Timestep
|
||||
}
|
||||
}
|
||||
|
||||
return minFreq
|
||||
}
|
||||
@@ -498,24 +498,30 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
|
||||
}
|
||||
|
||||
// JobMetrics is the resolver for the jobMetrics field.
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int, resampleAlgo *model.ResampleAlgo) ([]*model.JobMetricWithName, error) {
|
||||
job, err := r.Query().Job(ctx, id)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while querying job for metrics")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := metricdispatch.LoadData(job, metrics, scopes, ctx, *resolution)
|
||||
// Resolve resolution: explicit param > user policy > global config > 0
|
||||
if resolution == nil {
|
||||
resolution = resolveResolutionFromPolicy(ctx, int64(job.Duration), job.Cluster, metrics)
|
||||
}
|
||||
if resolution == nil {
|
||||
if config.Keys.EnableResampling != nil {
|
||||
resolution = resolveResolutionFromDefaultPolicy(int64(job.Duration), job.Cluster, metrics)
|
||||
}
|
||||
if resolution == nil {
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
}
|
||||
|
||||
algoName := resolveResampleAlgo(ctx, resampleAlgo)
|
||||
|
||||
data, err := metricdispatch.LoadData(job, metrics, scopes, ctx, *resolution, algoName)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while loading job data")
|
||||
return nil, err
|
||||
@@ -877,12 +883,17 @@ func (r *queryResolver) NodeMetrics(ctx context.Context, cluster string, nodes [
|
||||
}
|
||||
|
||||
// NodeMetricsList is the resolver for the nodeMetricsList field.
|
||||
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int) (*model.NodesResultList, error) {
|
||||
if resolution == nil { // Load from Config
|
||||
func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, subCluster string, stateFilter string, nodeFilter string, scopes []schema.MetricScope, metrics []string, from time.Time, to time.Time, page *model.PageRequest, resolution *int, resampleAlgo *model.ResampleAlgo) (*model.NodesResultList, error) {
|
||||
// Resolve resolution: explicit param > user policy > global config > 0
|
||||
duration := int64(to.Sub(from).Seconds())
|
||||
if resolution == nil {
|
||||
resolution = resolveResolutionFromPolicy(ctx, duration, cluster, metrics)
|
||||
}
|
||||
if resolution == nil {
|
||||
if config.Keys.EnableResampling != nil {
|
||||
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
|
||||
resolution = &defaultRes
|
||||
} else { // Set 0 (Loads configured metric timestep)
|
||||
resolution = resolveResolutionFromDefaultPolicy(duration, cluster, metrics)
|
||||
}
|
||||
if resolution == nil {
|
||||
defaultRes := 0
|
||||
resolution = &defaultRes
|
||||
}
|
||||
@@ -906,8 +917,10 @@ func (r *queryResolver) NodeMetricsList(ctx context.Context, cluster string, sub
|
||||
}
|
||||
}
|
||||
|
||||
algoName := resolveResampleAlgo(ctx, resampleAlgo)
|
||||
|
||||
// data -> map hostname:jobdata
|
||||
data, err := metricdispatch.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx)
|
||||
data, err := metricdispatch.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, *resolution, from, to, ctx, algoName)
|
||||
if err != nil {
|
||||
cclog.Warn("error while loading node data (Resolver.NodeMetricsList")
|
||||
return nil, err
|
||||
|
||||
@@ -55,7 +55,7 @@ func (r *queryResolver) rooflineHeatmap(
|
||||
// resolution = max(resolution, mc.Timestep)
|
||||
// }
|
||||
|
||||
jobdata, err := metricdispatch.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
|
||||
jobdata, err := metricdispatch.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0, "")
|
||||
if err != nil {
|
||||
cclog.Warnf("Error while loading roofline metrics for job %d", *job.ID)
|
||||
return nil, err
|
||||
|
||||
@@ -62,9 +62,10 @@ func cacheKey(
|
||||
metrics []string,
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
resampleAlgo string,
|
||||
) string {
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
|
||||
*job.ID, job.State, metrics, scopes, resolution)
|
||||
return fmt.Sprintf("%d(%s):[%v],[%v]-%d-%s",
|
||||
*job.ID, job.State, metrics, scopes, resolution, resampleAlgo)
|
||||
}
|
||||
|
||||
// LoadData retrieves metric data for a job from the appropriate backend (memory store for running jobs,
|
||||
@@ -87,8 +88,9 @@ func LoadData(job *schema.Job,
|
||||
scopes []schema.MetricScope,
|
||||
ctx context.Context,
|
||||
resolution int,
|
||||
resampleAlgo string,
|
||||
) (schema.JobData, error) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ any, ttl time.Duration, size int) {
|
||||
data := cache.Get(cacheKey(job, metrics, scopes, resolution, resampleAlgo), func() (_ any, ttl time.Duration, size int) {
|
||||
var jd schema.JobData
|
||||
var err error
|
||||
|
||||
@@ -136,13 +138,17 @@ func LoadData(job *schema.Job,
|
||||
|
||||
jd = deepCopy(jdTemp)
|
||||
|
||||
// Resample archived data using Largest Triangle Three Bucket algorithm to reduce data points
|
||||
// to the requested resolution, improving transfer performance and client-side rendering.
|
||||
// Resample archived data to reduce data points to the requested resolution,
|
||||
// improving transfer performance and client-side rendering.
|
||||
resampleFn, rfErr := resampler.GetResampler(resampleAlgo)
|
||||
if rfErr != nil {
|
||||
return rfErr, 0, 0
|
||||
}
|
||||
for _, v := range jd {
|
||||
for _, v_ := range v {
|
||||
timestep := int64(0)
|
||||
for i := 0; i < len(v_.Series); i += 1 {
|
||||
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, int64(v_.Timestep), int64(resolution))
|
||||
v_.Series[i].Data, timestep, err = resampleFn(v_.Series[i].Data, int64(v_.Timestep), int64(resolution))
|
||||
if err != nil {
|
||||
return err, 0, 0
|
||||
}
|
||||
@@ -414,6 +420,7 @@ func LoadNodeListData(
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
resampleAlgo string,
|
||||
) (map[string]schema.JobData, error) {
|
||||
if metrics == nil {
|
||||
for _, m := range archive.GetCluster(cluster).MetricConfig {
|
||||
@@ -428,7 +435,7 @@ func LoadNodeListData(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := ms.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, resolution, from, to, ctx)
|
||||
data, err := ms.LoadNodeListData(cluster, subCluster, nodes, metrics, scopes, resolution, from, to, ctx, resampleAlgo)
|
||||
if err != nil {
|
||||
if len(data) != 0 {
|
||||
cclog.Warnf("partial error loading node list data from metric store for cluster %s, subcluster %s: %s",
|
||||
|
||||
@@ -51,7 +51,8 @@ type MetricDataRepository interface {
|
||||
scopes []schema.MetricScope,
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context) (map[string]schema.JobData, error)
|
||||
ctx context.Context,
|
||||
resampleAlgo string) (map[string]schema.JobData, error)
|
||||
|
||||
// HealthCheck evaluates the monitoring state for a set of nodes against expected metrics.
|
||||
HealthCheck(cluster string,
|
||||
|
||||
49
internal/metricdispatch/resamplepolicy.go
Normal file
49
internal/metricdispatch/resamplepolicy.go
Normal file
@@ -0,0 +1,49 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricdispatch
|
||||
|
||||
import "math"
|
||||
|
||||
type ResamplePolicy string
|
||||
|
||||
const (
|
||||
ResamplePolicyLow ResamplePolicy = "low"
|
||||
ResamplePolicyMedium ResamplePolicy = "medium"
|
||||
ResamplePolicyHigh ResamplePolicy = "high"
|
||||
)
|
||||
|
||||
// TargetPointsForPolicy returns the target number of data points for a given policy.
|
||||
func TargetPointsForPolicy(policy ResamplePolicy) int {
|
||||
switch policy {
|
||||
case ResamplePolicyLow:
|
||||
return 200
|
||||
case ResamplePolicyMedium:
|
||||
return 500
|
||||
case ResamplePolicyHigh:
|
||||
return 1000
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// ComputeResolution computes the resampling resolution in seconds for a given
|
||||
// job duration, metric frequency, and target point count. Returns 0 if the
|
||||
// total number of data points is already at or below targetPoints (no resampling needed).
|
||||
func ComputeResolution(duration int64, frequency int64, targetPoints int) int {
|
||||
if frequency <= 0 || targetPoints <= 0 || duration <= 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
totalPoints := duration / frequency
|
||||
if totalPoints <= int64(targetPoints) {
|
||||
return 0
|
||||
}
|
||||
|
||||
targetRes := math.Ceil(float64(duration) / float64(targetPoints))
|
||||
// Round up to nearest multiple of frequency
|
||||
resolution := int(math.Ceil(targetRes/float64(frequency))) * int(frequency)
|
||||
|
||||
return resolution
|
||||
}
|
||||
68
internal/metricdispatch/resamplepolicy_test.go
Normal file
68
internal/metricdispatch/resamplepolicy_test.go
Normal file
@@ -0,0 +1,68 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
package metricdispatch
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTargetPointsForPolicy(t *testing.T) {
|
||||
tests := []struct {
|
||||
policy ResamplePolicy
|
||||
want int
|
||||
}{
|
||||
{ResamplePolicyLow, 200},
|
||||
{ResamplePolicyMedium, 500},
|
||||
{ResamplePolicyHigh, 1000},
|
||||
{ResamplePolicy("unknown"), 0},
|
||||
{ResamplePolicy(""), 0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
if got := TargetPointsForPolicy(tt.policy); got != tt.want {
|
||||
t.Errorf("TargetPointsForPolicy(%q) = %d, want %d", tt.policy, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeResolution(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
duration int64
|
||||
frequency int64
|
||||
targetPoints int
|
||||
want int
|
||||
}{
|
||||
// 24h job, 60s frequency, 1440 total points
|
||||
{"low_24h_60s", 86400, 60, 200, 480},
|
||||
{"medium_24h_60s", 86400, 60, 500, 180},
|
||||
{"high_24h_60s", 86400, 60, 1000, 120},
|
||||
|
||||
// 2h job, 60s frequency, 120 total points — no resampling needed
|
||||
{"low_2h_60s", 7200, 60, 200, 0},
|
||||
{"medium_2h_60s", 7200, 60, 500, 0},
|
||||
{"high_2h_60s", 7200, 60, 1000, 0},
|
||||
|
||||
// Edge: zero/negative inputs
|
||||
{"zero_duration", 0, 60, 200, 0},
|
||||
{"zero_frequency", 86400, 0, 200, 0},
|
||||
{"zero_target", 86400, 60, 0, 0},
|
||||
{"negative_duration", -100, 60, 200, 0},
|
||||
|
||||
// 12h job, 30s frequency, 1440 total points
|
||||
{"medium_12h_30s", 43200, 30, 500, 90},
|
||||
|
||||
// Exact fit: total points == target points
|
||||
{"exact_fit", 12000, 60, 200, 0},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ComputeResolution(tt.duration, tt.frequency, tt.targetPoints)
|
||||
if got != tt.want {
|
||||
t.Errorf("ComputeResolution(%d, %d, %d) = %d, want %d",
|
||||
tt.duration, tt.frequency, tt.targetPoints, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -617,6 +617,7 @@ func (ccms *CCMetricStore) LoadNodeListData(
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
resampleAlgo string,
|
||||
) (map[string]schema.JobData, error) {
|
||||
queries, assignedScope, err := ccms.buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, resolution)
|
||||
if err != nil {
|
||||
|
||||
BIN
internal/repository/testdata/job.db
vendored
BIN
internal/repository/testdata/job.db
vendored
Binary file not shown.
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/metricdispatch"
|
||||
"github.com/ClusterCockpit/cc-backend/internal/repository"
|
||||
"github.com/ClusterCockpit/cc-backend/web"
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
@@ -496,13 +497,15 @@ func SetupRoutes(router chi.Router, buildInfo web.Build) {
|
||||
// Get Roles
|
||||
availableRoles, _ := schema.GetValidRolesMap(user)
|
||||
|
||||
resampling := resamplingForUser(conf)
|
||||
|
||||
page := web.Page{
|
||||
Title: title,
|
||||
User: *user,
|
||||
Roles: availableRoles,
|
||||
Build: buildInfo,
|
||||
Config: conf,
|
||||
Resampling: config.Keys.EnableResampling,
|
||||
Resampling: resampling,
|
||||
Infos: infos,
|
||||
}
|
||||
|
||||
@@ -589,3 +592,36 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil
|
||||
web.RenderTemplate(rw, "message.tmpl", &web.Page{Title: "Warning", MsgType: "alert-warning", Message: "Empty search", User: *user, Roles: availableRoles, Build: buildInfo})
|
||||
}
|
||||
}
|
||||
|
||||
// resamplingForUser returns a ResampleConfig that incorporates the user's
|
||||
// resample policy preference. If the user has a policy set, it creates a
|
||||
// policy-derived config with targetPoints and trigger. Otherwise falls back
|
||||
// to the global config.
|
||||
func resamplingForUser(conf map[string]any) *config.ResampleConfig {
|
||||
globalCfg := config.Keys.EnableResampling
|
||||
|
||||
policyStr := ""
|
||||
if policyVal, ok := conf["plotConfiguration_resamplePolicy"]; ok {
|
||||
if s, ok := policyVal.(string); ok {
|
||||
policyStr = s
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to global default policy, then to "medium"
|
||||
if policyStr == "" && globalCfg != nil {
|
||||
policyStr = globalCfg.DefaultPolicy
|
||||
}
|
||||
if policyStr == "" {
|
||||
policyStr = "medium"
|
||||
}
|
||||
|
||||
policy := metricdispatch.ResamplePolicy(policyStr)
|
||||
targetPoints := metricdispatch.TargetPointsForPolicy(policy)
|
||||
if targetPoints == 0 {
|
||||
return globalCfg
|
||||
}
|
||||
|
||||
return &config.ResampleConfig{
|
||||
TargetPoints: targetPoints,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,6 +59,7 @@ type APIQueryRequest struct {
|
||||
WithStats bool `json:"with-stats"`
|
||||
WithData bool `json:"with-data"`
|
||||
WithPadding bool `json:"with-padding"`
|
||||
ResampleAlgo string `json:"resample-algo,omitempty"`
|
||||
}
|
||||
|
||||
// APIQueryResponse represents the response to an APIQueryRequest.
|
||||
@@ -279,7 +280,7 @@ func FetchData(req APIQueryRequest) (*APIQueryResponse, error) {
|
||||
for _, sel := range sels {
|
||||
data := APIMetricData{}
|
||||
|
||||
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)
|
||||
data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution, req.ResampleAlgo)
|
||||
if err != nil {
|
||||
// Skip Error If Just Missing Host or Metric, Continue
|
||||
// Empty Return For Metric Handled Gracefully By Frontend
|
||||
|
||||
@@ -701,7 +701,7 @@ func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metric
|
||||
// If the level does not hold the metric itself, the data will be aggregated recursively from the children.
|
||||
// The second and third return value are the actual from/to for the data. Those can be different from
|
||||
// the range asked for if no data was available.
|
||||
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {
|
||||
func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64, resampleAlgo string) ([]schema.Float, int64, int64, int64, error) {
|
||||
if from > to {
|
||||
return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range")
|
||||
}
|
||||
@@ -759,7 +759,11 @@ func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, reso
|
||||
}
|
||||
}
|
||||
|
||||
data, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)
|
||||
resampleFn, rfErr := resampler.GetResampler(resampleAlgo)
|
||||
if rfErr != nil {
|
||||
return nil, 0, 0, 0, rfErr
|
||||
}
|
||||
data, resolution, err = resampleFn(data, minfo.Frequency, resolution)
|
||||
if err != nil {
|
||||
return nil, 0, 0, 0, err
|
||||
}
|
||||
|
||||
@@ -621,6 +621,7 @@ func (ccms *InternalMetricStore) LoadNodeListData(
|
||||
resolution int,
|
||||
from, to time.Time,
|
||||
ctx context.Context,
|
||||
resampleAlgo string,
|
||||
) (map[string]schema.JobData, error) {
|
||||
// Note: Order of node data is not guaranteed after this point
|
||||
queries, assignedScope, err := buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution))
|
||||
@@ -642,6 +643,7 @@ func (ccms *InternalMetricStore) LoadNodeListData(
|
||||
To: to.Unix(),
|
||||
WithStats: true,
|
||||
WithData: true,
|
||||
ResampleAlgo: resampleAlgo,
|
||||
}
|
||||
|
||||
resBody, err := FetchData(req)
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
import Options from "./admin/Options.svelte";
|
||||
import NoticeEdit from "./admin/NoticeEdit.svelte";
|
||||
import RunTaggers from "./admin/RunTaggers.svelte";
|
||||
import PlotRenderOptions from "./user/PlotRenderOptions.svelte";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
let {
|
||||
@@ -29,6 +30,8 @@
|
||||
/* State Init */
|
||||
let users = $state([]);
|
||||
let roles = $state([]);
|
||||
let message = $state({ msg: "", target: "", color: "#d63384" });
|
||||
let displayMessage = $state(false);
|
||||
|
||||
/* Functions */
|
||||
function getUserList() {
|
||||
@@ -52,6 +55,37 @@
|
||||
getValidRoles();
|
||||
}
|
||||
|
||||
async function handleSettingSubmit(event, setting) {
|
||||
event.preventDefault();
|
||||
|
||||
const selector = setting.selector
|
||||
const target = setting.target
|
||||
let form = document.querySelector(selector);
|
||||
let formData = new FormData(form);
|
||||
try {
|
||||
const res = await fetch(form.action, { method: "POST", body: formData });
|
||||
if (res.ok) {
|
||||
let text = await res.text();
|
||||
popMessage(text, target, "#048109");
|
||||
} else {
|
||||
let text = await res.text();
|
||||
throw new Error("Response Code " + res.status + "-> " + text);
|
||||
}
|
||||
} catch (err) {
|
||||
popMessage(err, target, "#d63384");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function popMessage(response, restarget, rescolor) {
|
||||
message = { msg: response, target: restarget, color: rescolor };
|
||||
displayMessage = true;
|
||||
setTimeout(function () {
|
||||
displayMessage = false;
|
||||
}, 3500);
|
||||
}
|
||||
|
||||
/* on Mount */
|
||||
onMount(() => initAdmin());
|
||||
</script>
|
||||
@@ -73,3 +107,4 @@
|
||||
<NoticeEdit {ncontent}/>
|
||||
<RunTaggers />
|
||||
</Row>
|
||||
<PlotRenderOptions config={ccconfig} bind:message bind:displayMessage updateSetting={(e, newSetting) => handleSettingSubmit(e, newSetting)}/>
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
Card,
|
||||
CardTitle,
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import { getContext } from "svelte";
|
||||
import { fade } from "svelte/transition";
|
||||
|
||||
/* Svelte 5 Props */
|
||||
@@ -25,6 +26,8 @@
|
||||
displayMessage = $bindable(),
|
||||
updateSetting
|
||||
} = $props();
|
||||
|
||||
const resampleConfig = getContext("resampling");
|
||||
</script>
|
||||
|
||||
<Row cols={3} class="p-2 g-2">
|
||||
@@ -64,7 +67,7 @@
|
||||
id="lwvalue"
|
||||
name="value"
|
||||
aria-describedby="lineWidthHelp"
|
||||
value={config.plotConfiguration_lineWidth}
|
||||
value={config?.plotConfiguration_lineWidth}
|
||||
min="1"
|
||||
/>
|
||||
<div id="lineWidthHelp" class="form-text">
|
||||
@@ -111,7 +114,7 @@
|
||||
id="pprvalue"
|
||||
name="value"
|
||||
aria-describedby="plotsperrowHelp"
|
||||
value={config.plotConfiguration_plotsPerRow}
|
||||
value={config?.plotConfiguration_plotsPerRow}
|
||||
min="1"
|
||||
/>
|
||||
<div id="plotsperrowHelp" class="form-text">
|
||||
@@ -153,7 +156,7 @@
|
||||
<input type="hidden" name="key" value="plotConfiguration_colorBackground" />
|
||||
<div class="mb-3">
|
||||
<div>
|
||||
{#if config.plotConfiguration_colorBackground}
|
||||
{#if config?.plotConfiguration_colorBackground}
|
||||
<input type="radio" id="colb-true-checked" name="value" value="true" checked />
|
||||
{:else}
|
||||
<input type="radio" id="colb-true" name="value" value="true" />
|
||||
@@ -161,7 +164,7 @@
|
||||
<label for="true">Yes</label>
|
||||
</div>
|
||||
<div>
|
||||
{#if config.plotConfiguration_colorBackground}
|
||||
{#if config?.plotConfiguration_colorBackground}
|
||||
<input type="radio" id="colb-false" name="value" value="false" />
|
||||
{:else}
|
||||
<input type="radio" id="colb-false-checked" name="value" value="false" checked />
|
||||
@@ -219,4 +222,90 @@
|
||||
</form>
|
||||
</Card>
|
||||
</Col>
|
||||
|
||||
<!-- RESAMPLE POLICY -->
|
||||
<Col>
|
||||
<Card class="h-100">
|
||||
<form
|
||||
id="resample-policy-form"
|
||||
method="post"
|
||||
action="/frontend/configuration/"
|
||||
class="card-body"
|
||||
onsubmit={(e) => updateSetting(e, {
|
||||
selector: "#resample-policy-form",
|
||||
target: "rsp",
|
||||
})}
|
||||
>
|
||||
<CardTitle
|
||||
style="margin-bottom: 1em; display: flex; align-items: center;"
|
||||
>
|
||||
<div>Resample Policy</div>
|
||||
{#if displayMessage && message.target == "rsp"}
|
||||
<div style="margin-left: auto; font-size: 0.9em;">
|
||||
<code style="color: {message.color};" out:fade>
|
||||
Update: {message.msg}
|
||||
</code>
|
||||
</div>
|
||||
{/if}
|
||||
</CardTitle>
|
||||
<input type="hidden" name="key" value="plotConfiguration_resamplePolicy" />
|
||||
<div class="mb-3">
|
||||
{#each [["", "Default"], ["low", "Low"], ["medium", "Medium"], ["high", "High"]] as [val, label]}
|
||||
<div>
|
||||
<input type="radio" id="rsp-{val || 'default'}" name="value" value={JSON.stringify(val)}
|
||||
checked={(!config?.plotConfiguration_resamplePolicy && val === "") || config?.plotConfiguration_resamplePolicy === val} />
|
||||
<label for="rsp-{val || 'default'}">{label}</label>
|
||||
</div>
|
||||
{/each}
|
||||
<div id="resamplePolicyHelp" class="form-text">
|
||||
Controls how many data points are shown in metric plots. Low = fast overview (~200 points), Medium = balanced (~500), High = maximum detail (~1000).
|
||||
</div>
|
||||
</div>
|
||||
<Button color="primary" type="submit">Submit</Button>
|
||||
</form>
|
||||
</Card>
|
||||
</Col>
|
||||
|
||||
<!-- RESAMPLE ALGORITHM -->
|
||||
<Col>
|
||||
<Card class="h-100">
|
||||
<form
|
||||
id="resample-algo-form"
|
||||
method="post"
|
||||
action="/frontend/configuration/"
|
||||
class="card-body"
|
||||
onsubmit={(e) => updateSetting(e, {
|
||||
selector: "#resample-algo-form",
|
||||
target: "rsa",
|
||||
})}
|
||||
>
|
||||
<CardTitle
|
||||
style="margin-bottom: 1em; display: flex; align-items: center;"
|
||||
>
|
||||
<div>Resample Algorithm</div>
|
||||
{#if displayMessage && message.target == "rsa"}
|
||||
<div style="margin-left: auto; font-size: 0.9em;">
|
||||
<code style="color: {message.color};" out:fade>
|
||||
Update: {message.msg}
|
||||
</code>
|
||||
</div>
|
||||
{/if}
|
||||
</CardTitle>
|
||||
<input type="hidden" name="key" value="plotConfiguration_resampleAlgo" />
|
||||
<div class="mb-3">
|
||||
{#each [["", "Default"], ["lttb", "LTTB"], ["average", "Average"], ["simple", "Simple"]] as [val, label]}
|
||||
<div>
|
||||
<input type="radio" id="rsa-{val || 'default'}" name="value" value={JSON.stringify(val)}
|
||||
checked={(!config?.plotConfiguration_resampleAlgo && val === "") || config?.plotConfiguration_resampleAlgo === val} />
|
||||
<label for="rsa-{val || 'default'}">{label}</label>
|
||||
</div>
|
||||
{/each}
|
||||
<div id="resampleAlgoHelp" class="form-text">
|
||||
Algorithm used when downsampling time-series data. LTTB preserves visual shape, Average smooths data, Simple picks every Nth point.
|
||||
</div>
|
||||
</div>
|
||||
<Button color="primary" type="submit">Submit</Button>
|
||||
</form>
|
||||
</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
@@ -73,9 +73,10 @@
|
||||
const subClusterTopology = $derived(getContext("getHardwareTopology")(cluster, subCluster));
|
||||
const metricConfig = $derived(getContext("getMetricConfig")(cluster, subCluster, metric));
|
||||
const usesMeanStatsSeries = $derived((statisticsSeries?.mean && statisticsSeries.mean.length != 0));
|
||||
const resampleTrigger = $derived(resampleConfig?.trigger ? Number(resampleConfig.trigger) : null);
|
||||
const resampleTrigger = $derived(resampleConfig?.trigger ? Number(resampleConfig.trigger) : (resampleConfig?.targetPoints ? Math.floor(resampleConfig.targetPoints / 4) : null));
|
||||
const resampleResolutions = $derived(resampleConfig?.resolutions ? [...resampleConfig.resolutions] : null);
|
||||
const resampleMinimum = $derived(resampleConfig?.resolutions ? Math.min(...resampleConfig.resolutions) : null);
|
||||
const resampleTargetPoints = $derived(resampleConfig?.targetPoints ? Number(resampleConfig.targetPoints) : null);
|
||||
const useStatsSeries = $derived(!!statisticsSeries); // Display Stats Series By Default if Exists
|
||||
const thresholds = $derived(findJobAggregationThresholds(
|
||||
subClusterTopology,
|
||||
@@ -515,24 +516,29 @@
|
||||
if (resampleConfig && !forNode && key === 'x') {
|
||||
const numX = (u.series[0].idxs[1] - u.series[0].idxs[0])
|
||||
if (numX <= resampleTrigger && timestep !== resampleMinimum) {
|
||||
/* Get closest zoom level; prevents multiple iterative zoom requests for big zoom-steps (e.g. 600 -> 300 -> 120 -> 60) */
|
||||
// Which resolution to theoretically request to achieve 30 or more visible data points:
|
||||
const target = (numX * timestep) / resampleTrigger
|
||||
// Which configured resolution actually matches the closest to theoretical target:
|
||||
const closest = resampleResolutions.reduce(function(prev, curr) {
|
||||
let newRes;
|
||||
if (resampleTargetPoints && !resampleResolutions) {
|
||||
// Policy-based: compute resolution dynamically from visible window
|
||||
const visibleDuration = (u.scales.x.max - u.scales.x.min);
|
||||
const nativeTimestep = metricConfig?.timestep || timestep;
|
||||
newRes = Math.ceil(visibleDuration / resampleTargetPoints / nativeTimestep) * nativeTimestep;
|
||||
if (newRes < nativeTimestep) newRes = nativeTimestep;
|
||||
} else if (resampleResolutions) {
|
||||
// Array-based: find closest configured resolution
|
||||
const target = (numX * timestep) / resampleTrigger;
|
||||
newRes = resampleResolutions.reduce(function(prev, curr) {
|
||||
return (Math.abs(curr - target) < Math.abs(prev - target) ? curr : prev);
|
||||
});
|
||||
}
|
||||
// Prevents non-required dispatches
|
||||
if (timestep !== closest) {
|
||||
// console.log('Dispatch: Zoom with Res from / to', timestep, closest)
|
||||
if (newRes && timestep !== newRes) {
|
||||
onZoom({
|
||||
newRes: closest,
|
||||
newRes: newRes,
|
||||
lastZoomState: u?.scales,
|
||||
lastThreshold: thresholds?.normal
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// console.log('Dispatch: Zoom Update States')
|
||||
onZoom({
|
||||
lastZoomState: u?.scales,
|
||||
lastThreshold: thresholds?.normal
|
||||
|
||||
@@ -72,6 +72,8 @@ type PlotConfiguration struct {
|
||||
PlotsPerRow int `json:"plots-per-row"`
|
||||
LineWidth int `json:"line-width"`
|
||||
ColorScheme []string `json:"color-scheme"`
|
||||
ResampleAlgo string `json:"resample-algo"`
|
||||
ResamplePolicy string `json:"resample-policy"`
|
||||
}
|
||||
|
||||
var UIDefaults = WebConfig{
|
||||
@@ -144,6 +146,8 @@ func Init(rawConfig json.RawMessage) error {
|
||||
UIDefaultsMap["plotConfiguration_plotsPerRow"] = UIDefaults.PlotConfiguration.PlotsPerRow
|
||||
UIDefaultsMap["plotConfiguration_lineWidth"] = UIDefaults.PlotConfiguration.LineWidth
|
||||
UIDefaultsMap["plotConfiguration_colorScheme"] = UIDefaults.PlotConfiguration.ColorScheme
|
||||
UIDefaultsMap["plotConfiguration_resampleAlgo"] = UIDefaults.PlotConfiguration.ResampleAlgo
|
||||
UIDefaultsMap["plotConfiguration_resamplePolicy"] = UIDefaults.PlotConfiguration.ResamplePolicy
|
||||
|
||||
for _, c := range UIDefaults.MetricConfig.Clusters {
|
||||
if c.JobListMetrics != nil {
|
||||
|
||||
Reference in New Issue
Block a user