Merge branch 'sample_resolution_select' into dev

This commit is contained in:
Christoph Kluge 2024-09-24 17:43:15 +02:00
commit 8e3327ef6a
42 changed files with 826 additions and 242 deletions

View File

@ -224,7 +224,7 @@ type Query {
allocatedNodes(cluster: String!): [Count!]!
job(id: ID!): Job
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!

View File

@ -175,7 +175,6 @@ func cleanup() {
func TestRestApi(t *testing.T) {
restapi := setup(t)
t.Cleanup(cleanup)
testData := schema.JobData{
"load_one": map[schema.MetricScope]*schema.JobMetric{
schema.MetricScopeNode: {
@ -192,7 +191,7 @@ func TestRestApi(t *testing.T) {
},
}
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
metricdata.TestLoadDataCallback = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
return testData, nil
}
@ -344,7 +343,7 @@ func TestRestApi(t *testing.T) {
}
t.Run("CheckArchive", func(t *testing.T) {
data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background())
data, err := metricDataDispatcher.LoadData(stoppedJob, []string{"load_one"}, []schema.MetricScope{schema.MetricScopeNode}, context.Background(), 60)
if err != nil {
t.Fatal(err)
}

View File

@ -516,8 +516,15 @@ func (api *RestApi) getCompleteJobById(rw http.ResponseWriter, r *http.Request)
var data schema.JobData
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
resolution := 0
for _, mc := range metricConfigs {
resolution = max(resolution, mc.Timestep)
}
if r.URL.Query().Get("all-metrics") == "true" {
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context())
data, err = metricDataDispatcher.LoadData(job, nil, scopes, r.Context(), resolution)
if err != nil {
log.Warn("Error while loading job data")
return
@ -606,7 +613,14 @@ func (api *RestApi) getJobById(rw http.ResponseWriter, r *http.Request) {
scopes = []schema.MetricScope{"node"}
}
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context())
metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
resolution := 0
for _, mc := range metricConfigs {
resolution = max(resolution, mc.Timestep)
}
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, r.Context(), resolution)
if err != nil {
log.Warn("Error while loading job data")
return
@ -1114,7 +1128,7 @@ func (api *RestApi) getJobMetrics(rw http.ResponseWriter, r *http.Request) {
}
resolver := graph.GetResolverInstance()
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes)
data, err := resolver.Query().JobMetrics(r.Context(), id, metrics, scopes, nil)
if err != nil {
json.NewEncoder(rw).Encode(Respone{
Error: &struct {

View File

@ -34,7 +34,7 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
scopes = append(scopes, schema.MetricScopeAccelerator)
}
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx)
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, ctx, 0) // 0 Resulotion-Value retrieves highest res (60s)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err

View File

@ -246,7 +246,7 @@ type ComplexityRoot struct {
Clusters func(childComplexity int) int
GlobalMetrics func(childComplexity int) int
Job func(childComplexity int, id string) int
JobMetrics func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope) int
JobMetrics func(childComplexity int, id string, metrics []string, scopes []schema.MetricScope, resolution *int) int
Jobs func(childComplexity int, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) int
JobsFootprints func(childComplexity int, filter []*model.JobFilter, metrics []string) int
JobsStatistics func(childComplexity int, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) int
@ -369,7 +369,7 @@ type QueryResolver interface {
User(ctx context.Context, username string) (*model.User, error)
AllocatedNodes(ctx context.Context, cluster string) ([]*model.Count, error)
Job(ctx context.Context, id string) (*schema.Job, error)
JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error)
JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error)
JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error)
Jobs(ctx context.Context, filter []*model.JobFilter, page *model.PageRequest, order *model.OrderByInput) (*model.JobResultList, error)
JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error)
@ -1291,7 +1291,7 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return 0, false
}
return e.complexity.Query.JobMetrics(childComplexity, args["id"].(string), args["metrics"].([]string), args["scopes"].([]schema.MetricScope)), true
return e.complexity.Query.JobMetrics(childComplexity, args["id"].(string), args["metrics"].([]string), args["scopes"].([]schema.MetricScope), args["resolution"].(*int)), true
case "Query.jobs":
if e.complexity.Query.Jobs == nil {
@ -2068,7 +2068,7 @@ type Query {
allocatedNodes(cluster: String!): [Count!]!
job(id: ID!): Job
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!]): [JobMetricWithName!]!
jobMetrics(id: ID!, metrics: [String!], scopes: [MetricScope!], resolution: Int): [JobMetricWithName!]!
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
@ -2388,6 +2388,15 @@ func (ec *executionContext) field_Query_jobMetrics_args(ctx context.Context, raw
}
}
args["scopes"] = arg2
var arg3 *int
if tmp, ok := rawArgs["resolution"]; ok {
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("resolution"))
arg3, err = ec.unmarshalOInt2ᚖint(ctx, tmp)
if err != nil {
return nil, err
}
}
args["resolution"] = arg3
return args, nil
}
@ -8527,7 +8536,7 @@ func (ec *executionContext) _Query_jobMetrics(ctx context.Context, field graphql
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return ec.resolvers.Query().JobMetrics(rctx, fc.Args["id"].(string), fc.Args["metrics"].([]string), fc.Args["scopes"].([]schema.MetricScope))
return ec.resolvers.Query().JobMetrics(rctx, fc.Args["id"].(string), fc.Args["metrics"].([]string), fc.Args["scopes"].([]schema.MetricScope), fc.Args["resolution"].(*int))
})
if err != nil {
ec.Error(ctx, err)

View File

@ -8,6 +8,7 @@ import (
"context"
"errors"
"fmt"
"slices"
"strconv"
"strings"
"time"
@ -226,14 +227,19 @@ func (r *queryResolver) Job(ctx context.Context, id string) (*schema.Job, error)
}
// JobMetrics is the resolver for the jobMetrics field.
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobMetricWithName, error) {
func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope, resolution *int) ([]*model.JobMetricWithName, error) {
if resolution == nil && config.Keys.EnableResampling != nil {
defaultRes := slices.Max(config.Keys.EnableResampling.Resolutions)
resolution = &defaultRes
}
job, err := r.Query().Job(ctx, id)
if err != nil {
log.Warn("Error while querying job for metrics")
return nil, err
}
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx)
data, err := metricDataDispatcher.LoadData(job, metrics, scopes, ctx, *resolution)
if err != nil {
log.Warn("Error while loading job data")
return nil, err
@ -442,11 +448,9 @@ func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
// SubCluster returns generated.SubClusterResolver implementation.
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
type (
clusterResolver struct{ *Resolver }
jobResolver struct{ *Resolver }
metricValueResolver struct{ *Resolver }
mutationResolver struct{ *Resolver }
queryResolver struct{ *Resolver }
subClusterResolver struct{ *Resolver }
)
type clusterResolver struct{ *Resolver }
type jobResolver struct{ *Resolver }
type metricValueResolver struct{ *Resolver }
type mutationResolver struct{ *Resolver }
type queryResolver struct{ *Resolver }
type subClusterResolver struct{ *Resolver }

View File

@ -47,7 +47,14 @@ func (r *queryResolver) rooflineHeatmap(
continue
}
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx)
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
// resolution := 0
// for _, mc := range metricConfigs {
// resolution = max(resolution, mc.Timestep)
// }
jobdata, err := metricDataDispatcher.LoadData(job, []string{"flops_any", "mem_bw"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0)
if err != nil {
log.Errorf("Error while loading roofline metrics for job %d", job.ID)
return nil, err

View File

@ -14,6 +14,7 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/resampler"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
@ -23,11 +24,12 @@ func cacheKey(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
resolution int,
) string {
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
return fmt.Sprintf("%d(%s):[%v],[%v]",
job.ID, job.State, metrics, scopes)
return fmt.Sprintf("%d(%s):[%v],[%v]-%d",
job.ID, job.State, metrics, scopes, resolution)
}
// Fetches the metric data for a job.
@ -35,8 +37,9 @@ func LoadData(job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int,
) (schema.JobData, error) {
data := cache.Get(cacheKey(job, metrics, scopes), func() (_ interface{}, ttl time.Duration, size int) {
data := cache.Get(cacheKey(job, metrics, scopes, resolution), func() (_ interface{}, ttl time.Duration, size int) {
var jd schema.JobData
var err error
@ -60,7 +63,7 @@ func LoadData(job *schema.Job,
}
}
jd, err = repo.LoadData(job, metrics, scopes, ctx)
jd, err = repo.LoadData(job, metrics, scopes, ctx, resolution)
if err != nil {
if len(jd) != 0 {
log.Warnf("partial error: %s", err.Error())
@ -72,12 +75,31 @@ func LoadData(job *schema.Job,
}
size = jd.Size()
} else {
jd, err = archive.GetHandle().LoadJobData(job)
var jd_temp schema.JobData
jd_temp, err = archive.GetHandle().LoadJobData(job)
if err != nil {
log.Error("Error while loading job data from archive")
return err, 0, 0
}
//Deep copy the cached archive hashmap
jd = metricdata.DeepCopy(jd_temp)
//Resampling for archived data.
//Pass the resolution from frontend here.
for _, v := range jd {
for _, v_ := range v {
timestep := 0
for i := 0; i < len(v_.Series); i += 1 {
v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
if err != nil {
return err, 0, 0
}
}
v_.Timestep = timestep
}
}
// Avoid sending unrequested data to the client:
if metrics != nil || scopes != nil {
if metrics == nil {
@ -117,6 +139,7 @@ func LoadData(job *schema.Job,
}
// FIXME: Review: Is this really necessary or correct.
// Note: Lines 142-170 formerly known as prepareJobData(jobData, scoeps)
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/median/max Graph can be

View File

@ -55,6 +55,7 @@ type ApiQuery struct {
SubType *string `json:"subtype,omitempty"`
Metric string `json:"metric"`
Hostname string `json:"host"`
Resolution int `json:"resolution"`
TypeIds []string `json:"type-ids,omitempty"`
SubTypeIds []string `json:"subtype-ids,omitempty"`
Aggregate bool `json:"aggreg"`
@ -70,6 +71,7 @@ type ApiMetricData struct {
Data []schema.Float `json:"data"`
From int64 `json:"from"`
To int64 `json:"to"`
Resolution int `json:"resolution"`
Avg schema.Float `json:"avg"`
Min schema.Float `json:"min"`
Max schema.Float `json:"max"`
@ -83,7 +85,7 @@ func (ccms *CCMetricStore) Init(rawConfig json.RawMessage) error {
}
ccms.url = config.Url
ccms.queryEndpoint = fmt.Sprintf("%s/api/query", config.Url)
ccms.queryEndpoint = fmt.Sprintf("%s/api/query/", config.Url)
ccms.jwt = config.Token
ccms.client = http.Client{
Timeout: 10 * time.Second,
@ -129,7 +131,7 @@ func (ccms *CCMetricStore) doRequest(
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, ccms.queryEndpoint, buf)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, ccms.queryEndpoint, buf)
if err != nil {
log.Warn("Error while building request body")
return nil, err
@ -138,6 +140,13 @@ func (ccms *CCMetricStore) doRequest(
req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", ccms.jwt))
}
// versioning the cc-metric-store query API.
// v2 = data with resampling
// v1 = data without resampling
q := req.URL.Query()
q.Add("version", "v2")
req.URL.RawQuery = q.Encode()
res, err := ccms.client.Do(req)
if err != nil {
log.Error("Error while performing request")
@ -162,8 +171,9 @@ func (ccms *CCMetricStore) LoadData(
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int,
) (schema.JobData, error) {
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes)
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, resolution)
if err != nil {
log.Warn("Error while building queries")
return nil, err
@ -195,11 +205,17 @@ func (ccms *CCMetricStore) LoadData(
jobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)
}
res := row[0].Resolution
if res == 0 {
res = mc.Timestep
}
jobMetric, ok := jobData[metric][scope]
if !ok {
jobMetric = &schema.JobMetric{
Unit: mc.Unit,
Timestep: mc.Timestep,
Timestep: res,
Series: make([]schema.Series, 0),
}
jobData[metric][scope] = jobMetric
@ -251,7 +267,6 @@ func (ccms *CCMetricStore) LoadData(
/* Returns list for "partial errors" */
return jobData, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
}
return jobData, nil
}
@ -267,6 +282,7 @@ func (ccms *CCMetricStore) buildQueries(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
resolution int,
) ([]ApiQuery, []schema.MetricScope, error) {
queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(job.Resources))
assignedScope := []schema.MetricScope{}
@ -323,6 +339,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: false,
Type: &acceleratorString,
TypeIds: host.Accelerators,
Resolution: resolution,
})
assignedScope = append(assignedScope, schema.MetricScopeAccelerator)
continue
@ -340,6 +357,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: true,
Type: &acceleratorString,
TypeIds: host.Accelerators,
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -353,6 +371,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: false,
Type: &hwthreadString,
TypeIds: intToStringSlice(hwthreads),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -368,6 +387,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: true,
Type: &hwthreadString,
TypeIds: intToStringSlice(topology.Core[core]),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
}
@ -384,6 +404,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: true,
Type: &hwthreadString,
TypeIds: intToStringSlice(topology.Socket[socket]),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
}
@ -398,6 +419,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: true,
Type: &hwthreadString,
TypeIds: intToStringSlice(hwthreads),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -412,6 +434,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: false,
Type: &coreString,
TypeIds: intToStringSlice(cores),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -426,6 +449,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: true,
Type: &coreString,
TypeIds: intToStringSlice(cores),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -440,6 +464,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: false,
Type: &memoryDomainString,
TypeIds: intToStringSlice(sockets),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -454,6 +479,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: true,
Type: &memoryDomainString,
TypeIds: intToStringSlice(sockets),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -468,6 +494,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: false,
Type: &socketString,
TypeIds: intToStringSlice(sockets),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -482,6 +509,7 @@ func (ccms *CCMetricStore) buildQueries(
Aggregate: true,
Type: &socketString,
TypeIds: intToStringSlice(sockets),
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -492,6 +520,7 @@ func (ccms *CCMetricStore) buildQueries(
queries = append(queries, ApiQuery{
Metric: remoteName,
Hostname: host.Hostname,
Resolution: resolution,
})
assignedScope = append(assignedScope, scope)
continue
@ -510,7 +539,15 @@ func (ccms *CCMetricStore) LoadStats(
metrics []string,
ctx context.Context,
) (map[string]map[string]schema.MetricStatistics, error) {
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}) // #166 Add scope shere for analysis view accelerator normalization?
// metricConfigs := archive.GetCluster(job.Cluster).MetricConfig
// resolution := 9000
// for _, mc := range metricConfigs {
// resolution = min(resolution, mc.Timestep)
// }
queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
if err != nil {
log.Warn("Error while building query")
return nil, err
@ -590,6 +627,7 @@ func (ccms *CCMetricStore) LoadNodeData(
req.Queries = append(req.Queries, ApiQuery{
Hostname: node,
Metric: ccms.toRemoteName(metric),
Resolution: 60, // Default for Node Queries
})
}
}
@ -597,7 +635,7 @@ func (ccms *CCMetricStore) LoadNodeData(
resBody, err := ccms.doRequest(ctx, &req)
if err != nil {
log.Error("Error while performing request")
log.Error(fmt.Sprintf("Error while performing request %#v\n", err))
return nil, err
}

View File

@ -60,7 +60,8 @@ func (idb *InfluxDBv2DataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
ctx context.Context,
resolution int) (schema.JobData, error) {
measurementsConds := make([]string, 0, len(metrics))
for _, m := range metrics {

View File

@ -21,7 +21,7 @@ type MetricDataRepository interface {
Init(rawConfig json.RawMessage) error
// Return the JobData for the given job, only with the requested metrics.
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)

View File

@ -265,6 +265,7 @@ func (pdb *PrometheusDataRepository) LoadData(
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
resolution int,
) (schema.JobData, error) {
// TODO respect requested scope
if len(scopes) == 0 || !contains(scopes, schema.MetricScopeNode) {
@ -356,7 +357,7 @@ func (pdb *PrometheusDataRepository) LoadStats(
// map of metrics of nodes of stats
stats := map[string]map[string]schema.MetricStatistics{}
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx)
data, err := pdb.LoadData(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, ctx, 0 /*resolution here*/)
if err != nil {
log.Warn("Error while loading job for stats")
return nil, err

View File

@ -12,7 +12,7 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) = func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error) {
panic("TODO")
}
@ -27,9 +27,10 @@ func (tmdr *TestMetricDataRepository) LoadData(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.JobData, error) {
ctx context.Context,
resolution int) (schema.JobData, error) {
return TestLoadDataCallback(job, metrics, scopes, ctx)
return TestLoadDataCallback(job, metrics, scopes, ctx, resolution)
}
func (tmdr *TestMetricDataRepository) LoadStats(
@ -48,3 +49,41 @@ func (tmdr *TestMetricDataRepository) LoadNodeData(
panic("TODO")
}
func DeepCopy(jd_temp schema.JobData) schema.JobData {
var jd schema.JobData
jd = make(schema.JobData, len(jd_temp))
for k, v := range jd_temp {
jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
for k_, v_ := range v {
jd[k][k_] = new(schema.JobMetric)
jd[k][k_].Series = make([]schema.Series, len(v_.Series))
for i := 0; i < len(v_.Series); i += 1 {
jd[k][k_].Series[i].Data = make([]schema.Float, len(v_.Series[i].Data))
copy(jd[k][k_].Series[i].Data, v_.Series[i].Data)
jd[k][k_].Series[i].Hostname = v_.Series[i].Hostname
jd[k][k_].Series[i].Id = v_.Series[i].Id
jd[k][k_].Series[i].Statistics.Avg = v_.Series[i].Statistics.Avg
jd[k][k_].Series[i].Statistics.Min = v_.Series[i].Statistics.Min
jd[k][k_].Series[i].Statistics.Max = v_.Series[i].Statistics.Max
}
jd[k][k_].Timestep = v_.Timestep
jd[k][k_].Unit.Base = v_.Unit.Base
jd[k][k_].Unit.Prefix = v_.Unit.Prefix
if v_.StatisticsSeries != nil {
jd[k][k_].StatisticsSeries = new(schema.StatsSeries)
copy(jd[k][k_].StatisticsSeries.Max, v_.StatisticsSeries.Max)
copy(jd[k][k_].StatisticsSeries.Min, v_.StatisticsSeries.Min)
copy(jd[k][k_].StatisticsSeries.Median, v_.StatisticsSeries.Median)
copy(jd[k][k_].StatisticsSeries.Mean, v_.StatisticsSeries.Mean)
for k__, v__ := range v_.StatisticsSeries.Percentiles {
jd[k][k_].StatisticsSeries.Percentiles[k__] = v__
}
} else {
jd[k][k_].StatisticsSeries = v_.StatisticsSeries
}
}
}
return jd
}

View File

@ -77,8 +77,8 @@ func (r *JobRepository) buildStatsQuery(
// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)
if col != "" {
// Scan columns: id, totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id) as totalJobs",
// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
@ -86,9 +86,9 @@ func (r *JobRepository) buildStatsQuery(
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_hwthreads) / 3600) as %s) as totalCoreHours`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_acc) as %s) as totalAccs`, castType),
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
).From("job").GroupBy(col)
).From("job").Join("user ON user.username = job.user").GroupBy(col)
} else {
// Scan columns: totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
query = sq.Select("COUNT(job.id)",
fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
@ -107,15 +107,15 @@ func (r *JobRepository) buildStatsQuery(
return query
}
func (r *JobRepository) getUserName(ctx context.Context, id string) string {
user := GetUserFromContext(ctx)
name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
if name != "" {
return name
} else {
return "-"
}
}
// func (r *JobRepository) getUserName(ctx context.Context, id string) string {
// user := GetUserFromContext(ctx)
// name, _ := r.FindColumnValue(user, id, "user", "name", "username", false)
// if name != "" {
// return name
// } else {
// return "-"
// }
// }
func (r *JobRepository) getCastType() string {
var castType string
@ -167,14 +167,20 @@ func (r *JobRepository) JobsStatsGrouped(
for rows.Next() {
var id sql.NullString
var name sql.NullString
var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
if err := rows.Scan(&id, &jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
log.Warn("Error while scanning rows")
return nil, err
}
if id.Valid {
var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
var personName string
if name.Valid {
personName = name.String
}
if jobs.Valid {
totalJobs = int(jobs.Int64)
@ -205,11 +211,11 @@ func (r *JobRepository) JobsStatsGrouped(
}
if col == "job.user" {
name := r.getUserName(ctx, id.String)
// name := r.getUserName(ctx, id.String)
stats = append(stats,
&model.JobsStatistics{
ID: id.String,
Name: name,
Name: personName,
TotalJobs: totalJobs,
TotalWalltime: totalWalltime,
TotalNodes: totalNodes,

View File

@ -13,6 +13,7 @@ import (
"strings"
"time"
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/internal/graph/model"
"github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/internal/util"
@ -278,6 +279,7 @@ func SetupRoutes(router *mux.Router, buildInfo web.Build) {
Roles: availableRoles,
Build: buildInfo,
Config: conf,
Resampling: config.Keys.EnableResampling,
Infos: infos,
}

View File

@ -47,8 +47,8 @@ func RegisterFootprintWorker() {
scopes = append(scopes, schema.MetricScopeAccelerator)
for _, job := range jobs {
log.Debugf("Try job %d", job.JobID)
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, context.Background())
// log.Debugf("Try job %d", job.JobID)
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, context.Background(), 0) // 0 Resolution-Value retrieves highest res
if err != nil {
log.Errorf("Error wile loading job data for footprint update: %v", err)
continue

View File

@ -9,8 +9,8 @@ import (
"io"
"time"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func DecodeJobData(r io.Reader, k string) (schema.JobData, error) {

123
pkg/resampler/resampler.go Normal file
View File

@ -0,0 +1,123 @@
package resampler
import (
"errors"
"fmt"
"math"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func SimpleResampler(data []schema.Float, old_frequency int64, new_frequency int64) ([]schema.Float, error) {
if old_frequency == 0 || new_frequency == 0 {
return nil, errors.New("either old or new frequency is set to 0")
}
if new_frequency%old_frequency != 0 {
return nil, errors.New("new sampling frequency should be multiple of the old frequency")
}
var step int = int(new_frequency / old_frequency)
var new_data_length = len(data) / step
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
return data, nil
}
new_data := make([]schema.Float, new_data_length)
for i := 0; i < new_data_length; i++ {
new_data[i] = data[i*step]
}
return new_data, nil
}
// Inspired by one of the algorithms from https://skemman.is/bitstream/1946/15343/3/SS_MSthesis.pdf
// Adapted from https://github.com/haoel/downsampling/blob/master/core/lttb.go
func LargestTriangleThreeBucket(data []schema.Float, old_frequency int, new_frequency int) ([]schema.Float, int, error) {
if old_frequency == 0 || new_frequency == 0 {
return data, old_frequency, nil
}
if new_frequency%old_frequency != 0 {
return nil, 0, errors.New(fmt.Sprintf("new sampling frequency : %d should be multiple of the old frequency : %d", new_frequency, old_frequency))
}
var step int = int(new_frequency / old_frequency)
var new_data_length = len(data) / step
if new_data_length == 0 || len(data) < 100 || new_data_length >= len(data) {
return data, old_frequency, nil
}
new_data := make([]schema.Float, 0, new_data_length)
// Bucket size. Leave room for start and end data points
bucketSize := float64(len(data)-2) / float64(new_data_length-2)
new_data = append(new_data, data[0]) // Always add the first point
// We have 3 pointers represent for
// > bucketLow - the current bucket's beginning location
// > bucketMiddle - the current bucket's ending location,
// also the beginning location of next bucket
// > bucketHight - the next bucket's ending location.
bucketLow := 1
bucketMiddle := int(math.Floor(bucketSize)) + 1
var prevMaxAreaPoint int
for i := 0; i < new_data_length-2; i++ {
bucketHigh := int(math.Floor(float64(i+2)*bucketSize)) + 1
if bucketHigh >= len(data)-1 {
bucketHigh = len(data) - 2
}
// Calculate point average for next bucket (containing c)
avgPointX, avgPointY := calculateAverageDataPoint(data[bucketMiddle:bucketHigh+1], int64(bucketMiddle))
// Get the range for current bucket
currBucketStart := bucketLow
currBucketEnd := bucketMiddle
// Point a
pointX := prevMaxAreaPoint
pointY := data[prevMaxAreaPoint]
maxArea := -1.0
var maxAreaPoint int
flag_ := 0
for ; currBucketStart < currBucketEnd; currBucketStart++ {
area := calculateTriangleArea(schema.Float(pointX), pointY, avgPointX, avgPointY, schema.Float(currBucketStart), data[currBucketStart])
if area > maxArea {
maxArea = area
maxAreaPoint = currBucketStart
}
if math.IsNaN(float64(avgPointY)) {
flag_ = 1
}
}
if flag_ == 1 {
new_data = append(new_data, schema.NaN) // Pick this point from the bucket
} else {
new_data = append(new_data, data[maxAreaPoint]) // Pick this point from the bucket
}
prevMaxAreaPoint = maxAreaPoint // This MaxArea point is the next's prevMAxAreaPoint
//move to the next window
bucketLow = bucketMiddle
bucketMiddle = bucketHigh
}
new_data = append(new_data, data[len(data)-1]) // Always add last
return new_data, new_frequency, nil
}

35
pkg/resampler/util.go Normal file
View File

@ -0,0 +1,35 @@
package resampler
import (
"math"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
func calculateTriangleArea(paX, paY, pbX, pbY, pcX, pcY schema.Float) float64 {
area := ((paX-pcX)*(pbY-paY) - (paX-pbX)*(pcY-paY)) * 0.5
return math.Abs(float64(area))
}
func calculateAverageDataPoint(points []schema.Float, xStart int64) (avgX schema.Float, avgY schema.Float) {
flag := 0
for _, point := range points {
avgX += schema.Float(xStart)
avgY += point
xStart++
if math.IsNaN(float64(point)) {
flag = 1
}
}
l := schema.Float(len(points))
avgX /= l
avgY /= l
if flag == 1 {
return avgX, schema.NaN
} else {
return avgX, avgY
}
}

View File

@ -76,6 +76,13 @@ type Retention struct {
IncludeDB bool `json:"includeDB"`
}
type ResampleConfig struct {
// Trigger next zoom level at less than this many visible datapoints
Trigger int `json:"trigger"`
// Array of resampling target resolutions, in seconds; Example: [600,300,60]
Resolutions []int `json:"resolutions"`
}
// Format of the configuration (file). See below for the defaults.
type ProgramConfig struct {
// Address where the http (or https) server will listen on (for example: 'localhost:80').
@ -133,6 +140,9 @@ type ProgramConfig struct {
// be provided! Most options here can be overwritten by the user.
UiDefaults map[string]interface{} `json:"ui-defaults"`
// If exists, will enable dynamic zoom in frontend metric plots using the configured values
EnableResampling *ResampleConfig `json:"enable-resampling"`
// Where to store MachineState files
MachineStateDir string `json:"machine-state-dir"`

View File

@ -424,6 +424,27 @@
"plot_general_colorscheme",
"plot_list_selectedMetrics"
]
},
"enable-resampling": {
"description": "Enable dynamic zoom in frontend metric plots.",
"type": "object",
"properties": {
"trigger": {
"description": "Trigger next zoom level at less than this many visible datapoints.",
"type": "integer"
},
"resolutions": {
"description": "Array of resampling target resolutions, in seconds.",
"type": "array",
"items": {
"type": "integer"
}
}
},
"required": [
"trigger",
"resolutions"
]
}
},
"required": [

View File

@ -48,6 +48,7 @@
href: `/monitoring/user/${username}`,
icon: "bar-chart-line-fill",
perCluster: false,
listOptions: false,
menu: "none",
},
{
@ -56,6 +57,7 @@
href: `/monitoring/jobs/`,
icon: "card-list",
perCluster: false,
listOptions: false,
menu: "none",
},
{
@ -63,7 +65,8 @@
requiredRole: roles.manager,
href: "/monitoring/users/",
icon: "people-fill",
perCluster: false,
perCluster: true,
listOptions: true,
menu: "Groups",
},
{
@ -71,7 +74,8 @@
requiredRole: roles.support,
href: "/monitoring/projects/",
icon: "folder",
perCluster: false,
perCluster: true,
listOptions: true,
menu: "Groups",
},
{
@ -80,6 +84,7 @@
href: "/monitoring/tags/",
icon: "tags",
perCluster: false,
listOptions: false,
menu: "Groups",
},
{
@ -88,6 +93,7 @@
href: "/monitoring/analysis/",
icon: "graph-up",
perCluster: true,
listOptions: false,
menu: "Stats",
},
{
@ -96,6 +102,7 @@
href: "/monitoring/systems/",
icon: "cpu",
perCluster: true,
listOptions: false,
menu: "Groups",
},
{
@ -104,6 +111,7 @@
href: "/monitoring/status/",
icon: "cpu",
perCluster: true,
listOptions: false,
menu: "Stats",
},
];

View File

@ -56,7 +56,8 @@
selectedScopes = [];
let plots = {},
roofWidth
roofWidth,
statsTable
let missingMetrics = [],
missingHosts = [],
@ -119,15 +120,6 @@
variables: { dbid, selectedMetrics, selectedScopes },
});
function loadAllScopes() {
selectedScopes = [...selectedScopes, "socket", "core"]
jobMetrics = queryStore({
client: client,
query: query,
variables: { dbid, selectedMetrics, selectedScopes},
});
}
// Handle Job Query on Init -> is not executed anymore
getContext("on-init")(() => {
let job = $initq.data.job;
@ -352,7 +344,7 @@
{#if item.data}
<Metric
bind:this={plots[item.metric]}
on:load-all={loadAllScopes}
on:more-loaded={({ detail }) => statsTable.moreLoaded(detail)}
job={$initq.data.job}
metricName={item.metric}
metricUnit={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.unit}
@ -418,6 +410,7 @@
{#if $jobMetrics?.data?.jobMetrics}
{#key $jobMetrics.data.jobMetrics}
<StatsTable
bind:this={statsTable}
job={$initq.data.job}
jobMetrics={$jobMetrics.data.jobMetrics}
/>

View File

@ -90,11 +90,10 @@
},
});
let itemsPerPage = ccconfig.plot_list_jobsPerPage;
let page = 1;
let paging = { itemsPerPage, page };
let sorting = { field: "startTime", type: "col", order: "DESC" };
$: filter = [
const paging = { itemsPerPage: 50, page: 1 };
const sorting = { field: "startTime", type: "col", order: "DESC" };
const filter = [
{ cluster: { eq: cluster } },
{ node: { contains: hostname } },
{ state: ["running"] },
@ -207,7 +206,6 @@
cluster={clusters.find((c) => c.name == cluster)}
subCluster={$nodeMetricsData.data.nodeMetrics[0].subCluster}
series={item.metric.series}
resources={[{ hostname: hostname }]}
forNode={true}
/>
{:else if item.disabled === true && item.metric}

View File

@ -206,7 +206,6 @@
metric={item.data.name}
cluster={clusters.find((c) => c.name == cluster)}
subCluster={item.subCluster}
resources={[{ hostname: item.host }]}
forNode={true}
/>
{:else if item.disabled === true && item.data}

View File

@ -9,6 +9,7 @@ new Config({
username: username
},
context: new Map([
['cc-config', clusterCockpitConfig]
['cc-config', clusterCockpitConfig],
['resampling', resampleConfig]
])
})

View File

@ -51,7 +51,5 @@
<Col>
<EditProject on:reload={getUserList} />
</Col>
<Col>
<Options />
</Col>
</Row>

View File

@ -3,11 +3,13 @@
-->
<script>
import { onMount } from "svelte";
import { Card, CardBody, CardTitle } from "@sveltestrap/sveltestrap";
import { getContext, onMount } from "svelte";
import { Col, Card, CardBody, CardTitle } from "@sveltestrap/sveltestrap";
let scrambled;
const resampleConfig = getContext("resampling");
onMount(() => {
scrambled = window.localStorage.getItem("cc-scramble-names") != null;
});
@ -23,6 +25,7 @@
}
</script>
<Col>
<Card class="h-100">
<CardBody>
<CardTitle class="mb-3">Scramble Names / Presentation Mode</CardTitle>
@ -36,3 +39,16 @@
Active?
</CardBody>
</Card>
</Col>
{#if resampleConfig}
<Col>
<Card class="h-100">
<CardBody>
<CardTitle class="mb-3">Metric Plot Resampling</CardTitle>
<p>Triggered at {resampleConfig.trigger} datapoints.</p>
<p>Configured resolutions: {resampleConfig.resolutions}</p>
</CardBody>
</Card>
</Col>
{/if}

View File

@ -26,18 +26,23 @@
export let showFootprint;
export let triggerMetricRefresh = false;
const resampleConfig = getContext("resampling") || null;
const resampleDefault = resampleConfig ? Math.max(...resampleConfig.resolutions) : 0;
let { id } = job;
let scopes = job.numNodes == 1
? job.numAcc >= 1
? ["core", "accelerator"]
: ["core"]
: ["node"];
let selectedResolution = resampleDefault;
let zoomStates = {};
const cluster = getContext("clusters").find((c) => c.name == job.cluster);
const client = getContextClient();
const query = gql`
query ($id: ID!, $metrics: [String!]!, $scopes: [MetricScope!]!) {
jobMetrics(id: $id, metrics: $metrics, scopes: $scopes) {
query ($id: ID!, $metrics: [String!]!, $scopes: [MetricScope!]!, $selectedResolution: Int) {
jobMetrics(id: $id, metrics: $metrics, scopes: $scopes, resolution: $selectedResolution) {
name
scope
metric {
@ -66,17 +71,30 @@
}
`;
function handleZoom(detail, metric) {
if ( // States have to differ, causes deathloop if just set
(zoomStates[metric]?.x?.min !== detail?.lastZoomState?.x?.min) &&
(zoomStates[metric]?.y?.max !== detail?.lastZoomState?.y?.max)
) {
zoomStates[metric] = {...detail.lastZoomState}
}
if (detail?.newRes) { // Triggers GQL
selectedResolution = detail.newRes
}
}
$: metricsQuery = queryStore({
client: client,
query: query,
variables: { id, metrics, scopes },
variables: { id, metrics, scopes, selectedResolution },
});
function refreshMetrics() {
metricsQuery = queryStore({
client: client,
query: query,
variables: { id, metrics, scopes },
variables: { id, metrics, scopes, selectedResolution },
// requestPolicy: 'network-only' // use default cache-first for refresh
});
}
@ -159,6 +177,7 @@
<!-- Subluster Metricconfig remove keyword for jobtables (joblist main, user joblist, project joblist) to be used here as toplevel case-->
{#if metric.disabled == false && metric.data}
<MetricPlot
on:zoom={({detail}) => { handleZoom(detail, metric.data.name) }}
width={plotWidth}
height={plotHeight}
timestep={metric.data.metric.timestep}
@ -169,9 +188,9 @@
{cluster}
subCluster={job.subCluster}
isShared={job.exclusive != 1}
resources={job.resources}
numhwthreads={job.numHWThreads}
numaccs={job.numAcc}
zoomState={zoomStates[metric.data.name] || null}
/>
{:else if metric.disabled == true && metric.data}
<Card body color="info"

View File

@ -6,7 +6,6 @@
Properties:
- `metric String`: The metric name
- `scope String?`: Scope of the displayed data [Default: node]
- `resources [GraphQL.Resource]`: List of resources used for parent job
- `width Number`: The plot width
- `height Number`: The plot height
- `timestep Number`: The timestep used for X-axis rendering
@ -16,9 +15,10 @@
- `cluster GraphQL.Cluster`: Cluster Object of the parent job
- `subCluster String`: Name of the subCluster of the parent job
- `isShared Bool?`: If this job used shared resources; will adapt threshold indicators accordingly [Default: false]
- `forNode Bool?`: If this plot is used for node data display; will render x-axis as negative time with $now as maximum [Default: false]
- `forNode Bool?`: If this plot is used for node data display; will ren[data, err := metricdata.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)](https://github.com/ClusterCockpit/cc-backend/blob/9fe7cdca9215220a19930779a60c8afc910276a3/internal/graph/schema.resolvers.go#L391-L392)der x-axis as negative time with $now as maximum [Default: false]
- `numhwthreads Number?`: Number of job HWThreads [Default: 0]
- `numaccs Number?`: Number of job Accelerators [Default: 0]
- `zoomState Object?`: The last zoom state to preserve on user zoom [Default: null]
-->
<script context="module">
@ -40,7 +40,7 @@
function timeIncrs(timestep, maxX, forNode) {
if (forNode === true) {
return [60, 300, 900, 1800, 3600, 7200, 14400, 21600]; // forNode fixed increments
return [60, 120, 240, 300, 360, 480, 600, 900, 1800, 3600, 7200, 14400, 21600]; // forNode fixed increments
} else {
let incrs = [];
for (let t = timestep; t < maxX; t *= 10)
@ -113,12 +113,11 @@
<script>
import uPlot from "uplot";
import { formatNumber } from "../units.js";
import { getContext, onMount, onDestroy } from "svelte";
import { getContext, onMount, onDestroy, createEventDispatcher } from "svelte";
import { Card } from "@sveltestrap/sveltestrap";
export let metric;
export let scope = "node";
export let resources = [];
export let width;
export let height;
export let timestep;
@ -131,11 +130,13 @@
export let forNode = false;
export let numhwthreads = 0;
export let numaccs = 0;
export let zoomState = null;
if (useStatsSeries == null) useStatsSeries = statisticsSeries != null;
if (useStatsSeries == false && series == null) useStatsSeries = true;
const dispatch = createEventDispatcher();
const subClusterTopology = getContext("getHardwareTopology")(cluster, subCluster);
const metricConfig = getContext("getMetricConfig")(cluster, subCluster, metric);
const clusterCockpitConfig = getContext("cc-config");
@ -158,6 +159,17 @@
numaccs
);
const resampleConfig = getContext("resampling");
let resampleTrigger;
let resampleResolutions;
let resampleMinimum;
if (resampleConfig) {
resampleTrigger = Number(resampleConfig.trigger)
resampleResolutions = [...resampleConfig.resolutions];
resampleMinimum = Math.min(...resampleConfig.resolutions);
}
// converts the legend into a simple tooltip
function legendAsTooltipPlugin({
className,
@ -296,7 +308,6 @@
},
];
const plotData = [new Array(longestSeries)];
if (forNode === true) {
// Negative Timestamp Buildup
for (let i = 0; i <= longestSeries; i++) {
@ -317,15 +328,15 @@
plotData.push(statisticsSeries.min);
plotData.push(statisticsSeries.max);
plotData.push(statisticsSeries.median);
// plotData.push(statisticsSeries.mean);
if (forNode === true) {
// timestamp 0 with null value for reversed time axis
if (plotData[1].length != 0) plotData[1].push(null);
if (plotData[2].length != 0) plotData[2].push(null);
if (plotData[3].length != 0) plotData[3].push(null);
// if (plotData[4].length != 0) plotData[4].push(null);
}
/* deprecated: sparse data handled by uplot */
// if (forNode === true) {
// if (plotData[1][-1] != null && plotData[2][-1] != null && plotData[3][-1] != null) {
// if (plotData[1].length != 0) plotData[1].push(null);
// if (plotData[2].length != 0) plotData[2].push(null);
// if (plotData[3].length != 0) plotData[3].push(null);
// }
// }
plotSeries.push({
label: "min",
@ -345,12 +356,6 @@
width: lineWidth,
stroke: "black",
});
// plotSeries.push({
// label: "mean",
// scale: "y",
// width: lineWidth,
// stroke: "blue",
// });
plotBands = [
{ series: [2, 3], fill: "rgba(0,255,0,0.1)" },
@ -359,13 +364,18 @@
} else {
for (let i = 0; i < series.length; i++) {
plotData.push(series[i].data);
if (forNode === true && plotData[1].length != 0) plotData[1].push(null); // timestamp 0 with null value for reversed time axis
/* deprecated: sparse data handled by uplot */
// if (forNode === true && plotData[1].length != 0) {
// if (plotData[1][-1] != null) {
// plotData[1].push(null);
// };
// };
plotSeries.push({
label:
scope === "node"
? resources[i].hostname
: // scope === 'accelerator' ? resources[0].accelerators[i] :
scope + " #" + (i + 1),
? series[i].hostname
: scope + " #" + (i + 1),
scale: "y",
width: lineWidth,
stroke: lineColor(i, series.length),
@ -395,6 +405,22 @@
bands: plotBands,
padding: [5, 10, -20, 0],
hooks: {
init: [
(u) => {
/* IF Zoom Enabled */
if (resampleConfig) {
u.over.addEventListener("dblclick", (e) => {
// console.log('Dispatch Reset')
dispatch('zoom', {
lastZoomState: {
x: { time: false },
y: { auto: true }
}
});
});
};
},
],
draw: [
(u) => {
// Draw plot type label:
@ -436,6 +462,34 @@
u.ctx.restore();
},
],
setScale: [
(u, key) => { // If ZoomResample is Configured && Not System/Node View
if (resampleConfig && !forNode && key === 'x') {
const numX = (u.series[0].idxs[1] - u.series[0].idxs[0])
if (numX <= resampleTrigger && timestep !== resampleMinimum) {
/* Get closest zoom level; prevents multiple iterative zoom requests for big zoom-steps (e.g. 600 -> 300 -> 120 -> 60) */
// Which resolution to theoretically request to achieve 30 or more visible data points:
const target = (numX * timestep) / resampleTrigger
// Which configured resolution actually matches the closest to theoretical target:
const closest = resampleResolutions.reduce(function(prev, curr) {
return (Math.abs(curr - target) < Math.abs(prev - target) ? curr : prev);
});
// Prevents non-required dispatches
if (timestep !== closest) {
// console.log('Dispatch Zoom with Res from / to', timestep, closest)
dispatch('zoom', {
newRes: closest,
lastZoomState: u?.scales
});
}
} else {
dispatch('zoom', {
lastZoomState: u?.scales
});
};
};
},
]
},
scales: {
x: { time: false },
@ -466,6 +520,9 @@
if (!uplot) {
opts.width = width;
opts.height = height;
if (zoomState) {
opts.scales = {...zoomState}
}
uplot = new uPlot(opts, plotData, plotWrapper);
} else {
uplot.setSize({ width, height });
@ -474,7 +531,6 @@
function onSizeChange() {
if (!uplot) return;
if (timeoutId != null) clearTimeout(timeoutId);
timeoutId = setTimeout(() => {

View File

@ -69,6 +69,7 @@
<InputGroup class="inline-from">
<InputGroupText><Icon name="clock-history" /></InputGroupText>
<InputGroupText>Range</InputGroupText>
<select
class="form-select"
bind:value={timeRange}

View File

@ -21,7 +21,41 @@
</script>
{#each links as item}
{#if !item.perCluster}
{#if item.listOptions}
<Dropdown nav inNavbar>
<DropdownToggle nav caret>
<Icon name={item.icon} />
{item.title}
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
<DropdownItem
href={item.href}
>
All Clusters
</DropdownItem>
<DropdownItem divider />
{#each clusters as cluster}
<Dropdown nav direction="right">
<DropdownToggle nav caret class="dropdown-item py-1 px-2">
{cluster.name}
</DropdownToggle>
<DropdownMenu>
<DropdownItem class="py-1 px-2"
href={item.href + '?cluster=' + cluster.name}
>
All Jobs
</DropdownItem>
<DropdownItem class="py-1 px-2"
href={item.href + '?cluster=' + cluster.name + '&state=running'}
>
Running Jobs
</DropdownItem>
</DropdownMenu>
</Dropdown>
{/each}
</DropdownMenu>
</Dropdown>
{:else if !item.perCluster}
<NavLink href={item.href} active={window.location.pathname == item.href}
><Icon name={item.icon} /> {item.title}</NavLink
>

View File

@ -10,6 +10,7 @@ new Job({
roles: roles
},
context: new Map([
['cc-config', clusterCockpitConfig]
['cc-config', clusterCockpitConfig],
['resampling', resampleConfig]
])
})

View File

@ -13,14 +13,24 @@
-->
<script>
import { createEventDispatcher } from "svelte";
import {
getContext,
createEventDispatcher
} from "svelte";
import {
queryStore,
gql,
getContextClient
} from "@urql/svelte";
import {
InputGroup,
InputGroupText,
Spinner,
Card,
} from "@sveltestrap/sveltestrap";
import { minScope } from "../generic/utils";
import {
minScope,
} from "../generic/utils.js";
import Timeseries from "../generic/plots/MetricPlot.svelte";
export let job;
@ -32,32 +42,132 @@
export let rawData;
export let isShared = false;
const dispatch = createEventDispatcher();
const unit = (metricUnit?.prefix ? metricUnit.prefix : "") + (metricUnit?.base ? metricUnit.base : "")
const resampleConfig = getContext("resampling") || null;
const resampleDefault = resampleConfig ? Math.max(...resampleConfig.resolutions) : 0;
let selectedHost = null,
plot,
fetching = false,
error = null;
let selectedHost = null;
let error = null;
let selectedScope = minScope(scopes);
let selectedResolution = null;
let pendingResolution = resampleDefault;
let selectedScopeIndex = scopes.findIndex((s) => s == minScope(scopes));
let patternMatches = false;
let nodeOnly = false; // If, after load-all, still only node scope returned
let statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null);
let zoomState = null;
let pendingZoomState = null;
let statsPattern = /(.*)-stat$/
let statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null)
let selectedScopeIndex
const dispatch = createEventDispatcher();
const statsPattern = /(.*)-stat$/;
const unit = (metricUnit?.prefix ? metricUnit.prefix : "") + (metricUnit?.base ? metricUnit.base : "");
const client = getContextClient();
const subQuery = gql`
query ($dbid: ID!, $selectedMetrics: [String!]!, $selectedScopes: [MetricScope!]!, $selectedResolution: Int) {
singleUpdate: jobMetrics(id: $dbid, metrics: $selectedMetrics, scopes: $selectedScopes, resolution: $selectedResolution) {
name
scope
metric {
unit {
prefix
base
}
timestep
statisticsSeries {
min
median
max
}
series {
hostname
id
data
statistics {
min
avg
max
}
}
}
}
}
`;
$: availableScopes = scopes;
$: patternMatches = statsPattern.exec(selectedScope)
$: if (!patternMatches) {
function handleZoom(detail) {
if ( // States have to differ, causes deathloop if just set
(pendingZoomState?.x?.min !== detail?.lastZoomState?.x?.min) &&
(pendingZoomState?.y?.max !== detail?.lastZoomState?.y?.max)
) {
pendingZoomState = {...detail.lastZoomState}
}
if (detail?.newRes) { // Triggers GQL
pendingResolution = detail.newRes
}
}
let metricData;
let selectedScopes = [...scopes]
const dbid = job.id;
const selectedMetrics = [metricName]
$: if (selectedScope || pendingResolution) {
if (!selectedResolution) {
// Skips reactive data load on init
selectedResolution = Number(pendingResolution)
} else {
if (selectedScope == "load-all") {
selectedScopes = [...scopes, "socket", "core", "accelerator"]
}
if (pendingResolution) {
selectedResolution = Number(pendingResolution)
}
metricData = queryStore({
client: client,
query: subQuery,
variables: { dbid, selectedMetrics, selectedScopes, selectedResolution },
// Never user network-only: causes reactive load-loop!
});
if ($metricData && !$metricData.fetching) {
rawData = $metricData.data.singleUpdate.map((x) => x.metric)
scopes = $metricData.data.singleUpdate.map((x) => x.scope)
statsSeries = rawData.map((data) => data?.statisticsSeries ? data.statisticsSeries : null)
// Keep Zoomlevel if ResChange By Zoom
if (pendingZoomState) {
zoomState = {...pendingZoomState}
}
// Set selected scope to min of returned scopes
if (selectedScope == "load-all") {
selectedScope = minScope(scopes)
nodeOnly = (selectedScope == "node") // "node" still only scope after load-all
}
const statsTableData = $metricData.data.singleUpdate.filter((x) => x.scope !== "node")
if (statsTableData.length > 0) {
dispatch("more-loaded", statsTableData);
}
patternMatches = statsPattern.exec(selectedScope)
if (!patternMatches) {
selectedScopeIndex = scopes.findIndex((s) => s == selectedScope);
} else {
selectedScopeIndex = scopes.findIndex((s) => s == patternMatches[1]);
}
}
}
}
$: data = rawData[selectedScopeIndex];
$: series = data?.series.filter(
$: series = data?.series?.filter(
(series) => selectedHost == null || series.hostname == selectedHost,
);
$: if (selectedScope == "load-all") dispatch("load-all");
</script>
<InputGroup>
@ -65,13 +175,13 @@
{metricName} ({unit})
</InputGroupText>
<select class="form-select" bind:value={selectedScope}>
{#each availableScopes as scope, index}
{#each scopes as scope, index}
<option value={scope}>{scope}</option>
{#if statsSeries[index]}
<option value={scope + '-stat'}>stats series ({scope})</option>
{/if}
{/each}
{#if availableScopes.length == 1 && nativeScope != "node"}
{#if scopes.length == 1 && nativeScope != "node" && !nodeOnly}
<option value={"load-all"}>Load all...</option>
{/if}
</select>
@ -85,13 +195,13 @@
{/if}
</InputGroup>
{#key series}
{#if fetching == true}
{#if $metricData?.fetching == true}
<Spinner />
{:else if error != null}
<Card body color="danger">{error.message}</Card>
{:else if series != null && !patternMatches}
<Timeseries
bind:this={plot}
on:zoom={({detail}) => { handleZoom(detail) }}
{width}
height={300}
cluster={job.cluster}
@ -101,11 +211,11 @@
metric={metricName}
{series}
{isShared}
resources={job.resources}
{zoomState}
/>
{:else if statsSeries[selectedScopeIndex] != null && patternMatches}
<Timeseries
bind:this={plot}
on:zoom={({detail}) => { handleZoom(detail) }}
{width}
height={300}
cluster={job.cluster}
@ -115,7 +225,7 @@
metric={metricName}
{series}
{isShared}
resources={job.resources}
{zoomState}
statisticsSeries={statsSeries[selectedScopeIndex]}
useStatsSeries={!!statsSeries[selectedScopeIndex]}
/>

View File

@ -4,6 +4,9 @@
Properties:
- `job Object`: The job object
- `jobMetrics [Object]`: The jobs metricdata
Exported:
- `moreLoaded`: Adds additional scopes requested from Metric.svelte in Job-View
-->
<script>
@ -23,8 +26,8 @@
export let job;
export let jobMetrics;
const allMetrics = [...new Set(jobMetrics.map((m) => m.name))].sort(),
scopesForMetric = (metric) =>
const allMetrics = [...new Set(jobMetrics.map((m) => m.name))].sort()
const scopesForMetric = (metric) =>
jobMetrics.filter((jm) => jm.name == metric).map((jm) => jm.scope);
let hosts = job.resources.map((r) => r.hostname).sort(),
@ -83,6 +86,14 @@
return s.dir != "up" ? s1[stat] - s2[stat] : s2[stat] - s1[stat];
});
}
export function moreLoaded(moreJobMetrics) {
moreJobMetrics.forEach(function (newMetric) {
if (!jobMetrics.some((m) => m.scope == newMetric.scope)) {
jobMetrics = [...jobMetrics, newMetric]
}
});
};
</script>
<Table class="mb-0">

View File

@ -9,6 +9,7 @@ new Jobs({
roles: roles
},
context: new Map([
['cc-config', clusterCockpitConfig]
['cc-config', clusterCockpitConfig],
['resampling', resampleConfig]
])
})

View File

@ -8,6 +8,7 @@ new User({
user: userInfos
},
context: new Map([
['cc-config', clusterCockpitConfig]
['cc-config', clusterCockpitConfig],
['resampling', resampleConfig]
])
})

View File

@ -12,6 +12,7 @@
const username = {{ .User.Username }};
const filterPresets = {{ .FilterPresets }};
const clusterCockpitConfig = {{ .Config }};
const resampleConfig = {{ .Resampling }};
</script>
<script src='/build/config.js'></script>
{{end}}

View File

@ -14,6 +14,7 @@
const username = {{ .User.Username }};
const authlevel = {{ .User.GetAuthLevel }};
const roles = {{ .Roles }};
const resampleConfig = {{ .Resampling }};
</script>
<script src='/build/job.js'></script>
{{end}}

View File

@ -12,6 +12,7 @@
const clusterCockpitConfig = {{ .Config }};
const authlevel = {{ .User.GetAuthLevel }};
const roles = {{ .Roles }};
const resampleConfig = {{ .Resampling }};
</script>
<script src='/build/jobs.js'></script>
{{end}}

View File

@ -10,6 +10,7 @@
const userInfos = {{ .Infos }};
const filterPresets = {{ .FilterPresets }};
const clusterCockpitConfig = {{ .Config }};
const resampleConfig = {{ .Resampling }};
</script>
<script src='/build/user.js'></script>
{{end}}

View File

@ -98,6 +98,7 @@ type Page struct {
FilterPresets map[string]interface{} // For pages with the Filter component, this can be used to set initial filters.
Infos map[string]interface{} // For generic use (e.g. username for /monitoring/user/<id>, job id for /monitoring/job/<id>)
Config map[string]interface{} // UI settings for the currently logged in user (e.g. line width, ...)
Resampling *schema.ResampleConfig // If not nil, defines resampling trigger and resolutions
}
func RenderTemplate(rw http.ResponseWriter, file string, page *Page) {