Prepare adaptive binCounts in backend

This commit is contained in:
Christoph Kluge 2025-01-22 12:07:12 +01:00
parent 817076bdbf
commit 05bfa9b546
6 changed files with 1171 additions and 338 deletions

View File

@ -236,7 +236,7 @@ type Query {
jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints jobsFootprints(filter: [JobFilter!], metrics: [String!]!): Footprints
jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList! jobs(filter: [JobFilter!], page: PageRequest, order: OrderByInput): JobResultList!
jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate): [JobsStatistics!]! jobsStatistics(filter: [JobFilter!], metrics: [String!], page: PageRequest, sortBy: SortByAggregate, groupBy: Aggregate, numDurationBins: Int, numMetricBins: Int): [JobsStatistics!]!
rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]! rooflineHeatmap(filter: [JobFilter!]!, rows: Int!, cols: Int!, minX: Float!, minY: Float!, maxX: Float!, maxY: Float!): [[Float!]!]!

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@ package graph
// This file will be automatically regenerated based on the schema, any resolver implementations // This file will be automatically regenerated based on the schema, any resolver implementations
// will be copied through when generating and any unknown code will be moved to the end. // will be copied through when generating and any unknown code will be moved to the end.
// Code generated by github.com/99designs/gqlgen version v0.17.49 // Code generated by github.com/99designs/gqlgen version v0.17.57
import ( import (
"context" "context"
@ -357,7 +357,7 @@ func (r *queryResolver) Jobs(ctx context.Context, filter []*model.JobFilter, pag
} }
// JobsStatistics is the resolver for the jobsStatistics field. // JobsStatistics is the resolver for the jobsStatistics field.
func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate) ([]*model.JobsStatistics, error) { func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobFilter, metrics []string, page *model.PageRequest, sortBy *model.SortByAggregate, groupBy *model.Aggregate, numDurationBins *int, numMetricBins *int) ([]*model.JobsStatistics, error) {
var err error var err error
var stats []*model.JobsStatistics var stats []*model.JobsStatistics
@ -395,7 +395,7 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") { if requireField(ctx, "histDuration") || requireField(ctx, "histNumNodes") || requireField(ctx, "histNumCores") || requireField(ctx, "histNumAccs") {
if groupBy == nil { if groupBy == nil {
stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0]) stats[0], err = r.Repo.AddHistograms(ctx, filter, stats[0], numDurationBins)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -406,7 +406,7 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
if requireField(ctx, "histMetrics") { if requireField(ctx, "histMetrics") {
if groupBy == nil { if groupBy == nil {
stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0]) stats[0], err = r.Repo.AddMetricHistograms(ctx, filter, metrics, stats[0], numMetricBins)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -493,11 +493,9 @@ func (r *Resolver) Query() generated.QueryResolver { return &queryResolver{r} }
// SubCluster returns generated.SubClusterResolver implementation. // SubCluster returns generated.SubClusterResolver implementation.
func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} } func (r *Resolver) SubCluster() generated.SubClusterResolver { return &subClusterResolver{r} }
type ( type clusterResolver struct{ *Resolver }
clusterResolver struct{ *Resolver } type jobResolver struct{ *Resolver }
jobResolver struct{ *Resolver } type metricValueResolver struct{ *Resolver }
metricValueResolver struct{ *Resolver } type mutationResolver struct{ *Resolver }
mutationResolver struct{ *Resolver } type queryResolver struct{ *Resolver }
queryResolver struct{ *Resolver } type subClusterResolver struct{ *Resolver }
subClusterResolver struct{ *Resolver }
)

View File

@ -447,15 +447,29 @@ func (r *JobRepository) AddHistograms(
ctx context.Context, ctx context.Context,
filter []*model.JobFilter, filter []*model.JobFilter,
stat *model.JobsStatistics, stat *model.JobsStatistics,
targetBinCount *int,
) (*model.JobsStatistics, error) { ) (*model.JobsStatistics, error) {
start := time.Now() start := time.Now()
binSeconds := 900 // targetBinCount : Frontendargument
binMinutes := binSeconds / 60 // -> Min Bins: 24 -> Min Resolution: By Hour
// -> In Between Bins: 48 -> Resolution by Half Hour
// 96 -> Resolution by Quarter Hour (Skip for Now)
// 144 -> Resolution by 10 Minutes
// 288 -> Resolution by 5 Minutes
// 720 -> Resolution by 2 Minutes (SKip for Now)
// -> Max Bins: 1440 -> Max Resolution: By Minute
if targetBinCount == nil {
binCount := 24
targetBinCount = &binCount
}
binSizeSeconds := (86400 / *targetBinCount)
castType := r.getCastType() castType := r.getCastType()
var err error var err error
// Bin by job duration in sizes of binSeconds, add +1, gives Integers from 1-XX+1, re-multiply by binMinutes to get final bar x-values (logic: Jobs less than duration X in bin) // Return X-Values always as seconds, will be formatted into minutes and hours in frontend
value := fmt.Sprintf(`CAST((ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) * %d) as %s) as value`, time.Now().Unix(), binSeconds, binMinutes, castType) value := fmt.Sprintf(`CAST(ROUND(((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / %d) + 1) as %s) as value`, time.Now().Unix(), binSizeSeconds, castType)
stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter) stat.HistDuration, err = r.jobsStatisticsHistogram(ctx, value, filter)
if err != nil { if err != nil {
log.Warn("Error while loading job statistics histogram: job duration") log.Warn("Error while loading job statistics histogram: job duration")
@ -490,14 +504,20 @@ func (r *JobRepository) AddMetricHistograms(
filter []*model.JobFilter, filter []*model.JobFilter,
metrics []string, metrics []string,
stat *model.JobsStatistics, stat *model.JobsStatistics,
targetBinCount *int,
) (*model.JobsStatistics, error) { ) (*model.JobsStatistics, error) {
start := time.Now() start := time.Now()
if targetBinCount == nil {
binCount := 10
targetBinCount = &binCount
}
// Running Jobs Only: First query jobdata from sqlite, then query data and make bins // Running Jobs Only: First query jobdata from sqlite, then query data and make bins
for _, f := range filter { for _, f := range filter {
if f.State != nil { if f.State != nil {
if len(f.State) == 1 && f.State[0] == "running" { if len(f.State) == 1 && f.State[0] == "running" {
stat.HistMetrics = r.runningJobsMetricStatisticsHistogram(ctx, metrics, filter) stat.HistMetrics = r.runningJobsMetricStatisticsHistogram(ctx, metrics, filter, targetBinCount)
log.Debugf("Timer AddMetricHistograms %s", time.Since(start)) log.Debugf("Timer AddMetricHistograms %s", time.Since(start))
return stat, nil return stat, nil
} }
@ -506,7 +526,7 @@ func (r *JobRepository) AddMetricHistograms(
// All other cases: Query and make bins in sqlite directly // All other cases: Query and make bins in sqlite directly
for _, m := range metrics { for _, m := range metrics {
metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter) metricHisto, err := r.jobsMetricStatisticsHistogram(ctx, m, filter, targetBinCount)
if err != nil { if err != nil {
log.Warnf("Error while loading job metric statistics histogram: %s", m) log.Warnf("Error while loading job metric statistics histogram: %s", m)
continue continue
@ -560,6 +580,7 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
ctx context.Context, ctx context.Context,
metric string, metric string,
filters []*model.JobFilter, filters []*model.JobFilter,
bins *int,
) (*model.MetricHistoPoints, error) { ) (*model.MetricHistoPoints, error) {
// Get specific Peak or largest Peak // Get specific Peak or largest Peak
var metricConfig *schema.MetricConfig var metricConfig *schema.MetricConfig
@ -627,7 +648,6 @@ func (r *JobRepository) jobsMetricStatisticsHistogram(
return nil, sqlerr return nil, sqlerr
} }
bins := 10
binQuery := fmt.Sprintf(`CAST( (case when %s = value.max binQuery := fmt.Sprintf(`CAST( (case when %s = value.max
then value.max*0.999999999 else %s end - value.min) / (value.max - then value.max*0.999999999 else %s end - value.min) / (value.max -
value.min) * %d as INTEGER )`, jm, jm, bins) value.min) * %d as INTEGER )`, jm, jm, bins)
@ -681,6 +701,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
ctx context.Context, ctx context.Context,
metrics []string, metrics []string,
filters []*model.JobFilter, filters []*model.JobFilter,
bins *int,
) []*model.MetricHistoPoints { ) []*model.MetricHistoPoints {
// Get Jobs // Get Jobs
jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil) jobs, err := r.QueryJobs(ctx, filters, &model.PageRequest{Page: 1, ItemsPerPage: 500 + 1}, nil)
@ -743,8 +764,7 @@ func (r *JobRepository) runningJobsMetricStatisticsHistogram(
} }
// Make and fill bins // Make and fill bins
bins := 10.0 peakBin := peak / float64(*bins)
peakBin := peak / bins
points := make([]*model.MetricHistoPoint, 0) points := make([]*model.MetricHistoPoint, 0)
for b := 0; b < 10; b++ { for b := 0; b < 10; b++ {

View File

@ -213,7 +213,7 @@
<Col class="px-1"> <Col class="px-1">
{#key $stats.data.jobsStatistics[0].histDuration} {#key $stats.data.jobsStatistics[0].histDuration}
<Histogram <Histogram
data={convert2uplot($stats.data.jobsStatistics[0].histDuration, true)} data={convert2uplot($stats.data.jobsStatistics[0].histDuration)}
title="Duration Distribution" title="Duration Distribution"
xlabel="Current Runtimes (Hours)" xlabel="Current Runtimes (Hours)"
xtime={true} xtime={true}

View File

@ -405,7 +405,7 @@ function getMetricConfigDeep(metric, cluster, subCluster) {
} }
} }
export function convert2uplot(canvasData, minutesToHours = false) { export function convert2uplot(canvasData, secondsToMinutes = false, secondsToHours = false) {
// Prep: Uplot Data Structure // Prep: Uplot Data Structure
let uplotData = [[],[]] // [X, Y1, Y2, ...] let uplotData = [[],[]] // [X, Y1, Y2, ...]
// Iterate if exists // Iterate if exists
@ -415,10 +415,14 @@ export function convert2uplot(canvasData, minutesToHours = false) {
uplotData[0].push(cd?.max ? cd.max : 0) uplotData[0].push(cd?.max ? cd.max : 0)
uplotData[1].push(cd.count) uplotData[1].push(cd.count)
} else { // Default } else { // Default
if (minutesToHours) { if (secondsToHours) {
let hours = cd.value / 60 let hours = cd.value / 3600
console.log("x minutes to y hours", cd.value, hours) console.log("x seconds to y hours", cd.value, hours)
uplotData[0].push(hours) uplotData[0].push(hours)
} else if (secondsToMinutes) {
let minutes = cd.value / 60
console.log("x seconds to y minutes", cd.value, minutes)
uplotData[0].push(minutes)
} else { } else {
uplotData[0].push(cd.value) uplotData[0].push(cd.value)
} }