From e34623b1ceeae2124ffe4f0ffddf83dfad943b50 Mon Sep 17 00:00:00 2001 From: Christoph Kluge Date: Fri, 24 Nov 2023 15:11:38 +0100 Subject: [PATCH] Add db average stats to gql, use in footprint --- api/schema.graphqls | 5 + internal/graph/generated/generated.go | 226 ++++++++++++++++++++++++ internal/repository/job.go | 5 +- pkg/schema/job.go | 8 +- tools/archive-migration/job.go | 8 +- web/frontend/src/Job.root.svelte | 3 +- web/frontend/src/JobFootprint.svelte | 21 ++- web/frontend/src/joblist/JobList.svelte | 3 + 8 files changed, 264 insertions(+), 15 deletions(-) diff --git a/api/schema.graphqls b/api/schema.graphqls index 69e32e2..01eabc2 100644 --- a/api/schema.graphqls +++ b/api/schema.graphqls @@ -28,6 +28,11 @@ type Job { resources: [Resource!]! concurrentJobs: JobLinkResultList + memUsedMax: Float + flopsAnyAvg: Float + memBwAvg: Float + loadAvg: Float + metaData: Any userData: User } diff --git a/internal/graph/generated/generated.go b/internal/graph/generated/generated.go index f29e2a0..6778e76 100644 --- a/internal/graph/generated/generated.go +++ b/internal/graph/generated/generated.go @@ -88,8 +88,12 @@ type ComplexityRoot struct { ConcurrentJobs func(childComplexity int) int Duration func(childComplexity int) int Exclusive func(childComplexity int) int + FlopsAnyAvg func(childComplexity int) int ID func(childComplexity int) int JobID func(childComplexity int) int + LoadAvg func(childComplexity int) int + MemBwAvg func(childComplexity int) int + MemUsedMax func(childComplexity int) int MetaData func(childComplexity int) int MonitoringStatus func(childComplexity int) int NumAcc func(childComplexity int) int @@ -303,6 +307,7 @@ type JobResolver interface { Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) + MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) UserData(ctx context.Context, obj *schema.Job) (*model.User, error) } @@ -485,6 +490,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Job.Exclusive(childComplexity), true + case "Job.flopsAnyAvg": + if e.complexity.Job.FlopsAnyAvg == nil { + break + } + + return e.complexity.Job.FlopsAnyAvg(childComplexity), true + case "Job.id": if e.complexity.Job.ID == nil { break @@ -499,6 +511,27 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in return e.complexity.Job.JobID(childComplexity), true + case "Job.loadAvg": + if e.complexity.Job.LoadAvg == nil { + break + } + + return e.complexity.Job.LoadAvg(childComplexity), true + + case "Job.memBwAvg": + if e.complexity.Job.MemBwAvg == nil { + break + } + + return e.complexity.Job.MemBwAvg(childComplexity), true + + case "Job.memUsedMax": + if e.complexity.Job.MemUsedMax == nil { + break + } + + return e.complexity.Job.MemUsedMax(childComplexity), true + case "Job.metaData": if e.complexity.Job.MetaData == nil { break @@ -1628,6 +1661,11 @@ type Job { resources: [Resource!]! concurrentJobs: JobLinkResultList + memUsedMax: Float + flopsAnyAvg: Float + memBwAvg: Float + loadAvg: Float + metaData: Any userData: User } @@ -4054,6 +4092,170 @@ func (ec *executionContext) fieldContext_Job_concurrentJobs(ctx context.Context, return fc, nil } +func (ec *executionContext) _Job_memUsedMax(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Job_memUsedMax(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.MemUsedMax, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(float64) + fc.Result = res + return ec.marshalOFloat2float64(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Job_memUsedMax(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Job", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Float does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _Job_flopsAnyAvg(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Job_flopsAnyAvg(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.FlopsAnyAvg, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(float64) + fc.Result = res + return ec.marshalOFloat2float64(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Job_flopsAnyAvg(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Job", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Float does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _Job_memBwAvg(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Job_memBwAvg(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.MemBwAvg, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(float64) + fc.Result = res + return ec.marshalOFloat2float64(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Job_memBwAvg(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Job", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Float does not have child fields") + }, + } + return fc, nil +} + +func (ec *executionContext) _Job_loadAvg(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { + fc, err := ec.fieldContext_Job_loadAvg(ctx, field) + if err != nil { + return graphql.Null + } + ctx = graphql.WithFieldContext(ctx, fc) + defer func() { + if r := recover(); r != nil { + ec.Error(ctx, ec.Recover(ctx, r)) + ret = graphql.Null + } + }() + resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) { + ctx = rctx // use context from middleware stack in children + return obj.LoadAvg, nil + }) + if err != nil { + ec.Error(ctx, err) + return graphql.Null + } + if resTmp == nil { + return graphql.Null + } + res := resTmp.(float64) + fc.Result = res + return ec.marshalOFloat2float64(ctx, field.Selections, res) +} + +func (ec *executionContext) fieldContext_Job_loadAvg(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) { + fc = &graphql.FieldContext{ + Object: "Job", + Field: field, + IsMethod: false, + IsResolver: false, + Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) { + return nil, errors.New("field of type Float does not have child fields") + }, + } + return fc, nil +} + func (ec *executionContext) _Job_metaData(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { fc, err := ec.fieldContext_Job_metaData(ctx, field) if err != nil { @@ -4778,6 +4980,14 @@ func (ec *executionContext) fieldContext_JobResultList_items(ctx context.Context return ec.fieldContext_Job_resources(ctx, field) case "concurrentJobs": return ec.fieldContext_Job_concurrentJobs(ctx, field) + case "memUsedMax": + return ec.fieldContext_Job_memUsedMax(ctx, field) + case "flopsAnyAvg": + return ec.fieldContext_Job_flopsAnyAvg(ctx, field) + case "memBwAvg": + return ec.fieldContext_Job_memBwAvg(ctx, field) + case "loadAvg": + return ec.fieldContext_Job_loadAvg(ctx, field) case "metaData": return ec.fieldContext_Job_metaData(ctx, field) case "userData": @@ -7152,6 +7362,14 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr return ec.fieldContext_Job_resources(ctx, field) case "concurrentJobs": return ec.fieldContext_Job_concurrentJobs(ctx, field) + case "memUsedMax": + return ec.fieldContext_Job_memUsedMax(ctx, field) + case "flopsAnyAvg": + return ec.fieldContext_Job_flopsAnyAvg(ctx, field) + case "memBwAvg": + return ec.fieldContext_Job_memBwAvg(ctx, field) + case "loadAvg": + return ec.fieldContext_Job_loadAvg(ctx, field) case "metaData": return ec.fieldContext_Job_metaData(ctx, field) case "userData": @@ -12504,6 +12722,14 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj } out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) + case "memUsedMax": + out.Values[i] = ec._Job_memUsedMax(ctx, field, obj) + case "flopsAnyAvg": + out.Values[i] = ec._Job_flopsAnyAvg(ctx, field, obj) + case "memBwAvg": + out.Values[i] = ec._Job_memBwAvg(ctx, field, obj) + case "loadAvg": + out.Values[i] = ec._Job_loadAvg(ctx, field, obj) case "metaData": field := field diff --git a/internal/repository/job.go b/internal/repository/job.go index 76834d1..e1a997a 100644 --- a/internal/repository/job.go +++ b/internal/repository/job.go @@ -60,7 +60,7 @@ func GetJobRepository() *JobRepository { var jobColumns []string = []string{ "job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id", "job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state", - "job.duration", "job.walltime", "job.resources", // "job.meta_data", + "job.duration", "job.walltime", "job.resources", "job.mem_used_max", "job.flops_any_avg", "job.mem_bw_avg", "job.load_avg", // "job.meta_data", } func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) { @@ -68,7 +68,7 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) { if err := row.Scan( &job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId, &job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State, - &job.Duration, &job.Walltime, &job.RawResources /*&job.RawMetaData*/); err != nil { + &job.Duration, &job.Walltime, &job.RawResources, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.LoadAvg /*&job.RawMetaData*/); err != nil { log.Warnf("Error while scanning rows (Job): %v", err) return nil, err } @@ -483,6 +483,7 @@ func (r *JobRepository) MarkArchived( case "mem_bw": stmt = stmt.Set("mem_bw_avg", stats.Avg) case "load": + stmt = stmt.Set("load_avg", stats.Avg) case "cpu_load": stmt = stmt.Set("load_avg", stats.Avg) case "net_bw": diff --git a/pkg/schema/job.go b/pkg/schema/job.go index ed3a8b6..90bf2cb 100644 --- a/pkg/schema/job.go +++ b/pkg/schema/job.go @@ -54,10 +54,10 @@ type Job struct { BaseJob StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type - MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64 - FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64 - MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64 - LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64 + MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64 + FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64 + MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64 + LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64 NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64 NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64 FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64 diff --git a/tools/archive-migration/job.go b/tools/archive-migration/job.go index cd54d6c..0dff4b4 100644 --- a/tools/archive-migration/job.go +++ b/tools/archive-migration/job.go @@ -52,10 +52,10 @@ type Job struct { BaseJob StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type - MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64 - FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64 - MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64 - LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64 + MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64 + FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64 + MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64 + LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64 NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64 NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64 FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64 diff --git a/web/frontend/src/Job.root.svelte b/web/frontend/src/Job.root.svelte index 7bd40f8..1b66e33 100644 --- a/web/frontend/src/Job.root.svelte +++ b/web/frontend/src/Job.root.svelte @@ -47,7 +47,8 @@ resources { hostname, hwthreads, accelerators }, metaData, userData { name, email }, - concurrentJobs { items { id, jobId }, count, listQuery } + concurrentJobs { items { id, jobId }, count, listQuery }, + flopsAnyAvg, memBwAvg, loadAvg } `); diff --git a/web/frontend/src/JobFootprint.svelte b/web/frontend/src/JobFootprint.svelte index cf3e227..4313030 100644 --- a/web/frontend/src/JobFootprint.svelte +++ b/web/frontend/src/JobFootprint.svelte @@ -31,9 +31,9 @@ /* NOTES: - 'mem_allocated' für shared jobs (noch todo / nicht in den jobdaten enthalten bisher) - > For now: 'acc_util' gegen 'mem_used' für alex + > For now: 'acc_util' gegen 'mem_used' für alex: Mem bw für shared weggefallen: dann wieder vier bars - Energy Metric Missiing, muss eingebaut werden - - Diese Config in config.json? + - footprintMetrics Config in config.json? */ const footprintMetrics = isAcceleratedJob @@ -60,9 +60,15 @@ const footprintData = footprintMetrics.map((fm) => { const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === 'node') - // ... get Mean + // ... get Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata let mv = null - if (jm?.metric?.statisticsSeries) { + if (fm === 'cpu_load' && job.loadAvg !== 0) { + mv = round(job.loadAvg, 2) + } else if (fm === 'flops_any' && job.flopsAnyAvg !== 0) { + mv = round(job.flopsAnyAvg, 2) + } else if (fm === 'mem_bw' && job.memBwAvg !== 0) { + mv = round(job.memBwAvg, 2) + } else if (jm?.metric?.statisticsSeries) { mv = round(mean(jm.metric.statisticsSeries.mean), 2) } else if (jm?.metric?.series?.length > 1) { const avgs = jm.metric.series.map(jms => jms.statistics.avg) @@ -356,6 +362,13 @@ /> {/each} + {#if job?.metaData?.message}
{@html job.metaData.message} diff --git a/web/frontend/src/joblist/JobList.svelte b/web/frontend/src/joblist/JobList.svelte index 8036361..5f8d89b 100644 --- a/web/frontend/src/joblist/JobList.svelte +++ b/web/frontend/src/joblist/JobList.svelte @@ -74,6 +74,9 @@ name } metaData + flopsAnyAvg + memBwAvg + loadAvg } count }