Add db average stats to gql, use in footprint

This commit is contained in:
Christoph Kluge 2023-11-24 15:11:38 +01:00
parent 4e375ff32b
commit e34623b1ce
8 changed files with 264 additions and 15 deletions

View File

@ -28,6 +28,11 @@ type Job {
resources: [Resource!]!
concurrentJobs: JobLinkResultList
memUsedMax: Float
flopsAnyAvg: Float
memBwAvg: Float
loadAvg: Float
metaData: Any
userData: User
}

View File

@ -88,8 +88,12 @@ type ComplexityRoot struct {
ConcurrentJobs func(childComplexity int) int
Duration func(childComplexity int) int
Exclusive func(childComplexity int) int
FlopsAnyAvg func(childComplexity int) int
ID func(childComplexity int) int
JobID func(childComplexity int) int
LoadAvg func(childComplexity int) int
MemBwAvg func(childComplexity int) int
MemUsedMax func(childComplexity int) int
MetaData func(childComplexity int) int
MonitoringStatus func(childComplexity int) int
NumAcc func(childComplexity int) int
@ -303,6 +307,7 @@ type JobResolver interface {
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error)
MetaData(ctx context.Context, obj *schema.Job) (interface{}, error)
UserData(ctx context.Context, obj *schema.Job) (*model.User, error)
}
@ -485,6 +490,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Job.Exclusive(childComplexity), true
case "Job.flopsAnyAvg":
if e.complexity.Job.FlopsAnyAvg == nil {
break
}
return e.complexity.Job.FlopsAnyAvg(childComplexity), true
case "Job.id":
if e.complexity.Job.ID == nil {
break
@ -499,6 +511,27 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Job.JobID(childComplexity), true
case "Job.loadAvg":
if e.complexity.Job.LoadAvg == nil {
break
}
return e.complexity.Job.LoadAvg(childComplexity), true
case "Job.memBwAvg":
if e.complexity.Job.MemBwAvg == nil {
break
}
return e.complexity.Job.MemBwAvg(childComplexity), true
case "Job.memUsedMax":
if e.complexity.Job.MemUsedMax == nil {
break
}
return e.complexity.Job.MemUsedMax(childComplexity), true
case "Job.metaData":
if e.complexity.Job.MetaData == nil {
break
@ -1628,6 +1661,11 @@ type Job {
resources: [Resource!]!
concurrentJobs: JobLinkResultList
memUsedMax: Float
flopsAnyAvg: Float
memBwAvg: Float
loadAvg: Float
metaData: Any
userData: User
}
@ -4054,6 +4092,170 @@ func (ec *executionContext) fieldContext_Job_concurrentJobs(ctx context.Context,
return fc, nil
}
func (ec *executionContext) _Job_memUsedMax(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_memUsedMax(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.MemUsedMax, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(float64)
fc.Result = res
return ec.marshalOFloat2float64(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_memUsedMax(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Float does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_flopsAnyAvg(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_flopsAnyAvg(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.FlopsAnyAvg, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(float64)
fc.Result = res
return ec.marshalOFloat2float64(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_flopsAnyAvg(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Float does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_memBwAvg(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_memBwAvg(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.MemBwAvg, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(float64)
fc.Result = res
return ec.marshalOFloat2float64(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_memBwAvg(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Float does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_loadAvg(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_loadAvg(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.LoadAvg, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(float64)
fc.Result = res
return ec.marshalOFloat2float64(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_loadAvg(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Float does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_metaData(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_metaData(ctx, field)
if err != nil {
@ -4778,6 +4980,14 @@ func (ec *executionContext) fieldContext_JobResultList_items(ctx context.Context
return ec.fieldContext_Job_resources(ctx, field)
case "concurrentJobs":
return ec.fieldContext_Job_concurrentJobs(ctx, field)
case "memUsedMax":
return ec.fieldContext_Job_memUsedMax(ctx, field)
case "flopsAnyAvg":
return ec.fieldContext_Job_flopsAnyAvg(ctx, field)
case "memBwAvg":
return ec.fieldContext_Job_memBwAvg(ctx, field)
case "loadAvg":
return ec.fieldContext_Job_loadAvg(ctx, field)
case "metaData":
return ec.fieldContext_Job_metaData(ctx, field)
case "userData":
@ -7152,6 +7362,14 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
return ec.fieldContext_Job_resources(ctx, field)
case "concurrentJobs":
return ec.fieldContext_Job_concurrentJobs(ctx, field)
case "memUsedMax":
return ec.fieldContext_Job_memUsedMax(ctx, field)
case "flopsAnyAvg":
return ec.fieldContext_Job_flopsAnyAvg(ctx, field)
case "memBwAvg":
return ec.fieldContext_Job_memBwAvg(ctx, field)
case "loadAvg":
return ec.fieldContext_Job_loadAvg(ctx, field)
case "metaData":
return ec.fieldContext_Job_metaData(ctx, field)
case "userData":
@ -12504,6 +12722,14 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
}
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
case "memUsedMax":
out.Values[i] = ec._Job_memUsedMax(ctx, field, obj)
case "flopsAnyAvg":
out.Values[i] = ec._Job_flopsAnyAvg(ctx, field, obj)
case "memBwAvg":
out.Values[i] = ec._Job_memBwAvg(ctx, field, obj)
case "loadAvg":
out.Values[i] = ec._Job_loadAvg(ctx, field, obj)
case "metaData":
field := field

View File

@ -60,7 +60,7 @@ func GetJobRepository() *JobRepository {
var jobColumns []string = []string{
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id",
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
"job.duration", "job.walltime", "job.resources", // "job.meta_data",
"job.duration", "job.walltime", "job.resources", "job.mem_used_max", "job.flops_any_avg", "job.mem_bw_avg", "job.load_avg", // "job.meta_data",
}
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
@ -68,7 +68,7 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
if err := row.Scan(
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
&job.Duration, &job.Walltime, &job.RawResources /*&job.RawMetaData*/); err != nil {
&job.Duration, &job.Walltime, &job.RawResources, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.LoadAvg /*&job.RawMetaData*/); err != nil {
log.Warnf("Error while scanning rows (Job): %v", err)
return nil, err
}
@ -483,6 +483,7 @@ func (r *JobRepository) MarkArchived(
case "mem_bw":
stmt = stmt.Set("mem_bw_avg", stats.Avg)
case "load":
stmt = stmt.Set("load_avg", stats.Avg)
case "cpu_load":
stmt = stmt.Set("load_avg", stats.Avg)
case "net_bw":

View File

@ -54,10 +54,10 @@ type Job struct {
BaseJob
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64
MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64
LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64
MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64
FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64
MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64
LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64

View File

@ -52,10 +52,10 @@ type Job struct {
BaseJob
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64
MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64
LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64
MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64
FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64
MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64
LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64

View File

@ -47,7 +47,8 @@
resources { hostname, hwthreads, accelerators },
metaData,
userData { name, email },
concurrentJobs { items { id, jobId }, count, listQuery }
concurrentJobs { items { id, jobId }, count, listQuery },
flopsAnyAvg, memBwAvg, loadAvg
}
`);

View File

@ -31,9 +31,9 @@
/* NOTES:
- 'mem_allocated' für shared jobs (noch todo / nicht in den jobdaten enthalten bisher)
> For now: 'acc_util' gegen 'mem_used' für alex
> For now: 'acc_util' gegen 'mem_used' für alex: Mem bw für shared weggefallen: dann wieder vier bars
- Energy Metric Missiing, muss eingebaut werden
- Diese Config in config.json?
- footprintMetrics Config in config.json?
*/
const footprintMetrics = isAcceleratedJob
@ -60,9 +60,15 @@
const footprintData = footprintMetrics.map((fm) => {
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === 'node')
// ... get Mean
// ... get Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
let mv = null
if (jm?.metric?.statisticsSeries) {
if (fm === 'cpu_load' && job.loadAvg !== 0) {
mv = round(job.loadAvg, 2)
} else if (fm === 'flops_any' && job.flopsAnyAvg !== 0) {
mv = round(job.flopsAnyAvg, 2)
} else if (fm === 'mem_bw' && job.memBwAvg !== 0) {
mv = round(job.memBwAvg, 2)
} else if (jm?.metric?.statisticsSeries) {
mv = round(mean(jm.metric.statisticsSeries.mean), 2)
} else if (jm?.metric?.series?.length > 1) {
const avgs = jm.metric.series.map(jms => jms.statistics.avg)
@ -356,6 +362,13 @@
/>
</div>
{/each}
<!-- <hr class="mt-1 mb-2"/>
<ul>
<li>Load Avg {round(job.loadAvg, 2)}</li>
<li>Flops Any {round(job.flopsAnyAvg, 2)}</li>
<li>Mem Used Max {round(job.memUsedMax, 2)}</li>
<li>Mem BW Avg {round(job.memBwAvg, 2)}</li>
</ul> -->
{#if job?.metaData?.message}
<hr class="mt-1 mb-2"/>
{@html job.metaData.message}

View File

@ -74,6 +74,9 @@
name
}
metaData
flopsAnyAvg
memBwAvg
loadAvg
}
count
}