Add db average stats to gql, use in footprint

This commit is contained in:
Christoph Kluge 2023-11-24 15:11:38 +01:00
parent 4e375ff32b
commit e34623b1ce
8 changed files with 264 additions and 15 deletions

View File

@ -28,6 +28,11 @@ type Job {
resources: [Resource!]! resources: [Resource!]!
concurrentJobs: JobLinkResultList concurrentJobs: JobLinkResultList
memUsedMax: Float
flopsAnyAvg: Float
memBwAvg: Float
loadAvg: Float
metaData: Any metaData: Any
userData: User userData: User
} }

View File

@ -88,8 +88,12 @@ type ComplexityRoot struct {
ConcurrentJobs func(childComplexity int) int ConcurrentJobs func(childComplexity int) int
Duration func(childComplexity int) int Duration func(childComplexity int) int
Exclusive func(childComplexity int) int Exclusive func(childComplexity int) int
FlopsAnyAvg func(childComplexity int) int
ID func(childComplexity int) int ID func(childComplexity int) int
JobID func(childComplexity int) int JobID func(childComplexity int) int
LoadAvg func(childComplexity int) int
MemBwAvg func(childComplexity int) int
MemUsedMax func(childComplexity int) int
MetaData func(childComplexity int) int MetaData func(childComplexity int) int
MonitoringStatus func(childComplexity int) int MonitoringStatus func(childComplexity int) int
NumAcc func(childComplexity int) int NumAcc func(childComplexity int) int
@ -303,6 +307,7 @@ type JobResolver interface {
Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag, error)
ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error)
MetaData(ctx context.Context, obj *schema.Job) (interface{}, error) MetaData(ctx context.Context, obj *schema.Job) (interface{}, error)
UserData(ctx context.Context, obj *schema.Job) (*model.User, error) UserData(ctx context.Context, obj *schema.Job) (*model.User, error)
} }
@ -485,6 +490,13 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Job.Exclusive(childComplexity), true return e.complexity.Job.Exclusive(childComplexity), true
case "Job.flopsAnyAvg":
if e.complexity.Job.FlopsAnyAvg == nil {
break
}
return e.complexity.Job.FlopsAnyAvg(childComplexity), true
case "Job.id": case "Job.id":
if e.complexity.Job.ID == nil { if e.complexity.Job.ID == nil {
break break
@ -499,6 +511,27 @@ func (e *executableSchema) Complexity(typeName, field string, childComplexity in
return e.complexity.Job.JobID(childComplexity), true return e.complexity.Job.JobID(childComplexity), true
case "Job.loadAvg":
if e.complexity.Job.LoadAvg == nil {
break
}
return e.complexity.Job.LoadAvg(childComplexity), true
case "Job.memBwAvg":
if e.complexity.Job.MemBwAvg == nil {
break
}
return e.complexity.Job.MemBwAvg(childComplexity), true
case "Job.memUsedMax":
if e.complexity.Job.MemUsedMax == nil {
break
}
return e.complexity.Job.MemUsedMax(childComplexity), true
case "Job.metaData": case "Job.metaData":
if e.complexity.Job.MetaData == nil { if e.complexity.Job.MetaData == nil {
break break
@ -1628,6 +1661,11 @@ type Job {
resources: [Resource!]! resources: [Resource!]!
concurrentJobs: JobLinkResultList concurrentJobs: JobLinkResultList
memUsedMax: Float
flopsAnyAvg: Float
memBwAvg: Float
loadAvg: Float
metaData: Any metaData: Any
userData: User userData: User
} }
@ -4054,6 +4092,170 @@ func (ec *executionContext) fieldContext_Job_concurrentJobs(ctx context.Context,
return fc, nil return fc, nil
} }
func (ec *executionContext) _Job_memUsedMax(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_memUsedMax(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.MemUsedMax, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(float64)
fc.Result = res
return ec.marshalOFloat2float64(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_memUsedMax(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Float does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_flopsAnyAvg(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_flopsAnyAvg(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.FlopsAnyAvg, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(float64)
fc.Result = res
return ec.marshalOFloat2float64(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_flopsAnyAvg(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Float does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_memBwAvg(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_memBwAvg(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.MemBwAvg, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(float64)
fc.Result = res
return ec.marshalOFloat2float64(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_memBwAvg(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Float does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_loadAvg(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_loadAvg(ctx, field)
if err != nil {
return graphql.Null
}
ctx = graphql.WithFieldContext(ctx, fc)
defer func() {
if r := recover(); r != nil {
ec.Error(ctx, ec.Recover(ctx, r))
ret = graphql.Null
}
}()
resTmp, err := ec.ResolverMiddleware(ctx, func(rctx context.Context) (interface{}, error) {
ctx = rctx // use context from middleware stack in children
return obj.LoadAvg, nil
})
if err != nil {
ec.Error(ctx, err)
return graphql.Null
}
if resTmp == nil {
return graphql.Null
}
res := resTmp.(float64)
fc.Result = res
return ec.marshalOFloat2float64(ctx, field.Selections, res)
}
func (ec *executionContext) fieldContext_Job_loadAvg(ctx context.Context, field graphql.CollectedField) (fc *graphql.FieldContext, err error) {
fc = &graphql.FieldContext{
Object: "Job",
Field: field,
IsMethod: false,
IsResolver: false,
Child: func(ctx context.Context, field graphql.CollectedField) (*graphql.FieldContext, error) {
return nil, errors.New("field of type Float does not have child fields")
},
}
return fc, nil
}
func (ec *executionContext) _Job_metaData(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) { func (ec *executionContext) _Job_metaData(ctx context.Context, field graphql.CollectedField, obj *schema.Job) (ret graphql.Marshaler) {
fc, err := ec.fieldContext_Job_metaData(ctx, field) fc, err := ec.fieldContext_Job_metaData(ctx, field)
if err != nil { if err != nil {
@ -4778,6 +4980,14 @@ func (ec *executionContext) fieldContext_JobResultList_items(ctx context.Context
return ec.fieldContext_Job_resources(ctx, field) return ec.fieldContext_Job_resources(ctx, field)
case "concurrentJobs": case "concurrentJobs":
return ec.fieldContext_Job_concurrentJobs(ctx, field) return ec.fieldContext_Job_concurrentJobs(ctx, field)
case "memUsedMax":
return ec.fieldContext_Job_memUsedMax(ctx, field)
case "flopsAnyAvg":
return ec.fieldContext_Job_flopsAnyAvg(ctx, field)
case "memBwAvg":
return ec.fieldContext_Job_memBwAvg(ctx, field)
case "loadAvg":
return ec.fieldContext_Job_loadAvg(ctx, field)
case "metaData": case "metaData":
return ec.fieldContext_Job_metaData(ctx, field) return ec.fieldContext_Job_metaData(ctx, field)
case "userData": case "userData":
@ -7152,6 +7362,14 @@ func (ec *executionContext) fieldContext_Query_job(ctx context.Context, field gr
return ec.fieldContext_Job_resources(ctx, field) return ec.fieldContext_Job_resources(ctx, field)
case "concurrentJobs": case "concurrentJobs":
return ec.fieldContext_Job_concurrentJobs(ctx, field) return ec.fieldContext_Job_concurrentJobs(ctx, field)
case "memUsedMax":
return ec.fieldContext_Job_memUsedMax(ctx, field)
case "flopsAnyAvg":
return ec.fieldContext_Job_flopsAnyAvg(ctx, field)
case "memBwAvg":
return ec.fieldContext_Job_memBwAvg(ctx, field)
case "loadAvg":
return ec.fieldContext_Job_loadAvg(ctx, field)
case "metaData": case "metaData":
return ec.fieldContext_Job_metaData(ctx, field) return ec.fieldContext_Job_metaData(ctx, field)
case "userData": case "userData":
@ -12504,6 +12722,14 @@ func (ec *executionContext) _Job(ctx context.Context, sel ast.SelectionSet, obj
} }
out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) }) out.Concurrently(i, func(ctx context.Context) graphql.Marshaler { return innerFunc(ctx, out) })
case "memUsedMax":
out.Values[i] = ec._Job_memUsedMax(ctx, field, obj)
case "flopsAnyAvg":
out.Values[i] = ec._Job_flopsAnyAvg(ctx, field, obj)
case "memBwAvg":
out.Values[i] = ec._Job_memBwAvg(ctx, field, obj)
case "loadAvg":
out.Values[i] = ec._Job_loadAvg(ctx, field, obj)
case "metaData": case "metaData":
field := field field := field

View File

@ -60,7 +60,7 @@ func GetJobRepository() *JobRepository {
var jobColumns []string = []string{ var jobColumns []string = []string{
"job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id", "job.id", "job.job_id", "job.user", "job.project", "job.cluster", "job.subcluster", "job.start_time", "job.partition", "job.array_job_id",
"job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state", "job.num_nodes", "job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status", "job.smt", "job.job_state",
"job.duration", "job.walltime", "job.resources", // "job.meta_data", "job.duration", "job.walltime", "job.resources", "job.mem_used_max", "job.flops_any_avg", "job.mem_bw_avg", "job.load_avg", // "job.meta_data",
} }
func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) { func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
@ -68,7 +68,7 @@ func scanJob(row interface{ Scan(...interface{}) error }) (*schema.Job, error) {
if err := row.Scan( if err := row.Scan(
&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId, &job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster, &job.StartTimeUnix, &job.Partition, &job.ArrayJobId,
&job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State, &job.NumNodes, &job.NumHWThreads, &job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
&job.Duration, &job.Walltime, &job.RawResources /*&job.RawMetaData*/); err != nil { &job.Duration, &job.Walltime, &job.RawResources, &job.MemUsedMax, &job.FlopsAnyAvg, &job.MemBwAvg, &job.LoadAvg /*&job.RawMetaData*/); err != nil {
log.Warnf("Error while scanning rows (Job): %v", err) log.Warnf("Error while scanning rows (Job): %v", err)
return nil, err return nil, err
} }
@ -483,6 +483,7 @@ func (r *JobRepository) MarkArchived(
case "mem_bw": case "mem_bw":
stmt = stmt.Set("mem_bw_avg", stats.Avg) stmt = stmt.Set("mem_bw_avg", stats.Avg)
case "load": case "load":
stmt = stmt.Set("load_avg", stats.Avg)
case "cpu_load": case "cpu_load":
stmt = stmt.Set("load_avg", stats.Avg) stmt = stmt.Set("load_avg", stats.Avg)
case "net_bw": case "net_bw":

View File

@ -54,10 +54,10 @@ type Job struct {
BaseJob BaseJob
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64 MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64 FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64
MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64 MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64
LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64 LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64 NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64 NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64 FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64

View File

@ -52,10 +52,10 @@ type Job struct {
BaseJob BaseJob
StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds StartTimeUnix int64 `json:"-" db:"start_time" example:"1649723812"` // Start epoch time stamp in seconds
StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type StartTime time.Time `json:"startTime"` // Start time as 'time.Time' data type
MemUsedMax float64 `json:"-" db:"mem_used_max"` // MemUsedMax as Float64 MemUsedMax float64 `json:"memUsedMax" db:"mem_used_max"` // MemUsedMax as Float64
FlopsAnyAvg float64 `json:"-" db:"flops_any_avg"` // FlopsAnyAvg as Float64 FlopsAnyAvg float64 `json:"flopsAnyAvg" db:"flops_any_avg"` // FlopsAnyAvg as Float64
MemBwAvg float64 `json:"-" db:"mem_bw_avg"` // MemBwAvg as Float64 MemBwAvg float64 `json:"memBwAvg" db:"mem_bw_avg"` // MemBwAvg as Float64
LoadAvg float64 `json:"-" db:"load_avg"` // LoadAvg as Float64 LoadAvg float64 `json:"loadAvg" db:"load_avg"` // LoadAvg as Float64
NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64 NetBwAvg float64 `json:"-" db:"net_bw_avg"` // NetBwAvg as Float64
NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64 NetDataVolTotal float64 `json:"-" db:"net_data_vol_total"` // NetDataVolTotal as Float64
FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64 FileBwAvg float64 `json:"-" db:"file_bw_avg"` // FileBwAvg as Float64

View File

@ -47,7 +47,8 @@
resources { hostname, hwthreads, accelerators }, resources { hostname, hwthreads, accelerators },
metaData, metaData,
userData { name, email }, userData { name, email },
concurrentJobs { items { id, jobId }, count, listQuery } concurrentJobs { items { id, jobId }, count, listQuery },
flopsAnyAvg, memBwAvg, loadAvg
} }
`); `);

View File

@ -31,9 +31,9 @@
/* NOTES: /* NOTES:
- 'mem_allocated' für shared jobs (noch todo / nicht in den jobdaten enthalten bisher) - 'mem_allocated' für shared jobs (noch todo / nicht in den jobdaten enthalten bisher)
> For now: 'acc_util' gegen 'mem_used' für alex > For now: 'acc_util' gegen 'mem_used' für alex: Mem bw für shared weggefallen: dann wieder vier bars
- Energy Metric Missiing, muss eingebaut werden - Energy Metric Missiing, muss eingebaut werden
- Diese Config in config.json? - footprintMetrics Config in config.json?
*/ */
const footprintMetrics = isAcceleratedJob const footprintMetrics = isAcceleratedJob
@ -60,9 +60,15 @@
const footprintData = footprintMetrics.map((fm) => { const footprintData = footprintMetrics.map((fm) => {
const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === 'node') const jm = jobMetrics.find((jm) => jm.name === fm && jm.scope === 'node')
// ... get Mean // ... get Mean: Primarily use backend sourced avgs from job.*, secondarily calculate/read from metricdata
let mv = null let mv = null
if (jm?.metric?.statisticsSeries) { if (fm === 'cpu_load' && job.loadAvg !== 0) {
mv = round(job.loadAvg, 2)
} else if (fm === 'flops_any' && job.flopsAnyAvg !== 0) {
mv = round(job.flopsAnyAvg, 2)
} else if (fm === 'mem_bw' && job.memBwAvg !== 0) {
mv = round(job.memBwAvg, 2)
} else if (jm?.metric?.statisticsSeries) {
mv = round(mean(jm.metric.statisticsSeries.mean), 2) mv = round(mean(jm.metric.statisticsSeries.mean), 2)
} else if (jm?.metric?.series?.length > 1) { } else if (jm?.metric?.series?.length > 1) {
const avgs = jm.metric.series.map(jms => jms.statistics.avg) const avgs = jm.metric.series.map(jms => jms.statistics.avg)
@ -356,6 +362,13 @@
/> />
</div> </div>
{/each} {/each}
<!-- <hr class="mt-1 mb-2"/>
<ul>
<li>Load Avg {round(job.loadAvg, 2)}</li>
<li>Flops Any {round(job.flopsAnyAvg, 2)}</li>
<li>Mem Used Max {round(job.memUsedMax, 2)}</li>
<li>Mem BW Avg {round(job.memBwAvg, 2)}</li>
</ul> -->
{#if job?.metaData?.message} {#if job?.metaData?.message}
<hr class="mt-1 mb-2"/> <hr class="mt-1 mb-2"/>
{@html job.metaData.message} {@html job.metaData.message}

View File

@ -74,6 +74,9 @@
name name
} }
metaData metaData
flopsAnyAvg
memBwAvg
loadAvg
} }
count count
} }