split statsTable data from jobMetrics query, initial commit

- mainly backend changes
- statstable changes only for prototyping
This commit is contained in:
Christoph Kluge
2025-03-13 17:33:55 +01:00
parent d0af933b35
commit f5f36427a4
19 changed files with 1471 additions and 426 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -81,11 +81,6 @@ type JobLinkResultList struct {
Count *int `json:"count,omitempty"`
}
type JobMetricStatWithName struct {
Name string `json:"name"`
Stats *schema.MetricStatistics `json:"stats"`
}
type JobMetricWithName struct {
Name string `json:"name"`
Scope schema.MetricScope `json:"scope"`
@@ -100,6 +95,17 @@ type JobResultList struct {
HasNextPage *bool `json:"hasNextPage,omitempty"`
}
type JobStats struct {
Name string `json:"name"`
Stats *schema.MetricStatistics `json:"stats"`
}
type JobStatsWithScope struct {
Name string `json:"name"`
Scope schema.MetricScope `json:"scope"`
Stats []*ScopedStats `json:"stats"`
}
type JobsStatistics struct {
ID string `json:"id"`
Name string `json:"name"`
@@ -173,6 +179,12 @@ type PageRequest struct {
Page int `json:"page"`
}
type ScopedStats struct {
Hostname string `json:"hostname"`
ID *string `json:"id,omitempty"`
Data *schema.MetricStatistics `json:"data"`
}
type StringInput struct {
Eq *string `json:"eq,omitempty"`
Neq *string `json:"neq,omitempty"`

View File

@@ -301,24 +301,23 @@ func (r *queryResolver) JobMetrics(ctx context.Context, id string, metrics []str
return res, err
}
// JobMetricStats is the resolver for the jobMetricStats field.
func (r *queryResolver) JobMetricStats(ctx context.Context, id string, metrics []string) ([]*model.JobMetricStatWithName, error) {
// JobMetricStats is the resolver for the jobStats field.
func (r *queryResolver) JobStats(ctx context.Context, id string, metrics []string) ([]*model.JobStats, error) {
job, err := r.Query().Job(ctx, id)
if err != nil {
log.Warn("Error while querying job for metrics")
log.Warnf("Error while querying job %s for metrics", id)
return nil, err
}
data, err := metricDataDispatcher.LoadStatData(job, metrics, ctx)
data, err := metricDataDispatcher.LoadJobStats(job, metrics, ctx)
if err != nil {
log.Warn("Error while loading job stat data")
log.Warnf("Error while loading job stat data for job id %s", id)
return nil, err
}
res := []*model.JobMetricStatWithName{}
res := []*model.JobStats{}
for name, md := range data {
res = append(res, &model.JobMetricStatWithName{
res = append(res, &model.JobStats{
Name: name,
Stats: &md,
})
@@ -327,6 +326,47 @@ func (r *queryResolver) JobMetricStats(ctx context.Context, id string, metrics [
return res, err
}
// JobStats is the resolver for the scopedJobStats field.
func (r *queryResolver) ScopedJobStats(ctx context.Context, id string, metrics []string, scopes []schema.MetricScope) ([]*model.JobStatsWithScope, error) {
job, err := r.Query().Job(ctx, id)
if err != nil {
log.Warnf("Error while querying job %s for metrics", id)
return nil, err
}
data, err := metricDataDispatcher.LoadScopedJobStats(job, metrics, scopes, ctx)
if err != nil {
log.Warnf("Error while loading scoped job stat data for job id %s", id)
return nil, err
}
res := make([]*model.JobStatsWithScope, 0)
for name, scoped := range data {
for scope, stats := range scoped {
// log.Debugf("HANDLE >>>>> %s @ %s -> First Array Value %#v", name, scope, *stats[0])
mdlStats := make([]*model.ScopedStats, 0)
for _, stat := range stats {
// log.Debugf("CONVERT >>>>> >>>>> %s -> %v -> %#v", stat.Hostname, stat.Id, stat.Data)
mdlStats = append(mdlStats, &model.ScopedStats{
Hostname: stat.Hostname,
ID: stat.Id,
Data: stat.Data,
})
}
// log.Debugf("APPEND >>>>> >>>>> %#v", mdlStats)
res = append(res, &model.JobStatsWithScope{
Name: name,
Scope: scope,
Stats: mdlStats,
})
}
}
return res, nil
}
// JobsFootprints is the resolver for the jobsFootprints field.
func (r *queryResolver) JobsFootprints(ctx context.Context, filter []*model.JobFilter, metrics []string) (*model.Footprints, error) {
// NOTE: Legacy Naming! This resolver is for normalized histograms in analysis view only - *Not* related to DB "footprint" column!

View File

@@ -224,8 +224,34 @@ func LoadAverages(
return nil
}
// Used for polar plots in frontend
func LoadStatData(
// Used for statsTable in frontend: Return scoped statistics by metric.
func LoadScopedJobStats(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
) (schema.ScopedJobStats, error) {
if job.State != schema.JobStateRunning && !config.Keys.DisableArchive {
return archive.LoadScopedStatsFromArchive(job, metrics, scopes)
}
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return nil, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster)
}
scopedStats, err := repo.LoadScopedStats(job, metrics, scopes, ctx)
if err != nil {
log.Errorf("error while loading scoped statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project)
return nil, err
}
return scopedStats, nil
}
// Used for polar plots in frontend: Aggregates statistics for all nodes to single values for job per metric.
func LoadJobStats(
job *schema.Job,
metrics []string,
ctx context.Context,
@@ -237,12 +263,12 @@ func LoadStatData(
data := make(map[string]schema.MetricStatistics, len(metrics))
repo, err := metricdata.GetMetricDataRepo(job.Cluster)
if err != nil {
return data, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
return data, fmt.Errorf("job %d: no metric data repository configured for '%s'", job.JobID, job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx)
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
log.Errorf("error while loading statistics for job %d (User %s, Project %s)", job.JobID, job.User, job.Project)
return data, err
}

View File

@@ -618,7 +618,98 @@ func (ccms *CCMetricStore) LoadStats(
return stats, nil
}
// TODO: Support sub-node-scope metrics! For this, the partition of a node needs to be known!
// Scoped Stats: Basically Load Data without resolution and data query flag?
func (ccms *CCMetricStore) LoadScopedStats(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context,
) (schema.ScopedJobStats, error) {
queries, assignedScope, err := ccms.buildQueries(job, metrics, scopes, 0)
if err != nil {
log.Warn("Error while building queries")
return nil, err
}
req := ApiQueryRequest{
Cluster: job.Cluster,
From: job.StartTime.Unix(),
To: job.StartTime.Add(time.Duration(job.Duration) * time.Second).Unix(),
Queries: queries,
WithStats: true,
WithData: false,
}
resBody, err := ccms.doRequest(ctx, &req)
if err != nil {
log.Error("Error while performing request")
return nil, err
}
var errors []string
scopedJobStats := make(schema.ScopedJobStats)
for i, row := range resBody.Results {
query := req.Queries[i]
metric := ccms.toLocalName(query.Metric)
scope := assignedScope[i]
if _, ok := scopedJobStats[metric]; !ok {
scopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)
}
if _, ok := scopedJobStats[metric][scope]; !ok {
scopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)
}
for ndx, res := range row {
if res.Error != nil {
/* Build list for "partial errors", if any */
errors = append(errors, fmt.Sprintf("failed to fetch '%s' from host '%s': %s", query.Metric, query.Hostname, *res.Error))
continue
}
id := (*string)(nil)
if query.Type != nil {
id = new(string)
*id = query.TypeIds[ndx]
}
if res.Avg.IsNaN() || res.Min.IsNaN() || res.Max.IsNaN() {
// "schema.Float()" because regular float64 can not be JSONed when NaN.
res.Avg = schema.Float(0)
res.Min = schema.Float(0)
res.Max = schema.Float(0)
}
scopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{
Hostname: query.Hostname,
Id: id,
Data: &schema.MetricStatistics{
Avg: float64(res.Avg),
Min: float64(res.Min),
Max: float64(res.Max),
},
})
}
// So that one can later check len(scopedJobStats[metric][scope]): Remove from map if empty
if len(scopedJobStats[metric][scope]) == 0 {
delete(scopedJobStats[metric], scope)
if len(scopedJobStats[metric]) == 0 {
delete(scopedJobStats, metric)
}
}
}
if len(errors) != 0 {
/* Returns list for "partial errors" */
return scopedJobStats, fmt.Errorf("METRICDATA/CCMS > Errors: %s", strings.Join(errors, ", "))
}
return scopedJobStats, nil
}
// TODO: Support sub-node-scope metrics! For this, the partition of a node needs to be known! - Todo Outdated with NodeListData?
func (ccms *CCMetricStore) LoadNodeData(
cluster string,
metrics, nodes []string,

View File

@@ -301,6 +301,18 @@ func (idb *InfluxDBv2DataRepository) LoadStats(
return stats, nil
}
func (idb *InfluxDBv2DataRepository) LoadScopedStats(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.ScopedJobStats, error) {
// TODO : Implement to be used in JobView Stats Table
log.Infof("LoadScopedStats unimplemented for InfluxDBv2DataRepository, Args: Job-ID %d, metrics %v, scopes %v", job.JobID, metrics, scopes)
return nil, errors.New("METRICDATA/INFLUXV2 > unimplemented for InfluxDBv2DataRepository")
}
func (idb *InfluxDBv2DataRepository) LoadNodeData(
cluster string,
metrics, nodes []string,

View File

@@ -24,9 +24,12 @@ type MetricDataRepository interface {
// Return the JobData for the given job, only with the requested metrics.
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope only.
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
// Return a map of metrics to a map of scopes to the scoped metric statistics of the job.
LoadScopedStats(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.ScopedJobStats, error)
// Return a map of hosts to a map of metrics at the requested scopes (currently only node) for that node.
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)

View File

@@ -448,6 +448,18 @@ func (pdb *PrometheusDataRepository) LoadNodeData(
return data, nil
}
func (pdb *PrometheusDataRepository) LoadScopedStats(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.ScopedJobStats, error) {
// TODO : Implement to be used in Job-View StatsTable
log.Infof("LoadScopedStats unimplemented for PrometheusDataRepository, Args: job-id %v, metrics %v, scopes %v", job.JobID, metrics, scopes)
return nil, errors.New("METRICDATA/PROMETHEUS > unimplemented for PrometheusDataRepository")
}
func (pdb *PrometheusDataRepository) LoadNodeListData(
cluster, subCluster, nodeFilter string,
metrics []string,
@@ -463,5 +475,5 @@ func (pdb *PrometheusDataRepository) LoadNodeListData(
// TODO : Implement to be used in NodeList-View
log.Infof("LoadNodeListData unimplemented for PrometheusDataRepository, Args: cluster %s, metrics %v, nodeFilter %v, scopes %v", cluster, metrics, nodeFilter, scopes)
return nil, totalNodes, hasNextPage, errors.New("METRICDATA/INFLUXV2 > unimplemented for PrometheusDataRepository")
return nil, totalNodes, hasNextPage, errors.New("METRICDATA/PROMETHEUS > unimplemented for PrometheusDataRepository")
}

View File

@@ -36,7 +36,17 @@ func (tmdr *TestMetricDataRepository) LoadData(
func (tmdr *TestMetricDataRepository) LoadStats(
job *schema.Job,
metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
metrics []string,
ctx context.Context) (map[string]map[string]schema.MetricStatistics, error) {
panic("TODO")
}
func (tmdr *TestMetricDataRepository) LoadScopedStats(
job *schema.Job,
metrics []string,
scopes []schema.MetricScope,
ctx context.Context) (schema.ScopedJobStats, error) {
panic("TODO")
}