cc-backend/internal/metricdata/metricdata.go

187 lines
5.4 KiB
Go
Raw Normal View History

2024-04-11 23:04:30 +02:00
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricdata
import (
"context"
"encoding/json"
2021-11-26 10:32:36 +01:00
"fmt"
"time"
2022-06-21 17:52:36 +02:00
"github.com/ClusterCockpit/cc-backend/internal/config"
"github.com/ClusterCockpit/cc-backend/pkg/archive"
2022-06-21 17:52:36 +02:00
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
2021-12-08 10:14:45 +01:00
type MetricDataRepository interface {
2021-12-09 16:25:48 +01:00
// Initialize this MetricDataRepository. One instance of
// this interface will only ever be responsible for one cluster.
Init(rawConfig json.RawMessage) error
2021-12-09 16:25:48 +01:00
// Return the JobData for the given job, only with the requested metrics.
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
2021-12-09 16:25:48 +01:00
2022-01-12 13:03:01 +01:00
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
2021-12-17 15:49:22 +01:00
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
2021-12-09 16:25:48 +01:00
// Return a map of hosts to a map of metrics at the requested scopes for that node.
LoadNodeData(cluster string, metrics, nodes []string, scopes []schema.MetricScope, from, to time.Time, ctx context.Context) (map[string]map[string][]*schema.JobMetric, error)
2021-12-08 10:14:45 +01:00
}
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
2021-11-26 10:32:36 +01:00
func Init() error {
for _, cluster := range config.Keys.Clusters {
2021-12-08 10:14:45 +01:00
if cluster.MetricDataRepository != nil {
var kind struct {
Kind string `json:"kind"`
}
if err := json.Unmarshal(cluster.MetricDataRepository, &kind); err != nil {
log.Warn("Error while unmarshaling raw json MetricDataRepository")
return err
}
2022-01-24 10:06:25 +01:00
var mdr MetricDataRepository
switch kind.Kind {
2021-12-08 10:14:45 +01:00
case "cc-metric-store":
2022-01-24 10:06:25 +01:00
mdr = &CCMetricStore{}
case "influxdb":
mdr = &InfluxDBv2DataRepository{}
2022-12-08 13:51:44 +01:00
case "prometheus":
mdr = &PrometheusDataRepository{}
2022-01-24 10:06:25 +01:00
case "test":
mdr = &TestMetricDataRepository{}
2021-12-08 10:14:45 +01:00
default:
return fmt.Errorf("METRICDATA/METRICDATA > Unknown MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
2021-12-08 10:14:45 +01:00
}
2022-01-24 10:06:25 +01:00
if err := mdr.Init(cluster.MetricDataRepository); err != nil {
log.Errorf("Error initializing MetricDataRepository %v for cluster %v", kind.Kind, cluster.Name)
2022-01-24 10:06:25 +01:00
return err
}
metricDataRepos[cluster.Name] = mdr
2021-12-08 10:14:45 +01:00
}
2021-11-26 10:32:36 +01:00
}
2021-12-08 10:14:45 +01:00
return nil
2021-11-26 10:32:36 +01:00
}
func GetMetricDataRepo(cluster string) MetricDataRepository {
repo, ok := metricDataRepos[cluster]
2022-09-13 15:21:50 +02:00
if !ok {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster), 0, 0
2022-09-13 15:21:50 +02:00
}
return repo
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
func LoadAverages(
job *schema.Job,
metrics []string,
data [][]schema.Float,
2024-02-23 10:08:41 +01:00
ctx context.Context,
) error {
2021-12-17 15:49:22 +01:00
if job.State != schema.JobStateRunning && useArchive {
return archive.LoadAveragesFromArchive(job, metrics, data) // #166 change also here?
}
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", job.Cluster)
}
2023-07-21 16:33:53 +02:00
stats, err := repo.LoadStats(job, metrics, ctx) // #166 how to handle stats for acc normalizazion?
if err != nil {
log.Errorf("Error while loading statistics for job %v (User %v, Project %v)", job.JobID, job.User, job.Project)
return err
}
for i, m := range metrics {
nodes, ok := stats[m]
if !ok {
data[i] = append(data[i], schema.NaN)
continue
}
sum := 0.0
for _, node := range nodes {
sum += node.Avg
}
data[i] = append(data[i], schema.Float(sum))
}
return nil
}
// Used for the node/system view. Returns a map of nodes to a map of metrics.
func LoadNodeData(
cluster string,
metrics, nodes []string,
scopes []schema.MetricScope,
from, to time.Time,
2024-02-23 10:08:41 +01:00
ctx context.Context,
) (map[string]map[string][]*schema.JobMetric, error) {
repo, ok := metricDataRepos[cluster]
if !ok {
return nil, fmt.Errorf("METRICDATA/METRICDATA > no metric data repository configured for '%s'", cluster)
}
if metrics == nil {
for _, m := range archive.GetCluster(cluster).MetricConfig {
metrics = append(metrics, m.Name)
}
}
data, err := repo.LoadNodeData(cluster, metrics, nodes, scopes, from, to, ctx)
if err != nil {
if len(data) != 0 {
log.Warnf("partial error: %s", err.Error())
} else {
log.Error("Error while loading node data from metric repository")
return nil, err
}
}
if data == nil {
return nil, fmt.Errorf("METRICDATA/METRICDATA > the metric data repository for '%s' does not support this query", cluster)
}
return data, nil
}
// For /monitoring/job/<job> and some other places, flops_any and mem_bw need
// to be available at the scope 'node'. If a job has a lot of nodes,
// statisticsSeries should be available so that a min/median/max Graph can be
// used instead of a lot of single lines.
func prepareJobData(
jobData schema.JobData,
2024-02-23 10:08:41 +01:00
scopes []schema.MetricScope,
) {
2022-01-20 10:08:50 +01:00
const maxSeriesSize int = 15
for _, scopes := range jobData {
for _, jm := range scopes {
if jm.StatisticsSeries != nil || len(jm.Series) <= maxSeriesSize {
continue
}
jm.AddStatisticsSeries()
}
}
nodeScopeRequested := false
for _, scope := range scopes {
if scope == schema.MetricScopeNode {
nodeScopeRequested = true
}
}
if nodeScopeRequested {
jobData.AddNodeScope("flops_any")
jobData.AddNodeScope("mem_bw")
}
}