cc-backend/metricdata/metricdata.go

177 lines
5.1 KiB
Go
Raw Normal View History

package metricdata
import (
"context"
2021-11-26 10:32:36 +01:00
"fmt"
"time"
2021-12-08 10:14:45 +01:00
"github.com/ClusterCockpit/cc-jobarchive/config"
"github.com/ClusterCockpit/cc-jobarchive/schema"
"github.com/iamlouk/lrucache"
)
2021-12-08 10:14:45 +01:00
type MetricDataRepository interface {
2021-12-09 16:25:48 +01:00
// Initialize this MetricDataRepository. One instance of
// this interface will only ever be responsible for one cluster.
2021-12-20 10:49:46 +01:00
Init(url, token string) error
2021-12-09 16:25:48 +01:00
// Return the JobData for the given job, only with the requested metrics.
LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error)
2021-12-09 16:25:48 +01:00
2022-01-12 13:03:01 +01:00
// Return a map of metrics to a map of nodes to the metric statistics of the job. node scope assumed for now.
2021-12-17 15:49:22 +01:00
LoadStats(job *schema.Job, metrics []string, ctx context.Context) (map[string]map[string]schema.MetricStatistics, error)
2021-12-09 16:25:48 +01:00
// Return a map of nodes to a map of metrics to the data for the requested time.
LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error)
2021-12-08 10:14:45 +01:00
}
var metricDataRepos map[string]MetricDataRepository = map[string]MetricDataRepository{}
2021-11-26 10:32:36 +01:00
2021-12-08 10:14:45 +01:00
var JobArchivePath string
var useArchive bool
func Init(jobArchivePath string, disableArchive bool) error {
useArchive = !disableArchive
2021-12-08 10:14:45 +01:00
JobArchivePath = jobArchivePath
for _, cluster := range config.Clusters {
if cluster.MetricDataRepository != nil {
switch cluster.MetricDataRepository.Kind {
case "cc-metric-store":
ccms := &CCMetricStore{}
2021-12-20 10:49:46 +01:00
if err := ccms.Init(cluster.MetricDataRepository.Url, cluster.MetricDataRepository.Token); err != nil {
2021-12-08 10:14:45 +01:00
return err
}
2021-12-17 15:49:22 +01:00
metricDataRepos[cluster.Name] = ccms
// case "influxdb-v2":
// idb := &InfluxDBv2DataRepository{}
// if err := idb.Init(cluster.MetricDataRepository.Url); err != nil {
// return err
// }
// metricDataRepos[cluster.Name] = idb
2021-12-08 10:14:45 +01:00
default:
2021-12-17 15:49:22 +01:00
return fmt.Errorf("unkown metric data repository '%s' for cluster '%s'", cluster.MetricDataRepository.Kind, cluster.Name)
2021-12-08 10:14:45 +01:00
}
}
2021-11-26 10:32:36 +01:00
}
2021-12-08 10:14:45 +01:00
return nil
2021-11-26 10:32:36 +01:00
}
var cache *lrucache.Cache = lrucache.New(500 * 1024 * 1024)
// Fetches the metric data for a job.
func LoadData(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context) (schema.JobData, error) {
2021-12-17 15:49:22 +01:00
if job.State == schema.JobStateRunning || !useArchive {
ckey := cacheKey(job, metrics, scopes)
if data := cache.Get(ckey, nil); data != nil {
return data.(schema.JobData), nil
}
repo, ok := metricDataRepos[job.Cluster]
2021-12-08 10:14:45 +01:00
if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
2021-12-08 10:14:45 +01:00
}
if scopes == nil {
scopes = append(scopes, schema.MetricScopeNode)
}
if metrics == nil {
cluster := config.GetClusterConfig(job.Cluster)
for _, mc := range cluster.MetricConfig {
metrics = append(metrics, mc.Name)
}
}
data, err := repo.LoadData(job, metrics, scopes, ctx)
if err != nil {
return nil, err
}
// calcStatisticsSeries(job, data, 7)
cache.Put(ckey, data, data.Size(), 2*time.Minute)
return data, nil
2021-11-26 10:32:36 +01:00
}
data, err := loadFromArchive(job)
if err != nil {
return nil, err
}
if metrics != nil {
res := schema.JobData{}
for _, metric := range metrics {
if metricdata, ok := data[metric]; ok {
res[metric] = metricdata
}
}
return res, nil
}
return data, nil
}
// Used for the jobsFootprint GraphQL-Query. TODO: Rename/Generalize.
2021-12-17 15:49:22 +01:00
func LoadAverages(job *schema.Job, metrics []string, data [][]schema.Float, ctx context.Context) error {
if job.State != schema.JobStateRunning && useArchive {
return loadAveragesFromArchive(job, metrics, data)
}
repo, ok := metricDataRepos[job.Cluster]
if !ok {
return fmt.Errorf("no metric data repository configured for '%s'", job.Cluster)
}
stats, err := repo.LoadStats(job, metrics, ctx)
if err != nil {
return err
}
for i, m := range metrics {
nodes, ok := stats[m]
if !ok {
data[i] = append(data[i], schema.NaN)
continue
}
sum := 0.0
for _, node := range nodes {
sum += node.Avg
}
data[i] = append(data[i], schema.Float(sum))
}
return nil
}
2022-01-12 13:03:01 +01:00
// Used for the node/system view. Returns a map of nodes to a map of metrics (at node scope).
func LoadNodeData(clusterId string, metrics, nodes []string, from, to int64, ctx context.Context) (map[string]map[string][]schema.Float, error) {
repo, ok := metricDataRepos[clusterId]
if !ok {
return nil, fmt.Errorf("no metric data repository configured for '%s'", clusterId)
}
if metrics == nil {
for _, m := range config.GetClusterConfig(clusterId).MetricConfig {
metrics = append(metrics, m.Name)
}
}
data, err := repo.LoadNodeData(clusterId, metrics, nodes, from, to, ctx)
if err != nil {
return nil, err
}
if data == nil {
return nil, fmt.Errorf("the metric data repository for '%s' does not support this query", clusterId)
}
return data, nil
}
func cacheKey(job *schema.Job, metrics []string, scopes []schema.MetricScope) string {
// Duration and StartTime do not need to be in the cache key as StartTime is less unique than
// job.ID and the TTL of the cache entry makes sure it does not stay there forever.
return fmt.Sprintf("%d:[%v],[%v]",
job.ID, metrics, scopes)
}