cc-backend/pkg/archive/archive.go

249 lines
5.7 KiB
Go
Raw Normal View History

2024-04-11 23:04:30 +02:00
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package archive
import (
2024-08-27 10:14:33 +02:00
"context"
"encoding/json"
"fmt"
2024-08-27 10:14:33 +02:00
"math"
"sync"
2024-08-27 10:14:33 +02:00
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
2023-03-27 13:24:06 +02:00
"github.com/ClusterCockpit/cc-backend/pkg/log"
"github.com/ClusterCockpit/cc-backend/pkg/lrucache"
"github.com/ClusterCockpit/cc-backend/pkg/schema"
)
const Version uint64 = 1
2023-03-27 13:24:06 +02:00
type ArchiveBackend interface {
Init(rawConfig json.RawMessage) (uint64, error)
2023-05-15 14:32:23 +02:00
Info()
Exists(job *schema.Job) bool
LoadJobMeta(job *schema.Job) (*schema.JobMeta, error)
LoadJobData(job *schema.Job) (schema.JobData, error)
LoadClusterCfg(name string) (*schema.Cluster, error)
StoreJobMeta(jobMeta *schema.JobMeta) error
ImportJob(jobMeta *schema.JobMeta, jobData *schema.JobData) error
2022-09-06 08:57:38 +02:00
GetClusters() []string
2023-05-09 16:33:26 +02:00
CleanUp(jobs []*schema.Job)
2023-04-18 07:43:21 +02:00
Move(jobs []*schema.Job, path string)
2023-05-15 14:32:23 +02:00
Clean(before int64, after int64)
2023-05-09 16:33:26 +02:00
Compress(jobs []*schema.Job)
2023-05-09 09:34:03 +02:00
CompressLast(starttime int64) int64
Iter(loadMetricData bool) <-chan JobContainer
}
type JobContainer struct {
Meta *schema.JobMeta
Data *schema.JobData
}
var (
initOnce sync.Once
cache *lrucache.Cache = lrucache.New(128 * 1024 * 1024)
ar ArchiveBackend
useArchive bool
)
2022-11-08 16:49:45 +01:00
func Init(rawConfig json.RawMessage, disableArchive bool) error {
var err error
2023-05-09 09:34:03 +02:00
initOnce.Do(func() {
useArchive = !disableArchive
2023-05-09 09:34:03 +02:00
var cfg struct {
Kind string `json:"kind"`
}
2022-09-06 08:57:38 +02:00
if err = json.Unmarshal(rawConfig, &cfg); err != nil {
log.Warn("Error while unmarshaling raw config json")
return
}
switch cfg.Kind {
case "file":
ar = &FsArchive{}
// case "s3":
// ar = &S3Archive{}
default:
err = fmt.Errorf("ARCHIVE/ARCHIVE > unkown archive backend '%s''", cfg.Kind)
}
var version uint64
version, err = ar.Init(rawConfig)
if err != nil {
log.Error("Error while initializing archiveBackend")
return
}
log.Infof("Load archive version %d", version)
err = initClusterConfig()
})
2023-05-09 09:34:03 +02:00
return err
}
func GetHandle() ArchiveBackend {
return ar
}
2024-08-27 10:14:33 +02:00
// Writes a running job to the job-archive
func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
allMetrics := make([]string, 0)
metricConfigs := GetCluster(job.Cluster).MetricConfig
for _, mc := range metricConfigs {
allMetrics = append(allMetrics, mc.Name)
}
// TODO: Talk about this! What resolutions to store data at...
scopes := []schema.MetricScope{schema.MetricScopeNode}
if job.NumNodes <= 8 {
scopes = append(scopes, schema.MetricScopeCore)
}
if job.NumAcc > 0 {
scopes = append(scopes, schema.MetricScopeAccelerator)
}
jobData, err := metricdata.LoadData(job, allMetrics, scopes, ctx)
if err != nil {
log.Error("Error wile loading job data for archiving")
return nil, err
}
jobMeta := &schema.JobMeta{
BaseJob: job.BaseJob,
StartTime: job.StartTime.Unix(),
Statistics: make(map[string]schema.JobStatistics),
}
for metric, data := range jobData {
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
nodeData, ok := data["node"]
if !ok {
// TODO/FIXME: Calc average for non-node metrics as well!
continue
}
for _, series := range nodeData.Series {
avg += series.Statistics.Avg
min = math.Min(min, series.Statistics.Min)
max = math.Max(max, series.Statistics.Max)
}
jobMeta.Statistics[metric] = schema.JobStatistics{
Unit: schema.Unit{
Prefix: GetMetricConfig(job.Cluster, metric).Unit.Prefix,
Base: GetMetricConfig(job.Cluster, metric).Unit.Base,
},
Avg: avg / float64(job.NumNodes),
Min: min,
Max: max,
}
}
// If the file based archive is disabled,
// only return the JobMeta structure as the
// statistics in there are needed.
if !useArchive {
return jobMeta, nil
}
return jobMeta, archive.GetHandle().ImportJob(jobMeta, &jobData)
}
// Helper to metricdata.LoadAverages().
func LoadAveragesFromArchive(
job *schema.Job,
metrics []string,
data [][]schema.Float,
) error {
metaFile, err := ar.LoadJobMeta(job)
if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return err
}
for i, m := range metrics {
if stat, ok := metaFile.Statistics[m]; ok {
data[i] = append(data[i], schema.Float(stat.Avg))
} else {
data[i] = append(data[i], schema.NaN)
}
}
return nil
}
func GetStatistics(job *schema.Job) (map[string]schema.JobStatistics, error) {
metaFile, err := ar.LoadJobMeta(job)
if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return nil, err
}
return metaFile.Statistics, nil
}
// If the job is archived, find its `meta.json` file and override the Metadata
// in that JSON file. If the job is not archived, nothing is done.
func UpdateMetadata(job *schema.Job, metadata map[string]string) error {
if job.State == schema.JobStateRunning || !useArchive {
return nil
}
jobMeta, err := ar.LoadJobMeta(job)
if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return err
}
for k, v := range metadata {
jobMeta.MetaData[k] = v
}
return ar.StoreJobMeta(jobMeta)
}
// If the job is archived, find its `meta.json` file and override the tags list
// in that JSON file. If the job is not archived, nothing is done.
func UpdateTags(job *schema.Job, tags []*schema.Tag) error {
2022-11-08 16:49:45 +01:00
if job.State == schema.JobStateRunning || !useArchive {
return nil
}
jobMeta, err := ar.LoadJobMeta(job)
if err != nil {
log.Warn("Error while loading job metadata from archiveBackend")
return err
}
jobMeta.Tags = make([]*schema.Tag, 0)
for _, tag := range tags {
jobMeta.Tags = append(jobMeta.Tags, &schema.Tag{
Name: tag.Name,
Type: tag.Type,
})
}
return ar.StoreJobMeta(jobMeta)
}