2024-07-14 11:18:38 +02:00
|
|
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
|
|
// All rights reserved.
|
|
|
|
// Use of this source code is governed by a MIT-style
|
|
|
|
// license that can be found in the LICENSE file.
|
2024-07-16 12:34:27 +02:00
|
|
|
package taskManager
|
2024-07-14 11:18:38 +02:00
|
|
|
|
2024-08-29 08:45:04 +02:00
|
|
|
import (
|
2024-08-30 07:22:40 +02:00
|
|
|
"context"
|
|
|
|
"math"
|
2024-08-29 08:45:04 +02:00
|
|
|
"time"
|
|
|
|
|
2024-10-23 16:15:44 +02:00
|
|
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
2024-11-22 12:42:49 +01:00
|
|
|
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
2024-08-30 07:22:40 +02:00
|
|
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
2024-08-29 08:45:04 +02:00
|
|
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
2024-08-30 07:22:40 +02:00
|
|
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
2024-09-03 13:41:00 +02:00
|
|
|
sq "github.com/Masterminds/squirrel"
|
2024-08-29 08:45:04 +02:00
|
|
|
"github.com/go-co-op/gocron/v2"
|
|
|
|
)
|
|
|
|
|
2024-09-02 12:07:44 +02:00
|
|
|
func RegisterFootprintWorker() {
|
2024-10-23 16:15:44 +02:00
|
|
|
var frequency string
|
2024-10-28 11:56:34 +01:00
|
|
|
if config.Keys.CronFrequency != nil && config.Keys.CronFrequency.FootprintWorker != "" {
|
2024-10-23 16:15:44 +02:00
|
|
|
frequency = config.Keys.CronFrequency.FootprintWorker
|
|
|
|
} else {
|
|
|
|
frequency = "10m"
|
|
|
|
}
|
|
|
|
d, _ := time.ParseDuration(frequency)
|
|
|
|
log.Infof("Register Footprint Update service with %s interval", frequency)
|
|
|
|
|
2024-08-29 08:45:04 +02:00
|
|
|
s.NewJob(gocron.DurationJob(d),
|
|
|
|
gocron.NewTask(
|
|
|
|
func() {
|
2024-09-03 13:41:00 +02:00
|
|
|
s := time.Now()
|
2024-10-22 14:37:22 +02:00
|
|
|
c := 0
|
|
|
|
ce := 0
|
|
|
|
cl := 0
|
|
|
|
log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
|
2024-09-03 15:59:01 +02:00
|
|
|
|
2024-08-30 07:22:40 +02:00
|
|
|
for _, cluster := range archive.Clusters {
|
2024-11-22 13:13:43 +01:00
|
|
|
s_cluster := time.Now()
|
2024-08-30 07:22:40 +02:00
|
|
|
jobs, err := jobRepo.FindRunningJobs(cluster.Name)
|
|
|
|
if err != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
allMetrics := make([]string, 0)
|
|
|
|
metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
|
|
|
|
for _, mc := range metricConfigs {
|
|
|
|
allMetrics = append(allMetrics, mc.Name)
|
|
|
|
}
|
2024-08-29 08:45:04 +02:00
|
|
|
|
2024-11-22 12:42:49 +01:00
|
|
|
repo, err := metricdata.GetMetricDataRepo(cluster.Name)
|
|
|
|
if err != nil {
|
|
|
|
log.Warnf("no metric data repository configured for '%s'", cluster.Name)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2024-11-22 13:39:59 +01:00
|
|
|
pendingStatements := []sq.UpdateBuilder{}
|
2024-08-30 07:22:40 +02:00
|
|
|
|
|
|
|
for _, job := range jobs {
|
2024-10-22 14:37:22 +02:00
|
|
|
log.Debugf("Try job %d", job.JobID)
|
|
|
|
cl++
|
2024-11-22 12:42:49 +01:00
|
|
|
|
2024-11-22 13:13:43 +01:00
|
|
|
s_job := time.Now()
|
|
|
|
|
2024-11-22 12:42:49 +01:00
|
|
|
jobStats, err := repo.LoadStats(job, allMetrics, context.Background())
|
2024-08-30 07:22:40 +02:00
|
|
|
if err != nil {
|
2024-11-22 13:36:26 +01:00
|
|
|
log.Errorf("error wile loading job data stats for footprint update: %v", err)
|
2024-10-22 14:37:22 +02:00
|
|
|
ce++
|
2024-08-30 07:22:40 +02:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
jobMeta := &schema.JobMeta{
|
|
|
|
BaseJob: job.BaseJob,
|
|
|
|
StartTime: job.StartTime.Unix(),
|
|
|
|
Statistics: make(map[string]schema.JobStatistics),
|
|
|
|
}
|
|
|
|
|
2024-11-22 12:42:49 +01:00
|
|
|
for metric, data := range jobStats { // Metric, Hostname:Stats
|
2024-08-30 07:22:40 +02:00
|
|
|
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
|
|
|
|
2024-11-22 12:42:49 +01:00
|
|
|
for _, hostStats := range data {
|
|
|
|
avg += hostStats.Avg
|
|
|
|
min = math.Min(min, hostStats.Min)
|
|
|
|
max = math.Max(max, hostStats.Max)
|
2024-08-30 07:22:40 +02:00
|
|
|
}
|
|
|
|
|
2024-09-30 16:33:28 +02:00
|
|
|
// Add values rounded to 2 digits
|
2024-08-30 07:22:40 +02:00
|
|
|
jobMeta.Statistics[metric] = schema.JobStatistics{
|
|
|
|
Unit: schema.Unit{
|
|
|
|
Prefix: archive.GetMetricConfig(job.Cluster, metric).Unit.Prefix,
|
|
|
|
Base: archive.GetMetricConfig(job.Cluster, metric).Unit.Base,
|
|
|
|
},
|
2024-09-30 16:33:28 +02:00
|
|
|
Avg: (math.Round((avg/float64(job.NumNodes))*100) / 100),
|
|
|
|
Min: (math.Round(min*100) / 100),
|
|
|
|
Max: (math.Round(max*100) / 100),
|
2024-08-30 07:22:40 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-11-22 12:42:49 +01:00
|
|
|
// Build Statement per Job, Add to Pending Array
|
2024-09-25 18:04:29 +02:00
|
|
|
stmt := sq.Update("job")
|
2024-09-05 14:58:08 +02:00
|
|
|
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
|
|
|
|
if err != nil {
|
2024-11-22 13:36:26 +01:00
|
|
|
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
|
2024-10-22 14:37:22 +02:00
|
|
|
ce++
|
2024-08-30 07:22:40 +02:00
|
|
|
continue
|
|
|
|
}
|
2024-09-05 14:58:08 +02:00
|
|
|
stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta)
|
|
|
|
if err != nil {
|
2024-11-22 12:42:49 +01:00
|
|
|
log.Errorf("update job (dbid: %d) statement build failed at energy step: %s", job.ID, err.Error())
|
2024-10-22 14:37:22 +02:00
|
|
|
ce++
|
2024-08-30 07:22:40 +02:00
|
|
|
continue
|
|
|
|
}
|
2024-09-25 18:04:29 +02:00
|
|
|
stmt = stmt.Where("job.id = ?", job.ID)
|
2024-09-03 15:59:01 +02:00
|
|
|
|
2024-11-22 12:42:49 +01:00
|
|
|
pendingStatements = append(pendingStatements, stmt)
|
2024-11-22 13:36:26 +01:00
|
|
|
log.Debugf("Job %d took %s", job.JobID, time.Since(s_job))
|
2024-11-22 12:42:49 +01:00
|
|
|
}
|
2024-11-22 13:36:26 +01:00
|
|
|
log.Debugf("Finish preparation for %d jobs: %d statements", len(jobs), len(pendingStatements))
|
2024-11-22 12:42:49 +01:00
|
|
|
|
|
|
|
t, err := jobRepo.TransactionInit()
|
|
|
|
if err != nil {
|
2024-11-22 13:36:26 +01:00
|
|
|
log.Errorf("failed TransactionInit %v", err)
|
2024-11-22 12:42:49 +01:00
|
|
|
}
|
|
|
|
|
2024-11-22 13:36:26 +01:00
|
|
|
for idx, ps := range pendingStatements {
|
2024-11-22 12:42:49 +01:00
|
|
|
|
|
|
|
query, args, err := ps.ToSql()
|
2024-10-22 14:37:22 +02:00
|
|
|
if err != nil {
|
2024-11-22 13:36:26 +01:00
|
|
|
log.Errorf("failed in ToSQL conversion: %v", err)
|
2024-10-22 14:37:22 +02:00
|
|
|
ce++
|
2024-11-22 13:13:43 +01:00
|
|
|
} else {
|
|
|
|
// Args: JSON, JSON, ENERGY, JOBID
|
2024-11-22 13:36:26 +01:00
|
|
|
log.Infof("add transaction on index %d", idx)
|
2024-11-22 13:13:43 +01:00
|
|
|
jobRepo.TransactionAdd(t, query, args...)
|
|
|
|
c++
|
2024-09-25 18:04:29 +02:00
|
|
|
}
|
|
|
|
}
|
2024-11-22 12:42:49 +01:00
|
|
|
|
|
|
|
jobRepo.TransactionEnd(t)
|
2024-11-22 13:13:43 +01:00
|
|
|
log.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
|
2024-08-30 07:22:40 +02:00
|
|
|
}
|
2024-10-22 14:37:22 +02:00
|
|
|
log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
|
2024-08-29 08:45:04 +02:00
|
|
|
}))
|
2024-07-14 11:18:38 +02:00
|
|
|
}
|