diff --git a/internal/taskManager/updateFootprintService.go b/internal/taskManager/updateFootprintService.go index efca6d1..59c1b12 100644 --- a/internal/taskManager/updateFootprintService.go +++ b/internal/taskManager/updateFootprintService.go @@ -10,7 +10,7 @@ import ( "time" "github.com/ClusterCockpit/cc-backend/internal/config" - "github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher" + "github.com/ClusterCockpit/cc-backend/internal/metricdata" "github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/log" "github.com/ClusterCockpit/cc-backend/pkg/schema" @@ -37,32 +37,38 @@ func RegisterFootprintWorker() { cl := 0 log.Printf("Update Footprints started at %s", s.Format(time.RFC3339)) - t, err := jobRepo.TransactionInit() - if err != nil { - log.Errorf("Failed TransactionInit %v", err) - } - for _, cluster := range archive.Clusters { + s_cluster := time.Now() jobs, err := jobRepo.FindRunningJobs(cluster.Name) if err != nil { continue } + // NOTE: Additional Subcluster Loop Could Allow For Limited List Of (Energy)Footprint-Metrics Only. + // - Chunk-Size Would Then Be 'SubCluster' (Running Jobs, Transactions) as Lists Can Change Within SCs + // - Would Require Review of 'updateFootprint' And 'updateEnergy' Usage allMetrics := make([]string, 0) metricConfigs := archive.GetCluster(cluster.Name).MetricConfig for _, mc := range metricConfigs { allMetrics = append(allMetrics, mc.Name) } - scopes := []schema.MetricScope{schema.MetricScopeNode} - scopes = append(scopes, schema.MetricScopeCore) - scopes = append(scopes, schema.MetricScopeAccelerator) + repo, err := metricdata.GetMetricDataRepo(cluster.Name) + if err != nil { + log.Errorf("no metric data repository configured for '%s'", cluster.Name) + continue + } + + pendingStatements := []sq.UpdateBuilder{} for _, job := range jobs { - log.Debugf("Try job %d", job.JobID) + log.Debugf("Prepare job %d", job.JobID) cl++ - jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, context.Background(), 0) // 0 Resolution-Value retrieves highest res + + s_job := time.Now() + + jobStats, err := repo.LoadStats(job, allMetrics, context.Background()) if err != nil { - log.Errorf("Error wile loading job data for footprint update: %v", err) + log.Errorf("error wile loading job data stats for footprint update: %v", err) ce++ continue } @@ -73,19 +79,19 @@ func RegisterFootprintWorker() { Statistics: make(map[string]schema.JobStatistics), } - for metric, data := range jobData { - avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32 - nodeData, ok := data["node"] - if !ok { - // This should never happen ? - ce++ - continue - } + for _, metric := range allMetrics { + avg, min, max := 0.0, 0.0, 0.0 + data, ok := jobStats[metric] // JobStats[Metric1:[Hostname1:[Stats], Hostname2:[Stats], ...], Metric2[...] ...] + if ok { + for _, res := range job.Resources { + hostStats, ok := data[res.Hostname] + if ok { + avg += hostStats.Avg + min = math.Min(min, hostStats.Min) + max = math.Max(max, hostStats.Max) + } - for _, series := range nodeData.Series { - avg += series.Statistics.Avg - min = math.Min(min, series.Statistics.Min) - max = math.Max(max, series.Statistics.Max) + } } // Add values rounded to 2 digits @@ -100,44 +106,47 @@ func RegisterFootprintWorker() { } } - // Init UpdateBuilder + // Build Statement per Job, Add to Pending Array stmt := sq.Update("job") - // Add SET queries stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta) if err != nil { - log.Errorf("Update job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error()) + log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error()) ce++ continue } stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta) if err != nil { - log.Errorf("Update job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error()) + log.Errorf("update job (dbid: %d) statement build failed at energy step: %s", job.ID, err.Error()) ce++ continue } - // Add WHERE Filter stmt = stmt.Where("job.id = ?", job.ID) - query, args, err := stmt.ToSql() - if err != nil { - log.Errorf("Failed in ToSQL conversion: %v", err) - ce++ - continue - } - - // Args: JSON, JSON, ENERGY, JOBID - jobRepo.TransactionAdd(t, query, args...) - // if err := jobRepo.Execute(stmt); err != nil { - // log.Errorf("Update job footprint (dbid: %d) failed at db execute: %s", job.ID, err.Error()) - // continue - // } - c++ - log.Debugf("Finish Job %d", job.JobID) + pendingStatements = append(pendingStatements, stmt) + log.Debugf("Job %d took %s", job.JobID, time.Since(s_job)) } - jobRepo.TransactionCommit(t) - log.Debugf("Finish Cluster %s", cluster.Name) + + t, err := jobRepo.TransactionInit() + if err != nil { + log.Errorf("failed TransactionInit %v", err) + log.Errorf("skipped %d transactions for cluster %s", len(pendingStatements), cluster.Name) + ce += len(pendingStatements) + } else { + for _, ps := range pendingStatements { + query, args, err := ps.ToSql() + if err != nil { + log.Errorf("failed in ToSQL conversion: %v", err) + ce++ + } else { + // args...: Footprint-JSON, Energyfootprint-JSON, TotalEnergy, JobID + jobRepo.TransactionAdd(t, query, args...) + c++ + } + } + jobRepo.TransactionEnd(t) + } + log.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster)) } - jobRepo.TransactionEnd(t) log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s)) })) }