mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2024-12-26 13:29:05 +01:00
Merge pull request #300 from ClusterCockpit/improve_footprint_transactions
Improve footprint transactions
This commit is contained in:
commit
f1e6dedd44
@ -10,7 +10,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/config"
|
"github.com/ClusterCockpit/cc-backend/internal/config"
|
||||||
"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
|
"github.com/ClusterCockpit/cc-backend/internal/metricdata"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
"github.com/ClusterCockpit/cc-backend/pkg/archive"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
"github.com/ClusterCockpit/cc-backend/pkg/log"
|
||||||
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
"github.com/ClusterCockpit/cc-backend/pkg/schema"
|
||||||
@ -37,32 +37,38 @@ func RegisterFootprintWorker() {
|
|||||||
cl := 0
|
cl := 0
|
||||||
log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
|
log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
|
||||||
|
|
||||||
t, err := jobRepo.TransactionInit()
|
|
||||||
if err != nil {
|
|
||||||
log.Errorf("Failed TransactionInit %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, cluster := range archive.Clusters {
|
for _, cluster := range archive.Clusters {
|
||||||
|
s_cluster := time.Now()
|
||||||
jobs, err := jobRepo.FindRunningJobs(cluster.Name)
|
jobs, err := jobRepo.FindRunningJobs(cluster.Name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// NOTE: Additional Subcluster Loop Could Allow For Limited List Of (Energy)Footprint-Metrics Only.
|
||||||
|
// - Chunk-Size Would Then Be 'SubCluster' (Running Jobs, Transactions) as Lists Can Change Within SCs
|
||||||
|
// - Would Require Review of 'updateFootprint' And 'updateEnergy' Usage
|
||||||
allMetrics := make([]string, 0)
|
allMetrics := make([]string, 0)
|
||||||
metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
|
metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
|
||||||
for _, mc := range metricConfigs {
|
for _, mc := range metricConfigs {
|
||||||
allMetrics = append(allMetrics, mc.Name)
|
allMetrics = append(allMetrics, mc.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
scopes := []schema.MetricScope{schema.MetricScopeNode}
|
repo, err := metricdata.GetMetricDataRepo(cluster.Name)
|
||||||
scopes = append(scopes, schema.MetricScopeCore)
|
if err != nil {
|
||||||
scopes = append(scopes, schema.MetricScopeAccelerator)
|
log.Errorf("no metric data repository configured for '%s'", cluster.Name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
pendingStatements := []sq.UpdateBuilder{}
|
||||||
|
|
||||||
for _, job := range jobs {
|
for _, job := range jobs {
|
||||||
log.Debugf("Try job %d", job.JobID)
|
log.Debugf("Prepare job %d", job.JobID)
|
||||||
cl++
|
cl++
|
||||||
jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, context.Background(), 0) // 0 Resolution-Value retrieves highest res
|
|
||||||
|
s_job := time.Now()
|
||||||
|
|
||||||
|
jobStats, err := repo.LoadStats(job, allMetrics, context.Background())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error wile loading job data for footprint update: %v", err)
|
log.Errorf("error wile loading job data stats for footprint update: %v", err)
|
||||||
ce++
|
ce++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -73,19 +79,19 @@ func RegisterFootprintWorker() {
|
|||||||
Statistics: make(map[string]schema.JobStatistics),
|
Statistics: make(map[string]schema.JobStatistics),
|
||||||
}
|
}
|
||||||
|
|
||||||
for metric, data := range jobData {
|
for _, metric := range allMetrics {
|
||||||
avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
avg, min, max := 0.0, 0.0, 0.0
|
||||||
nodeData, ok := data["node"]
|
data, ok := jobStats[metric] // JobStats[Metric1:[Hostname1:[Stats], Hostname2:[Stats], ...], Metric2[...] ...]
|
||||||
if !ok {
|
if ok {
|
||||||
// This should never happen ?
|
for _, res := range job.Resources {
|
||||||
ce++
|
hostStats, ok := data[res.Hostname]
|
||||||
continue
|
if ok {
|
||||||
}
|
avg += hostStats.Avg
|
||||||
|
min = math.Min(min, hostStats.Min)
|
||||||
|
max = math.Max(max, hostStats.Max)
|
||||||
|
}
|
||||||
|
|
||||||
for _, series := range nodeData.Series {
|
}
|
||||||
avg += series.Statistics.Avg
|
|
||||||
min = math.Min(min, series.Statistics.Min)
|
|
||||||
max = math.Max(max, series.Statistics.Max)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add values rounded to 2 digits
|
// Add values rounded to 2 digits
|
||||||
@ -100,44 +106,47 @@ func RegisterFootprintWorker() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init UpdateBuilder
|
// Build Statement per Job, Add to Pending Array
|
||||||
stmt := sq.Update("job")
|
stmt := sq.Update("job")
|
||||||
// Add SET queries
|
|
||||||
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
|
stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Update job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
|
log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
|
||||||
ce++
|
ce++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta)
|
stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Update job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
|
log.Errorf("update job (dbid: %d) statement build failed at energy step: %s", job.ID, err.Error())
|
||||||
ce++
|
ce++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// Add WHERE Filter
|
|
||||||
stmt = stmt.Where("job.id = ?", job.ID)
|
stmt = stmt.Where("job.id = ?", job.ID)
|
||||||
|
|
||||||
query, args, err := stmt.ToSql()
|
pendingStatements = append(pendingStatements, stmt)
|
||||||
if err != nil {
|
log.Debugf("Job %d took %s", job.JobID, time.Since(s_job))
|
||||||
log.Errorf("Failed in ToSQL conversion: %v", err)
|
|
||||||
ce++
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Args: JSON, JSON, ENERGY, JOBID
|
|
||||||
jobRepo.TransactionAdd(t, query, args...)
|
|
||||||
// if err := jobRepo.Execute(stmt); err != nil {
|
|
||||||
// log.Errorf("Update job footprint (dbid: %d) failed at db execute: %s", job.ID, err.Error())
|
|
||||||
// continue
|
|
||||||
// }
|
|
||||||
c++
|
|
||||||
log.Debugf("Finish Job %d", job.JobID)
|
|
||||||
}
|
}
|
||||||
jobRepo.TransactionCommit(t)
|
|
||||||
log.Debugf("Finish Cluster %s", cluster.Name)
|
t, err := jobRepo.TransactionInit()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed TransactionInit %v", err)
|
||||||
|
log.Errorf("skipped %d transactions for cluster %s", len(pendingStatements), cluster.Name)
|
||||||
|
ce += len(pendingStatements)
|
||||||
|
} else {
|
||||||
|
for _, ps := range pendingStatements {
|
||||||
|
query, args, err := ps.ToSql()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("failed in ToSQL conversion: %v", err)
|
||||||
|
ce++
|
||||||
|
} else {
|
||||||
|
// args...: Footprint-JSON, Energyfootprint-JSON, TotalEnergy, JobID
|
||||||
|
jobRepo.TransactionAdd(t, query, args...)
|
||||||
|
c++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
jobRepo.TransactionEnd(t)
|
||||||
|
}
|
||||||
|
log.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
|
||||||
}
|
}
|
||||||
jobRepo.TransactionEnd(t)
|
|
||||||
log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
|
log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user