mirror of
				https://github.com/ClusterCockpit/cc-backend
				synced 2025-11-04 01:25:06 +01:00 
			
		
		
		
	Merge pull request #300 from ClusterCockpit/improve_footprint_transactions
Improve footprint transactions
This commit is contained in:
		@@ -10,7 +10,7 @@ import (
 | 
				
			|||||||
	"time"
 | 
						"time"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	"github.com/ClusterCockpit/cc-backend/internal/config"
 | 
						"github.com/ClusterCockpit/cc-backend/internal/config"
 | 
				
			||||||
	"github.com/ClusterCockpit/cc-backend/internal/metricDataDispatcher"
 | 
						"github.com/ClusterCockpit/cc-backend/internal/metricdata"
 | 
				
			||||||
	"github.com/ClusterCockpit/cc-backend/pkg/archive"
 | 
						"github.com/ClusterCockpit/cc-backend/pkg/archive"
 | 
				
			||||||
	"github.com/ClusterCockpit/cc-backend/pkg/log"
 | 
						"github.com/ClusterCockpit/cc-backend/pkg/log"
 | 
				
			||||||
	"github.com/ClusterCockpit/cc-backend/pkg/schema"
 | 
						"github.com/ClusterCockpit/cc-backend/pkg/schema"
 | 
				
			||||||
@@ -37,32 +37,38 @@ func RegisterFootprintWorker() {
 | 
				
			|||||||
				cl := 0
 | 
									cl := 0
 | 
				
			||||||
				log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
 | 
									log.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
				t, err := jobRepo.TransactionInit()
 | 
					 | 
				
			||||||
				if err != nil {
 | 
					 | 
				
			||||||
					log.Errorf("Failed TransactionInit %v", err)
 | 
					 | 
				
			||||||
				}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
				for _, cluster := range archive.Clusters {
 | 
									for _, cluster := range archive.Clusters {
 | 
				
			||||||
 | 
										s_cluster := time.Now()
 | 
				
			||||||
					jobs, err := jobRepo.FindRunningJobs(cluster.Name)
 | 
										jobs, err := jobRepo.FindRunningJobs(cluster.Name)
 | 
				
			||||||
					if err != nil {
 | 
										if err != nil {
 | 
				
			||||||
						continue
 | 
											continue
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
 | 
										// NOTE: Additional Subcluster Loop Could Allow For Limited List Of (Energy)Footprint-Metrics Only.
 | 
				
			||||||
 | 
										//       - Chunk-Size Would Then Be 'SubCluster' (Running Jobs, Transactions) as Lists Can Change Within SCs
 | 
				
			||||||
 | 
										//       - Would Require Review of 'updateFootprint' And 'updateEnergy' Usage
 | 
				
			||||||
					allMetrics := make([]string, 0)
 | 
										allMetrics := make([]string, 0)
 | 
				
			||||||
					metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
 | 
										metricConfigs := archive.GetCluster(cluster.Name).MetricConfig
 | 
				
			||||||
					for _, mc := range metricConfigs {
 | 
										for _, mc := range metricConfigs {
 | 
				
			||||||
						allMetrics = append(allMetrics, mc.Name)
 | 
											allMetrics = append(allMetrics, mc.Name)
 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					scopes := []schema.MetricScope{schema.MetricScopeNode}
 | 
										repo, err := metricdata.GetMetricDataRepo(cluster.Name)
 | 
				
			||||||
					scopes = append(scopes, schema.MetricScopeCore)
 | 
										if err != nil {
 | 
				
			||||||
					scopes = append(scopes, schema.MetricScopeAccelerator)
 | 
											log.Errorf("no metric data repository configured for '%s'", cluster.Name)
 | 
				
			||||||
 | 
											continue
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
										pendingStatements := []sq.UpdateBuilder{}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
					for _, job := range jobs {
 | 
										for _, job := range jobs {
 | 
				
			||||||
						log.Debugf("Try job %d", job.JobID)
 | 
											log.Debugf("Prepare job %d", job.JobID)
 | 
				
			||||||
						cl++
 | 
											cl++
 | 
				
			||||||
						jobData, err := metricDataDispatcher.LoadData(job, allMetrics, scopes, context.Background(), 0) // 0 Resolution-Value retrieves highest res
 | 
					
 | 
				
			||||||
 | 
											s_job := time.Now()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
											jobStats, err := repo.LoadStats(job, allMetrics, context.Background())
 | 
				
			||||||
						if err != nil {
 | 
											if err != nil {
 | 
				
			||||||
							log.Errorf("Error wile loading job data for footprint update: %v", err)
 | 
												log.Errorf("error wile loading job data stats for footprint update: %v", err)
 | 
				
			||||||
							ce++
 | 
												ce++
 | 
				
			||||||
							continue
 | 
												continue
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
@@ -73,19 +79,19 @@ func RegisterFootprintWorker() {
 | 
				
			|||||||
							Statistics: make(map[string]schema.JobStatistics),
 | 
												Statistics: make(map[string]schema.JobStatistics),
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
						for metric, data := range jobData {
 | 
											for _, metric := range allMetrics {
 | 
				
			||||||
							avg, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
 | 
												avg, min, max := 0.0, 0.0, 0.0
 | 
				
			||||||
							nodeData, ok := data["node"]
 | 
												data, ok := jobStats[metric] // JobStats[Metric1:[Hostname1:[Stats], Hostname2:[Stats], ...], Metric2[...] ...]
 | 
				
			||||||
							if !ok {
 | 
												if ok {
 | 
				
			||||||
								// This should never happen ?
 | 
													for _, res := range job.Resources {
 | 
				
			||||||
								ce++
 | 
														hostStats, ok := data[res.Hostname]
 | 
				
			||||||
								continue
 | 
														if ok {
 | 
				
			||||||
							}
 | 
															avg += hostStats.Avg
 | 
				
			||||||
 | 
															min = math.Min(min, hostStats.Min)
 | 
				
			||||||
 | 
															max = math.Max(max, hostStats.Max)
 | 
				
			||||||
 | 
														}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							for _, series := range nodeData.Series {
 | 
													}
 | 
				
			||||||
								avg += series.Statistics.Avg
 | 
					 | 
				
			||||||
								min = math.Min(min, series.Statistics.Min)
 | 
					 | 
				
			||||||
								max = math.Max(max, series.Statistics.Max)
 | 
					 | 
				
			||||||
							}
 | 
												}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							// Add values rounded to 2 digits
 | 
												// Add values rounded to 2 digits
 | 
				
			||||||
@@ -100,44 +106,47 @@ func RegisterFootprintWorker() {
 | 
				
			|||||||
							}
 | 
												}
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
						// Init UpdateBuilder
 | 
											// Build Statement per Job, Add to Pending Array
 | 
				
			||||||
						stmt := sq.Update("job")
 | 
											stmt := sq.Update("job")
 | 
				
			||||||
						// Add SET queries
 | 
					 | 
				
			||||||
						stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
 | 
											stmt, err = jobRepo.UpdateFootprint(stmt, jobMeta)
 | 
				
			||||||
						if err != nil {
 | 
											if err != nil {
 | 
				
			||||||
							log.Errorf("Update job (dbid: %d) failed at update Footprint step: %s", job.ID, err.Error())
 | 
												log.Errorf("update job (dbid: %d) statement build failed at footprint step: %s", job.ID, err.Error())
 | 
				
			||||||
							ce++
 | 
												ce++
 | 
				
			||||||
							continue
 | 
												continue
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
						stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta)
 | 
											stmt, err = jobRepo.UpdateEnergy(stmt, jobMeta)
 | 
				
			||||||
						if err != nil {
 | 
											if err != nil {
 | 
				
			||||||
							log.Errorf("Update job (dbid: %d) failed at update Energy step: %s", job.ID, err.Error())
 | 
												log.Errorf("update job (dbid: %d) statement build failed at energy step: %s", job.ID, err.Error())
 | 
				
			||||||
							ce++
 | 
												ce++
 | 
				
			||||||
							continue
 | 
												continue
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
						// Add WHERE Filter
 | 
					 | 
				
			||||||
						stmt = stmt.Where("job.id = ?", job.ID)
 | 
											stmt = stmt.Where("job.id = ?", job.ID)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
						query, args, err := stmt.ToSql()
 | 
											pendingStatements = append(pendingStatements, stmt)
 | 
				
			||||||
						if err != nil {
 | 
											log.Debugf("Job %d took %s", job.JobID, time.Since(s_job))
 | 
				
			||||||
							log.Errorf("Failed in ToSQL conversion: %v", err)
 | 
					 | 
				
			||||||
							ce++
 | 
					 | 
				
			||||||
							continue
 | 
					 | 
				
			||||||
						}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
						// Args: JSON, JSON, ENERGY, JOBID
 | 
					 | 
				
			||||||
						jobRepo.TransactionAdd(t, query, args...)
 | 
					 | 
				
			||||||
						// if err := jobRepo.Execute(stmt); err != nil {
 | 
					 | 
				
			||||||
						// 	log.Errorf("Update job footprint (dbid: %d) failed at db execute: %s", job.ID, err.Error())
 | 
					 | 
				
			||||||
						// 	continue
 | 
					 | 
				
			||||||
						// }
 | 
					 | 
				
			||||||
						c++
 | 
					 | 
				
			||||||
						log.Debugf("Finish Job %d", job.JobID)
 | 
					 | 
				
			||||||
					}
 | 
										}
 | 
				
			||||||
					jobRepo.TransactionCommit(t)
 | 
					
 | 
				
			||||||
					log.Debugf("Finish Cluster %s", cluster.Name)
 | 
										t, err := jobRepo.TransactionInit()
 | 
				
			||||||
 | 
										if err != nil {
 | 
				
			||||||
 | 
											log.Errorf("failed TransactionInit %v", err)
 | 
				
			||||||
 | 
											log.Errorf("skipped %d transactions for cluster %s", len(pendingStatements), cluster.Name)
 | 
				
			||||||
 | 
											ce += len(pendingStatements)
 | 
				
			||||||
 | 
										} else {
 | 
				
			||||||
 | 
											for _, ps := range pendingStatements {
 | 
				
			||||||
 | 
												query, args, err := ps.ToSql()
 | 
				
			||||||
 | 
												if err != nil {
 | 
				
			||||||
 | 
													log.Errorf("failed in ToSQL conversion: %v", err)
 | 
				
			||||||
 | 
													ce++
 | 
				
			||||||
 | 
												} else {
 | 
				
			||||||
 | 
													// args...: Footprint-JSON, Energyfootprint-JSON, TotalEnergy, JobID
 | 
				
			||||||
 | 
													jobRepo.TransactionAdd(t, query, args...)
 | 
				
			||||||
 | 
													c++
 | 
				
			||||||
 | 
												}
 | 
				
			||||||
 | 
											}
 | 
				
			||||||
 | 
											jobRepo.TransactionEnd(t)
 | 
				
			||||||
 | 
										}
 | 
				
			||||||
 | 
										log.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
 | 
				
			||||||
				}
 | 
									}
 | 
				
			||||||
				jobRepo.TransactionEnd(t)
 | 
					 | 
				
			||||||
				log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
 | 
									log.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
 | 
				
			||||||
			}))
 | 
								}))
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user