// Copyright (C) NHR@FAU, University Erlangen-Nuremberg. // All rights reserved. This file is part of cc-backend. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. // Package importer provides functionality for importing job data into the ClusterCockpit database. // // The package supports two primary use cases: // 1. Bulk database initialization from archived jobs via InitDB() // 2. Individual job import from file pairs via HandleImportFlag() // // Both operations enrich job metadata by calculating footprints and energy metrics // before persisting to the database. package importer import ( "encoding/json" "fmt" "math" "strings" "time" "github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/pkg/archive" cclog "github.com/ClusterCockpit/cc-lib/ccLogger" "github.com/ClusterCockpit/cc-lib/schema" ) const ( addTagQuery = "INSERT INTO tag (tag_name, tag_type) VALUES (?, ?)" setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)" ) // InitDB reinitializes the job database from archived job data. // // This function performs the following operations: // 1. Flushes existing job, tag, and jobtag tables // 2. Iterates through all jobs in the archive // 3. Enriches each job with calculated footprints and energy metrics // 4. Inserts jobs and tags into the database in batched transactions // // Jobs are processed in batches of 100 for optimal performance. The function // continues processing even if individual jobs fail, logging errors and // returning a summary at the end. // // Returns an error if database initialization, transaction management, or // critical operations fail. Individual job failures are logged but do not // stop the overall import process. func InitDB() error { r := repository.GetJobRepository() if err := r.Flush(); err != nil { cclog.Errorf("repository initDB(): %v", err) return err } starttime := time.Now() cclog.Print("Building job table...") t, err := r.TransactionInit() if err != nil { cclog.Warn("Error while initializing SQL transactions") return err } tags := make(map[string]int64) // Not using cclog.Print because we want the line to end with `\r` and // this function is only ever called when a special command line flag // is passed anyways. fmt.Printf("%d jobs inserted...\r", 0) ar := archive.GetHandle() i := 0 errorOccured := 0 for jobContainer := range ar.Iter(false) { jobMeta := jobContainer.Meta if jobMeta == nil { cclog.Warn("skipping job with nil metadata") errorOccured++ continue } // Bundle 100 inserts into one transaction for better performance if i%100 == 0 { if i > 0 { if err := t.Commit(); err != nil { cclog.Errorf("transaction commit error: %v", err) return err } // Start a new transaction for the next batch t, err = r.TransactionInit() if err != nil { cclog.Errorf("transaction init error: %v", err) return err } } fmt.Printf("%d jobs inserted...\r", i) } jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful if err := enrichJobMetadata(jobMeta); err != nil { cclog.Errorf("repository initDB(): %v", err) errorOccured++ continue } if err := SanityChecks(jobMeta); err != nil { cclog.Errorf("repository initDB(): %v", err) errorOccured++ continue } id, err := r.TransactionAddNamed(t, repository.NamedJobInsert, jobMeta) if err != nil { cclog.Errorf("repository initDB(): %v", err) errorOccured++ continue } for _, tag := range jobMeta.Tags { tagstr := tag.Name + ":" + tag.Type tagID, ok := tags[tagstr] if !ok { tagID, err = r.TransactionAdd(t, addTagQuery, tag.Name, tag.Type) if err != nil { cclog.Errorf("Error adding tag: %v", err) errorOccured++ continue } tags[tagstr] = tagID } r.TransactionAdd(t, setTagQuery, id, tagID) } if err == nil { i += 1 } } if errorOccured > 0 { cclog.Warnf("Error in import of %d jobs!", errorOccured) } r.TransactionEnd(t) cclog.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds()) return nil } // enrichJobMetadata calculates and populates job footprints, energy metrics, and serialized fields. // // This function performs the following enrichment operations: // 1. Calculates job footprint metrics based on the subcluster configuration // 2. Computes energy footprint and total energy consumption in kWh // 3. Marshals footprints, resources, and metadata into JSON for database storage // // The function expects the job's MonitoringStatus and SubCluster to be already set. // Energy calculations convert power metrics (Watts) to energy (kWh) using the formula: // // Energy (kWh) = (Power (W) * Duration (s) / 3600) / 1000 // // Returns an error if subcluster retrieval, metric indexing, or JSON marshaling fails. func enrichJobMetadata(job *schema.Job) error { sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster) if err != nil { cclog.Errorf("cannot get subcluster: %s", err.Error()) return err } job.Footprint = make(map[string]float64) for _, fp := range sc.Footprint { statType := "avg" if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil { statType = sc.MetricConfig[i].Footprint } name := fmt.Sprintf("%s_%s", fp, statType) job.Footprint[name] = repository.LoadJobStat(job, fp, statType) } job.RawFootprint, err = json.Marshal(job.Footprint) if err != nil { cclog.Warn("Error while marshaling job footprint") return err } job.EnergyFootprint = make(map[string]float64) // Total Job Energy Outside Loop totalEnergy := 0.0 for _, fp := range sc.EnergyFootprint { // Always Init Metric Energy Inside Loop metricEnergy := 0.0 if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil { // Note: For DB data, calculate and save as kWh switch sc.MetricConfig[i].Energy { case "energy": // this metric has energy as unit (Joules) cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp) // FIXME: Needs sum as stats type case "power": // this metric has power as unit (Watt) // Energy: Power (in Watts) * Time (in Seconds) // Unit: (W * (s / 3600)) / 1000 = kWh // Round 2 Digits: round(Energy * 100) / 100 // Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000 // Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1 rawEnergy := ((repository.LoadJobStat(job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0 metricEnergy = math.Round(rawEnergy*100.0) / 100.0 } } else { cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID) } job.EnergyFootprint[fp] = metricEnergy totalEnergy += metricEnergy } job.Energy = (math.Round(totalEnergy*100.0) / 100.0) if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil { cclog.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID) return err } job.RawResources, err = json.Marshal(job.Resources) if err != nil { cclog.Warn("Error while marshaling job resources") return err } job.RawMetaData, err = json.Marshal(job.MetaData) if err != nil { cclog.Warn("Error while marshaling job metadata") return err } return nil } // SanityChecks validates job metadata and ensures cluster/subcluster configuration is valid. // // This function performs the following validations: // 1. Verifies the cluster exists in the archive configuration // 2. Assigns and validates the subcluster (may modify job.SubCluster) // 3. Validates job state is a recognized value // 4. Ensures resources and user fields are populated // 5. Validates node counts and hardware thread counts are positive // 6. Verifies the number of resources matches the declared node count // // The function may modify the job's SubCluster field if it needs to be assigned. // // Returns an error if any validation check fails. func SanityChecks(job *schema.Job) error { if c := archive.GetCluster(job.Cluster); c == nil { return fmt.Errorf("no such cluster: %v", job.Cluster) } if err := archive.AssignSubCluster(job); err != nil { cclog.Warn("Error while assigning subcluster to job") return err } if !job.State.Valid() { return fmt.Errorf("not a valid job state: %v", job.State) } if len(job.Resources) == 0 || len(job.User) == 0 { return fmt.Errorf("'resources' and 'user' should not be empty") } if job.NumAcc < 0 || job.NumHWThreads < 0 || job.NumNodes < 1 { return fmt.Errorf("'numNodes', 'numAcc' or 'numHWThreads' invalid") } if len(job.Resources) != int(job.NumNodes) { return fmt.Errorf("len(resources) does not equal numNodes (%d vs %d)", len(job.Resources), job.NumNodes) } return nil } // checkJobData normalizes metric units in job data based on average values. // // NOTE: This function is currently unused and contains incomplete implementation. // It was intended to normalize byte and file-related metrics to appropriate SI prefixes, // but the normalization logic is commented out. Consider removing or completing this // function based on project requirements. // // TODO: Either implement the metric normalization or remove this dead code. func checkJobData(d *schema.JobData) error { for _, scopes := range *d { // var newUnit schema.Unit // TODO Add node scope if missing for _, metric := range scopes { if strings.Contains(metric.Unit.Base, "B/s") || strings.Contains(metric.Unit.Base, "F/s") || strings.Contains(metric.Unit.Base, "B") { // get overall avg sum := 0.0 for _, s := range metric.Series { sum += s.Statistics.Avg } avg := sum / float64(len(metric.Series)) f, p := Normalize(avg, metric.Unit.Prefix) if p != metric.Unit.Prefix { fmt.Printf("Convert %e", f) // for _, s := range metric.Series { // fp := schema.ConvertFloatToFloat64(s.Data) // // for i := 0; i < len(fp); i++ { // fp[i] *= f // fp[i] = math.Ceil(fp[i]) // } // // s.Data = schema.GetFloat64ToFloat(fp) // } metric.Unit.Prefix = p } } } } return nil }