mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2025-12-16 04:06:16 +01:00
Add documentation for importer
This commit is contained in:
@@ -2,6 +2,15 @@
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package importer provides functionality for importing job data into the ClusterCockpit database.
|
||||
//
|
||||
// The package supports two primary use cases:
|
||||
// 1. Bulk database initialization from archived jobs via InitDB()
|
||||
// 2. Individual job import from file pairs via HandleImportFlag()
|
||||
//
|
||||
// Both operations enrich job metadata by calculating footprints and energy metrics
|
||||
// before persisting to the database.
|
||||
package importer
|
||||
|
||||
import (
|
||||
@@ -22,8 +31,21 @@ const (
|
||||
setTagQuery = "INSERT INTO jobtag (job_id, tag_id) VALUES (?, ?)"
|
||||
)
|
||||
|
||||
// Delete the tables "job", "tag" and "jobtag" from the database and
|
||||
// repopulate them using the jobs found in `archive`.
|
||||
// InitDB reinitializes the job database from archived job data.
|
||||
//
|
||||
// This function performs the following operations:
|
||||
// 1. Flushes existing job, tag, and jobtag tables
|
||||
// 2. Iterates through all jobs in the archive
|
||||
// 3. Enriches each job with calculated footprints and energy metrics
|
||||
// 4. Inserts jobs and tags into the database in batched transactions
|
||||
//
|
||||
// Jobs are processed in batches of 100 for optimal performance. The function
|
||||
// continues processing even if individual jobs fail, logging errors and
|
||||
// returning a summary at the end.
|
||||
//
|
||||
// Returns an error if database initialization, transaction management, or
|
||||
// critical operations fail. Individual job failures are logged but do not
|
||||
// stop the overall import process.
|
||||
func InitDB() error {
|
||||
r := repository.GetJobRepository()
|
||||
if err := r.Flush(); err != nil {
|
||||
@@ -72,76 +94,7 @@ func InitDB() error {
|
||||
|
||||
jobMeta.MonitoringStatus = schema.MonitoringStatusArchivingSuccessful
|
||||
|
||||
sc, err := archive.GetSubCluster(jobMeta.Cluster, jobMeta.SubCluster)
|
||||
if err != nil {
|
||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
jobMeta.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
jobMeta.Footprint[name] = repository.LoadJobStat(jobMeta, fp, statType)
|
||||
}
|
||||
|
||||
jobMeta.RawFootprint, err = json.Marshal(jobMeta.Footprint)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
jobMeta.EnergyFootprint = make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
|
||||
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
|
||||
}
|
||||
|
||||
jobMeta.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
jobMeta.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if jobMeta.RawEnergyFootprint, err = json.Marshal(jobMeta.EnergyFootprint); err != nil {
|
||||
cclog.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
jobMeta.RawResources, err = json.Marshal(jobMeta.Resources)
|
||||
if err != nil {
|
||||
cclog.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
|
||||
jobMeta.RawMetaData, err = json.Marshal(jobMeta.MetaData)
|
||||
if err != nil {
|
||||
if err := enrichJobMetadata(jobMeta); err != nil {
|
||||
cclog.Errorf("repository initDB(): %v", err)
|
||||
errorOccured++
|
||||
continue
|
||||
@@ -163,9 +116,9 @@ func InitDB() error {
|
||||
|
||||
for _, tag := range jobMeta.Tags {
|
||||
tagstr := tag.Name + ":" + tag.Type
|
||||
tagId, ok := tags[tagstr]
|
||||
tagID, ok := tags[tagstr]
|
||||
if !ok {
|
||||
tagId, err = r.TransactionAdd(t,
|
||||
tagID, err = r.TransactionAdd(t,
|
||||
addTagQuery,
|
||||
tag.Name, tag.Type)
|
||||
if err != nil {
|
||||
@@ -173,12 +126,12 @@ func InitDB() error {
|
||||
errorOccured++
|
||||
continue
|
||||
}
|
||||
tags[tagstr] = tagId
|
||||
tags[tagstr] = tagID
|
||||
}
|
||||
|
||||
r.TransactionAdd(t,
|
||||
setTagQuery,
|
||||
id, tagId)
|
||||
id, tagID)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
@@ -195,7 +148,110 @@ func InitDB() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// This function also sets the subcluster if necessary!
|
||||
// enrichJobMetadata calculates and populates job footprints, energy metrics, and serialized fields.
|
||||
//
|
||||
// This function performs the following enrichment operations:
|
||||
// 1. Calculates job footprint metrics based on the subcluster configuration
|
||||
// 2. Computes energy footprint and total energy consumption in kWh
|
||||
// 3. Marshals footprints, resources, and metadata into JSON for database storage
|
||||
//
|
||||
// The function expects the job's MonitoringStatus and SubCluster to be already set.
|
||||
// Energy calculations convert power metrics (Watts) to energy (kWh) using the formula:
|
||||
//
|
||||
// Energy (kWh) = (Power (W) * Duration (s) / 3600) / 1000
|
||||
//
|
||||
// Returns an error if subcluster retrieval, metric indexing, or JSON marshaling fails.
|
||||
func enrichJobMetadata(job *schema.Job) error {
|
||||
sc, err := archive.GetSubCluster(job.Cluster, job.SubCluster)
|
||||
if err != nil {
|
||||
cclog.Errorf("cannot get subcluster: %s", err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
job.Footprint = make(map[string]float64)
|
||||
|
||||
for _, fp := range sc.Footprint {
|
||||
statType := "avg"
|
||||
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err != nil {
|
||||
statType = sc.MetricConfig[i].Footprint
|
||||
}
|
||||
|
||||
name := fmt.Sprintf("%s_%s", fp, statType)
|
||||
|
||||
job.Footprint[name] = repository.LoadJobStat(job, fp, statType)
|
||||
}
|
||||
|
||||
job.RawFootprint, err = json.Marshal(job.Footprint)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job footprint")
|
||||
return err
|
||||
}
|
||||
|
||||
job.EnergyFootprint = make(map[string]float64)
|
||||
|
||||
// Total Job Energy Outside Loop
|
||||
totalEnergy := 0.0
|
||||
for _, fp := range sc.EnergyFootprint {
|
||||
// Always Init Metric Energy Inside Loop
|
||||
metricEnergy := 0.0
|
||||
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
|
||||
// Note: For DB data, calculate and save as kWh
|
||||
switch sc.MetricConfig[i].Energy {
|
||||
case "energy": // this metric has energy as unit (Joules)
|
||||
cclog.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
|
||||
// FIXME: Needs sum as stats type
|
||||
case "power": // this metric has power as unit (Watt)
|
||||
// Energy: Power (in Watts) * Time (in Seconds)
|
||||
// Unit: (W * (s / 3600)) / 1000 = kWh
|
||||
// Round 2 Digits: round(Energy * 100) / 100
|
||||
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
|
||||
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
|
||||
rawEnergy := ((repository.LoadJobStat(job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
|
||||
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
|
||||
}
|
||||
} else {
|
||||
cclog.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
|
||||
}
|
||||
|
||||
job.EnergyFootprint[fp] = metricEnergy
|
||||
totalEnergy += metricEnergy
|
||||
}
|
||||
|
||||
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
|
||||
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
|
||||
cclog.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawResources, err = json.Marshal(job.Resources)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job resources")
|
||||
return err
|
||||
}
|
||||
|
||||
job.RawMetaData, err = json.Marshal(job.MetaData)
|
||||
if err != nil {
|
||||
cclog.Warn("Error while marshaling job metadata")
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SanityChecks validates job metadata and ensures cluster/subcluster configuration is valid.
|
||||
//
|
||||
// This function performs the following validations:
|
||||
// 1. Verifies the cluster exists in the archive configuration
|
||||
// 2. Assigns and validates the subcluster (may modify job.SubCluster)
|
||||
// 3. Validates job state is a recognized value
|
||||
// 4. Ensures resources and user fields are populated
|
||||
// 5. Validates node counts and hardware thread counts are positive
|
||||
// 6. Verifies the number of resources matches the declared node count
|
||||
//
|
||||
// The function may modify the job's SubCluster field if it needs to be assigned.
|
||||
//
|
||||
// Returns an error if any validation check fails.
|
||||
func SanityChecks(job *schema.Job) error {
|
||||
if c := archive.GetCluster(job.Cluster); c == nil {
|
||||
return fmt.Errorf("no such cluster: %v", job.Cluster)
|
||||
@@ -220,6 +276,14 @@ func SanityChecks(job *schema.Job) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkJobData normalizes metric units in job data based on average values.
|
||||
//
|
||||
// NOTE: This function is currently unused and contains incomplete implementation.
|
||||
// It was intended to normalize byte and file-related metrics to appropriate SI prefixes,
|
||||
// but the normalization logic is commented out. Consider removing or completing this
|
||||
// function based on project requirements.
|
||||
//
|
||||
// TODO: Either implement the metric normalization or remove this dead code.
|
||||
func checkJobData(d *schema.JobData) error {
|
||||
for _, scopes := range *d {
|
||||
// var newUnit schema.Unit
|
||||
|
||||
Reference in New Issue
Block a user