Merge pull request #354 from ClusterCockpit/dev

Dev
This commit is contained in:
Jan Eitzinger 2025-03-13 14:22:44 +01:00 committed by GitHub
commit 590bfd3a10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 115 additions and 66 deletions

View File

@ -1008,8 +1008,8 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
return
}
if job == nil || job.StartTime.Unix() >= req.StopTime {
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
if job == nil || job.StartTime.Unix() > req.StopTime {
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
return
}

View File

@ -96,27 +96,35 @@ func HandleImportFlag(flag string) error {
}
job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
rawEnergy := ((repository.LoadJobStat(&job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
}
job.EnergyFootprint[fp] = energy
totalEnergy += energy
job.EnergyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
}
job.Energy = (math.Round(totalEnergy*100) / 100)
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
return err

View File

@ -93,27 +93,35 @@ func InitDB() error {
}
job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits
energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
rawEnergy := ((repository.LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
job.EnergyFootprint[fp] = energy
totalEnergy += energy
job.EnergyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
}
job.Energy = (math.Round(totalEnergy*100) / 100)
job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return err

View File

@ -590,28 +590,34 @@ func (r *JobRepository) UpdateEnergy(
return stmt, err
}
energyFootprint := make(map[string]float64)
var totalEnergy float64
var energy float64
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds)
// Unit: (( W * s ) / 3600) / 1000 = kWh ; Rounded to 2 nearest digits: (Energy * 100) / 100
// Here: All-Node Metric Average * Number of Nodes * Job Runtime
// Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
metricNodeSum := LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes) * float64(jobMeta.Duration)
energy = math.Round(((metricNodeSum/3600)/1000)*100) / 100
rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
}
} else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
}
energyFootprint[fp] = energy
totalEnergy += energy
energyFootprint[fp] = metricEnergy
totalEnergy += metricEnergy
// log.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy)
}
var rawFootprint []byte
@ -620,7 +626,7 @@ func (r *JobRepository) UpdateEnergy(
return stmt, err
}
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
}
func (r *JobRepository) UpdateFootprint(

View File

@ -128,15 +128,24 @@
if (!job) return;
const pendingMetrics = [
...(ccconfig[`job_view_selectedMetrics:${job.cluster}`] ||
$initq.data.globalMetrics.reduce((names, gm) => {
...(
(
ccconfig[`job_view_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
ccconfig[`job_view_selectedMetrics:${job.cluster}`]
) ||
$initq.data.globalMetrics
.reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === job.cluster && av.subClusters.includes(job.subCluster))) {
names.push(gm.name);
}
return names;
}, [])
),
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
...(
(
ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`]
) ||
ccconfig[`job_view_nodestats_selectedMetrics`]
),
];

View File

@ -137,5 +137,5 @@
bind:metrics
bind:isOpen={isMetricsSelectionOpen}
bind:showFootprint
footprintSelect={true}
footprintSelect
/>

View File

@ -29,8 +29,8 @@
import Refresher from "./generic/helper/Refresher.svelte";
export let displayType;
export let cluster;
export let subCluster = "";
export let cluster = null;
export let subCluster = null;
export let from = null;
export let to = null;
@ -60,7 +60,10 @@
let hostnameFilter = "";
let pendingHostnameFilter = "";
let selectedMetric = ccconfig.system_view_selectedMetric || "";
let selectedMetrics = ccconfig[`node_list_selectedMetrics:${cluster}`] || [ccconfig.system_view_selectedMetric];
let selectedMetrics = (
ccconfig[`node_list_selectedMetrics:${cluster}:${subCluster}`] ||
ccconfig[`node_list_selectedMetrics:${cluster}`]
) || [ccconfig.system_view_selectedMetric];
let isMetricsSelectionOpen = false;
/*
@ -191,6 +194,7 @@
<MetricSelection
{cluster}
{subCluster}
configName="node_list_selectedMetrics"
metrics={selectedMetrics}
bind:isOpen={isMetricsSelectionOpen}

View File

@ -352,7 +352,7 @@
bind:metrics
bind:isOpen={isMetricsSelectionOpen}
bind:showFootprint
footprintSelect={true}
footprintSelect
/>
<HistogramSelection

View File

@ -47,12 +47,12 @@
$: {
if (allMetrics != null) {
if (cluster == null) {
if (!cluster) {
for (let metric of globalMetrics) allMetrics.add(metric.name);
} else {
allMetrics.clear();
for (let gm of globalMetrics) {
if (subCluster == null) {
if (!subCluster) {
if (gm.availability.find((av) => av.cluster === cluster)) allMetrics.add(gm.name);
} else {
if (gm.availability.find((av) => av.cluster === cluster && av.subClusters.includes(subCluster))) allMetrics.add(gm.name);
@ -67,7 +67,7 @@
function printAvailability(metric, cluster) {
const avail = globalMetrics.find((gm) => gm.name === metric)?.availability
if (cluster == null) {
if (!cluster) {
return avail.map((av) => av.cluster).join(',')
} else {
return avail.find((av) => av.cluster === cluster).subClusters.join(',')
@ -112,10 +112,17 @@
metrics = newMetricsOrder.filter((m) => unorderedMetrics.includes(m));
isOpen = false;
showFootprint = !!pendingShowFootprint;
let configKey;
if (cluster && subCluster) {
configKey = `${configName}:${cluster}:${subCluster}`;
} else if (cluster && !subCluster) {
configKey = `${configName}:${cluster}`;
} else {
configKey = `${configName}`;
}
updateConfigurationMutation({
name: cluster == null ? configName : `${configName}:${cluster}`,
name: configKey,
value: JSON.stringify(metrics),
}).subscribe((res) => {
if (res.fetching === false && res.error) {
@ -123,9 +130,11 @@
}
});
if (footprintSelect) {
showFootprint = !!pendingShowFootprint;
updateConfigurationMutation({
name:
cluster == null
!cluster
? "plot_list_showFootprint"
: `plot_list_showFootprint:${cluster}`,
value: JSON.stringify(showFootprint),
@ -134,6 +143,7 @@
throw res.error;
}
});
};
dispatch('update-metrics', metrics);
}

View File

@ -37,9 +37,10 @@
sorting = {},
isMetricSelectionOpen = false,
availableMetrics = new Set(),
selectedMetrics =
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`] ||
getContext("cc-config")["job_view_nodestats_selectedMetrics"];
selectedMetrics = (
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`]
) || getContext("cc-config")["job_view_nodestats_selectedMetrics"];
for (let metric of sortedJobMetrics) {
// Not Exclusive or Multi-Node: get maxScope directly (mostly: node)
@ -169,6 +170,7 @@
<MetricSelection
cluster={job.cluster}
subCluster={job.subCluster}
configName="job_view_nodestats_selectedMetrics"
bind:allMetrics={availableMetrics}
bind:metrics={selectedMetrics}

View File

@ -217,6 +217,7 @@
<tr>
<td colspan={selectedMetrics.length + 1}>
<div style="text-align:center;">
{#if !usePaging}
<p><b>
Loading nodes {nodes.length + 1} to
{ matchedNodes
@ -224,6 +225,7 @@
: (nodes.length + paging.itemsPerPage)
}
</b></p>
{/if}
<Spinner secondary />
</div>
</td>