Merge pull request #354 from ClusterCockpit/dev

Dev
This commit is contained in:
Jan Eitzinger 2025-03-13 14:22:44 +01:00 committed by GitHub
commit 590bfd3a10
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 115 additions and 66 deletions

View File

@ -1008,8 +1008,8 @@ func (api *RestApi) checkAndHandleStopJob(rw http.ResponseWriter, job *schema.Jo
return return
} }
if job == nil || job.StartTime.Unix() >= req.StopTime { if job == nil || job.StartTime.Unix() > req.StopTime {
handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw) handleError(fmt.Errorf("jobId %d (id %d) on %s : stopTime %d must be larger/equal than startTime %d", job.JobID, job.ID, job.Cluster, req.StopTime, job.StartTime.Unix()), http.StatusBadRequest, rw)
return return
} }

View File

@ -96,27 +96,35 @@ func HandleImportFlag(flag string) error {
} }
job.EnergyFootprint = make(map[string]float64) job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint { for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil { if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh // Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules) if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", job.JobID, job.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt) } else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits // Energy: Power (in Watts) * Time (in Seconds)
energy = math.Round(((repository.LoadJobStat(&job, fp, "avg")*float64(job.Duration))/3600/1000)*100) / 100 // Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
rawEnergy := ((repository.LoadJobStat(&job, fp, "avg") * float64(job.NumNodes)) * (float64(job.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
} }
} else { } else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID) log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, job.ID)
} }
job.EnergyFootprint[fp] = energy job.EnergyFootprint[fp] = metricEnergy
totalEnergy += energy totalEnergy += metricEnergy
} }
job.Energy = (math.Round(totalEnergy*100) / 100) job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil { if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID) log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", job.ID)
return err return err

View File

@ -93,27 +93,35 @@ func InitDB() error {
} }
job.EnergyFootprint = make(map[string]float64) job.EnergyFootprint = make(map[string]float64)
var totalEnergy float64
var energy float64
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint { for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil { if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh // Note: For DB data, calculate and save as kWh
// Energy: Power (in Watts) * Time (in Seconds)
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules) if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
// FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt) } else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Unit: ( W * s ) / 3600 / 1000 = kWh ; Rounded to 2 nearest digits // Energy: Power (in Watts) * Time (in Seconds)
energy = math.Round(((repository.LoadJobStat(jobMeta, fp, "avg")*float64(jobMeta.Duration))/3600/1000)*100) / 100 // Unit: (W * (s / 3600)) / 1000 = kWh
// Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
rawEnergy := ((repository.LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
metricEnergy = math.Round(rawEnergy*100.0) / 100.0
} }
} else { } else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID) log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
} }
job.EnergyFootprint[fp] = energy job.EnergyFootprint[fp] = metricEnergy
totalEnergy += energy totalEnergy += metricEnergy
} }
job.Energy = (math.Round(totalEnergy*100) / 100) job.Energy = (math.Round(totalEnergy*100.0) / 100.0)
if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil { if job.RawEnergyFootprint, err = json.Marshal(job.EnergyFootprint); err != nil {
log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID) log.Warnf("Error while marshaling energy footprint for job INTO BYTES, DB ID '%v'", jobMeta.ID)
return err return err

View File

@ -590,28 +590,34 @@ func (r *JobRepository) UpdateEnergy(
return stmt, err return stmt, err
} }
energyFootprint := make(map[string]float64) energyFootprint := make(map[string]float64)
var totalEnergy float64
var energy float64
// Total Job Energy Outside Loop
totalEnergy := 0.0
for _, fp := range sc.EnergyFootprint { for _, fp := range sc.EnergyFootprint {
// Always Init Metric Energy Inside Loop
metricEnergy := 0.0
if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil { if i, err := archive.MetricIndex(sc.MetricConfig, fp); err == nil {
// Note: For DB data, calculate and save as kWh // Note: For DB data, calculate and save as kWh
if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh) if sc.MetricConfig[i].Energy == "energy" { // this metric has energy as unit (Joules or Wh)
log.Warnf("Update EnergyFootprint for Job %d and Metric %s on cluster %s: Set to 'energy' in cluster.json: Not implemented, will return 0.0", jobMeta.JobID, jobMeta.Cluster, fp)
// FIXME: Needs sum as stats type // FIXME: Needs sum as stats type
} else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt) } else if sc.MetricConfig[i].Energy == "power" { // this metric has power as unit (Watt)
// Energy: Power (in Watts) * Time (in Seconds) // Energy: Power (in Watts) * Time (in Seconds)
// Unit: (( W * s ) / 3600) / 1000 = kWh ; Rounded to 2 nearest digits: (Energy * 100) / 100 // Unit: (W * (s / 3600)) / 1000 = kWh
// Here: All-Node Metric Average * Number of Nodes * Job Runtime // Round 2 Digits: round(Energy * 100) / 100
// Here: (All-Node Metric Average * Number of Nodes) * (Job Duration in Seconds / 3600) / 1000
// Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1 // Note: Shared Jobs handled correctly since "Node Average" is based on partial resources, while "numNodes" factor is 1
metricNodeSum := LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes) * float64(jobMeta.Duration) rawEnergy := ((LoadJobStat(jobMeta, fp, "avg") * float64(jobMeta.NumNodes)) * (float64(jobMeta.Duration) / 3600.0)) / 1000.0
energy = math.Round(((metricNodeSum/3600)/1000)*100) / 100 metricEnergy = math.Round(rawEnergy*100.0) / 100.0
} }
} else { } else {
log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID) log.Warnf("Error while collecting energy metric %s for job, DB ID '%v', return '0.0'", fp, jobMeta.ID)
} }
energyFootprint[fp] = energy energyFootprint[fp] = metricEnergy
totalEnergy += energy totalEnergy += metricEnergy
// log.Infof("Metric %s Average %f -> %f kWh | Job %d Total -> %f kWh", fp, LoadJobStat(jobMeta, fp, "avg"), energy, jobMeta.JobID, totalEnergy)
} }
var rawFootprint []byte var rawFootprint []byte
@ -620,7 +626,7 @@ func (r *JobRepository) UpdateEnergy(
return stmt, err return stmt, err
} }
return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100) / 100)), nil return stmt.Set("energy_footprint", string(rawFootprint)).Set("energy", (math.Round(totalEnergy*100.0) / 100.0)), nil
} }
func (r *JobRepository) UpdateFootprint( func (r *JobRepository) UpdateFootprint(

View File

@ -128,15 +128,24 @@
if (!job) return; if (!job) return;
const pendingMetrics = [ const pendingMetrics = [
...(ccconfig[`job_view_selectedMetrics:${job.cluster}`] || ...(
$initq.data.globalMetrics.reduce((names, gm) => { (
if (gm.availability.find((av) => av.cluster === job.cluster && av.subClusters.includes(job.subCluster))) { ccconfig[`job_view_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
names.push(gm.name); ccconfig[`job_view_selectedMetrics:${job.cluster}`]
} ) ||
return names; $initq.data.globalMetrics
}, []) .reduce((names, gm) => {
if (gm.availability.find((av) => av.cluster === job.cluster && av.subClusters.includes(job.subCluster))) {
names.push(gm.name);
}
return names;
}, [])
), ),
...(ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`] || ...(
(
ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
ccconfig[`job_view_nodestats_selectedMetrics:${job.cluster}`]
) ||
ccconfig[`job_view_nodestats_selectedMetrics`] ccconfig[`job_view_nodestats_selectedMetrics`]
), ),
]; ];

View File

@ -137,5 +137,5 @@
bind:metrics bind:metrics
bind:isOpen={isMetricsSelectionOpen} bind:isOpen={isMetricsSelectionOpen}
bind:showFootprint bind:showFootprint
footprintSelect={true} footprintSelect
/> />

View File

@ -29,8 +29,8 @@
import Refresher from "./generic/helper/Refresher.svelte"; import Refresher from "./generic/helper/Refresher.svelte";
export let displayType; export let displayType;
export let cluster; export let cluster = null;
export let subCluster = ""; export let subCluster = null;
export let from = null; export let from = null;
export let to = null; export let to = null;
@ -60,7 +60,10 @@
let hostnameFilter = ""; let hostnameFilter = "";
let pendingHostnameFilter = ""; let pendingHostnameFilter = "";
let selectedMetric = ccconfig.system_view_selectedMetric || ""; let selectedMetric = ccconfig.system_view_selectedMetric || "";
let selectedMetrics = ccconfig[`node_list_selectedMetrics:${cluster}`] || [ccconfig.system_view_selectedMetric]; let selectedMetrics = (
ccconfig[`node_list_selectedMetrics:${cluster}:${subCluster}`] ||
ccconfig[`node_list_selectedMetrics:${cluster}`]
) || [ccconfig.system_view_selectedMetric];
let isMetricsSelectionOpen = false; let isMetricsSelectionOpen = false;
/* /*
@ -191,6 +194,7 @@
<MetricSelection <MetricSelection
{cluster} {cluster}
{subCluster}
configName="node_list_selectedMetrics" configName="node_list_selectedMetrics"
metrics={selectedMetrics} metrics={selectedMetrics}
bind:isOpen={isMetricsSelectionOpen} bind:isOpen={isMetricsSelectionOpen}

View File

@ -352,7 +352,7 @@
bind:metrics bind:metrics
bind:isOpen={isMetricsSelectionOpen} bind:isOpen={isMetricsSelectionOpen}
bind:showFootprint bind:showFootprint
footprintSelect={true} footprintSelect
/> />
<HistogramSelection <HistogramSelection

View File

@ -47,12 +47,12 @@
$: { $: {
if (allMetrics != null) { if (allMetrics != null) {
if (cluster == null) { if (!cluster) {
for (let metric of globalMetrics) allMetrics.add(metric.name); for (let metric of globalMetrics) allMetrics.add(metric.name);
} else { } else {
allMetrics.clear(); allMetrics.clear();
for (let gm of globalMetrics) { for (let gm of globalMetrics) {
if (subCluster == null) { if (!subCluster) {
if (gm.availability.find((av) => av.cluster === cluster)) allMetrics.add(gm.name); if (gm.availability.find((av) => av.cluster === cluster)) allMetrics.add(gm.name);
} else { } else {
if (gm.availability.find((av) => av.cluster === cluster && av.subClusters.includes(subCluster))) allMetrics.add(gm.name); if (gm.availability.find((av) => av.cluster === cluster && av.subClusters.includes(subCluster))) allMetrics.add(gm.name);
@ -67,7 +67,7 @@
function printAvailability(metric, cluster) { function printAvailability(metric, cluster) {
const avail = globalMetrics.find((gm) => gm.name === metric)?.availability const avail = globalMetrics.find((gm) => gm.name === metric)?.availability
if (cluster == null) { if (!cluster) {
return avail.map((av) => av.cluster).join(',') return avail.map((av) => av.cluster).join(',')
} else { } else {
return avail.find((av) => av.cluster === cluster).subClusters.join(',') return avail.find((av) => av.cluster === cluster).subClusters.join(',')
@ -112,10 +112,17 @@
metrics = newMetricsOrder.filter((m) => unorderedMetrics.includes(m)); metrics = newMetricsOrder.filter((m) => unorderedMetrics.includes(m));
isOpen = false; isOpen = false;
showFootprint = !!pendingShowFootprint; let configKey;
if (cluster && subCluster) {
configKey = `${configName}:${cluster}:${subCluster}`;
} else if (cluster && !subCluster) {
configKey = `${configName}:${cluster}`;
} else {
configKey = `${configName}`;
}
updateConfigurationMutation({ updateConfigurationMutation({
name: cluster == null ? configName : `${configName}:${cluster}`, name: configKey,
value: JSON.stringify(metrics), value: JSON.stringify(metrics),
}).subscribe((res) => { }).subscribe((res) => {
if (res.fetching === false && res.error) { if (res.fetching === false && res.error) {
@ -123,17 +130,20 @@
} }
}); });
updateConfigurationMutation({ if (footprintSelect) {
name: showFootprint = !!pendingShowFootprint;
cluster == null updateConfigurationMutation({
? "plot_list_showFootprint" name:
: `plot_list_showFootprint:${cluster}`, !cluster
value: JSON.stringify(showFootprint), ? "plot_list_showFootprint"
}).subscribe((res) => { : `plot_list_showFootprint:${cluster}`,
if (res.fetching === false && res.error) { value: JSON.stringify(showFootprint),
throw res.error; }).subscribe((res) => {
} if (res.fetching === false && res.error) {
}); throw res.error;
}
});
};
dispatch('update-metrics', metrics); dispatch('update-metrics', metrics);
} }

View File

@ -37,9 +37,10 @@
sorting = {}, sorting = {},
isMetricSelectionOpen = false, isMetricSelectionOpen = false,
availableMetrics = new Set(), availableMetrics = new Set(),
selectedMetrics = selectedMetrics = (
getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`] || getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}:${job.subCluster}`] ||
getContext("cc-config")["job_view_nodestats_selectedMetrics"]; getContext("cc-config")[`job_view_nodestats_selectedMetrics:${job.cluster}`]
) || getContext("cc-config")["job_view_nodestats_selectedMetrics"];
for (let metric of sortedJobMetrics) { for (let metric of sortedJobMetrics) {
// Not Exclusive or Multi-Node: get maxScope directly (mostly: node) // Not Exclusive or Multi-Node: get maxScope directly (mostly: node)
@ -169,6 +170,7 @@
<MetricSelection <MetricSelection
cluster={job.cluster} cluster={job.cluster}
subCluster={job.subCluster}
configName="job_view_nodestats_selectedMetrics" configName="job_view_nodestats_selectedMetrics"
bind:allMetrics={availableMetrics} bind:allMetrics={availableMetrics}
bind:metrics={selectedMetrics} bind:metrics={selectedMetrics}

View File

@ -217,13 +217,15 @@
<tr> <tr>
<td colspan={selectedMetrics.length + 1}> <td colspan={selectedMetrics.length + 1}>
<div style="text-align:center;"> <div style="text-align:center;">
<p><b> {#if !usePaging}
Loading nodes {nodes.length + 1} to <p><b>
{ matchedNodes Loading nodes {nodes.length + 1} to
? `${(nodes.length + paging.itemsPerPage) > matchedNodes ? matchedNodes : (nodes.length + paging.itemsPerPage)} of ${matchedNodes} total` { matchedNodes
: (nodes.length + paging.itemsPerPage) ? `${(nodes.length + paging.itemsPerPage) > matchedNodes ? matchedNodes : (nodes.length + paging.itemsPerPage)} of ${matchedNodes} total`
} : (nodes.length + paging.itemsPerPage)
</b></p> }
</b></p>
{/if}
<Spinner secondary /> <Spinner secondary />
</div> </div>
</td> </td>