Merge branch 'dev' into 275_add_tag_scope

This commit is contained in:
Christoph Kluge 2024-08-23 14:52:42 +02:00
commit 31765ce0ef
11 changed files with 539 additions and 442 deletions

View File

@ -24,9 +24,19 @@ SVELTE_COMPONENTS = status \
SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS))) SVELTE_TARGETS = $(addprefix $(FRONTEND)/public/build/,$(addsuffix .js, $(SVELTE_COMPONENTS)))
SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \ SVELTE_SRC = $(wildcard $(FRONTEND)/src/*.svelte) \
$(wildcard $(FRONTEND)/src/*.js) \ $(wildcard $(FRONTEND)/src/*.js) \
$(wildcard $(FRONTEND)/src/filters/*.svelte) \ $(wildcard $(FRONTEND)/src/analysis/*.svelte) \
$(wildcard $(FRONTEND)/src/plots/*.svelte) \ $(wildcard $(FRONTEND)/src/config/*.svelte) \
$(wildcard $(FRONTEND)/src/joblist/*.svelte) $(wildcard $(FRONTEND)/src/config/admin/*.svelte) \
$(wildcard $(FRONTEND)/src/config/user/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/*.js) \
$(wildcard $(FRONTEND)/src/generic/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/filters/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/plots/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/joblist/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/helper/*.svelte) \
$(wildcard $(FRONTEND)/src/generic/select/*.svelte) \
$(wildcard $(FRONTEND)/src/header/*.svelte) \
$(wildcard $(FRONTEND)/src/job/*.svelte)
.PHONY: clean distclean test tags frontend swagger graphql $(TARGET) .PHONY: clean distclean test tags frontend swagger graphql $(TARGET)

View File

@ -38,6 +38,15 @@ var (
apiHandle *api.RestApi apiHandle *api.RestApi
) )
func onFailureResponse(rw http.ResponseWriter, r *http.Request, err error) {
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusUnauthorized)
json.NewEncoder(rw).Encode(map[string]string{
"status": http.StatusText(http.StatusUnauthorized),
"error": err.Error(),
})
}
func serverInit() { func serverInit() {
// Setup the http.Handler/Router used by the server // Setup the http.Handler/Router used by the server
graph.Init() graph.Init()
@ -166,64 +175,32 @@ func serverInit() {
return authHandle.AuthApi( return authHandle.AuthApi(
// On success; // On success;
next, next,
// On failure: JSON Response // On failure: JSON Response
func(rw http.ResponseWriter, r *http.Request, err error) { onFailureResponse)
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusUnauthorized)
json.NewEncoder(rw).Encode(map[string]string{
"status": http.StatusText(http.StatusUnauthorized),
"error": err.Error(),
})
})
}) })
userapi.Use(func(next http.Handler) http.Handler { userapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthUserApi( return authHandle.AuthUserApi(
// On success; // On success;
next, next,
// On failure: JSON Response // On failure: JSON Response
func(rw http.ResponseWriter, r *http.Request, err error) { onFailureResponse)
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusUnauthorized)
json.NewEncoder(rw).Encode(map[string]string{
"status": http.StatusText(http.StatusUnauthorized),
"error": err.Error(),
})
})
}) })
configapi.Use(func(next http.Handler) http.Handler { configapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthConfigApi( return authHandle.AuthConfigApi(
// On success; // On success;
next, next,
// On failure: JSON Response // On failure: JSON Response
func(rw http.ResponseWriter, r *http.Request, err error) { onFailureResponse)
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusUnauthorized)
json.NewEncoder(rw).Encode(map[string]string{
"status": http.StatusText(http.StatusUnauthorized),
"error": err.Error(),
})
})
}) })
frontendapi.Use(func(next http.Handler) http.Handler { frontendapi.Use(func(next http.Handler) http.Handler {
return authHandle.AuthFrontendApi( return authHandle.AuthFrontendApi(
// On success; // On success;
next, next,
// On failure: JSON Response // On failure: JSON Response
func(rw http.ResponseWriter, r *http.Request, err error) { onFailureResponse)
rw.Header().Add("Content-Type", "application/json")
rw.WriteHeader(http.StatusUnauthorized)
json.NewEncoder(rw).Encode(map[string]string{
"status": http.StatusText(http.StatusUnauthorized),
"error": err.Error(),
})
})
}) })
} }

View File

@ -307,6 +307,10 @@ func ArchiveJob(job *schema.Job, ctx context.Context) (*schema.JobMeta, error) {
scopes = append(scopes, schema.MetricScopeCore) scopes = append(scopes, schema.MetricScopeCore)
} }
if job.NumAcc > 0 {
scopes = append(scopes, schema.MetricScopeAccelerator)
}
jobData, err := LoadData(job, allMetrics, scopes, ctx) jobData, err := LoadData(job, allMetrics, scopes, ctx)
if err != nil { if err != nil {
log.Error("Error wile loading job data for archiving") log.Error("Error wile loading job data for archiving")

View File

@ -0,0 +1,21 @@
ALTER TABLE job DROP energy;
ALTER TABLE job DROP energy_footprint;
ALTER TABLE job ADD COLUMN flops_any_avg;
ALTER TABLE job ADD COLUMN mem_bw_avg;
ALTER TABLE job ADD COLUMN mem_used_max;
ALTER TABLE job ADD COLUMN load_avg;
ALTER TABLE job ADD COLUMN net_bw_avg;
ALTER TABLE job ADD COLUMN net_data_vol_total;
ALTER TABLE job ADD COLUMN file_bw_avg;
ALTER TABLE job ADD COLUMN file_data_vol_total;
UPDATE job SET flops_any_avg = json_extract(footprint, '$.flops_any_avg');
UPDATE job SET mem_bw_avg = json_extract(footprint, '$.mem_bw_avg');
UPDATE job SET mem_used_max = json_extract(footprint, '$.mem_used_max');
UPDATE job SET load_avg = json_extract(footprint, '$.cpu_load_avg');
UPDATE job SET net_bw_avg = json_extract(footprint, '$.net_bw_avg');
UPDATE job SET net_data_vol_total = json_extract(footprint, '$.net_data_vol_total');
UPDATE job SET file_bw_avg = json_extract(footprint, '$.file_bw_avg');
UPDATE job SET file_data_vol_total = json_extract(footprint, '$.file_data_vol_total');
ALTER TABLE job DROP footprint;

View File

@ -1,12 +1,27 @@
CREATE INDEX IF NOT EXISTS job_by_project ON job (project);
CREATE INDEX IF NOT EXISTS job_list_projects ON job (project, job_state);
ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0; ALTER TABLE job ADD COLUMN energy REAL NOT NULL DEFAULT 0.0;
ALTER TABLE job ADD COLUMN energy_footprint TEXT DEFAULT NULL;
ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL; ALTER TABLE job ADD COLUMN footprint TEXT DEFAULT NULL;
ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global'; ALTER TABLE tag ADD COLUMN tag_scope TEXT NOT NULL DEFAULT 'global';
UPDATE job SET footprint = '{"flops_any_avg": 0.0}'; UPDATE job SET footprint = '{"flops_any_avg": 0.0}';
UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg); UPDATE job SET footprint = json_replace(footprint, '$.flops_any_avg', job.flops_any_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg); UPDATE job SET footprint = json_insert(footprint, '$.mem_bw_avg', job.mem_bw_avg);
UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max); UPDATE job SET footprint = json_insert(footprint, '$.mem_used_max', job.mem_used_max);
UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg); UPDATE job SET footprint = json_insert(footprint, '$.cpu_load_avg', job.load_avg);
UPDATE job SET footprint = json_insert(footprint, '$.net_bw_avg', job.net_bw_avg) WHERE job.net_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.net_data_vol_total', job.net_data_vol_total) WHERE job.net_data_vol_total != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_bw_avg', job.file_bw_avg) WHERE job.file_bw_avg != 0;
UPDATE job SET footprint = json_insert(footprint, '$.file_data_vol_total', job.file_data_vol_total) WHERE job.file_data_vol_total != 0;
ALTER TABLE job DROP flops_any_avg; ALTER TABLE job DROP flops_any_avg;
ALTER TABLE job DROP mem_bw_avg; ALTER TABLE job DROP mem_bw_avg;
ALTER TABLE job DROP mem_used_max; ALTER TABLE job DROP mem_used_max;
ALTER TABLE job DROP load_avg; ALTER TABLE job DROP load_avg;
ALTER TABLE job DROP net_bw_avg;
ALTER TABLE job DROP net_data_vol_total;
ALTER TABLE job DROP file_bw_avg;
ALTER TABLE job DROP file_data_vol_total;

View File

@ -47,11 +47,11 @@ type SubCluster struct {
type SubClusterConfig struct { type SubClusterConfig struct {
Name string `json:"name"` Name string `json:"name"`
Footprint string `json:"footprint,omitempty"`
Peak float64 `json:"peak"` Peak float64 `json:"peak"`
Normal float64 `json:"normal"` Normal float64 `json:"normal"`
Caution float64 `json:"caution"` Caution float64 `json:"caution"`
Alert float64 `json:"alert"` Alert float64 `json:"alert"`
Footprint string `json:"footprint,omitempty"`
Remove bool `json:"remove"` Remove bool `json:"remove"`
LowerIsBetter bool `json:"lowerIsBetter"` LowerIsBetter bool `json:"lowerIsBetter"`
Energy bool `json:"energy"` Energy bool `json:"energy"`
@ -62,14 +62,14 @@ type MetricConfig struct {
Name string `json:"name"` Name string `json:"name"`
Scope MetricScope `json:"scope"` Scope MetricScope `json:"scope"`
Aggregation string `json:"aggregation"` Aggregation string `json:"aggregation"`
Footprint string `json:"footprint,omitempty"`
SubClusters []*SubClusterConfig `json:"subClusters,omitempty"` SubClusters []*SubClusterConfig `json:"subClusters,omitempty"`
Timestep int `json:"timestep"`
Peak float64 `json:"peak"` Peak float64 `json:"peak"`
Normal float64 `json:"normal"` Normal float64 `json:"normal"`
Caution float64 `json:"caution"` Caution float64 `json:"caution"`
Alert float64 `json:"alert"` Alert float64 `json:"alert"`
Timestep int `json:"timestep"`
LowerIsBetter bool `json:"lowerIsBetter"` LowerIsBetter bool `json:"lowerIsBetter"`
Footprint string `json:"footprint,omitempty"`
Energy bool `json:"energy"` Energy bool `json:"energy"`
} }

View File

@ -32,7 +32,7 @@ type BaseJob struct {
Footprint map[string]float64 `json:"footprint"` Footprint map[string]float64 `json:"footprint"`
MetaData map[string]string `json:"metaData"` MetaData map[string]string `json:"metaData"`
ConcurrentJobs JobLinkResultList `json:"concurrentJobs"` ConcurrentJobs JobLinkResultList `json:"concurrentJobs"`
Energy float64 `json:"energy"` Energy float64 `json:"energy" db:"energy"`
ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"` ArrayJobId int64 `json:"arrayJobId,omitempty" db:"array_job_id" example:"123000"`
Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"` Walltime int64 `json:"walltime,omitempty" db:"walltime" example:"86400" minimum:"1"`
JobID int64 `json:"jobId" db:"job_id" example:"123000"` JobID int64 `json:"jobId" db:"job_id" example:"123000"`

View File

@ -39,6 +39,23 @@
"avg" "avg"
] ]
}, },
"footprint": {
"description": "Is it a footprint metric and what type",
"type": "string",
"enum": [
"avg",
"max",
"min"
]
},
"energy": {
"description": "Is it used to calculate job energy",
"type": "boolean"
},
"lowerIsBetter": {
"description": "Is lower better.",
"type": "boolean"
},
"peak": { "peak": {
"description": "Metric peak threshold (Upper metric limit)", "description": "Metric peak threshold (Upper metric limit)",
"type": "number" "type": "number"
@ -65,6 +82,23 @@
"description": "Hardware partition name", "description": "Hardware partition name",
"type": "string" "type": "string"
}, },
"footprint": {
"description": "Is it a footprint metric and what type. Overwrite global setting",
"type": "string",
"enum": [
"avg",
"max",
"min"
]
},
"energy": {
"description": "Is it used to calculate job energy. Overwrite global",
"type": "boolean"
},
"lowerIsBetter": {
"description": "Is lower better. Overwrite global",
"type": "boolean"
},
"peak": { "peak": {
"type": "number" "type": "number"
}, },
@ -78,6 +112,7 @@
"type": "number" "type": "number"
}, },
"remove": { "remove": {
"description": "Remove this metric for this subcluster",
"type": "boolean" "type": "boolean"
} }
}, },

View File

@ -75,7 +75,7 @@
duration, numNodes, numHWThreads, numAcc, duration, numNodes, numHWThreads, numAcc,
SMT, exclusive, partition, subCluster, arrayJobId, SMT, exclusive, partition, subCluster, arrayJobId,
monitoringStatus, state, walltime, monitoringStatus, state, walltime,
tags { id, type, name, scope }, tags { id, type, name },
resources { hostname, hwthreads, accelerators }, resources { hostname, hwthreads, accelerators },
metaData, metaData,
userData { name, email }, userData { name, email },

View File

@ -67,8 +67,9 @@
export let height = "310px"; export let height = "310px";
const footprintData = job?.footprint?.map((jf) => { const footprintData = job?.footprint?.map((jf) => {
// Unit
const fmc = getContext("getMetricConfig")(job.cluster, job.subCluster, jf.name); const fmc = getContext("getMetricConfig")(job.cluster, job.subCluster, jf.name);
if (fmc) {
// Unit
const unit = (fmc?.unit?.prefix ? fmc.unit.prefix : "") + (fmc?.unit?.base ? fmc.unit.base : "") const unit = (fmc?.unit?.prefix ? fmc.unit.prefix : "") + (fmc?.unit?.base ? fmc.unit.base : "")
// Threshold / -Differences // Threshold / -Differences
@ -122,7 +123,18 @@
impact: -1, impact: -1,
}; };
} }
}); } else { // No matching metric config: display as single value
return {
name: jf.name + ' (' + jf.stat + ')',
avg: jf.value,
message:
`No config for metric ${jf.name} found.`,
impact: 4,
};
}
}).sort(function (a, b) { // Sort by impact value primarily, within impact sort name alphabetically
return a.impact - b.impact || ((a.name > b.name) ? 1 : ((b.name > a.name) ? -1 : 0));
});;
function evalFootprint(mean, thresholds, lowerIsBetter, level) { function evalFootprint(mean, thresholds, lowerIsBetter, level) {
// Handle Metrics in which less value is better // Handle Metrics in which less value is better
@ -159,6 +171,7 @@
{/if} {/if}
<CardBody> <CardBody>
{#each footprintData as fpd, index} {#each footprintData as fpd, index}
{#if fpd.impact !== 4}
<div class="mb-1 d-flex justify-content-between"> <div class="mb-1 d-flex justify-content-between">
<div>&nbsp;<b>{fpd.name}</b></div> <div>&nbsp;<b>{fpd.name}</b></div>
<!-- For symmetry, see below ...--> <!-- For symmetry, see below ...-->
@ -213,6 +226,29 @@
</Col> </Col>
{/if} {/if}
</Row> </Row>
{:else}
<div class="mb-1 d-flex justify-content-between">
<div>
&nbsp;<b>{fpd.name}</b>
</div>
<div
class="cursor-help d-inline-flex"
id={`footprint-${job.jobId}-${index}`}
>
<div class="mx-1">
<Icon name="info-circle"/>
</div>
<div>
{fpd.avg}&nbsp;
</div>
</div>
</div>
<Tooltip
target={`footprint-${job.jobId}-${index}`}
placement="right"
offset={[0, 20]}>{fpd.message}</Tooltip
>
{/if}
{/each} {/each}
{#if job?.metaData?.message} {#if job?.metaData?.message}
<hr class="mt-1 mb-2" /> <hr class="mt-1 mb-2" />

View File

@ -309,15 +309,13 @@ export function checkMetricDisabled(m, c, s) { //[m]etric, [c]luster, [s]ubclust
export function getStatsItems() { export function getStatsItems() {
// console.time('stats') // console.time('stats')
// console.log('getStatsItems ...')
const globalMetrics = getContext("globalMetrics") const globalMetrics = getContext("globalMetrics")
const result = globalMetrics.map((gm) => { const result = globalMetrics.map((gm) => {
if (gm?.footprint) { if (gm?.footprint) {
// Footprint contains suffix naming the used stat-type
// console.time('deep') // console.time('deep')
// console.log('Deep Config for', gm.name)
const mc = getMetricConfigDeep(gm.name, null, null) const mc = getMetricConfigDeep(gm.name, null, null)
// console.timeEnd('deep') // console.timeEnd('deep')
if (mc) {
return { return {
field: gm.name + '_' + gm.footprint, field: gm.name + '_' + gm.footprint,
text: gm.name + ' (' + gm.footprint + ')', text: gm.name + ' (' + gm.footprint + ')',
@ -328,6 +326,7 @@ export function getStatsItems() {
enabled: false enabled: false
} }
} }
}
return null return null
}).filter((r) => r != null) }).filter((r) => r != null)
// console.timeEnd('stats') // console.timeEnd('stats')
@ -336,11 +335,9 @@ export function getStatsItems() {
export function getSortItems() { export function getSortItems() {
//console.time('sort') //console.time('sort')
//console.log('getSortItems ...')
const globalMetrics = getContext("globalMetrics") const globalMetrics = getContext("globalMetrics")
const result = globalMetrics.map((gm) => { const result = globalMetrics.map((gm) => {
if (gm?.footprint) { if (gm?.footprint) {
// Footprint contains suffix naming the used stat-type
return { return {
field: gm.name + '_' + gm.footprint, field: gm.name + '_' + gm.footprint,
type: 'foot', type: 'foot',
@ -357,21 +354,22 @@ export function getSortItems() {
function getMetricConfigDeep(metric, cluster, subCluster) { function getMetricConfigDeep(metric, cluster, subCluster) {
const clusters = getContext("clusters"); const clusters = getContext("clusters");
if (cluster != null) { if (cluster != null) {
let c = clusters.find((c) => c.name == cluster); const c = clusters.find((c) => c.name == cluster);
if (subCluster != null) { if (subCluster != null) {
let sc = c.subClusters.find((sc) => sc.name == subCluster); const sc = c.subClusters.find((sc) => sc.name == subCluster);
return sc.metricConfig.find((mc) => mc.name == metric) return sc.metricConfig.find((mc) => mc.name == metric)
} else { } else {
let result; let result;
for (let sc of c.subClusters) { for (let sc of c.subClusters) {
const mc = sc.metricConfig.find((mc) => mc.name == metric) const mc = sc.metricConfig.find((mc) => mc.name == metric)
if (result) { // If lowerIsBetter: Peak is still maximum value, no special case required if (result && mc) { // update result; If lowerIsBetter: Peak is still maximum value, no special case required
result.alert = (mc.alert > result.alert) ? mc.alert : result.alert result.alert = (mc.alert > result.alert) ? mc.alert : result.alert
result.caution = (mc.caution > result.caution) ? mc.caution : result.caution result.caution = (mc.caution > result.caution) ? mc.caution : result.caution
result.normal = (mc.normal > result.normal) ? mc.normal : result.normal result.normal = (mc.normal > result.normal) ? mc.normal : result.normal
result.peak = (mc.peak > result.peak) ? mc.peak : result.peak result.peak = (mc.peak > result.peak) ? mc.peak : result.peak
} else { } else if (mc) {
if (mc) result = {...mc}; // start new result
result = {...mc};
} }
} }
return result return result
@ -381,13 +379,14 @@ function getMetricConfigDeep(metric, cluster, subCluster) {
for (let c of clusters) { for (let c of clusters) {
for (let sc of c.subClusters) { for (let sc of c.subClusters) {
const mc = sc.metricConfig.find((mc) => mc.name == metric) const mc = sc.metricConfig.find((mc) => mc.name == metric)
if (result) { // If lowerIsBetter: Peak is still maximum value, no special case required if (result && mc) { // update result; If lowerIsBetter: Peak is still maximum value, no special case required
result.alert = (mc.alert > result.alert) ? mc.alert : result.alert result.alert = (mc.alert > result.alert) ? mc.alert : result.alert
result.caution = (mc.caution > result.caution) ? mc.caution : result.caution result.caution = (mc.caution > result.caution) ? mc.caution : result.caution
result.normal = (mc.normal > result.normal) ? mc.normal : result.normal result.normal = (mc.normal > result.normal) ? mc.normal : result.normal
result.peak = (mc.peak > result.peak) ? mc.peak : result.peak result.peak = (mc.peak > result.peak) ? mc.peak : result.peak
} else { } else if (mc) {
if (mc) result = {...mc}; // Start new result
result = {...mc};
} }
} }
} }