Merge pull request #493 from ClusterCockpit/dev

Dev
This commit is contained in:
Jan Eitzinger
2026-02-20 07:49:13 +01:00
committed by GitHub
7 changed files with 91 additions and 31 deletions

View File

@@ -279,8 +279,6 @@ func initSubsystems() error {
return fmt.Errorf("initializing archive: %w", err) return fmt.Errorf("initializing archive: %w", err)
} }
// Note: metricstore.Init() is called later in runServer() with proper configuration
// Handle database re-initialization // Handle database re-initialization
if flagReinitDB { if flagReinitDB {
if err := importer.InitDB(); err != nil { if err := importer.InitDB(); err != nil {

View File

@@ -113,8 +113,6 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
} }
} }
fmt.Printf("Result: %#v\n", healthResults)
cclog.Debugf("Timer updateNodeStates, MemStore HealthCheck: %s", time.Since(startMs)) cclog.Debugf("Timer updateNodeStates, MemStore HealthCheck: %s", time.Since(startMs))
startDB := time.Now() startDB := time.Now()

View File

@@ -274,7 +274,7 @@ type NodeStateWithNode struct {
func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode, error) { func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode, error) {
rows, err := sq.Select( rows, err := sq.Select(
"node_state.id", "node_state.time_stamp", "node_state.node_state", "node_state.id", "node_state.time_stamp", "node_state.node_state",
"node_state.health_state", "node_state.health_metrics", "node_state.health_state", "COALESCE(node_state.health_metrics, '')",
"node_state.cpus_allocated", "node_state.memory_allocated", "node_state.cpus_allocated", "node_state.memory_allocated",
"node_state.gpus_allocated", "node_state.jobs_running", "node_state.gpus_allocated", "node_state.jobs_running",
"node.hostname", "node.cluster", "node.subcluster", "node.hostname", "node.cluster", "node.subcluster",

View File

@@ -19,6 +19,14 @@ import (
"github.com/ClusterCockpit/cc-lib/v2/util" "github.com/ClusterCockpit/cc-lib/v2/util"
) )
func metadataKeys(m map[string]string) []string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
return keys
}
const ( const (
// defaultConfigPath is the default path for application tagging configuration // defaultConfigPath is the default path for application tagging configuration
defaultConfigPath = "./var/tagger/apps" defaultConfigPath = "./var/tagger/apps"
@@ -158,29 +166,54 @@ func (t *AppTagger) Register() error {
// Only the first matching application is tagged. // Only the first matching application is tagged.
func (t *AppTagger) Match(job *schema.Job) { func (t *AppTagger) Match(job *schema.Job) {
r := repository.GetJobRepository() r := repository.GetJobRepository()
if len(t.apps) == 0 {
cclog.Warn("AppTagger: no app patterns loaded, skipping match")
return
}
metadata, err := r.FetchMetadata(job) metadata, err := r.FetchMetadata(job)
if err != nil { if err != nil {
cclog.Infof("Cannot fetch metadata for job: %d on %s", job.JobID, job.Cluster) cclog.Infof("AppTagger: cannot fetch metadata for job %d on %s: %v", job.JobID, job.Cluster, err)
return
}
if metadata == nil {
cclog.Infof("AppTagger: metadata is nil for job %d on %s", job.JobID, job.Cluster)
return return
} }
jobscript, ok := metadata["jobScript"] jobscript, ok := metadata["jobScript"]
if ok { if !ok {
cclog.Infof("AppTagger: no 'jobScript' key in metadata for job %d on %s (keys: %v)",
job.JobID, job.Cluster, metadataKeys(metadata))
return
}
if len(jobscript) == 0 {
cclog.Infof("AppTagger: empty jobScript for job %d on %s", job.JobID, job.Cluster)
return
}
id := *job.ID id := *job.ID
jobscriptLower := strings.ToLower(jobscript) jobscriptLower := strings.ToLower(jobscript)
cclog.Debugf("AppTagger: matching job %d (script length: %d) against %d apps", id, len(jobscriptLower), len(t.apps))
out:
for _, a := range t.apps { for _, a := range t.apps {
for _, re := range a.patterns { for _, re := range a.patterns {
if re.MatchString(jobscriptLower) { if re.MatchString(jobscriptLower) {
if !r.HasTag(id, t.tagType, a.tag) { if r.HasTag(id, t.tagType, a.tag) {
r.AddTagOrCreateDirect(id, t.tagType, a.tag) cclog.Debugf("AppTagger: job %d already has tag %s:%s, skipping", id, t.tagType, a.tag)
}
break out
}
}
}
} else { } else {
cclog.Infof("Cannot extract job script for job: %d on %s", job.JobID, job.Cluster) cclog.Infof("AppTagger: pattern '%s' matched for app '%s' on job %d", re.String(), a.tag, id)
if _, err := r.AddTagOrCreateDirect(id, t.tagType, a.tag); err != nil {
cclog.Errorf("AppTagger: failed to add tag '%s' to job %d: %v", a.tag, id, err)
} }
} }
return
}
}
}
cclog.Debugf("AppTagger: no pattern matched for job %d on %s", id, job.Cluster)
}

View File

@@ -20,6 +20,7 @@
import { queryStore, gql, getContextClient } from "@urql/svelte"; import { queryStore, gql, getContextClient } from "@urql/svelte";
import { Card, Spinner } from "@sveltestrap/sveltestrap"; import { Card, Spinner } from "@sveltestrap/sveltestrap";
import { maxScope, checkMetricAvailability } from "../utils.js"; import { maxScope, checkMetricAvailability } from "../utils.js";
import uPlot from "uplot";
import JobInfo from "./JobInfo.svelte"; import JobInfo from "./JobInfo.svelte";
import MetricPlot from "../plots/MetricPlot.svelte"; import MetricPlot from "../plots/MetricPlot.svelte";
import JobFootprint from "../helper/JobFootprint.svelte"; import JobFootprint from "../helper/JobFootprint.svelte";
@@ -74,13 +75,17 @@
} }
`; `;
/* Var Init*/
// svelte-ignore state_referenced_locally
let plotSync = uPlot.sync(`jobMetricStack-${job.cluster}-${job.id}`);
/* State Init */ /* State Init */
let zoomStates = $state({}); let zoomStates = $state({});
let thresholdStates = $state({}); let thresholdStates = $state({});
/* Derived */ /* Derived */
const resampleDefault = $derived(resampleConfig ? Math.max(...resampleConfig.resolutions) : 0); const resampleDefault = $derived(resampleConfig ? Math.max(...resampleConfig.resolutions) : 0);
const jobId = $derived(job?.id); const jobId = $derived(job.id);
const scopes = $derived.by(() => { const scopes = $derived.by(() => {
if (job.numNodes == 1) { if (job.numNodes == 1) {
if (job.numAcc >= 1) return ["core", "accelerator"]; if (job.numAcc >= 1) return ["core", "accelerator"];
@@ -233,6 +238,7 @@
numaccs={job.numAcc} numaccs={job.numAcc}
zoomState={zoomStates[metric.data.name] || null} zoomState={zoomStates[metric.data.name] || null}
thresholdState={thresholdStates[metric.data.name] || null} thresholdState={thresholdStates[metric.data.name] || null}
{plotSync}
/> />
{:else} {:else}
<Card body class="mx-2" color="warning"> <Card body class="mx-2" color="warning">

View File

@@ -32,12 +32,28 @@
/* Const Init */ /* Const Init */
const client = getContextClient(); const client = getContextClient();
const stateOptions = [
"all",
"allocated",
"idle",
"down",
"mixed",
"reserved",
"unknown",
];
const healthOptions = [
"all",
"full",
"partial",
"failed",
];
/* State Init */ /* State Init */
let pieWidth = $state(0); let pieWidth = $state(0);
let querySorting = $state({ field: "startTime", type: "col", order: "DESC" })
let tableHostFilter = $state(""); let tableHostFilter = $state("");
let tableStateFilter = $state(""); let tableStateFilter = $state(stateOptions[0]);
let tableHealthFilter = $state(""); let tableHealthFilter = $state(healthOptions[0]);
let healthTableSorting = $state( let healthTableSorting = $state(
{ {
schedulerState: { dir: "down", active: true }, schedulerState: { dir: "down", active: true },
@@ -78,7 +94,7 @@
`, `,
variables: { variables: {
nodeFilter: { cluster: { eq: cluster }}, nodeFilter: { cluster: { eq: cluster }},
sorting: { field: "startTime", type: "col", order: "DESC" }, sorting: querySorting,
}, },
requestPolicy: "network-only" requestPolicy: "network-only"
})); }));
@@ -98,10 +114,10 @@
if (tableHostFilter != "") { if (tableHostFilter != "") {
pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter)) pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter))
} }
if (tableStateFilter != "") { if (tableStateFilter != "all") {
pendingTableData = pendingTableData.filter((e) => e.schedulerState.includes(tableStateFilter)) pendingTableData = pendingTableData.filter((e) => e.schedulerState.includes(tableStateFilter))
} }
if (tableHealthFilter != "") { if (tableHealthFilter != "all") {
pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter)) pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter))
} }
return pendingTableData return pendingTableData
@@ -148,7 +164,7 @@
<Refresher <Refresher
initially={120} initially={120}
onRefresh={(interval) => { onRefresh={(interval) => {
sorting = { field: "startTime", type: "col", order: "DESC" } querySorting = { field: "startTime", type: "col", order: "DESC" };
}} }}
/> />
</Col> </Col>
@@ -280,8 +296,8 @@
<thead> <thead>
<!-- Header Row 1: Titles and Sorting --> <!-- Header Row 1: Titles and Sorting -->
<tr> <tr>
<th style="width: 7.5%; min-width: 100px; max-width:10%;" onclick={() => sortBy('hostname')}> <th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('hostname')}>
Host Hosts ({filteredTableData.length})
<Icon <Icon
name="caret-{healthTableSorting['hostname'].dir}{healthTableSorting['hostname'] name="caret-{healthTableSorting['hostname'].dir}{healthTableSorting['hostname']
.active .active
@@ -289,7 +305,7 @@
: ''}" : ''}"
/> />
</th> </th>
<th style="width: 8.5%; min-width: 100px; max-width:10%;" onclick={() => sortBy('schedulerState')}> <th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('schedulerState')}>
Scheduler State Scheduler State
<Icon <Icon
name="caret-{healthTableSorting['schedulerState'].dir}{healthTableSorting['schedulerState'] name="caret-{healthTableSorting['schedulerState'].dir}{healthTableSorting['schedulerState']
@@ -298,7 +314,7 @@
: ''}" : ''}"
/> />
</th> </th>
<th style="width: 7.5%; min-width: 100px; max-width:10%;" onclick={() => sortBy('healthState')}> <th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('healthState')}>
Health State Health State
<Icon <Icon
name="caret-{healthTableSorting['healthState'].dir}{healthTableSorting['healthState'] name="caret-{healthTableSorting['healthState'].dir}{healthTableSorting['healthState']
@@ -322,7 +338,11 @@
</th> </th>
<th> <th>
<InputGroup size="sm"> <InputGroup size="sm">
<Input type="text" bind:value={tableStateFilter}/> <Input type="select" bind:value={tableStateFilter}>
{#each stateOptions as so}
<option value={so}>{so}</option>
{/each}
</Input>
<InputGroupText> <InputGroupText>
<Icon name="search"></Icon> <Icon name="search"></Icon>
</InputGroupText> </InputGroupText>
@@ -330,7 +350,11 @@
</th> </th>
<th> <th>
<InputGroup size="sm"> <InputGroup size="sm">
<Input type="text" bind:value={tableHealthFilter}/> <Input type="select" bind:value={tableHealthFilter}>
{#each healthOptions as ho}
<option value={ho}>{ho}</option>
{/each}
</Input>
<InputGroupText> <InputGroupText>
<Icon name="search"></Icon> <Icon name="search"></Icon>
</InputGroupText> </InputGroupText>

View File

@@ -211,6 +211,7 @@
timestep={metricData.data.metric.timestep} timestep={metricData.data.metric.timestep}
series={metricData.data.metric.series} series={metricData.data.metric.series}
height={375} height={375}
{plotSync}
forNode forNode
/> />
{/if} {/if}