mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-20 09:47:30 +01:00
@@ -279,8 +279,6 @@ func initSubsystems() error {
|
||||
return fmt.Errorf("initializing archive: %w", err)
|
||||
}
|
||||
|
||||
// Note: metricstore.Init() is called later in runServer() with proper configuration
|
||||
|
||||
// Handle database re-initialization
|
||||
if flagReinitDB {
|
||||
if err := importer.InitDB(); err != nil {
|
||||
|
||||
@@ -113,8 +113,6 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("Result: %#v\n", healthResults)
|
||||
|
||||
cclog.Debugf("Timer updateNodeStates, MemStore HealthCheck: %s", time.Since(startMs))
|
||||
startDB := time.Now()
|
||||
|
||||
|
||||
@@ -274,7 +274,7 @@ type NodeStateWithNode struct {
|
||||
func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode, error) {
|
||||
rows, err := sq.Select(
|
||||
"node_state.id", "node_state.time_stamp", "node_state.node_state",
|
||||
"node_state.health_state", "node_state.health_metrics",
|
||||
"node_state.health_state", "COALESCE(node_state.health_metrics, '')",
|
||||
"node_state.cpus_allocated", "node_state.memory_allocated",
|
||||
"node_state.gpus_allocated", "node_state.jobs_running",
|
||||
"node.hostname", "node.cluster", "node.subcluster",
|
||||
|
||||
@@ -19,6 +19,14 @@ import (
|
||||
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||
)
|
||||
|
||||
func metadataKeys(m map[string]string) []string {
|
||||
keys := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
const (
|
||||
// defaultConfigPath is the default path for application tagging configuration
|
||||
defaultConfigPath = "./var/tagger/apps"
|
||||
@@ -158,29 +166,54 @@ func (t *AppTagger) Register() error {
|
||||
// Only the first matching application is tagged.
|
||||
func (t *AppTagger) Match(job *schema.Job) {
|
||||
r := repository.GetJobRepository()
|
||||
|
||||
if len(t.apps) == 0 {
|
||||
cclog.Warn("AppTagger: no app patterns loaded, skipping match")
|
||||
return
|
||||
}
|
||||
|
||||
metadata, err := r.FetchMetadata(job)
|
||||
if err != nil {
|
||||
cclog.Infof("Cannot fetch metadata for job: %d on %s", job.JobID, job.Cluster)
|
||||
cclog.Infof("AppTagger: cannot fetch metadata for job %d on %s: %v", job.JobID, job.Cluster, err)
|
||||
return
|
||||
}
|
||||
|
||||
if metadata == nil {
|
||||
cclog.Infof("AppTagger: metadata is nil for job %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
}
|
||||
|
||||
jobscript, ok := metadata["jobScript"]
|
||||
if ok {
|
||||
if !ok {
|
||||
cclog.Infof("AppTagger: no 'jobScript' key in metadata for job %d on %s (keys: %v)",
|
||||
job.JobID, job.Cluster, metadataKeys(metadata))
|
||||
return
|
||||
}
|
||||
|
||||
if len(jobscript) == 0 {
|
||||
cclog.Infof("AppTagger: empty jobScript for job %d on %s", job.JobID, job.Cluster)
|
||||
return
|
||||
}
|
||||
|
||||
id := *job.ID
|
||||
jobscriptLower := strings.ToLower(jobscript)
|
||||
cclog.Debugf("AppTagger: matching job %d (script length: %d) against %d apps", id, len(jobscriptLower), len(t.apps))
|
||||
|
||||
out:
|
||||
for _, a := range t.apps {
|
||||
for _, re := range a.patterns {
|
||||
if re.MatchString(jobscriptLower) {
|
||||
if !r.HasTag(id, t.tagType, a.tag) {
|
||||
r.AddTagOrCreateDirect(id, t.tagType, a.tag)
|
||||
}
|
||||
break out
|
||||
}
|
||||
}
|
||||
}
|
||||
if r.HasTag(id, t.tagType, a.tag) {
|
||||
cclog.Debugf("AppTagger: job %d already has tag %s:%s, skipping", id, t.tagType, a.tag)
|
||||
} else {
|
||||
cclog.Infof("Cannot extract job script for job: %d on %s", job.JobID, job.Cluster)
|
||||
cclog.Infof("AppTagger: pattern '%s' matched for app '%s' on job %d", re.String(), a.tag, id)
|
||||
if _, err := r.AddTagOrCreateDirect(id, t.tagType, a.tag); err != nil {
|
||||
cclog.Errorf("AppTagger: failed to add tag '%s' to job %d: %v", a.tag, id, err)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cclog.Debugf("AppTagger: no pattern matched for job %d on %s", id, job.Cluster)
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
import { queryStore, gql, getContextClient } from "@urql/svelte";
|
||||
import { Card, Spinner } from "@sveltestrap/sveltestrap";
|
||||
import { maxScope, checkMetricAvailability } from "../utils.js";
|
||||
import uPlot from "uplot";
|
||||
import JobInfo from "./JobInfo.svelte";
|
||||
import MetricPlot from "../plots/MetricPlot.svelte";
|
||||
import JobFootprint from "../helper/JobFootprint.svelte";
|
||||
@@ -74,13 +75,17 @@
|
||||
}
|
||||
`;
|
||||
|
||||
/* Var Init*/
|
||||
// svelte-ignore state_referenced_locally
|
||||
let plotSync = uPlot.sync(`jobMetricStack-${job.cluster}-${job.id}`);
|
||||
|
||||
/* State Init */
|
||||
let zoomStates = $state({});
|
||||
let thresholdStates = $state({});
|
||||
|
||||
/* Derived */
|
||||
const resampleDefault = $derived(resampleConfig ? Math.max(...resampleConfig.resolutions) : 0);
|
||||
const jobId = $derived(job?.id);
|
||||
const jobId = $derived(job.id);
|
||||
const scopes = $derived.by(() => {
|
||||
if (job.numNodes == 1) {
|
||||
if (job.numAcc >= 1) return ["core", "accelerator"];
|
||||
@@ -233,6 +238,7 @@
|
||||
numaccs={job.numAcc}
|
||||
zoomState={zoomStates[metric.data.name] || null}
|
||||
thresholdState={thresholdStates[metric.data.name] || null}
|
||||
{plotSync}
|
||||
/>
|
||||
{:else}
|
||||
<Card body class="mx-2" color="warning">
|
||||
|
||||
@@ -32,12 +32,28 @@
|
||||
|
||||
/* Const Init */
|
||||
const client = getContextClient();
|
||||
const stateOptions = [
|
||||
"all",
|
||||
"allocated",
|
||||
"idle",
|
||||
"down",
|
||||
"mixed",
|
||||
"reserved",
|
||||
"unknown",
|
||||
];
|
||||
const healthOptions = [
|
||||
"all",
|
||||
"full",
|
||||
"partial",
|
||||
"failed",
|
||||
];
|
||||
|
||||
/* State Init */
|
||||
let pieWidth = $state(0);
|
||||
let querySorting = $state({ field: "startTime", type: "col", order: "DESC" })
|
||||
let tableHostFilter = $state("");
|
||||
let tableStateFilter = $state("");
|
||||
let tableHealthFilter = $state("");
|
||||
let tableStateFilter = $state(stateOptions[0]);
|
||||
let tableHealthFilter = $state(healthOptions[0]);
|
||||
let healthTableSorting = $state(
|
||||
{
|
||||
schedulerState: { dir: "down", active: true },
|
||||
@@ -78,7 +94,7 @@
|
||||
`,
|
||||
variables: {
|
||||
nodeFilter: { cluster: { eq: cluster }},
|
||||
sorting: { field: "startTime", type: "col", order: "DESC" },
|
||||
sorting: querySorting,
|
||||
},
|
||||
requestPolicy: "network-only"
|
||||
}));
|
||||
@@ -98,10 +114,10 @@
|
||||
if (tableHostFilter != "") {
|
||||
pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter))
|
||||
}
|
||||
if (tableStateFilter != "") {
|
||||
if (tableStateFilter != "all") {
|
||||
pendingTableData = pendingTableData.filter((e) => e.schedulerState.includes(tableStateFilter))
|
||||
}
|
||||
if (tableHealthFilter != "") {
|
||||
if (tableHealthFilter != "all") {
|
||||
pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter))
|
||||
}
|
||||
return pendingTableData
|
||||
@@ -148,7 +164,7 @@
|
||||
<Refresher
|
||||
initially={120}
|
||||
onRefresh={(interval) => {
|
||||
sorting = { field: "startTime", type: "col", order: "DESC" }
|
||||
querySorting = { field: "startTime", type: "col", order: "DESC" };
|
||||
}}
|
||||
/>
|
||||
</Col>
|
||||
@@ -280,8 +296,8 @@
|
||||
<thead>
|
||||
<!-- Header Row 1: Titles and Sorting -->
|
||||
<tr>
|
||||
<th style="width: 7.5%; min-width: 100px; max-width:10%;" onclick={() => sortBy('hostname')}>
|
||||
Host
|
||||
<th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('hostname')}>
|
||||
Hosts ({filteredTableData.length})
|
||||
<Icon
|
||||
name="caret-{healthTableSorting['hostname'].dir}{healthTableSorting['hostname']
|
||||
.active
|
||||
@@ -289,7 +305,7 @@
|
||||
: ''}"
|
||||
/>
|
||||
</th>
|
||||
<th style="width: 8.5%; min-width: 100px; max-width:10%;" onclick={() => sortBy('schedulerState')}>
|
||||
<th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('schedulerState')}>
|
||||
Scheduler State
|
||||
<Icon
|
||||
name="caret-{healthTableSorting['schedulerState'].dir}{healthTableSorting['schedulerState']
|
||||
@@ -298,7 +314,7 @@
|
||||
: ''}"
|
||||
/>
|
||||
</th>
|
||||
<th style="width: 7.5%; min-width: 100px; max-width:10%;" onclick={() => sortBy('healthState')}>
|
||||
<th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('healthState')}>
|
||||
Health State
|
||||
<Icon
|
||||
name="caret-{healthTableSorting['healthState'].dir}{healthTableSorting['healthState']
|
||||
@@ -322,7 +338,11 @@
|
||||
</th>
|
||||
<th>
|
||||
<InputGroup size="sm">
|
||||
<Input type="text" bind:value={tableStateFilter}/>
|
||||
<Input type="select" bind:value={tableStateFilter}>
|
||||
{#each stateOptions as so}
|
||||
<option value={so}>{so}</option>
|
||||
{/each}
|
||||
</Input>
|
||||
<InputGroupText>
|
||||
<Icon name="search"></Icon>
|
||||
</InputGroupText>
|
||||
@@ -330,7 +350,11 @@
|
||||
</th>
|
||||
<th>
|
||||
<InputGroup size="sm">
|
||||
<Input type="text" bind:value={tableHealthFilter}/>
|
||||
<Input type="select" bind:value={tableHealthFilter}>
|
||||
{#each healthOptions as ho}
|
||||
<option value={ho}>{ho}</option>
|
||||
{/each}
|
||||
</Input>
|
||||
<InputGroupText>
|
||||
<Icon name="search"></Icon>
|
||||
</InputGroupText>
|
||||
|
||||
@@ -211,6 +211,7 @@
|
||||
timestep={metricData.data.metric.timestep}
|
||||
series={metricData.data.metric.series}
|
||||
height={375}
|
||||
{plotSync}
|
||||
forNode
|
||||
/>
|
||||
{/if}
|
||||
|
||||
Reference in New Issue
Block a user