mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-20 09:47:30 +01:00
@@ -279,8 +279,6 @@ func initSubsystems() error {
|
|||||||
return fmt.Errorf("initializing archive: %w", err)
|
return fmt.Errorf("initializing archive: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: metricstore.Init() is called later in runServer() with proper configuration
|
|
||||||
|
|
||||||
// Handle database re-initialization
|
// Handle database re-initialization
|
||||||
if flagReinitDB {
|
if flagReinitDB {
|
||||||
if err := importer.InitDB(); err != nil {
|
if err := importer.InitDB(); err != nil {
|
||||||
|
|||||||
@@ -113,8 +113,6 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("Result: %#v\n", healthResults)
|
|
||||||
|
|
||||||
cclog.Debugf("Timer updateNodeStates, MemStore HealthCheck: %s", time.Since(startMs))
|
cclog.Debugf("Timer updateNodeStates, MemStore HealthCheck: %s", time.Since(startMs))
|
||||||
startDB := time.Now()
|
startDB := time.Now()
|
||||||
|
|
||||||
|
|||||||
@@ -274,7 +274,7 @@ type NodeStateWithNode struct {
|
|||||||
func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode, error) {
|
func (r *NodeRepository) FindNodeStatesBefore(cutoff int64) ([]NodeStateWithNode, error) {
|
||||||
rows, err := sq.Select(
|
rows, err := sq.Select(
|
||||||
"node_state.id", "node_state.time_stamp", "node_state.node_state",
|
"node_state.id", "node_state.time_stamp", "node_state.node_state",
|
||||||
"node_state.health_state", "node_state.health_metrics",
|
"node_state.health_state", "COALESCE(node_state.health_metrics, '')",
|
||||||
"node_state.cpus_allocated", "node_state.memory_allocated",
|
"node_state.cpus_allocated", "node_state.memory_allocated",
|
||||||
"node_state.gpus_allocated", "node_state.jobs_running",
|
"node_state.gpus_allocated", "node_state.jobs_running",
|
||||||
"node.hostname", "node.cluster", "node.subcluster",
|
"node.hostname", "node.cluster", "node.subcluster",
|
||||||
|
|||||||
@@ -19,6 +19,14 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-lib/v2/util"
|
"github.com/ClusterCockpit/cc-lib/v2/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func metadataKeys(m map[string]string) []string {
|
||||||
|
keys := make([]string, 0, len(m))
|
||||||
|
for k := range m {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// defaultConfigPath is the default path for application tagging configuration
|
// defaultConfigPath is the default path for application tagging configuration
|
||||||
defaultConfigPath = "./var/tagger/apps"
|
defaultConfigPath = "./var/tagger/apps"
|
||||||
@@ -158,29 +166,54 @@ func (t *AppTagger) Register() error {
|
|||||||
// Only the first matching application is tagged.
|
// Only the first matching application is tagged.
|
||||||
func (t *AppTagger) Match(job *schema.Job) {
|
func (t *AppTagger) Match(job *schema.Job) {
|
||||||
r := repository.GetJobRepository()
|
r := repository.GetJobRepository()
|
||||||
|
|
||||||
|
if len(t.apps) == 0 {
|
||||||
|
cclog.Warn("AppTagger: no app patterns loaded, skipping match")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
metadata, err := r.FetchMetadata(job)
|
metadata, err := r.FetchMetadata(job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Infof("Cannot fetch metadata for job: %d on %s", job.JobID, job.Cluster)
|
cclog.Infof("AppTagger: cannot fetch metadata for job %d on %s: %v", job.JobID, job.Cluster, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if metadata == nil {
|
||||||
|
cclog.Infof("AppTagger: metadata is nil for job %d on %s", job.JobID, job.Cluster)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
jobscript, ok := metadata["jobScript"]
|
jobscript, ok := metadata["jobScript"]
|
||||||
if ok {
|
if !ok {
|
||||||
id := *job.ID
|
cclog.Infof("AppTagger: no 'jobScript' key in metadata for job %d on %s (keys: %v)",
|
||||||
jobscriptLower := strings.ToLower(jobscript)
|
job.JobID, job.Cluster, metadataKeys(metadata))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
if len(jobscript) == 0 {
|
||||||
for _, a := range t.apps {
|
cclog.Infof("AppTagger: empty jobScript for job %d on %s", job.JobID, job.Cluster)
|
||||||
for _, re := range a.patterns {
|
return
|
||||||
if re.MatchString(jobscriptLower) {
|
}
|
||||||
if !r.HasTag(id, t.tagType, a.tag) {
|
|
||||||
r.AddTagOrCreateDirect(id, t.tagType, a.tag)
|
id := *job.ID
|
||||||
|
jobscriptLower := strings.ToLower(jobscript)
|
||||||
|
cclog.Debugf("AppTagger: matching job %d (script length: %d) against %d apps", id, len(jobscriptLower), len(t.apps))
|
||||||
|
|
||||||
|
for _, a := range t.apps {
|
||||||
|
for _, re := range a.patterns {
|
||||||
|
if re.MatchString(jobscriptLower) {
|
||||||
|
if r.HasTag(id, t.tagType, a.tag) {
|
||||||
|
cclog.Debugf("AppTagger: job %d already has tag %s:%s, skipping", id, t.tagType, a.tag)
|
||||||
|
} else {
|
||||||
|
cclog.Infof("AppTagger: pattern '%s' matched for app '%s' on job %d", re.String(), a.tag, id)
|
||||||
|
if _, err := r.AddTagOrCreateDirect(id, t.tagType, a.tag); err != nil {
|
||||||
|
cclog.Errorf("AppTagger: failed to add tag '%s' to job %d: %v", a.tag, id, err)
|
||||||
}
|
}
|
||||||
break out
|
|
||||||
}
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
cclog.Infof("Cannot extract job script for job: %d on %s", job.JobID, job.Cluster)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cclog.Debugf("AppTagger: no pattern matched for job %d on %s", id, job.Cluster)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
import { queryStore, gql, getContextClient } from "@urql/svelte";
|
import { queryStore, gql, getContextClient } from "@urql/svelte";
|
||||||
import { Card, Spinner } from "@sveltestrap/sveltestrap";
|
import { Card, Spinner } from "@sveltestrap/sveltestrap";
|
||||||
import { maxScope, checkMetricAvailability } from "../utils.js";
|
import { maxScope, checkMetricAvailability } from "../utils.js";
|
||||||
|
import uPlot from "uplot";
|
||||||
import JobInfo from "./JobInfo.svelte";
|
import JobInfo from "./JobInfo.svelte";
|
||||||
import MetricPlot from "../plots/MetricPlot.svelte";
|
import MetricPlot from "../plots/MetricPlot.svelte";
|
||||||
import JobFootprint from "../helper/JobFootprint.svelte";
|
import JobFootprint from "../helper/JobFootprint.svelte";
|
||||||
@@ -74,13 +75,17 @@
|
|||||||
}
|
}
|
||||||
`;
|
`;
|
||||||
|
|
||||||
|
/* Var Init*/
|
||||||
|
// svelte-ignore state_referenced_locally
|
||||||
|
let plotSync = uPlot.sync(`jobMetricStack-${job.cluster}-${job.id}`);
|
||||||
|
|
||||||
/* State Init */
|
/* State Init */
|
||||||
let zoomStates = $state({});
|
let zoomStates = $state({});
|
||||||
let thresholdStates = $state({});
|
let thresholdStates = $state({});
|
||||||
|
|
||||||
/* Derived */
|
/* Derived */
|
||||||
const resampleDefault = $derived(resampleConfig ? Math.max(...resampleConfig.resolutions) : 0);
|
const resampleDefault = $derived(resampleConfig ? Math.max(...resampleConfig.resolutions) : 0);
|
||||||
const jobId = $derived(job?.id);
|
const jobId = $derived(job.id);
|
||||||
const scopes = $derived.by(() => {
|
const scopes = $derived.by(() => {
|
||||||
if (job.numNodes == 1) {
|
if (job.numNodes == 1) {
|
||||||
if (job.numAcc >= 1) return ["core", "accelerator"];
|
if (job.numAcc >= 1) return ["core", "accelerator"];
|
||||||
@@ -233,6 +238,7 @@
|
|||||||
numaccs={job.numAcc}
|
numaccs={job.numAcc}
|
||||||
zoomState={zoomStates[metric.data.name] || null}
|
zoomState={zoomStates[metric.data.name] || null}
|
||||||
thresholdState={thresholdStates[metric.data.name] || null}
|
thresholdState={thresholdStates[metric.data.name] || null}
|
||||||
|
{plotSync}
|
||||||
/>
|
/>
|
||||||
{:else}
|
{:else}
|
||||||
<Card body class="mx-2" color="warning">
|
<Card body class="mx-2" color="warning">
|
||||||
|
|||||||
@@ -32,12 +32,28 @@
|
|||||||
|
|
||||||
/* Const Init */
|
/* Const Init */
|
||||||
const client = getContextClient();
|
const client = getContextClient();
|
||||||
|
const stateOptions = [
|
||||||
|
"all",
|
||||||
|
"allocated",
|
||||||
|
"idle",
|
||||||
|
"down",
|
||||||
|
"mixed",
|
||||||
|
"reserved",
|
||||||
|
"unknown",
|
||||||
|
];
|
||||||
|
const healthOptions = [
|
||||||
|
"all",
|
||||||
|
"full",
|
||||||
|
"partial",
|
||||||
|
"failed",
|
||||||
|
];
|
||||||
|
|
||||||
/* State Init */
|
/* State Init */
|
||||||
let pieWidth = $state(0);
|
let pieWidth = $state(0);
|
||||||
|
let querySorting = $state({ field: "startTime", type: "col", order: "DESC" })
|
||||||
let tableHostFilter = $state("");
|
let tableHostFilter = $state("");
|
||||||
let tableStateFilter = $state("");
|
let tableStateFilter = $state(stateOptions[0]);
|
||||||
let tableHealthFilter = $state("");
|
let tableHealthFilter = $state(healthOptions[0]);
|
||||||
let healthTableSorting = $state(
|
let healthTableSorting = $state(
|
||||||
{
|
{
|
||||||
schedulerState: { dir: "down", active: true },
|
schedulerState: { dir: "down", active: true },
|
||||||
@@ -78,7 +94,7 @@
|
|||||||
`,
|
`,
|
||||||
variables: {
|
variables: {
|
||||||
nodeFilter: { cluster: { eq: cluster }},
|
nodeFilter: { cluster: { eq: cluster }},
|
||||||
sorting: { field: "startTime", type: "col", order: "DESC" },
|
sorting: querySorting,
|
||||||
},
|
},
|
||||||
requestPolicy: "network-only"
|
requestPolicy: "network-only"
|
||||||
}));
|
}));
|
||||||
@@ -98,10 +114,10 @@
|
|||||||
if (tableHostFilter != "") {
|
if (tableHostFilter != "") {
|
||||||
pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter))
|
pendingTableData = pendingTableData.filter((e) => e.hostname.includes(tableHostFilter))
|
||||||
}
|
}
|
||||||
if (tableStateFilter != "") {
|
if (tableStateFilter != "all") {
|
||||||
pendingTableData = pendingTableData.filter((e) => e.schedulerState.includes(tableStateFilter))
|
pendingTableData = pendingTableData.filter((e) => e.schedulerState.includes(tableStateFilter))
|
||||||
}
|
}
|
||||||
if (tableHealthFilter != "") {
|
if (tableHealthFilter != "all") {
|
||||||
pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter))
|
pendingTableData = pendingTableData.filter((e) => e.healthState.includes(tableHealthFilter))
|
||||||
}
|
}
|
||||||
return pendingTableData
|
return pendingTableData
|
||||||
@@ -148,7 +164,7 @@
|
|||||||
<Refresher
|
<Refresher
|
||||||
initially={120}
|
initially={120}
|
||||||
onRefresh={(interval) => {
|
onRefresh={(interval) => {
|
||||||
sorting = { field: "startTime", type: "col", order: "DESC" }
|
querySorting = { field: "startTime", type: "col", order: "DESC" };
|
||||||
}}
|
}}
|
||||||
/>
|
/>
|
||||||
</Col>
|
</Col>
|
||||||
@@ -280,8 +296,8 @@
|
|||||||
<thead>
|
<thead>
|
||||||
<!-- Header Row 1: Titles and Sorting -->
|
<!-- Header Row 1: Titles and Sorting -->
|
||||||
<tr>
|
<tr>
|
||||||
<th style="width: 7.5%; min-width: 100px; max-width:10%;" onclick={() => sortBy('hostname')}>
|
<th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('hostname')}>
|
||||||
Host
|
Hosts ({filteredTableData.length})
|
||||||
<Icon
|
<Icon
|
||||||
name="caret-{healthTableSorting['hostname'].dir}{healthTableSorting['hostname']
|
name="caret-{healthTableSorting['hostname'].dir}{healthTableSorting['hostname']
|
||||||
.active
|
.active
|
||||||
@@ -289,7 +305,7 @@
|
|||||||
: ''}"
|
: ''}"
|
||||||
/>
|
/>
|
||||||
</th>
|
</th>
|
||||||
<th style="width: 8.5%; min-width: 100px; max-width:10%;" onclick={() => sortBy('schedulerState')}>
|
<th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('schedulerState')}>
|
||||||
Scheduler State
|
Scheduler State
|
||||||
<Icon
|
<Icon
|
||||||
name="caret-{healthTableSorting['schedulerState'].dir}{healthTableSorting['schedulerState']
|
name="caret-{healthTableSorting['schedulerState'].dir}{healthTableSorting['schedulerState']
|
||||||
@@ -298,7 +314,7 @@
|
|||||||
: ''}"
|
: ''}"
|
||||||
/>
|
/>
|
||||||
</th>
|
</th>
|
||||||
<th style="width: 7.5%; min-width: 100px; max-width:10%;" onclick={() => sortBy('healthState')}>
|
<th style="width: 9%; min-width: 100px; max-width:10%;" onclick={() => sortBy('healthState')}>
|
||||||
Health State
|
Health State
|
||||||
<Icon
|
<Icon
|
||||||
name="caret-{healthTableSorting['healthState'].dir}{healthTableSorting['healthState']
|
name="caret-{healthTableSorting['healthState'].dir}{healthTableSorting['healthState']
|
||||||
@@ -322,7 +338,11 @@
|
|||||||
</th>
|
</th>
|
||||||
<th>
|
<th>
|
||||||
<InputGroup size="sm">
|
<InputGroup size="sm">
|
||||||
<Input type="text" bind:value={tableStateFilter}/>
|
<Input type="select" bind:value={tableStateFilter}>
|
||||||
|
{#each stateOptions as so}
|
||||||
|
<option value={so}>{so}</option>
|
||||||
|
{/each}
|
||||||
|
</Input>
|
||||||
<InputGroupText>
|
<InputGroupText>
|
||||||
<Icon name="search"></Icon>
|
<Icon name="search"></Icon>
|
||||||
</InputGroupText>
|
</InputGroupText>
|
||||||
@@ -330,7 +350,11 @@
|
|||||||
</th>
|
</th>
|
||||||
<th>
|
<th>
|
||||||
<InputGroup size="sm">
|
<InputGroup size="sm">
|
||||||
<Input type="text" bind:value={tableHealthFilter}/>
|
<Input type="select" bind:value={tableHealthFilter}>
|
||||||
|
{#each healthOptions as ho}
|
||||||
|
<option value={ho}>{ho}</option>
|
||||||
|
{/each}
|
||||||
|
</Input>
|
||||||
<InputGroupText>
|
<InputGroupText>
|
||||||
<Icon name="search"></Icon>
|
<Icon name="search"></Icon>
|
||||||
</InputGroupText>
|
</InputGroupText>
|
||||||
|
|||||||
@@ -211,6 +211,7 @@
|
|||||||
timestep={metricData.data.metric.timestep}
|
timestep={metricData.data.metric.timestep}
|
||||||
series={metricData.data.metric.series}
|
series={metricData.data.metric.series}
|
||||||
height={375}
|
height={375}
|
||||||
|
{plotSync}
|
||||||
forNode
|
forNode
|
||||||
/>
|
/>
|
||||||
{/if}
|
{/if}
|
||||||
|
|||||||
Reference in New Issue
Block a user