Merge pull request #482 from ClusterCockpit/dev

Dev
This commit is contained in:
Jan Eitzinger
2026-02-06 09:51:47 +01:00
committed by GitHub
9 changed files with 80 additions and 18 deletions

View File

@@ -1,2 +1,3 @@
vasp vasp_gam
VASP vasp_ncl
vasp_std

View File

@@ -15,6 +15,7 @@ import (
"github.com/ClusterCockpit/cc-backend/internal/repository" "github.com/ClusterCockpit/cc-backend/internal/repository"
"github.com/ClusterCockpit/cc-backend/pkg/archive" "github.com/ClusterCockpit/cc-backend/pkg/archive"
"github.com/ClusterCockpit/cc-backend/pkg/metricstore" "github.com/ClusterCockpit/cc-backend/pkg/metricstore"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/ClusterCockpit/cc-lib/v2/schema"
) )
@@ -81,6 +82,8 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
m := make(map[string][]string) m := make(map[string][]string)
healthStates := make(map[string]schema.MonitoringState) healthStates := make(map[string]schema.MonitoringState)
startMs := time.Now()
for _, node := range req.Nodes { for _, node := range req.Nodes {
if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil { if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil {
m[sc] = append(m[sc], node.Hostname) m[sc] = append(m[sc], node.Hostname)
@@ -97,6 +100,9 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
} }
} }
cclog.Infof("Timer updateNodeStates, MemStore HealthCheck: %s", time.Since(startMs))
startDb := time.Now()
for _, node := range req.Nodes { for _, node := range req.Nodes {
state := determineState(node.States) state := determineState(node.States)
healthState := schema.MonitoringStateFailed healthState := schema.MonitoringStateFailed
@@ -115,4 +121,6 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState) repo.UpdateNodeState(node.Hostname, req.Cluster, &nodeState)
} }
cclog.Infof("Timer updateNodeStates, SQLite Inserts: %s", time.Since(startDb))
} }

View File

@@ -81,7 +81,7 @@ func (api *RestAPI) MountAPIRoutes(r *mux.Router) {
// Cluster List // Cluster List
r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet) r.HandleFunc("/clusters/", api.getClusters).Methods(http.MethodGet)
// Slurm node state // Slurm node state
r.HandleFunc("/nodestates/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut) r.HandleFunc("/nodestate/", api.updateNodeStates).Methods(http.MethodPost, http.MethodPut)
// Job Handler // Job Handler
if config.Keys.APISubjects == nil { if config.Keys.APISubjects == nil {
cclog.Info("Enabling REST start/stop job API") cclog.Info("Enabling REST start/stop job API")

View File

@@ -198,7 +198,6 @@ func (r *NodeRepository) UpdateNodeState(hostname string, cluster string, nodeSt
} }
cclog.Debugf("Added node '%s' to database", hostname) cclog.Debugf("Added node '%s' to database", hostname)
return nil
} else { } else {
cclog.Warnf("Error while querying node '%v' from database", id) cclog.Warnf("Error while querying node '%v' from database", id)
return err return err

View File

@@ -11,6 +11,7 @@ import (
"slices" "slices"
"time" "time"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema" "github.com/ClusterCockpit/cc-lib/v2/schema"
) )
@@ -244,7 +245,16 @@ func (m *MemoryStore) HealthCheck(cluster string,
missingCount = len(missingList) missingCount = len(missingList)
degradedCount = len(degradedList) degradedCount = len(degradedList)
healthyCount = len(expectedMetrics) - (missingCount + degradedCount) uniqueList := mergeList(missingList, degradedList)
healthyCount = len(expectedMetrics) - len(uniqueList)
// Debug log missing and degraded metrics
if missingCount > 0 {
cclog.ComponentDebug("metricstore", "HealthCheck: node", hostname, "missing metrics:", missingList)
}
if degradedCount > 0 {
cclog.ComponentDebug("metricstore", "HealthCheck: node", hostname, "degraded metrics:", degradedList)
}
// Determine overall health status // Determine overall health status
if missingCount > 0 || degradedCount > 0 { if missingCount > 0 || degradedCount > 0 {

View File

@@ -319,11 +319,11 @@
<tr> <tr>
<td> <td>
{#if type == "USER"} {#if type == "USER"}
<a href="/monitoring/user/{row.id}" <a href="/monitoring/user/{row.id}{fetchRunning ? '?state=running' : ''}"
>{scrambleNames ? scramble(row.id) : row.id}</a >{scrambleNames ? scramble(row.id) : row.id}</a
> >
{:else if type == "PROJECT"} {:else if type == "PROJECT"}
<a href="/monitoring/jobs/?project={row.id}" <a href="/monitoring/jobs/?project={row.id}{fetchRunning ? '&state=running' : ''}"
>{scrambleNames ? scramble(row.id) : row.id}</a >{scrambleNames ? scramble(row.id) : row.id}</a
> >
{:else} {:else}

View File

@@ -19,7 +19,8 @@
Spinner, Spinner,
Input, Input,
InputGroup, InputGroup,
InputGroupText InputGroupText,
Tooltip
} from "@sveltestrap/sveltestrap"; } from "@sveltestrap/sveltestrap";
import { import {
queryStore, queryStore,
@@ -32,6 +33,10 @@
scramble, scramble,
scrambleNames, scrambleNames,
} from "./generic/utils.js"; } from "./generic/utils.js";
import {
formatNumber,
formatDurationTime
} from "./generic/units.js";
import JobList from "./generic/JobList.svelte"; import JobList from "./generic/JobList.svelte";
import JobCompare from "./generic/JobCompare.svelte"; import JobCompare from "./generic/JobCompare.svelte";
import Filters from "./generic/Filters.svelte"; import Filters from "./generic/Filters.svelte";
@@ -56,6 +61,7 @@
const durationBinOptions = ["1m","10m","1h","6h","12h"]; const durationBinOptions = ["1m","10m","1h","6h","12h"];
const metricBinOptions = [10, 20, 50, 100]; const metricBinOptions = [10, 20, 50, 100];
const matchedJobCompareLimit = 500; const matchedJobCompareLimit = 500;
const shortDuration = ccconfig.jobList_hideShortRunningJobs; // Always configured
/* State Init */ /* State Init */
// List & Control Vars // List & Control Vars
@@ -108,6 +114,7 @@
shortJobs shortJobs
totalWalltime totalWalltime
totalCoreHours totalCoreHours
totalAccHours
histDuration { histDuration {
count count
value value
@@ -133,6 +140,7 @@
variables: { jobFilters, selectedHistograms, numDurationBins, numMetricBins }, variables: { jobFilters, selectedHistograms, numDurationBins, numMetricBins },
}) })
); );
const hasAccHours = $derived($stats?.data?.jobsStatistics[0]?.totalAccHours != 0);
/* Functions */ /* Functions */
function resetJobSelection() { function resetJobSelection() {
@@ -290,20 +298,54 @@
{/if} {/if}
<tr> <tr>
<th scope="row">Total Jobs</th> <th scope="row">Total Jobs</th>
<td>{$stats.data.jobsStatistics[0].totalJobs}</td> <td>
<span style="cursor: help;" title="{$stats.data.jobsStatistics[0].totalJobs} Jobs">
{formatNumber($stats.data.jobsStatistics[0].totalJobs)} Jobs
</span>
</td>
</tr> </tr>
<tr> <tr>
<th scope="row">Short Jobs</th> <th scope="row">
<td>{$stats.data.jobsStatistics[0].shortJobs}</td> <span class="mr-1">
Short Jobs
<Icon name="info-circle" id="shortjobs-info" style="margin-left:5px; cursor:help;"/>
</span>
<Tooltip target={`shortjobs-info`} placement="right">
Job duration less than {formatDurationTime(shortDuration)}
</Tooltip>
</th>
<td>
<span style="cursor: help;" title="{$stats.data.jobsStatistics[0].shortJobs} Jobs">
{formatNumber($stats.data.jobsStatistics[0].shortJobs)} Jobs
</span>
</td>
</tr> </tr>
<tr> <tr>
<th scope="row">Total Walltime</th> <th scope="row">Total Walltime</th>
<td>{$stats.data.jobsStatistics[0].totalWalltime}</td> <td>
<span style="cursor: help;" title="{$stats.data.jobsStatistics[0].totalWalltime} Hours">
{formatNumber($stats.data.jobsStatistics[0].totalWalltime)} Hours
</span>
</td>
</tr> </tr>
<tr> <tr>
<th scope="row">Total Core Hours</th> <th scope="row">Total Core Hours</th>
<td>{$stats.data.jobsStatistics[0].totalCoreHours}</td> <td>
<span style="cursor: help;" title="{$stats.data.jobsStatistics[0].totalCoreHours} Hours">
{formatNumber($stats.data.jobsStatistics[0].totalCoreHours)} Hours
</span>
</td>
</tr> </tr>
{#if hasAccHours}
<tr>
<th scope="row">Total Accelerator Hours</th>
<td>
<span style="cursor: help;" title="{$stats.data.jobsStatistics[0].totalAccHours} Hours">
{formatNumber($stats.data.jobsStatistics[0].totalAccHours)} Hours
</span>
</td>
</tr>
{/if}
</tbody> </tbody>
</Table> </Table>
</Col> </Col>
@@ -316,6 +358,7 @@
xunit="Runtime" xunit="Runtime"
ylabel="Number of Jobs" ylabel="Number of Jobs"
yunit="Jobs" yunit="Jobs"
height={hasAccHours ? 290 : 250}
usesBins usesBins
xtime xtime
/> />
@@ -330,6 +373,7 @@
xunit="Nodes" xunit="Nodes"
ylabel="Number of Jobs" ylabel="Number of Jobs"
yunit="Jobs" yunit="Jobs"
height={hasAccHours ? 290 : 250}
/> />
{/key} {/key}
</Col> </Col>

View File

@@ -79,7 +79,6 @@
/* Derived */ /* Derived */
const jobId = $derived(job?.id); const jobId = $derived(job?.id);
const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope($metricsQuery.data.jobMetrics) : []);
const scopes = $derived.by(() => { const scopes = $derived.by(() => {
if (job.numNodes == 1) { if (job.numNodes == 1) {
if (job.numAcc >= 1) return ["core", "accelerator"]; if (job.numAcc >= 1) return ["core", "accelerator"];
@@ -95,6 +94,7 @@
variables: { id: jobId, metrics, scopes, selectedResolution }, variables: { id: jobId, metrics, scopes, selectedResolution },
}) })
); );
const refinedData = $derived($metricsQuery?.data?.jobMetrics ? sortAndSelectScope($metricsQuery.data.jobMetrics) : []);
/* Effects */ /* Effects */
$effect(() => { $effect(() => {

View File

@@ -32,10 +32,6 @@ export function scaleNumber(x, p = '') {
} }
} }
export function roundTwoDigits(x) {
return Math.round(x * 100) / 100
}
export function scaleNumbers(x, y, p = '') { export function scaleNumbers(x, y, p = '') {
const oldPower = power[prefix.indexOf(p)] const oldPower = power[prefix.indexOf(p)]
const rawXValue = x * oldPower const rawXValue = x * oldPower
@@ -77,6 +73,10 @@ export function formatUnixTime(t, withDate = false) {
} }
} }
export function roundTwoDigits(x) {
return Math.round(x * 100) / 100
}
// const equalsCheck = (a, b) => { // const equalsCheck = (a, b) => {
// return JSON.stringify(a) === JSON.stringify(b); // return JSON.stringify(a) === JSON.stringify(b);
// } // }