Merge branch 'dev' of github.com:ClusterCockpit/cc-backend into dev

This commit is contained in:
2026-01-26 08:38:56 +01:00
12 changed files with 402 additions and 140 deletions

View File

@@ -11,6 +11,7 @@ import (
"encoding/json"
"fmt"
"slices"
"sort"
"strings"
"sync"
"time"
@@ -683,6 +684,11 @@ func (r *NodeRepository) GetNodesForList(
hasNextPage = len(nextNodes) == 1
}
// Fallback for non-init'd node table in DB; Ignores stateFilter
if stateFilter == "all" && countNodes == 0 {
nodes, countNodes, hasNextPage = getNodesFromTopol(cluster, subCluster, nodeFilter, page)
}
return nodes, stateMap, countNodes, hasNextPage, nil
}
@@ -707,3 +713,51 @@ func AccessCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBu
return qnil, fmt.Errorf("user has no or unknown roles")
}
}
func getNodesFromTopol(cluster string, subCluster string, nodeFilter string, page *model.PageRequest) ([]string, int, bool) {
// 0) Init additional vars
var hasNextPage bool = false
var totalNodes int = 0
// 1) Get list of all nodes
var topolNodes []string
if subCluster != "" {
scNodes := archive.NodeLists[cluster][subCluster]
topolNodes = scNodes.PrintList()
} else {
subClusterNodeLists := archive.NodeLists[cluster]
for _, nodeList := range subClusterNodeLists {
topolNodes = append(topolNodes, nodeList.PrintList()...)
}
}
// 2) Filter nodes
if nodeFilter != "" {
filteredNodes := []string{}
for _, node := range topolNodes {
if strings.Contains(node, nodeFilter) {
filteredNodes = append(filteredNodes, node)
}
}
topolNodes = filteredNodes
}
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ...
totalNodes = len(topolNodes)
sort.Strings(topolNodes)
// 3) Apply paging
if len(topolNodes) > page.ItemsPerPage {
start := (page.Page - 1) * page.ItemsPerPage
end := start + page.ItemsPerPage
if end >= len(topolNodes) {
end = len(topolNodes)
hasNextPage = false
} else {
hasNextPage = true
}
topolNodes = topolNodes[start:end]
}
return topolNodes, totalNodes, hasNextPage
}

View File

@@ -456,7 +456,7 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil
http.Redirect(rw, r, "/monitoring/jobs/?startTime="+fromTime+"-"+untilTime+"&arrayJobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
case "username":
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound)
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " "))+"&startTime=last30d", http.StatusFound)
} else {
web.RenderTemplate(rw, "message.tmpl", &web.Page{Title: "Error", MsgType: "alert-danger", Message: "Missing Access Rights", User: *user, Roles: availableRoles, Build: buildInfo})
}
@@ -464,10 +464,10 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil
usernames, _ := repo.FindColumnValues(user, strings.Trim(splitSearch[1], " "), "user", "username", "name")
if len(usernames) != 0 {
joinedNames := strings.Join(usernames, "&user=")
http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames, http.StatusFound)
http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames+"&startTime=last30d", http.StatusFound)
} else {
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
http.Redirect(rw, r, "/monitoring/users/?user=NoUserNameFound", http.StatusPermanentRedirect)
http.Redirect(rw, r, "/monitoring/users/?user=NoUserNameFound&startTime=last30d", http.StatusPermanentRedirect)
} else {
web.RenderTemplate(rw, "message.tmpl", &web.Page{Title: "Error", MsgType: "alert-danger", Message: "Missing Access Rights", User: *user, Roles: availableRoles, Build: buildInfo})
}

View File

@@ -912,9 +912,6 @@ func buildNodeQueries(
scopes []schema.MetricScope,
resolution int64,
) ([]APIQuery, []schema.MetricScope, error) {
if len(nodes) == 0 {
return nil, nil, fmt.Errorf("METRICDATA/CCMS > no nodes specified for query")
}
queries := make([]APIQuery, 0, len(metrics)*len(scopes)*len(nodes))
assignedScope := []schema.MetricScope{}

197
tools/dataGenerator.sh Normal file
View File

@@ -0,0 +1,197 @@
#!/bin/bash
# ==========================================
# CONFIGURATION & FLAGS
# ==========================================
# MODE SETTINGS
TRANSPORT_MODE="REST" # Options: "REST" or "NATS"
CONNECTION_SCOPE="INTERNAL" # Options: "INTERNAL" or "EXTERNAL"
API_USER="demo" # User for JWT generation
# BASE NETWORK CONFIG
SERVICE_ADDRESS="http://localhost:8080"
NATS_SERVER="nats://0.0.0.0:4222"
# NATS CREDENTIALS
NATS_USER="root"
NATS_PASS="root"
NATS_SUBJECT="hpc-nats"
# EXTERNAL JWT (Used if CONNECTION_SCOPE is EXTERNAL)
JWT_STATIC="eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzU3Nzg4NDQsImlhdCI6MTc2ODU3ODg0NCwicm9sZXMiOlsiYWRtaW4iLCJhcGkiXSwic3ViIjoiZGVtbyJ9._SDEW9WaUVXSBFmWqGhyIZXLoqoDU8F1hkfh4cXKIqF4yw7w50IUpfUBtwUFUOnoviFKoi563f6RAMC7XxeLDA"
# ==========================================
# DATA DEFINITIONS
# ==========================================
ALEX_HOSTS="a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904"
FRITZ_HOSTS="f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378"
METRICS_STD="cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock"
METRICS_NODE="cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts"
ACCEL_IDS="00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0"
# ==========================================
# SETUP ENV (URL & TOKEN)
# ==========================================
if [ "$CONNECTION_SCOPE" == "INTERNAL" ]; then
# 1. Set URL for Internal Mode
REST_URL="${SERVICE_ADDRESS}/metricstore/api/write"
# 2. Generate JWT dynamically
echo "Setup: INTERNAL mode selected."
echo "Generating JWT for user: $API_USER"
JWT=$(./cc-backend -jwt "$API_USER" | grep -oP "(?<=JWT: Successfully generated JWT for user '${API_USER}': ).*")
if [ -z "$JWT" ]; then
echo "Error: Failed to generate JWT from cc-backend."
exit 1
fi
else
# 1. Set URL for External Mode
REST_URL="${SERVICE_ADDRESS}/api/write"
# 2. Use Static JWT
echo "Setup: EXTERNAL mode selected."
echo "Using static JWT."
JWT="$JWT_STATIC"
fi
echo "Target URL: $REST_URL"
# ==========================================
# FUNCTIONS
# ==========================================
send_payload() {
local file_path=$1
local cluster_name=$2
if [ "$TRANSPORT_MODE" == "NATS" ]; then
# Piping file content directly to nats stdin
cat "$file_path" | nats pub "$NATS_SUBJECT" -s "$NATS_SERVER" --user "$NATS_USER" --password "$NATS_PASS"
else
# Sending via REST API
curl -s -X 'POST' "${REST_URL}/?cluster=${cluster_name}" \
-H "Authorization: Bearer $JWT" \
--data-binary "@$file_path"
fi
# Clean up immediately
rm "$file_path"
}
# ==========================================
# MAIN LOOP
# ==========================================
# Clean up leftovers
rm -f sample_fritz.txt sample_alex.txt
while [ true ]; do
timestamp="$(date '+%s')"
echo "--- Cycle Start: $timestamp [Mode: $TRANSPORT_MODE | Scope: $CONNECTION_SCOPE] ---"
# 1. ALEX: HWTHREAD
echo "Generating Alex: hwthread"
{
for metric in $METRICS_STD; do
for hostname in $ALEX_HOSTS; do
for id in {0..127}; do
echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
done
done
done
} > sample_alex.txt
send_payload "sample_alex.txt" "alex"
# 2. FRITZ: HWTHREAD
echo "Generating Fritz: hwthread"
{
for metric in $METRICS_STD; do
for hostname in $FRITZ_HOSTS; do
for id in {0..71}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
done
done
done
} > sample_fritz.txt
send_payload "sample_fritz.txt" "fritz"
# 3. ALEX: ACCELERATOR
echo "Generating Alex: accelerator"
{
for metric in $METRICS_STD; do
for hostname in $ALEX_HOSTS; do
for id in $ACCEL_IDS; do
echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
done
done
done
} > sample_alex.txt
send_payload "sample_alex.txt" "alex"
# 4. ALEX: MEMORY DOMAIN
echo "Generating Alex: memoryDomain"
{
for metric in $METRICS_STD; do
for hostname in $ALEX_HOSTS; do
for id in {0..7}; do
echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
done
done
done
} > sample_alex.txt
send_payload "sample_alex.txt" "alex"
# 5. ALEX: SOCKET
echo "Generating Alex: socket"
{
for metric in $METRICS_STD; do
for hostname in $ALEX_HOSTS; do
for id in {0..1}; do
echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
done
done
done
} > sample_alex.txt
send_payload "sample_alex.txt" "alex"
# 6. FRITZ: SOCKET
echo "Generating Fritz: socket"
{
for metric in $METRICS_STD; do
for hostname in $FRITZ_HOSTS; do
for id in {0..1}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
done
done
done
} > sample_fritz.txt
send_payload "sample_fritz.txt" "fritz"
# 7. ALEX: NODE
echo "Generating Alex: node"
{
for metric in $METRICS_NODE; do
for hostname in $ALEX_HOSTS; do
echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp"
done
done
} > sample_alex.txt
send_payload "sample_alex.txt" "alex"
# 8. FRITZ: NODE
echo "Generating Fritz: node"
{
for metric in $METRICS_NODE; do
for hostname in $FRITZ_HOSTS; do
echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp"
done
done
} > sample_fritz.txt
send_payload "sample_fritz.txt" "fritz"
sleep 1m
done

View File

@@ -149,7 +149,7 @@
filter: $jobFilter
page: $paging
sortBy: TOTALJOBS
groupBy: SUBCLUSTER
groupBy: CLUSTER
) {
id
totalJobs
@@ -193,30 +193,25 @@
}));
const clusterInfo = $derived.by(() => {
let rawInfos = {};
if ($initq?.data?.clusters) {
let rawInfos = {};
// Grouped By Cluster
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalCores || 0;
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalAccs || 0;
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalUsers || 0;
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalJobs || 0;
// Collected By Subcluster
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
for (let subCluster of subClusters) {
// Allocations
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
// Infos
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
else rawInfos['processorTypes'].add(subCluster.processorType);
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
@@ -281,11 +276,8 @@
let rawGpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'acc_power')?.metric?.unit || null
rawInfos['gpuPwrUnit'] = rawGpuUnit ? rawGpuUnit.prefix + rawGpuUnit.base : ''
}
return rawInfos
} else {
return {};
}
return rawInfos;
});
const refinedStateData = $derived.by(() => {
@@ -518,7 +510,7 @@
</Col>
</Row>
<Row cols={{xs:1, md:2}} style="height: 35vh; margin-bottom: 1rem;">
<Row cols={{xs:1, md:2}} style="height: 34vh; margin-bottom: 1rem;">
<!-- Total Cluster Metric in Time SUMS-->
<Col class="text-center">
<h5 class="mt-2 mb-0">
@@ -537,6 +529,7 @@
timestep={$statusQuery?.data?.clusterMetrics[0]?.timestep || 60}
numNodes={$statusQuery?.data?.clusterMetrics?.nodeCount || 0}
metricData={$statusQuery?.data?.clusterMetrics?.metrics || []}
height={250}
publicMode
/>
{/key}
@@ -557,14 +550,14 @@
nodesData={transformNodesStatsToInfo($statusQuery?.data?.nodeMetrics)}
fixTitle="Node Utilization"
yMinimum={1.0}
height={330}
height={280}
/>
{/key}
</div>
</Col>
</Row>
<Row cols={{xs:1, md:2}} style="height: 35vh;">
<Row cols={{xs:1, md:2}} style="height: 34vh;">
<Col> <!-- Pie Last States -->
<Row>
{#if refinedStateData.length > 0}
@@ -621,7 +614,7 @@
{#key $statesTimed?.data?.nodeStatesTimed}
<Stacked
data={$statesTimed?.data?.nodeStatesTimed}
height={300}
height={250}
ylabel="Nodes"
yunit = "#Count"
title = "Cluster Status"

View File

@@ -95,19 +95,7 @@
}
/* On Mount */
onMount(() => {
// By default, look at the jobs of the last 30 days:
if (filterPresets?.startTime == null) {
if (filterPresets == null) filterPresets = {};
filterPresets.startTime = {
range: "last30d",
text: "Last 30 Days",
};
};
// Init Filter
filterComponent.updateFilters();
});
onMount(() => filterComponent.updateFilters());
</script>
<Row cols={{ xs: 1, md: 2}}>

View File

@@ -91,38 +91,38 @@
/* State Init */
// svelte-ignore state_referenced_locally
let filters = $state({
dbId: filterPresets.dbId || [],
jobId: filterPresets.jobId || "",
jobIdMatch: filterPresets.jobIdMatch || "eq",
arrayJobId: filterPresets.arrayJobId || null,
jobName: filterPresets.jobName || "",
project: filterPresets.project || "",
projectMatch: filterPresets.projectMatch || "contains",
user: filterPresets.user || "",
userMatch: filterPresets.userMatch || "contains",
cluster: filterPresets.cluster || null,
partition: filterPresets.partition || null,
dbId: filterPresets?.dbId || [],
jobId: filterPresets?.jobId || "",
jobIdMatch: filterPresets?.jobIdMatch || "eq",
arrayJobId: filterPresets?.arrayJobId || null,
jobName: filterPresets?.jobName || "",
project: filterPresets?.project || "",
projectMatch: filterPresets?.projectMatch || "contains",
user: filterPresets?.user || "",
userMatch: filterPresets?.userMatch || "contains",
cluster: filterPresets?.cluster || null,
partition: filterPresets?.partition || null,
states:
filterPresets.states || filterPresets.state
filterPresets?.states || filterPresets?.state
? [filterPresets.state].flat()
: allJobStates,
shared: filterPresets.shared || "",
schedule: filterPresets.schedule || "",
startTime: filterPresets.startTime || { from: null, to: null, range: ""},
duration: filterPresets.duration || {
shared: filterPresets?.shared || "",
schedule: filterPresets?.schedule || "",
startTime: filterPresets?.startTime || { from: null, to: null, range: ""},
duration: filterPresets?.duration || {
lessThan: null,
moreThan: null,
from: null,
to: null,
},
tags: filterPresets.tags || [],
numNodes: filterPresets.numNodes || { from: null, to: null },
numHWThreads: filterPresets.numHWThreads || { from: null, to: null },
numAccelerators: filterPresets.numAccelerators || { from: null, to: null },
node: filterPresets.node || null,
nodeMatch: filterPresets.nodeMatch || "eq",
energy: filterPresets.energy || { from: null, to: null },
stats: filterPresets.stats || [],
tags: filterPresets?.tags || [],
numNodes: filterPresets?.numNodes || { from: null, to: null },
numHWThreads: filterPresets?.numHWThreads || { from: null, to: null },
numAccelerators: filterPresets?.numAccelerators || { from: null, to: null },
node: filterPresets?.node || null,
nodeMatch: filterPresets?.nodeMatch || "eq",
energy: filterPresets?.energy || { from: null, to: null },
stats: filterPresets?.stats || [],
});
/* Opened States */

View File

@@ -92,7 +92,7 @@
{/each}
</DropdownMenu>
</Dropdown>
{:else}
{:else} <!-- Lists -->
<Dropdown nav inNavbar {direction}>
<DropdownToggle nav caret>
<Icon name={item.icon} />
@@ -100,7 +100,7 @@
</DropdownToggle>
<DropdownMenu class="dropdown-menu-lg-end">
<DropdownItem
href={item.href}
href={`${item.href}?startTime=last30d`}
>
All Clusters
</DropdownItem>
@@ -112,12 +112,12 @@
</DropdownToggle>
<DropdownMenu>
<DropdownItem class="py-1 px-2"
href={`${item.href}?cluster=${cn}`}
href={`${item.href}?cluster=${cn}&startTime=last30d`}
>
All Jobs
</DropdownItem>
<DropdownItem class="py-1 px-2"
href={`${item.href}?cluster=${cn}&state=running`}
href={`${item.href}?cluster=${cn}&state=running&startTime=last30d`}
>
Running Jobs
</DropdownItem>

View File

@@ -155,7 +155,7 @@
filter: $jobFilter
page: $paging
sortBy: TOTALJOBS
groupBy: SUBCLUSTER
groupBy: CLUSTER
) {
id
totalJobs
@@ -222,30 +222,25 @@
}));
const clusterInfo = $derived.by(() => {
let rawInfos = {};
if ($initq?.data?.clusters) {
let rawInfos = {};
// Grouped By Cluster
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalCores || 0;
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalAccs || 0;
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalUsers || 0;
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalJobs || 0;
// Collected By Subcluster
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
for (let subCluster of subClusters) {
// Allocations
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
// Infos
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
else rawInfos['processorTypes'].add(subCluster.processorType);
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
@@ -290,11 +285,8 @@
0, // Initial Value
) || 0;
rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100)
return rawInfos
} else {
return {};
}
return rawInfos;
});
/* Functions */
@@ -410,17 +402,17 @@
<span>{[...clusterInfo?.processorTypes].join(', ')}</span>
</CardHeader>
<CardBody>
<Table borderless>
<Table borderless class="mb-0">
<tr class="py-2">
<td style="font-size:x-large;">{clusterInfo?.runningJobs} Running Jobs</td>
<td colspan="2" style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
<td style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
</tr>
<hr class="my-1"/>
<tr class="pt-2">
<td style="font-size: large;">
Flop Rate (<span style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]">Any</span>)
</td>
<td colspan="2" style="font-size: large;">
<td style="font-size: large;">
Memory BW Rate
</td>
</tr>
@@ -429,49 +421,52 @@
{clusterInfo?.flopRate}
{clusterInfo?.flopRateUnit}
</td>
<td colspan="2" style="font-size:x-large;">
<td style="font-size:x-large;">
{clusterInfo?.memBwRate}
{clusterInfo?.memBwRateUnit}
</td>
</tr>
<hr class="my-1"/>
<tr class="py-2">
<tr class="pt-2">
<td>{formatNumber(clusterInfo?.allocatedNodes)} Active Nodes</td>
<td style="min-width: 100px;"
><div class="col">
<Progress multi max={clusterInfo?.totalNodes} style="cursor: help;height:1.5rem;" title={`${formatNumber(clusterInfo?.totalNodes)} Total Nodes`}>
<Progress bar color="success" value={clusterInfo?.allocatedNodes}/>
<Progress bar color="light" value={clusterInfo?.idleNodes}/>
</Progress>
</div></td
>
<td>{formatNumber(clusterInfo?.idleNodes)} Idle Nodes</td>
</tr>
<tr class="py-2">
<tr class="pb-2">
<td colspan="2"> <Col class="p-0">
<Progress
color="success" style="cursor:help; height:1rem;"
value={clusterInfo?.allocatedNodes} max={clusterInfo?.totalNodes}
title={`${formatNumber(clusterInfo?.totalNodes)} Total Nodes`}
/>
</Col> </td>
</tr>
<tr class="pt-2">
<td>{formatNumber(clusterInfo?.allocatedCores)} Active Cores</td>
<td style="min-width: 100px;"
><div class="col">
<Progress multi max={clusterInfo?.totalCores} style="cursor: help;height:1.5rem;" title={`${formatNumber(clusterInfo?.totalCores)} Total Cores`}>
<Progress bar color="success" value={clusterInfo?.allocatedCores}/>
<Progress bar color="light" value={clusterInfo?.idleCores}/>
</Progress>
</div></td
>
<td>{formatNumber(clusterInfo?.idleCores)} Idle Cores</td>
</tr>
<tr class="pb-2">
<td colspan="2"> <Col class="p-0">
<Progress
color="success" style="cursor:help; height:1rem;"
value={clusterInfo?.allocatedCores} max={clusterInfo?.totalCores}
title={`${formatNumber(clusterInfo?.totalCores)} Total Cores`}
/>
</Col> </td>
</tr>
{#if clusterInfo?.totalAccs !== 0}
<tr class="py-2">
<tr class="pt-2">
<td>{formatNumber(clusterInfo?.allocatedAccs)} Active Accelerators</td>
<td style="min-width: 100px;"
><div class="col">
<Progress multi max={clusterInfo?.totalAccs} style="cursor: help;height:1.5rem;" title={`${formatNumber(clusterInfo?.totalAccs)} Total Accelerators`}>
<Progress bar color="success" value={clusterInfo?.allocatedAccs}/>
<Progress bar color="light" value={clusterInfo?.idleAccs}/>
</Progress>
</div></td
>
<td>{formatNumber(clusterInfo?.idleAccs)} Idle Accelerators</td>
</tr>
<tr class="pb-2">
<td colspan="2"> <Col class="p-0">
<Progress
color="success" style="cursor:help; height:1rem;"
value={clusterInfo?.allocatedAccs} max={clusterInfo?.totalAccs}
title={`${formatNumber(clusterInfo?.totalAccs)} Total Accelerators`}
/>
</Col> </td >
</tr>
{/if}
</Table>
</CardBody>

View File

@@ -16,6 +16,7 @@
Table,
Progress,
Icon,
Spinner
} from "@sveltestrap/sveltestrap";
import {
queryStore,
@@ -408,7 +409,19 @@
<hr/>
<!-- Node Stack Charts Dev-->
{#if $statesTimed.data}
{#if $statesTimed.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statesTimed.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">{$statesTimed.error.message}</Card>
</Col>
</Row>
{:else if $statesTimed.data}
<Row cols={{ md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<div>
@@ -450,7 +463,19 @@
<hr/>
<!-- Node Health Pis, later Charts -->
{#if $statusQuery?.data?.nodeStates}
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">{$statesTimed.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery?.data?.nodeStates}
<Row cols={{ lg: 4, md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<div bind:clientWidth={pieWidth}>
@@ -536,7 +561,19 @@
<hr/>
<!-- Gauges & Roofline per Subcluster-->
{#if $statusQuery.data}
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">{$statusQuery.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery.data}
{#each clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
<Row cols={{ lg: 3, md: 1 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3">

View File

@@ -95,7 +95,6 @@
let nodes = $state([]);
let page = $state(1);
let headerPaddingTop = $state(0);
let matchedNodes = $state(0);
/* Derived */
let selectedMetrics = $derived(pendingSelectedMetrics);
@@ -120,6 +119,8 @@
requestPolicy: "network-only", // Resolution queries are cached, but how to access them? For now: reload on every change
}));
const matchedNodes = $derived($nodesQuery?.data?.nodeMetricsList?.totalNodes || 0);
/* Effects */
$effect(() => {
if (!usePaging) {
@@ -141,8 +142,7 @@
$effect(() => {
if ($nodesQuery?.data) {
untrack(() => {
nodes = handleNodes($nodesQuery?.data?.nodeMetricsList);
matchedNodes = $nodesQuery?.data?.totalNodes || 0;
handleNodes($nodesQuery?.data?.nodeMetricsList?.items);
});
selectedMetrics = [...pendingSelectedMetrics]; // Trigger Rerender in NodeListRow Only After Data is Fetched
};
@@ -161,18 +161,18 @@
});
/* Functions */
function handleNodes(data) {
if (data) {
function handleNodes(newNodes) {
if (newNodes) {
if (usePaging) {
// console.log('New Paging', $state.snapshot(paging))
return [...data.items].sort((a, b) => a.host.localeCompare(b.host));
nodes = [...newNodes].sort((a, b) => a.host.localeCompare(b.host));
} else {
if ($state.snapshot(page) == 1) {
// console.log('Page 1 Reset', [...data.items])
return [...data.items].sort((a, b) => a.host.localeCompare(b.host));
nodes = [...newNodes].sort((a, b) => a.host.localeCompare(b.host));
} else {
// console.log('Add Nodes', $state.snapshot(nodes), [...data.items])
return nodes.concat([...data.items])
nodes = nodes.concat([...newNodes])
}
}
};
@@ -248,7 +248,16 @@
<Card body color="danger">{$nodesQuery.error.message}</Card>
</Col>
</Row>
{:else if $nodesQuery.fetching || !$nodesQuery.data}
{:else}
{#each nodes as nodeData (nodeData.host)}
<NodeListRow {nodeData} {cluster} {selectedMetrics}/>
{:else}
<tr>
<td colspan={selectedMetrics.length + 1}> No nodes found </td>
</tr>
{/each}
{/if}
{#if $nodesQuery.fetching || !$nodesQuery.data}
<tr>
<td colspan={pendingSelectedMetrics.length + 1}>
<div style="text-align:center;">
@@ -265,14 +274,6 @@
</div>
</td>
</tr>
{:else}
{#each nodes as nodeData (nodeData.host)}
<NodeListRow {nodeData} {cluster} {selectedMetrics}/>
{:else}
<tr>
<td colspan={selectedMetrics.length + 1}> No nodes found </td>
</tr>
{/each}
{/if}
</tbody>
</Table>

View File

@@ -171,7 +171,7 @@
Users
</InputGroupText>
<Input class="flex-grow-1" style="background-color: white;" type="text" value="{userList?.length || 0} User{(userList?.length == 1) ? '': 's'}" disabled />
<a title="Show users active on this node" href="/monitoring/users/?cluster={cluster}&state=running&node={hostname}" target="_blank" class="btn btn-outline-primary" role="button" aria-disabled="true" >
<a title="Show users active on this node" href="/monitoring/users/?cluster={cluster}&state=running&startTime=last30d&node={hostname}" target="_blank" class="btn btn-outline-primary" role="button" aria-disabled="true" >
<Icon name="view-list" />
List
</a>
@@ -192,7 +192,7 @@
Projects
</InputGroupText>
<Input class="flex-grow-1" style="background-color: white;" type="text" value="{projectList?.length || 0} Project{(projectList?.length == 1) ? '': 's'}" disabled />
<a title="Show projects active on this node" href="/monitoring/projects/?cluster={cluster}&state=running&node={hostname}" target="_blank" class="btn btn-outline-primary" role="button" aria-disabled="true" >
<a title="Show projects active on this node" href="/monitoring/projects/?cluster={cluster}&state=running&startTime=last30d&node={hostname}" target="_blank" class="btn btn-outline-primary" role="button" aria-disabled="true" >
<Icon name="view-list" />
List
</a>