mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-01-27 22:41:46 +01:00
Merge branch 'dev' of github.com:ClusterCockpit/cc-backend into dev
This commit is contained in:
@@ -11,6 +11,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -683,6 +684,11 @@ func (r *NodeRepository) GetNodesForList(
|
||||
hasNextPage = len(nextNodes) == 1
|
||||
}
|
||||
|
||||
// Fallback for non-init'd node table in DB; Ignores stateFilter
|
||||
if stateFilter == "all" && countNodes == 0 {
|
||||
nodes, countNodes, hasNextPage = getNodesFromTopol(cluster, subCluster, nodeFilter, page)
|
||||
}
|
||||
|
||||
return nodes, stateMap, countNodes, hasNextPage, nil
|
||||
}
|
||||
|
||||
@@ -707,3 +713,51 @@ func AccessCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBu
|
||||
return qnil, fmt.Errorf("user has no or unknown roles")
|
||||
}
|
||||
}
|
||||
|
||||
func getNodesFromTopol(cluster string, subCluster string, nodeFilter string, page *model.PageRequest) ([]string, int, bool) {
|
||||
// 0) Init additional vars
|
||||
var hasNextPage bool = false
|
||||
var totalNodes int = 0
|
||||
|
||||
// 1) Get list of all nodes
|
||||
var topolNodes []string
|
||||
if subCluster != "" {
|
||||
scNodes := archive.NodeLists[cluster][subCluster]
|
||||
topolNodes = scNodes.PrintList()
|
||||
} else {
|
||||
subClusterNodeLists := archive.NodeLists[cluster]
|
||||
for _, nodeList := range subClusterNodeLists {
|
||||
topolNodes = append(topolNodes, nodeList.PrintList()...)
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Filter nodes
|
||||
if nodeFilter != "" {
|
||||
filteredNodes := []string{}
|
||||
for _, node := range topolNodes {
|
||||
if strings.Contains(node, nodeFilter) {
|
||||
filteredNodes = append(filteredNodes, node)
|
||||
}
|
||||
}
|
||||
topolNodes = filteredNodes
|
||||
}
|
||||
|
||||
// 2.1) Count total nodes && Sort nodes -> Sorting invalidated after ccms return ...
|
||||
totalNodes = len(topolNodes)
|
||||
sort.Strings(topolNodes)
|
||||
|
||||
// 3) Apply paging
|
||||
if len(topolNodes) > page.ItemsPerPage {
|
||||
start := (page.Page - 1) * page.ItemsPerPage
|
||||
end := start + page.ItemsPerPage
|
||||
if end >= len(topolNodes) {
|
||||
end = len(topolNodes)
|
||||
hasNextPage = false
|
||||
} else {
|
||||
hasNextPage = true
|
||||
}
|
||||
topolNodes = topolNodes[start:end]
|
||||
}
|
||||
|
||||
return topolNodes, totalNodes, hasNextPage
|
||||
}
|
||||
|
||||
@@ -456,7 +456,7 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil
|
||||
http.Redirect(rw, r, "/monitoring/jobs/?startTime="+fromTime+"-"+untilTime+"&arrayJobId="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound) // All Users: Redirect to Tablequery
|
||||
case "username":
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
|
||||
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " ")), http.StatusFound)
|
||||
http.Redirect(rw, r, "/monitoring/users/?user="+url.QueryEscape(strings.Trim(splitSearch[1], " "))+"&startTime=last30d", http.StatusFound)
|
||||
} else {
|
||||
web.RenderTemplate(rw, "message.tmpl", &web.Page{Title: "Error", MsgType: "alert-danger", Message: "Missing Access Rights", User: *user, Roles: availableRoles, Build: buildInfo})
|
||||
}
|
||||
@@ -464,10 +464,10 @@ func HandleSearchBar(rw http.ResponseWriter, r *http.Request, buildInfo web.Buil
|
||||
usernames, _ := repo.FindColumnValues(user, strings.Trim(splitSearch[1], " "), "user", "username", "name")
|
||||
if len(usernames) != 0 {
|
||||
joinedNames := strings.Join(usernames, "&user=")
|
||||
http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames, http.StatusFound)
|
||||
http.Redirect(rw, r, "/monitoring/users/?user="+joinedNames+"&startTime=last30d", http.StatusFound)
|
||||
} else {
|
||||
if user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport, schema.RoleManager}) {
|
||||
http.Redirect(rw, r, "/monitoring/users/?user=NoUserNameFound", http.StatusPermanentRedirect)
|
||||
http.Redirect(rw, r, "/monitoring/users/?user=NoUserNameFound&startTime=last30d", http.StatusPermanentRedirect)
|
||||
} else {
|
||||
web.RenderTemplate(rw, "message.tmpl", &web.Page{Title: "Error", MsgType: "alert-danger", Message: "Missing Access Rights", User: *user, Roles: availableRoles, Build: buildInfo})
|
||||
}
|
||||
|
||||
@@ -912,9 +912,6 @@ func buildNodeQueries(
|
||||
scopes []schema.MetricScope,
|
||||
resolution int64,
|
||||
) ([]APIQuery, []schema.MetricScope, error) {
|
||||
if len(nodes) == 0 {
|
||||
return nil, nil, fmt.Errorf("METRICDATA/CCMS > no nodes specified for query")
|
||||
}
|
||||
|
||||
queries := make([]APIQuery, 0, len(metrics)*len(scopes)*len(nodes))
|
||||
assignedScope := []schema.MetricScope{}
|
||||
|
||||
197
tools/dataGenerator.sh
Normal file
197
tools/dataGenerator.sh
Normal file
@@ -0,0 +1,197 @@
|
||||
#!/bin/bash
|
||||
|
||||
# ==========================================
|
||||
# CONFIGURATION & FLAGS
|
||||
# ==========================================
|
||||
|
||||
# MODE SETTINGS
|
||||
TRANSPORT_MODE="REST" # Options: "REST" or "NATS"
|
||||
CONNECTION_SCOPE="INTERNAL" # Options: "INTERNAL" or "EXTERNAL"
|
||||
API_USER="demo" # User for JWT generation
|
||||
|
||||
# BASE NETWORK CONFIG
|
||||
SERVICE_ADDRESS="http://localhost:8080"
|
||||
NATS_SERVER="nats://0.0.0.0:4222"
|
||||
|
||||
# NATS CREDENTIALS
|
||||
NATS_USER="root"
|
||||
NATS_PASS="root"
|
||||
NATS_SUBJECT="hpc-nats"
|
||||
|
||||
# EXTERNAL JWT (Used if CONNECTION_SCOPE is EXTERNAL)
|
||||
JWT_STATIC="eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzU3Nzg4NDQsImlhdCI6MTc2ODU3ODg0NCwicm9sZXMiOlsiYWRtaW4iLCJhcGkiXSwic3ViIjoiZGVtbyJ9._SDEW9WaUVXSBFmWqGhyIZXLoqoDU8F1hkfh4cXKIqF4yw7w50IUpfUBtwUFUOnoviFKoi563f6RAMC7XxeLDA"
|
||||
|
||||
# ==========================================
|
||||
# DATA DEFINITIONS
|
||||
# ==========================================
|
||||
ALEX_HOSTS="a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904"
|
||||
FRITZ_HOSTS="f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378"
|
||||
|
||||
METRICS_STD="cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock"
|
||||
METRICS_NODE="cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts"
|
||||
ACCEL_IDS="00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0"
|
||||
|
||||
# ==========================================
|
||||
# SETUP ENV (URL & TOKEN)
|
||||
# ==========================================
|
||||
|
||||
if [ "$CONNECTION_SCOPE" == "INTERNAL" ]; then
|
||||
# 1. Set URL for Internal Mode
|
||||
REST_URL="${SERVICE_ADDRESS}/metricstore/api/write"
|
||||
|
||||
# 2. Generate JWT dynamically
|
||||
echo "Setup: INTERNAL mode selected."
|
||||
echo "Generating JWT for user: $API_USER"
|
||||
JWT=$(./cc-backend -jwt "$API_USER" | grep -oP "(?<=JWT: Successfully generated JWT for user '${API_USER}': ).*")
|
||||
|
||||
if [ -z "$JWT" ]; then
|
||||
echo "Error: Failed to generate JWT from cc-backend."
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
# 1. Set URL for External Mode
|
||||
REST_URL="${SERVICE_ADDRESS}/api/write"
|
||||
|
||||
# 2. Use Static JWT
|
||||
echo "Setup: EXTERNAL mode selected."
|
||||
echo "Using static JWT."
|
||||
JWT="$JWT_STATIC"
|
||||
fi
|
||||
|
||||
echo "Target URL: $REST_URL"
|
||||
|
||||
# ==========================================
|
||||
# FUNCTIONS
|
||||
# ==========================================
|
||||
|
||||
send_payload() {
|
||||
local file_path=$1
|
||||
local cluster_name=$2
|
||||
|
||||
if [ "$TRANSPORT_MODE" == "NATS" ]; then
|
||||
# Piping file content directly to nats stdin
|
||||
cat "$file_path" | nats pub "$NATS_SUBJECT" -s "$NATS_SERVER" --user "$NATS_USER" --password "$NATS_PASS"
|
||||
else
|
||||
# Sending via REST API
|
||||
curl -s -X 'POST' "${REST_URL}/?cluster=${cluster_name}" \
|
||||
-H "Authorization: Bearer $JWT" \
|
||||
--data-binary "@$file_path"
|
||||
fi
|
||||
|
||||
# Clean up immediately
|
||||
rm "$file_path"
|
||||
}
|
||||
|
||||
# ==========================================
|
||||
# MAIN LOOP
|
||||
# ==========================================
|
||||
|
||||
# Clean up leftovers
|
||||
rm -f sample_fritz.txt sample_alex.txt
|
||||
|
||||
while [ true ]; do
|
||||
timestamp="$(date '+%s')"
|
||||
echo "--- Cycle Start: $timestamp [Mode: $TRANSPORT_MODE | Scope: $CONNECTION_SCOPE] ---"
|
||||
|
||||
# 1. ALEX: HWTHREAD
|
||||
echo "Generating Alex: hwthread"
|
||||
{
|
||||
for metric in $METRICS_STD; do
|
||||
for hostname in $ALEX_HOSTS; do
|
||||
for id in {0..127}; do
|
||||
echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
|
||||
done
|
||||
done
|
||||
done
|
||||
} > sample_alex.txt
|
||||
send_payload "sample_alex.txt" "alex"
|
||||
|
||||
# 2. FRITZ: HWTHREAD
|
||||
echo "Generating Fritz: hwthread"
|
||||
{
|
||||
for metric in $METRICS_STD; do
|
||||
for hostname in $FRITZ_HOSTS; do
|
||||
for id in {0..71}; do
|
||||
echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
|
||||
done
|
||||
done
|
||||
done
|
||||
} > sample_fritz.txt
|
||||
send_payload "sample_fritz.txt" "fritz"
|
||||
|
||||
# 3. ALEX: ACCELERATOR
|
||||
echo "Generating Alex: accelerator"
|
||||
{
|
||||
for metric in $METRICS_STD; do
|
||||
for hostname in $ALEX_HOSTS; do
|
||||
for id in $ACCEL_IDS; do
|
||||
echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
|
||||
done
|
||||
done
|
||||
done
|
||||
} > sample_alex.txt
|
||||
send_payload "sample_alex.txt" "alex"
|
||||
|
||||
# 4. ALEX: MEMORY DOMAIN
|
||||
echo "Generating Alex: memoryDomain"
|
||||
{
|
||||
for metric in $METRICS_STD; do
|
||||
for hostname in $ALEX_HOSTS; do
|
||||
for id in {0..7}; do
|
||||
echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
|
||||
done
|
||||
done
|
||||
done
|
||||
} > sample_alex.txt
|
||||
send_payload "sample_alex.txt" "alex"
|
||||
|
||||
# 5. ALEX: SOCKET
|
||||
echo "Generating Alex: socket"
|
||||
{
|
||||
for metric in $METRICS_STD; do
|
||||
for hostname in $ALEX_HOSTS; do
|
||||
for id in {0..1}; do
|
||||
echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
|
||||
done
|
||||
done
|
||||
done
|
||||
} > sample_alex.txt
|
||||
send_payload "sample_alex.txt" "alex"
|
||||
|
||||
# 6. FRITZ: SOCKET
|
||||
echo "Generating Fritz: socket"
|
||||
{
|
||||
for metric in $METRICS_STD; do
|
||||
for hostname in $FRITZ_HOSTS; do
|
||||
for id in {0..1}; do
|
||||
echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp"
|
||||
done
|
||||
done
|
||||
done
|
||||
} > sample_fritz.txt
|
||||
send_payload "sample_fritz.txt" "fritz"
|
||||
|
||||
# 7. ALEX: NODE
|
||||
echo "Generating Alex: node"
|
||||
{
|
||||
for metric in $METRICS_NODE; do
|
||||
for hostname in $ALEX_HOSTS; do
|
||||
echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp"
|
||||
done
|
||||
done
|
||||
} > sample_alex.txt
|
||||
send_payload "sample_alex.txt" "alex"
|
||||
|
||||
# 8. FRITZ: NODE
|
||||
echo "Generating Fritz: node"
|
||||
{
|
||||
for metric in $METRICS_NODE; do
|
||||
for hostname in $FRITZ_HOSTS; do
|
||||
echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp"
|
||||
done
|
||||
done
|
||||
} > sample_fritz.txt
|
||||
send_payload "sample_fritz.txt" "fritz"
|
||||
|
||||
sleep 1m
|
||||
done
|
||||
@@ -149,7 +149,7 @@
|
||||
filter: $jobFilter
|
||||
page: $paging
|
||||
sortBy: TOTALJOBS
|
||||
groupBy: SUBCLUSTER
|
||||
groupBy: CLUSTER
|
||||
) {
|
||||
id
|
||||
totalJobs
|
||||
@@ -193,30 +193,25 @@
|
||||
}));
|
||||
|
||||
const clusterInfo = $derived.by(() => {
|
||||
let rawInfos = {};
|
||||
if ($initq?.data?.clusters) {
|
||||
let rawInfos = {};
|
||||
// Grouped By Cluster
|
||||
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalCores || 0;
|
||||
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalAccs || 0;
|
||||
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalUsers || 0;
|
||||
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalJobs || 0;
|
||||
|
||||
// Collected By Subcluster
|
||||
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
|
||||
for (let subCluster of subClusters) {
|
||||
// Allocations
|
||||
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
|
||||
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
|
||||
|
||||
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
|
||||
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
|
||||
|
||||
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
|
||||
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
|
||||
|
||||
// Infos
|
||||
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
|
||||
else rawInfos['processorTypes'].add(subCluster.processorType);
|
||||
|
||||
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
|
||||
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
|
||||
|
||||
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
|
||||
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
|
||||
|
||||
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
|
||||
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
|
||||
|
||||
@@ -281,11 +276,8 @@
|
||||
let rawGpuUnit = $statusQuery?.data?.nodeMetrics[0]?.metrics.find((m) => m.name == 'acc_power')?.metric?.unit || null
|
||||
rawInfos['gpuPwrUnit'] = rawGpuUnit ? rawGpuUnit.prefix + rawGpuUnit.base : ''
|
||||
}
|
||||
|
||||
return rawInfos
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
return rawInfos;
|
||||
});
|
||||
|
||||
const refinedStateData = $derived.by(() => {
|
||||
@@ -518,7 +510,7 @@
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
<Row cols={{xs:1, md:2}} style="height: 35vh; margin-bottom: 1rem;">
|
||||
<Row cols={{xs:1, md:2}} style="height: 34vh; margin-bottom: 1rem;">
|
||||
<!-- Total Cluster Metric in Time SUMS-->
|
||||
<Col class="text-center">
|
||||
<h5 class="mt-2 mb-0">
|
||||
@@ -537,6 +529,7 @@
|
||||
timestep={$statusQuery?.data?.clusterMetrics[0]?.timestep || 60}
|
||||
numNodes={$statusQuery?.data?.clusterMetrics?.nodeCount || 0}
|
||||
metricData={$statusQuery?.data?.clusterMetrics?.metrics || []}
|
||||
height={250}
|
||||
publicMode
|
||||
/>
|
||||
{/key}
|
||||
@@ -557,14 +550,14 @@
|
||||
nodesData={transformNodesStatsToInfo($statusQuery?.data?.nodeMetrics)}
|
||||
fixTitle="Node Utilization"
|
||||
yMinimum={1.0}
|
||||
height={330}
|
||||
height={280}
|
||||
/>
|
||||
{/key}
|
||||
</div>
|
||||
</Col>
|
||||
</Row>
|
||||
|
||||
<Row cols={{xs:1, md:2}} style="height: 35vh;">
|
||||
<Row cols={{xs:1, md:2}} style="height: 34vh;">
|
||||
<Col> <!-- Pie Last States -->
|
||||
<Row>
|
||||
{#if refinedStateData.length > 0}
|
||||
@@ -621,7 +614,7 @@
|
||||
{#key $statesTimed?.data?.nodeStatesTimed}
|
||||
<Stacked
|
||||
data={$statesTimed?.data?.nodeStatesTimed}
|
||||
height={300}
|
||||
height={250}
|
||||
ylabel="Nodes"
|
||||
yunit = "#Count"
|
||||
title = "Cluster Status"
|
||||
|
||||
@@ -95,19 +95,7 @@
|
||||
}
|
||||
|
||||
/* On Mount */
|
||||
onMount(() => {
|
||||
// By default, look at the jobs of the last 30 days:
|
||||
if (filterPresets?.startTime == null) {
|
||||
if (filterPresets == null) filterPresets = {};
|
||||
|
||||
filterPresets.startTime = {
|
||||
range: "last30d",
|
||||
text: "Last 30 Days",
|
||||
};
|
||||
};
|
||||
// Init Filter
|
||||
filterComponent.updateFilters();
|
||||
});
|
||||
onMount(() => filterComponent.updateFilters());
|
||||
</script>
|
||||
|
||||
<Row cols={{ xs: 1, md: 2}}>
|
||||
|
||||
@@ -91,38 +91,38 @@
|
||||
/* State Init */
|
||||
// svelte-ignore state_referenced_locally
|
||||
let filters = $state({
|
||||
dbId: filterPresets.dbId || [],
|
||||
jobId: filterPresets.jobId || "",
|
||||
jobIdMatch: filterPresets.jobIdMatch || "eq",
|
||||
arrayJobId: filterPresets.arrayJobId || null,
|
||||
jobName: filterPresets.jobName || "",
|
||||
project: filterPresets.project || "",
|
||||
projectMatch: filterPresets.projectMatch || "contains",
|
||||
user: filterPresets.user || "",
|
||||
userMatch: filterPresets.userMatch || "contains",
|
||||
cluster: filterPresets.cluster || null,
|
||||
partition: filterPresets.partition || null,
|
||||
dbId: filterPresets?.dbId || [],
|
||||
jobId: filterPresets?.jobId || "",
|
||||
jobIdMatch: filterPresets?.jobIdMatch || "eq",
|
||||
arrayJobId: filterPresets?.arrayJobId || null,
|
||||
jobName: filterPresets?.jobName || "",
|
||||
project: filterPresets?.project || "",
|
||||
projectMatch: filterPresets?.projectMatch || "contains",
|
||||
user: filterPresets?.user || "",
|
||||
userMatch: filterPresets?.userMatch || "contains",
|
||||
cluster: filterPresets?.cluster || null,
|
||||
partition: filterPresets?.partition || null,
|
||||
states:
|
||||
filterPresets.states || filterPresets.state
|
||||
filterPresets?.states || filterPresets?.state
|
||||
? [filterPresets.state].flat()
|
||||
: allJobStates,
|
||||
shared: filterPresets.shared || "",
|
||||
schedule: filterPresets.schedule || "",
|
||||
startTime: filterPresets.startTime || { from: null, to: null, range: ""},
|
||||
duration: filterPresets.duration || {
|
||||
shared: filterPresets?.shared || "",
|
||||
schedule: filterPresets?.schedule || "",
|
||||
startTime: filterPresets?.startTime || { from: null, to: null, range: ""},
|
||||
duration: filterPresets?.duration || {
|
||||
lessThan: null,
|
||||
moreThan: null,
|
||||
from: null,
|
||||
to: null,
|
||||
},
|
||||
tags: filterPresets.tags || [],
|
||||
numNodes: filterPresets.numNodes || { from: null, to: null },
|
||||
numHWThreads: filterPresets.numHWThreads || { from: null, to: null },
|
||||
numAccelerators: filterPresets.numAccelerators || { from: null, to: null },
|
||||
node: filterPresets.node || null,
|
||||
nodeMatch: filterPresets.nodeMatch || "eq",
|
||||
energy: filterPresets.energy || { from: null, to: null },
|
||||
stats: filterPresets.stats || [],
|
||||
tags: filterPresets?.tags || [],
|
||||
numNodes: filterPresets?.numNodes || { from: null, to: null },
|
||||
numHWThreads: filterPresets?.numHWThreads || { from: null, to: null },
|
||||
numAccelerators: filterPresets?.numAccelerators || { from: null, to: null },
|
||||
node: filterPresets?.node || null,
|
||||
nodeMatch: filterPresets?.nodeMatch || "eq",
|
||||
energy: filterPresets?.energy || { from: null, to: null },
|
||||
stats: filterPresets?.stats || [],
|
||||
});
|
||||
|
||||
/* Opened States */
|
||||
|
||||
@@ -92,7 +92,7 @@
|
||||
{/each}
|
||||
</DropdownMenu>
|
||||
</Dropdown>
|
||||
{:else}
|
||||
{:else} <!-- Lists -->
|
||||
<Dropdown nav inNavbar {direction}>
|
||||
<DropdownToggle nav caret>
|
||||
<Icon name={item.icon} />
|
||||
@@ -100,7 +100,7 @@
|
||||
</DropdownToggle>
|
||||
<DropdownMenu class="dropdown-menu-lg-end">
|
||||
<DropdownItem
|
||||
href={item.href}
|
||||
href={`${item.href}?startTime=last30d`}
|
||||
>
|
||||
All Clusters
|
||||
</DropdownItem>
|
||||
@@ -112,12 +112,12 @@
|
||||
</DropdownToggle>
|
||||
<DropdownMenu>
|
||||
<DropdownItem class="py-1 px-2"
|
||||
href={`${item.href}?cluster=${cn}`}
|
||||
href={`${item.href}?cluster=${cn}&startTime=last30d`}
|
||||
>
|
||||
All Jobs
|
||||
</DropdownItem>
|
||||
<DropdownItem class="py-1 px-2"
|
||||
href={`${item.href}?cluster=${cn}&state=running`}
|
||||
href={`${item.href}?cluster=${cn}&state=running&startTime=last30d`}
|
||||
>
|
||||
Running Jobs
|
||||
</DropdownItem>
|
||||
|
||||
@@ -155,7 +155,7 @@
|
||||
filter: $jobFilter
|
||||
page: $paging
|
||||
sortBy: TOTALJOBS
|
||||
groupBy: SUBCLUSTER
|
||||
groupBy: CLUSTER
|
||||
) {
|
||||
id
|
||||
totalJobs
|
||||
@@ -222,30 +222,25 @@
|
||||
}));
|
||||
|
||||
const clusterInfo = $derived.by(() => {
|
||||
let rawInfos = {};
|
||||
if ($initq?.data?.clusters) {
|
||||
let rawInfos = {};
|
||||
// Grouped By Cluster
|
||||
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalCores || 0;
|
||||
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalAccs || 0;
|
||||
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalUsers || 0;
|
||||
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == presetCluster)?.totalJobs || 0;
|
||||
|
||||
// Collected By Subcluster
|
||||
let subClusters = $initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || [];
|
||||
for (let subCluster of subClusters) {
|
||||
// Allocations
|
||||
if (!rawInfos['allocatedNodes']) rawInfos['allocatedNodes'] = $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
|
||||
else rawInfos['allocatedNodes'] += $statusQuery?.data?.allocatedNodes?.find(({ name }) => name == subCluster.name)?.count || 0;
|
||||
|
||||
if (!rawInfos['allocatedCores']) rawInfos['allocatedCores'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
|
||||
else rawInfos['allocatedCores'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalCores || 0;
|
||||
|
||||
if (!rawInfos['allocatedAccs']) rawInfos['allocatedAccs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
|
||||
else rawInfos['allocatedAccs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalAccs || 0;
|
||||
|
||||
// Infos
|
||||
if (!rawInfos['processorTypes']) rawInfos['processorTypes'] = subCluster?.processorType ? new Set([subCluster.processorType]) : new Set([]);
|
||||
else rawInfos['processorTypes'].add(subCluster.processorType);
|
||||
|
||||
if (!rawInfos['activeUsers']) rawInfos['activeUsers'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
|
||||
else rawInfos['activeUsers'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalUsers || 0;
|
||||
|
||||
if (!rawInfos['runningJobs']) rawInfos['runningJobs'] = $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
|
||||
else rawInfos['runningJobs'] += $statusQuery?.data?.jobsStatistics?.find(({ id }) => id == subCluster.name)?.totalJobs || 0;
|
||||
|
||||
if (!rawInfos['totalNodes']) rawInfos['totalNodes'] = subCluster?.numberOfNodes || 0;
|
||||
else rawInfos['totalNodes'] += subCluster?.numberOfNodes || 0;
|
||||
|
||||
@@ -290,11 +285,8 @@
|
||||
0, // Initial Value
|
||||
) || 0;
|
||||
rawInfos['memBwRate'] = Math.floor((rawMemBw * 100) / 100)
|
||||
|
||||
return rawInfos
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
return rawInfos;
|
||||
});
|
||||
|
||||
/* Functions */
|
||||
@@ -410,17 +402,17 @@
|
||||
<span>{[...clusterInfo?.processorTypes].join(', ')}</span>
|
||||
</CardHeader>
|
||||
<CardBody>
|
||||
<Table borderless>
|
||||
<Table borderless class="mb-0">
|
||||
<tr class="py-2">
|
||||
<td style="font-size:x-large;">{clusterInfo?.runningJobs} Running Jobs</td>
|
||||
<td colspan="2" style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
|
||||
<td style="font-size:x-large;">{clusterInfo?.activeUsers} Active Users</td>
|
||||
</tr>
|
||||
<hr class="my-1"/>
|
||||
<tr class="pt-2">
|
||||
<td style="font-size: large;">
|
||||
Flop Rate (<span style="cursor: help;" title="Flops[Any] = (Flops[Double] x 2) + Flops[Single]">Any</span>)
|
||||
</td>
|
||||
<td colspan="2" style="font-size: large;">
|
||||
<td style="font-size: large;">
|
||||
Memory BW Rate
|
||||
</td>
|
||||
</tr>
|
||||
@@ -429,49 +421,52 @@
|
||||
{clusterInfo?.flopRate}
|
||||
{clusterInfo?.flopRateUnit}
|
||||
</td>
|
||||
<td colspan="2" style="font-size:x-large;">
|
||||
<td style="font-size:x-large;">
|
||||
{clusterInfo?.memBwRate}
|
||||
{clusterInfo?.memBwRateUnit}
|
||||
</td>
|
||||
</tr>
|
||||
<hr class="my-1"/>
|
||||
<tr class="py-2">
|
||||
<tr class="pt-2">
|
||||
<td>{formatNumber(clusterInfo?.allocatedNodes)} Active Nodes</td>
|
||||
<td style="min-width: 100px;"
|
||||
><div class="col">
|
||||
<Progress multi max={clusterInfo?.totalNodes} style="cursor: help;height:1.5rem;" title={`${formatNumber(clusterInfo?.totalNodes)} Total Nodes`}>
|
||||
<Progress bar color="success" value={clusterInfo?.allocatedNodes}/>
|
||||
<Progress bar color="light" value={clusterInfo?.idleNodes}/>
|
||||
</Progress>
|
||||
</div></td
|
||||
>
|
||||
<td>{formatNumber(clusterInfo?.idleNodes)} Idle Nodes</td>
|
||||
</tr>
|
||||
<tr class="py-2">
|
||||
<tr class="pb-2">
|
||||
<td colspan="2"> <Col class="p-0">
|
||||
<Progress
|
||||
color="success" style="cursor:help; height:1rem;"
|
||||
value={clusterInfo?.allocatedNodes} max={clusterInfo?.totalNodes}
|
||||
title={`${formatNumber(clusterInfo?.totalNodes)} Total Nodes`}
|
||||
/>
|
||||
</Col> </td>
|
||||
</tr>
|
||||
<tr class="pt-2">
|
||||
<td>{formatNumber(clusterInfo?.allocatedCores)} Active Cores</td>
|
||||
<td style="min-width: 100px;"
|
||||
><div class="col">
|
||||
<Progress multi max={clusterInfo?.totalCores} style="cursor: help;height:1.5rem;" title={`${formatNumber(clusterInfo?.totalCores)} Total Cores`}>
|
||||
<Progress bar color="success" value={clusterInfo?.allocatedCores}/>
|
||||
<Progress bar color="light" value={clusterInfo?.idleCores}/>
|
||||
</Progress>
|
||||
</div></td
|
||||
>
|
||||
<td>{formatNumber(clusterInfo?.idleCores)} Idle Cores</td>
|
||||
</tr>
|
||||
<tr class="pb-2">
|
||||
<td colspan="2"> <Col class="p-0">
|
||||
<Progress
|
||||
color="success" style="cursor:help; height:1rem;"
|
||||
value={clusterInfo?.allocatedCores} max={clusterInfo?.totalCores}
|
||||
title={`${formatNumber(clusterInfo?.totalCores)} Total Cores`}
|
||||
/>
|
||||
</Col> </td>
|
||||
</tr>
|
||||
{#if clusterInfo?.totalAccs !== 0}
|
||||
<tr class="py-2">
|
||||
<tr class="pt-2">
|
||||
<td>{formatNumber(clusterInfo?.allocatedAccs)} Active Accelerators</td>
|
||||
<td style="min-width: 100px;"
|
||||
><div class="col">
|
||||
<Progress multi max={clusterInfo?.totalAccs} style="cursor: help;height:1.5rem;" title={`${formatNumber(clusterInfo?.totalAccs)} Total Accelerators`}>
|
||||
<Progress bar color="success" value={clusterInfo?.allocatedAccs}/>
|
||||
<Progress bar color="light" value={clusterInfo?.idleAccs}/>
|
||||
</Progress>
|
||||
</div></td
|
||||
>
|
||||
<td>{formatNumber(clusterInfo?.idleAccs)} Idle Accelerators</td>
|
||||
</tr>
|
||||
<tr class="pb-2">
|
||||
<td colspan="2"> <Col class="p-0">
|
||||
<Progress
|
||||
color="success" style="cursor:help; height:1rem;"
|
||||
value={clusterInfo?.allocatedAccs} max={clusterInfo?.totalAccs}
|
||||
title={`${formatNumber(clusterInfo?.totalAccs)} Total Accelerators`}
|
||||
/>
|
||||
</Col> </td >
|
||||
</tr>
|
||||
{/if}
|
||||
</Table>
|
||||
</CardBody>
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
Table,
|
||||
Progress,
|
||||
Icon,
|
||||
Spinner
|
||||
} from "@sveltestrap/sveltestrap";
|
||||
import {
|
||||
queryStore,
|
||||
@@ -408,7 +409,19 @@
|
||||
<hr/>
|
||||
|
||||
<!-- Node Stack Charts Dev-->
|
||||
{#if $statesTimed.data}
|
||||
{#if $statesTimed.fetching}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
<Col>
|
||||
<Spinner />
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $statesTimed.error}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
<Col>
|
||||
<Card body color="danger">{$statesTimed.error.message}</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $statesTimed.data}
|
||||
<Row cols={{ md: 2 , sm: 1}} class="mb-3 justify-content-center">
|
||||
<Col class="px-3 mt-2 mt-lg-0">
|
||||
<div>
|
||||
@@ -450,7 +463,19 @@
|
||||
<hr/>
|
||||
|
||||
<!-- Node Health Pis, later Charts -->
|
||||
{#if $statusQuery?.data?.nodeStates}
|
||||
{#if $statusQuery.fetching}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
<Col>
|
||||
<Spinner />
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $statusQuery.error}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
<Col>
|
||||
<Card body color="danger">{$statesTimed.error.message}</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $statusQuery?.data?.nodeStates}
|
||||
<Row cols={{ lg: 4, md: 2 , sm: 1}} class="mb-3 justify-content-center">
|
||||
<Col class="px-3 mt-2 mt-lg-0">
|
||||
<div bind:clientWidth={pieWidth}>
|
||||
@@ -536,7 +561,19 @@
|
||||
|
||||
<hr/>
|
||||
<!-- Gauges & Roofline per Subcluster-->
|
||||
{#if $statusQuery.data}
|
||||
{#if $statusQuery.fetching}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
<Col>
|
||||
<Spinner />
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $statusQuery.error}
|
||||
<Row cols={1} class="text-center mt-3">
|
||||
<Col>
|
||||
<Card body color="danger">{$statusQuery.error.message}</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $statusQuery.data}
|
||||
{#each clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
|
||||
<Row cols={{ lg: 3, md: 1 , sm: 1}} class="mb-3 justify-content-center">
|
||||
<Col class="px-3">
|
||||
|
||||
@@ -95,7 +95,6 @@
|
||||
let nodes = $state([]);
|
||||
let page = $state(1);
|
||||
let headerPaddingTop = $state(0);
|
||||
let matchedNodes = $state(0);
|
||||
|
||||
/* Derived */
|
||||
let selectedMetrics = $derived(pendingSelectedMetrics);
|
||||
@@ -119,6 +118,8 @@
|
||||
},
|
||||
requestPolicy: "network-only", // Resolution queries are cached, but how to access them? For now: reload on every change
|
||||
}));
|
||||
|
||||
const matchedNodes = $derived($nodesQuery?.data?.nodeMetricsList?.totalNodes || 0);
|
||||
|
||||
/* Effects */
|
||||
$effect(() => {
|
||||
@@ -141,8 +142,7 @@
|
||||
$effect(() => {
|
||||
if ($nodesQuery?.data) {
|
||||
untrack(() => {
|
||||
nodes = handleNodes($nodesQuery?.data?.nodeMetricsList);
|
||||
matchedNodes = $nodesQuery?.data?.totalNodes || 0;
|
||||
handleNodes($nodesQuery?.data?.nodeMetricsList?.items);
|
||||
});
|
||||
selectedMetrics = [...pendingSelectedMetrics]; // Trigger Rerender in NodeListRow Only After Data is Fetched
|
||||
};
|
||||
@@ -161,18 +161,18 @@
|
||||
});
|
||||
|
||||
/* Functions */
|
||||
function handleNodes(data) {
|
||||
if (data) {
|
||||
function handleNodes(newNodes) {
|
||||
if (newNodes) {
|
||||
if (usePaging) {
|
||||
// console.log('New Paging', $state.snapshot(paging))
|
||||
return [...data.items].sort((a, b) => a.host.localeCompare(b.host));
|
||||
nodes = [...newNodes].sort((a, b) => a.host.localeCompare(b.host));
|
||||
} else {
|
||||
if ($state.snapshot(page) == 1) {
|
||||
// console.log('Page 1 Reset', [...data.items])
|
||||
return [...data.items].sort((a, b) => a.host.localeCompare(b.host));
|
||||
nodes = [...newNodes].sort((a, b) => a.host.localeCompare(b.host));
|
||||
} else {
|
||||
// console.log('Add Nodes', $state.snapshot(nodes), [...data.items])
|
||||
return nodes.concat([...data.items])
|
||||
nodes = nodes.concat([...newNodes])
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -248,7 +248,16 @@
|
||||
<Card body color="danger">{$nodesQuery.error.message}</Card>
|
||||
</Col>
|
||||
</Row>
|
||||
{:else if $nodesQuery.fetching || !$nodesQuery.data}
|
||||
{:else}
|
||||
{#each nodes as nodeData (nodeData.host)}
|
||||
<NodeListRow {nodeData} {cluster} {selectedMetrics}/>
|
||||
{:else}
|
||||
<tr>
|
||||
<td colspan={selectedMetrics.length + 1}> No nodes found </td>
|
||||
</tr>
|
||||
{/each}
|
||||
{/if}
|
||||
{#if $nodesQuery.fetching || !$nodesQuery.data}
|
||||
<tr>
|
||||
<td colspan={pendingSelectedMetrics.length + 1}>
|
||||
<div style="text-align:center;">
|
||||
@@ -265,14 +274,6 @@
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
{:else}
|
||||
{#each nodes as nodeData (nodeData.host)}
|
||||
<NodeListRow {nodeData} {cluster} {selectedMetrics}/>
|
||||
{:else}
|
||||
<tr>
|
||||
<td colspan={selectedMetrics.length + 1}> No nodes found </td>
|
||||
</tr>
|
||||
{/each}
|
||||
{/if}
|
||||
</tbody>
|
||||
</Table>
|
||||
|
||||
@@ -171,7 +171,7 @@
|
||||
Users
|
||||
</InputGroupText>
|
||||
<Input class="flex-grow-1" style="background-color: white;" type="text" value="{userList?.length || 0} User{(userList?.length == 1) ? '': 's'}" disabled />
|
||||
<a title="Show users active on this node" href="/monitoring/users/?cluster={cluster}&state=running&node={hostname}" target="_blank" class="btn btn-outline-primary" role="button" aria-disabled="true" >
|
||||
<a title="Show users active on this node" href="/monitoring/users/?cluster={cluster}&state=running&startTime=last30d&node={hostname}" target="_blank" class="btn btn-outline-primary" role="button" aria-disabled="true" >
|
||||
<Icon name="view-list" />
|
||||
List
|
||||
</a>
|
||||
@@ -192,7 +192,7 @@
|
||||
Projects
|
||||
</InputGroupText>
|
||||
<Input class="flex-grow-1" style="background-color: white;" type="text" value="{projectList?.length || 0} Project{(projectList?.length == 1) ? '': 's'}" disabled />
|
||||
<a title="Show projects active on this node" href="/monitoring/projects/?cluster={cluster}&state=running&node={hostname}" target="_blank" class="btn btn-outline-primary" role="button" aria-disabled="true" >
|
||||
<a title="Show projects active on this node" href="/monitoring/projects/?cluster={cluster}&state=running&startTime=last30d&node={hostname}" target="_blank" class="btn btn-outline-primary" role="button" aria-disabled="true" >
|
||||
<Icon name="view-list" />
|
||||
List
|
||||
</a>
|
||||
|
||||
Reference in New Issue
Block a user