Fix exclusive to shared in svlete and graphql

This commit is contained in:
Aditya Ujeniya
2025-09-09 14:57:05 +02:00
parent d00881de2e
commit 3b9d05cc6d
16 changed files with 35 additions and 132 deletions

View File

@@ -419,7 +419,7 @@ input JobFilter {
startTime: TimeRange
state: [JobState!]
metricStats: [MetricStatItem!]
shared: StringInput
shared: String
node: StringInput
}

View File

@@ -80,7 +80,7 @@
"restore": "48h"
},
"archive": {
"interval": "48h",
"interval": "2h",
"directory": "./var/archive"
},
"retention-in-memory": "48h"

View File

@@ -6,7 +6,7 @@ package avro
import (
"context"
"fmt"
"log"
"slices"
"strconv"
"sync"
@@ -35,7 +35,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
// Fetch the frequency of the metric from the global configuration
freq, err := config.GetMetricFrequency(val.MetricName)
if err != nil {
fmt.Printf("Error fetching metric frequency: %s\n", err)
log.Printf("Error fetching metric frequency: %s\n", err)
continue
}
@@ -60,7 +60,7 @@ func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
// If the Avro level is nil, create a new one
if avroLevel == nil {
fmt.Printf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
log.Printf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
}
oldSelector = slices.Clone(selector)
}

View File

@@ -2741,7 +2741,7 @@ input JobFilter {
startTime: TimeRange
state: [JobState!]
metricStats: [MetricStatItem!]
shared: StringInput
shared: String
node: StringInput
}
@@ -16490,7 +16490,7 @@ func (ec *executionContext) unmarshalInputJobFilter(ctx context.Context, obj any
it.MetricStats = data
case "shared":
ctx := graphql.WithPathContext(ctx, graphql.NewPathWithField("shared"))
data, err := ec.unmarshalOStringInput2ᚖgithubᚗcomᚋClusterCockpitᚋccᚑbackendᚋinternalᚋgraphᚋmodelᚐStringInput(ctx, v)
data, err := ec.unmarshalOString2ᚖstring(ctx, v)
if err != nil {
return it, err
}

View File

@@ -69,7 +69,7 @@ type JobFilter struct {
StartTime *config.TimeRange `json:"startTime,omitempty"`
State []schema.JobState `json:"state,omitempty"`
MetricStats []*MetricStatItem `json:"metricStats,omitempty"`
Shared *StringInput `json:"shared,omitempty"`
Shared *string `json:"shared,omitempty"`
Node *StringInput `json:"node,omitempty"`
}

View File

@@ -831,3 +831,15 @@ type mutationResolver struct{ *Resolver }
type nodeResolver struct{ *Resolver }
type queryResolver struct{ *Resolver }
type subClusterResolver struct{ *Resolver }
// !!! WARNING !!!
// The code below was going to be deleted when updating resolvers. It has been copied here so you have
// one last chance to move it out of harms way if you want. There are two reasons this happens:
// - When renaming or deleting a resolver the old code will be put in here. You can safely delete
// it when you're done.
// - You have helper methods in this file. Move them out to keep these resolver files clean.
/*
func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) {
panic(fmt.Errorf("not implemented: Exclusive - exclusive"))
}
*/

View File

@@ -11,6 +11,7 @@ import (
"errors"
"fmt"
"io"
"log"
"os"
"path/filepath"
"sync"
@@ -26,7 +27,7 @@ func Archiving(wg *sync.WaitGroup, ctx context.Context) {
defer wg.Done()
d, err := time.ParseDuration(config.MetricStoreKeys.Archive.Interval)
if err != nil {
cclog.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err)
log.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err)
}
if d <= 0 {
return
@@ -44,14 +45,14 @@ func Archiving(wg *sync.WaitGroup, ctx context.Context) {
return
case <-ticks:
t := time.Now().Add(-d)
cclog.Infof("[METRICSTORE]> start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339))
log.Printf("[METRICSTORE]> start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339))
n, err := ArchiveCheckpoints(config.MetricStoreKeys.Checkpoints.RootDir,
config.MetricStoreKeys.Archive.RootDir, t.Unix(), config.MetricStoreKeys.Archive.DeleteInstead)
if err != nil {
cclog.Warnf("[METRICSTORE]> archiving failed: %s\n", err.Error())
log.Printf("[METRICSTORE]> archiving failed: %s\n", err.Error())
} else {
cclog.Infof("[METRICSTORE]> done: %d files zipped and moved to archive\n", n)
log.Printf("[METRICSTORE]> done: %d files zipped and moved to archive\n", n)
}
}
}

View File

@@ -34,7 +34,7 @@ func RegisterFootprintWorker() {
c := 0
ce := 0
cl := 0
cclog.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
cclog.Printf("Update Footprints started at %s\n", s.Format(time.RFC3339))
for _, cluster := range archive.Clusters {
s_cluster := time.Now()

View File

@@ -1,110 +0,0 @@
JWT="eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NjQ1NjMzOTUsImlhdCI6MTc1NzM2MzM5NSwicm9sZXMiOlsiYWRtaW4iLCJhcGkiXSwic3ViIjoiZGVtbyJ9.uhtEbS-ty4xNc8GWTKjyh1b06j6b3vtEw7lzQy0Eht5LtISZwRfyRBfdKjbm_t25xGrNH9sxINq4qiYKBjAaDQ"
# curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" -d $'cpu_load,cluster=alex,hostname=a042,type=hwthread,type-id=0 value=35.0 1725827464642231296'
rm sample_fritz.txt
rm sample_alex.txt
while [ true ]; do
echo "Alex Metrics for hwthread types and type-ids"
timestamp="$(date '+%s')"
echo "Timestamp : "+$timestamp
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in {0..127}; do
echo "$metric,cluster=alex,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
echo "Fritz Metrics for hwthread types and type-ids"
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
for id in {0..71}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=hwthread,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done
done
done
curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
rm sample_fritz.txt
rm sample_alex.txt
echo "Alex Metrics for accelerator types and type-ids"
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in 00000000:49:00.0 00000000:0E:00.0 00000000:D1:00.0 00000000:90:00.0 00000000:13:00.0 00000000:96:00.0 00000000:CC:00.0 00000000:4F:00.0; do
echo "$metric,cluster=alex,hostname=$hostname,type=accelerator,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
rm sample_alex.txt
echo "Alex Metrics for memoryDomain types and type-ids"
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in {0..7}; do
echo "$metric,cluster=alex,hostname=$hostname,type=memoryDomain,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
rm sample_alex.txt
echo "Alex Metrics for socket types and type-ids"
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
for id in {0..1}; do
echo "$metric,cluster=alex,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
done
curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
echo "Fritz Metrics for socket types and type-ids"
for metric in cpu_load cpu_user flops_any cpu_irq cpu_system ipc cpu_idle cpu_iowait core_power clock; do
for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
for id in {0..1}; do
echo "$metric,cluster=fritz,hostname=$hostname,type=socket,type-id=$id value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done
done
done
curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
rm sample_fritz.txt
rm sample_alex.txt
echo "Alex Metrics for nodes"
for metric in cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
for hostname in a0603 a0903 a0832 a0329 a0702 a0122 a1624 a0731 a0224 a0704 a0631 a0225 a0222 a0427 a0603 a0429 a0833 a0705 a0901 a0601 a0227 a0804 a0322 a0226 a0126 a0129 a0605 a0801 a0934 a1622 a0902 a0428 a0537 a1623 a1722 a0228 a0701 a0326 a0327 a0123 a0321 a1621 a0323 a0124 a0534 a0931 a0324 a0933 a0424 a0905 a0128 a0532 a0805 a0521 a0535 a0932 a0127 a0325 a0633 a0831 a0803 a0426 a0425 a0229 a1721 a0602 a0632 a0223 a0422 a0423 a0536 a0328 a0703 anvme7 a0125 a0221 a0604 a0802 a0522 a0531 a0533 a0904; do
echo "$metric,cluster=alex,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_alex.txt
done
done
curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" --data-binary @sample_alex.txt
echo "Fritz Metrics for nodes"
for metric in cpu_irq cpu_load mem_cached net_bytes_in cpu_user cpu_idle nfs4_read mem_used nfs4_write nfs4_total ib_xmit ib_xmit_pkts net_bytes_out cpu_iowait ib_recv cpu_system ib_recv_pkts; do
for hostname in f0201 f0202 f0203 f0204 f0205 f0206 f0207 f0208 f0209 f0210 f0211 f0212 f0213 f0214 f0215 f0217 f0218 f0219 f0220 f0221 f0222 f0223 f0224 f0225 f0226 f0227 f0228 f0229 f0230 f0231 f0232 f0233 f0234 f0235 f0236 f0237 f0238 f0239 f0240 f0241 f0242 f0243 f0244 f0245 f0246 f0247 f0248 f0249 f0250 f0251 f0252 f0253 f0254 f0255 f0256 f0257 f0258 f0259 f0260 f0261 f0262 f0263 f0264 f0378; do
echo "$metric,cluster=fritz,hostname=$hostname,type=node value=$((1 + RANDOM % 100)).0 $timestamp" >>sample_fritz.txt
done
done
curl -X 'POST' 'http://localhost:8080/metricstore/api/write/?cluster=fritz' -H "Authorization: Bearer $JWT" --data-binary @sample_fritz.txt
rm sample_fritz.txt
rm sample_alex.txt
sleep 1m
done
# curl -X 'POST' 'http://localhost:8081/api/write/?cluster=alex' -H "Authorization: Bearer $JWT" -d $'cpu_load,cluster=alex,hostname=a042,type=hwthread,type-id=0 value=35.0 1725827464642231296'

Binary file not shown.

View File

@@ -56,7 +56,7 @@
job(id: "${dbid}") {
id, jobId, user, project, cluster, startTime,
duration, numNodes, numHWThreads, numAcc, energy,
SMT, exclusive, partition, subCluster, arrayJobId,
SMT, shared, partition, subCluster, arrayJobId,
monitoringStatus, state, walltime,
tags { id, type, scope, name },
resources { hostname, hwthreads, accelerators },
@@ -325,7 +325,7 @@
metricUnit={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.unit}
nativeScope={$initq.data.globalMetrics.find((gm) => gm.name == item.metric)?.scope}
presetScopes={item.data.map((x) => x.scope)}
isShared={$initq.data.job.exclusive != 1}
isShared={$initq.data.job.shared != "none"}
/>
{:else if item.disabled == true}
<Card color="info">

View File

@@ -69,7 +69,7 @@
hostname
}
SMT
exclusive
shared
partition
arrayJobId
monitoringStatus

View File

@@ -172,7 +172,7 @@
{job.numNodes}
{/if}
<Icon name="pc-horizontal" />
{#if job.exclusive != 1}
{#if job.shared != "none"}
(shared)
{/if}
{#if job.numAcc > 0}

View File

@@ -213,7 +213,7 @@
metric={metric.data.name}
cluster={cluster.find((c) => c.name == job.cluster)}
subCluster={job.subCluster}
isShared={job.exclusive != 1}
isShared={job.shared != "none"}
numhwthreads={job.numHWThreads}
numaccs={job.numAcc}
zoomState={zoomStates[metric.data.name] || null}

View File

@@ -92,7 +92,7 @@
Missing Metric
</Button>
</InputGroup>
{:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].exclusive}
{:else if nodeJobsData.jobs.count == 1 && nodeJobsData.jobs.items[0].shared == "none"}
<InputGroup>
<InputGroupText>
<Icon name="circle-fill"/>
@@ -104,7 +104,7 @@
Exclusive
</Button>
</InputGroup>
{:else if nodeJobsData.jobs.count >= 1 && !nodeJobsData.jobs.items[0].exclusive}
{:else if nodeJobsData.jobs.count >= 1 && !(nodeJobsData.jobs.items[0].shared == "none")}
<InputGroup>
<InputGroupText>
<Icon name="circle-half"/>

View File

@@ -45,7 +45,7 @@
jobId
user
project
exclusive
shared
resources {
hostname
accelerators
@@ -101,7 +101,7 @@
function buildExtendedLegend() {
let pendingExtendedLegendData = null
// Build Extended for allocated nodes [Commented: Only Build extended Legend For Shared Nodes]
if ($nodeJobsData.data.jobs.count >= 1) { // "&& !$nodeJobsData.data.jobs.items[0].exclusive)"
if ($nodeJobsData.data.jobs.count >= 1) {
const accSet = Array.from(new Set($nodeJobsData.data.jobs.items
.map((i) => i.resources
.filter((r) => (r.hostname === nodeData.host) && r?.accelerators)