mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-26 09:37:30 +01:00
Compare commits
18 Commits
v1.5.2
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0d06b86e79 | ||
|
|
b7e133fbaf | ||
| c267501a1b | |||
| a550344f13 | |||
|
|
c3b6d93941 | ||
|
|
bd7125a52e | ||
| 93a9d732a4 | |||
| 6f7dda53ee | |||
| 0325d9e866 | |||
|
|
c13fd68aa9 | ||
| 3d94b0bf79 | |||
|
|
d5ea2b4cf5 | ||
| 45f329e5fb | |||
|
|
100dd7dacf | ||
| 192c94a78d | |||
| e41d1251ba | |||
| 586c902044 | |||
| 01ec70baa8 |
@@ -11,7 +11,8 @@ import "flag"
|
||||
|
||||
var (
|
||||
flagReinitDB, flagInit, flagServer, flagSyncLDAP, flagGops, flagMigrateDB, flagRevertDB,
|
||||
flagForceDB, flagDev, flagVersion, flagLogDateTime, flagApplyTags, flagOptimizeDB bool
|
||||
flagForceDB, flagDev, flagVersion, flagLogDateTime, flagApplyTags, flagOptimizeDB,
|
||||
flagCleanupCheckpoints bool
|
||||
flagNewUser, flagDelUser, flagGenJWT, flagConfigFile, flagImportJob, flagLogLevel string
|
||||
)
|
||||
|
||||
@@ -28,6 +29,7 @@ func cliInit() {
|
||||
flag.BoolVar(&flagApplyTags, "apply-tags", false, "Run taggers on all completed jobs and exit")
|
||||
flag.BoolVar(&flagForceDB, "force-db", false, "Force database version, clear dirty flag and exit")
|
||||
flag.BoolVar(&flagOptimizeDB, "optimize-db", false, "Optimize database: run VACUUM to reclaim space, then ANALYZE to update query planner statistics")
|
||||
flag.BoolVar(&flagCleanupCheckpoints, "cleanup-checkpoints", false, "Clean up old checkpoint files (delete or archive) based on retention settings, then exit")
|
||||
flag.BoolVar(&flagLogDateTime, "logdate", false, "Set this flag to add date and time to log messages")
|
||||
flag.StringVar(&flagConfigFile, "config", "./config.json", "Specify alternative path to `config.json`")
|
||||
flag.StringVar(&flagNewUser, "add-user", "", "Add a new user. Argument format: <username>:[admin,support,manager,api,user]:<password>")
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
goruntime "runtime"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -536,6 +537,43 @@ func run() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle checkpoint cleanup
|
||||
if flagCleanupCheckpoints {
|
||||
mscfg := ccconf.GetPackageConfig("metric-store")
|
||||
if mscfg == nil {
|
||||
return fmt.Errorf("metric-store configuration required for checkpoint cleanup")
|
||||
}
|
||||
if err := json.Unmarshal(mscfg, &metricstore.Keys); err != nil {
|
||||
return fmt.Errorf("decoding metric-store config: %w", err)
|
||||
}
|
||||
if metricstore.Keys.NumWorkers <= 0 {
|
||||
metricstore.Keys.NumWorkers = min(goruntime.NumCPU()/2+1, metricstore.DefaultMaxWorkers)
|
||||
}
|
||||
|
||||
d, err := time.ParseDuration(metricstore.Keys.RetentionInMemory)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parsing retention-in-memory: %w", err)
|
||||
}
|
||||
from := time.Now().Add(-d)
|
||||
deleteMode := metricstore.Keys.Cleanup == nil || metricstore.Keys.Cleanup.Mode != "archive"
|
||||
cleanupDir := ""
|
||||
if !deleteMode {
|
||||
cleanupDir = metricstore.Keys.Cleanup.RootDir
|
||||
}
|
||||
|
||||
cclog.Infof("Cleaning up checkpoints older than %s...", from.Format(time.RFC3339))
|
||||
n, err := metricstore.CleanupCheckpoints(
|
||||
metricstore.Keys.Checkpoints.RootDir, cleanupDir, from.Unix(), deleteMode)
|
||||
if err != nil {
|
||||
return fmt.Errorf("checkpoint cleanup: %w", err)
|
||||
}
|
||||
if deleteMode {
|
||||
cclog.Exitf("Cleanup done: %d checkpoint files deleted.", n)
|
||||
} else {
|
||||
cclog.Exitf("Cleanup done: %d checkpoint files archived to parquet.", n)
|
||||
}
|
||||
}
|
||||
|
||||
// Exit if start server is not requested
|
||||
if !flagServer {
|
||||
cclog.Exit("No errors, server flag not set. Exiting cc-backend.")
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/99designs/gqlgen/graphql"
|
||||
@@ -344,20 +345,20 @@ func (s *Server) init() error {
|
||||
|
||||
// Server timeout defaults (in seconds)
|
||||
const (
|
||||
defaultReadTimeout = 20
|
||||
defaultWriteTimeout = 20
|
||||
defaultReadHeaderTimeout = 20
|
||||
defaultWriteTimeout = 20
|
||||
)
|
||||
|
||||
func (s *Server) Start(ctx context.Context) error {
|
||||
// Use configurable timeouts with defaults
|
||||
readTimeout := time.Duration(defaultReadTimeout) * time.Second
|
||||
readHeaderTimeout := time.Duration(defaultReadHeaderTimeout) * time.Second
|
||||
writeTimeout := time.Duration(defaultWriteTimeout) * time.Second
|
||||
|
||||
s.server = &http.Server{
|
||||
ReadTimeout: readTimeout,
|
||||
WriteTimeout: writeTimeout,
|
||||
Handler: s.router,
|
||||
Addr: config.Keys.Addr,
|
||||
ReadHeaderTimeout: readHeaderTimeout,
|
||||
WriteTimeout: writeTimeout,
|
||||
Handler: s.router,
|
||||
Addr: config.Keys.Addr,
|
||||
}
|
||||
|
||||
// Start http or https server
|
||||
@@ -399,16 +400,6 @@ func (s *Server) Start(ctx context.Context) error {
|
||||
return fmt.Errorf("dropping privileges: %w", err)
|
||||
}
|
||||
|
||||
// Handle context cancellation for graceful shutdown
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
shutdownCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||
cclog.Errorf("Server shutdown error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
if err = s.server.Serve(listener); err != nil && err != http.ErrServerClosed {
|
||||
return fmt.Errorf("server failed: %w", err)
|
||||
}
|
||||
@@ -416,8 +407,7 @@ func (s *Server) Start(ctx context.Context) error {
|
||||
}
|
||||
|
||||
func (s *Server) Shutdown(ctx context.Context) {
|
||||
// Create a shutdown context with timeout
|
||||
shutdownCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
|
||||
shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
nc := nats.GetClient()
|
||||
@@ -425,20 +415,36 @@ func (s *Server) Shutdown(ctx context.Context) {
|
||||
nc.Close()
|
||||
}
|
||||
|
||||
// First shut down the server gracefully (waiting for all ongoing requests)
|
||||
if err := s.server.Shutdown(shutdownCtx); err != nil {
|
||||
cclog.Errorf("Server shutdown error: %v", err)
|
||||
}
|
||||
|
||||
// Archive all the metric store data
|
||||
ms := metricstore.GetMemoryStore()
|
||||
// Run metricstore and archiver shutdown concurrently.
|
||||
// They are independent: metricstore writes .bin snapshots,
|
||||
// archiver flushes pending job archives.
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(done)
|
||||
var wg sync.WaitGroup
|
||||
|
||||
if ms != nil {
|
||||
metricstore.Shutdown()
|
||||
}
|
||||
if ms := metricstore.GetMemoryStore(); ms != nil {
|
||||
wg.Go(func() {
|
||||
metricstore.Shutdown()
|
||||
})
|
||||
}
|
||||
|
||||
// Shutdown archiver with 10 second timeout for fast shutdown
|
||||
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
cclog.Warnf("Archiver shutdown: %v", err)
|
||||
wg.Go(func() {
|
||||
if err := archiver.Shutdown(10 * time.Second); err != nil {
|
||||
cclog.Warnf("Archiver shutdown: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
wg.Wait()
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(60 * time.Second):
|
||||
cclog.Warn("Shutdown deadline exceeded, forcing exit")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,11 +280,11 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
|
||||
|
||||
// buildIntCondition creates clauses for integer range filters, using BETWEEN only if required.
|
||||
func buildIntCondition(field string, cond *config.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.From != 1 && cond.To != 0 {
|
||||
if cond.From > 0 && cond.To > 0 {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
} else if cond.From != 1 && cond.To == 0 {
|
||||
} else if cond.From > 0 && cond.To == 0 {
|
||||
return query.Where(field+" >= ?", cond.From)
|
||||
} else if cond.From == 1 && cond.To != 0 {
|
||||
} else if cond.From == 0 && cond.To > 0 {
|
||||
return query.Where(field+" <= ?", cond.To)
|
||||
} else {
|
||||
return query
|
||||
@@ -293,11 +293,11 @@ func buildIntCondition(field string, cond *config.IntRange, query sq.SelectBuild
|
||||
|
||||
// buildFloatCondition creates a clauses for float range filters, using BETWEEN only if required.
|
||||
func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
if cond.From != 1.0 && cond.To != 0.0 {
|
||||
if cond.From > 0.0 && cond.To > 0.0 {
|
||||
return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
|
||||
} else if cond.From != 1.0 && cond.To == 0.0 {
|
||||
} else if cond.From > 0.0 && cond.To == 0.0 {
|
||||
return query.Where(field+" >= ?", cond.From)
|
||||
} else if cond.From == 1.0 && cond.To != 0.0 {
|
||||
} else if cond.From == 0.0 && cond.To > 0.0 {
|
||||
return query.Where(field+" <= ?", cond.To)
|
||||
} else {
|
||||
return query
|
||||
@@ -339,11 +339,11 @@ func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBui
|
||||
// buildFloatJSONCondition creates a filter on a numeric field within the footprint JSON column, using BETWEEN only if required.
|
||||
func buildFloatJSONCondition(jsonField string, cond *model.FloatRange, query sq.SelectBuilder) sq.SelectBuilder {
|
||||
query = query.Where("JSON_VALID(footprint)")
|
||||
if cond.From != 1.0 && cond.To != 0.0 {
|
||||
if cond.From > 0.0 && cond.To > 0.0 {
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") BETWEEN ? AND ?", cond.From, cond.To)
|
||||
} else if cond.From != 1.0 && cond.To == 0.0 {
|
||||
} else if cond.From > 0.0 && cond.To == 0.0 {
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") >= ?", cond.From)
|
||||
} else if cond.From == 1.0 && cond.To != 0.0 {
|
||||
} else if cond.From == 0.0 && cond.To > 0.0 {
|
||||
return query.Where("JSON_EXTRACT(footprint, \"$."+jsonField+"\") <= ?", cond.To)
|
||||
} else {
|
||||
return query
|
||||
|
||||
@@ -308,7 +308,7 @@ func buildFilterPresets(query url.Values) map[string]any {
|
||||
if parts[0] == "lessthan" {
|
||||
lt, lte := strconv.Atoi(parts[1])
|
||||
if lte == nil {
|
||||
filterPresets["numNodes"] = map[string]int{"from": 1, "to": lt}
|
||||
filterPresets["numNodes"] = map[string]int{"from": 0, "to": lt}
|
||||
}
|
||||
} else if parts[0] == "morethan" {
|
||||
mt, mte := strconv.Atoi(parts[1])
|
||||
@@ -330,7 +330,7 @@ func buildFilterPresets(query url.Values) map[string]any {
|
||||
if parts[0] == "lessthan" {
|
||||
lt, lte := strconv.Atoi(parts[1])
|
||||
if lte == nil {
|
||||
filterPresets["numHWThreads"] = map[string]int{"from": 1, "to": lt}
|
||||
filterPresets["numHWThreads"] = map[string]int{"from": 0, "to": lt}
|
||||
}
|
||||
} else if parts[0] == "morethan" {
|
||||
mt, mte := strconv.Atoi(parts[1])
|
||||
@@ -352,7 +352,7 @@ func buildFilterPresets(query url.Values) map[string]any {
|
||||
if parts[0] == "lessthan" {
|
||||
lt, lte := strconv.Atoi(parts[1])
|
||||
if lte == nil {
|
||||
filterPresets["numAccelerators"] = map[string]int{"from": 1, "to": lt}
|
||||
filterPresets["numAccelerators"] = map[string]int{"from": 0, "to": lt}
|
||||
}
|
||||
} else if parts[0] == "morethan" {
|
||||
mt, mte := strconv.Atoi(parts[1])
|
||||
@@ -408,7 +408,7 @@ func buildFilterPresets(query url.Values) map[string]any {
|
||||
if parts[0] == "lessthan" {
|
||||
lt, lte := strconv.Atoi(parts[1])
|
||||
if lte == nil {
|
||||
filterPresets["energy"] = map[string]int{"from": 1, "to": lt}
|
||||
filterPresets["energy"] = map[string]int{"from": 0, "to": lt}
|
||||
}
|
||||
} else if parts[0] == "morethan" {
|
||||
mt, mte := strconv.Atoi(parts[1])
|
||||
@@ -434,7 +434,7 @@ func buildFilterPresets(query url.Values) map[string]any {
|
||||
if lte == nil {
|
||||
statEntry := map[string]any{
|
||||
"field": parts[0],
|
||||
"from": 1,
|
||||
"from": 0,
|
||||
"to": lt,
|
||||
}
|
||||
statList = append(statList, statEntry)
|
||||
|
||||
@@ -363,7 +363,7 @@ func (t *JobClassTagger) Match(job *schema.Job) {
|
||||
for _, m := range ri.metrics {
|
||||
stats, ok := jobStats[m]
|
||||
if !ok {
|
||||
cclog.Errorf("job classification: missing metric '%s' for rule %s on job %d", m, tag, job.JobID)
|
||||
cclog.Debugf("job classification: missing metric '%s' for rule %s on job %d", m, tag, job.JobID)
|
||||
skipRule = true
|
||||
break
|
||||
}
|
||||
@@ -388,7 +388,7 @@ func (t *JobClassTagger) Match(job *schema.Job) {
|
||||
for _, r := range ri.requirements {
|
||||
ok, err := expr.Run(r, env)
|
||||
if err != nil {
|
||||
cclog.Errorf("error running requirement for rule %s: %#v", tag, err)
|
||||
cclog.Debugf("error running requirement for rule %s: %#v", tag, err)
|
||||
requirementsMet = false
|
||||
break
|
||||
}
|
||||
@@ -407,7 +407,7 @@ func (t *JobClassTagger) Match(job *schema.Job) {
|
||||
for _, v := range ri.variables {
|
||||
value, err := expr.Run(v.expr, env)
|
||||
if err != nil {
|
||||
cclog.Errorf("error evaluating variable %s for rule %s: %#v", v.name, tag, err)
|
||||
cclog.Debugf("error evaluating variable %s for rule %s: %#v", v.name, tag, err)
|
||||
varError = true
|
||||
break
|
||||
}
|
||||
|
||||
@@ -198,36 +198,19 @@ func GetSubCluster(cluster, subcluster string) (*schema.SubCluster, error) {
|
||||
func GetMetricConfigSubCluster(cluster, subcluster string) map[string]*schema.Metric {
|
||||
metrics := make(map[string]*schema.Metric)
|
||||
|
||||
for _, c := range Clusters {
|
||||
if c.Name == cluster {
|
||||
for _, m := range c.MetricConfig {
|
||||
for _, s := range m.SubClusters {
|
||||
if s.Name == subcluster {
|
||||
metrics[m.Name] = &schema.Metric{
|
||||
Name: m.Name,
|
||||
Unit: s.Unit,
|
||||
Peak: s.Peak,
|
||||
Normal: s.Normal,
|
||||
Caution: s.Caution,
|
||||
Alert: s.Alert,
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
sc, err := GetSubCluster(cluster, subcluster)
|
||||
if err != nil {
|
||||
return metrics
|
||||
}
|
||||
|
||||
_, ok := metrics[m.Name]
|
||||
if !ok {
|
||||
metrics[m.Name] = &schema.Metric{
|
||||
Name: m.Name,
|
||||
Unit: m.Unit,
|
||||
Peak: m.Peak,
|
||||
Normal: m.Normal,
|
||||
Caution: m.Caution,
|
||||
Alert: m.Alert,
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
for _, m := range sc.MetricConfig {
|
||||
metrics[m.Name] = &schema.Metric{
|
||||
Name: m.Name,
|
||||
Unit: m.Unit,
|
||||
Peak: m.Peak,
|
||||
Normal: m.Normal,
|
||||
Caution: m.Caution,
|
||||
Alert: m.Alert,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,3 +37,27 @@ func TestClusterConfig(t *testing.T) {
|
||||
// spew.Dump(archive.GlobalMetricList)
|
||||
// t.Fail()
|
||||
}
|
||||
|
||||
func TestGetMetricConfigSubClusterRespectsRemovedMetrics(t *testing.T) {
|
||||
if err := archive.Init(json.RawMessage(`{"kind": "file","path": "testdata/archive"}`)); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
sc, err := archive.GetSubCluster("fritz", "spr2tb")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
metrics := archive.GetMetricConfigSubCluster("fritz", "spr2tb")
|
||||
if len(metrics) != len(sc.MetricConfig) {
|
||||
t.Fatalf("GetMetricConfigSubCluster() returned %d metrics, want %d", len(metrics), len(sc.MetricConfig))
|
||||
}
|
||||
|
||||
if _, ok := metrics["flops_any"]; ok {
|
||||
t.Fatalf("GetMetricConfigSubCluster() returned removed metric flops_any for subcluster spr2tb")
|
||||
}
|
||||
|
||||
if _, ok := metrics["cpu_power"]; !ok {
|
||||
t.Fatalf("GetMetricConfigSubCluster() missing active metric cpu_power for subcluster spr2tb")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@@ -22,6 +23,7 @@ import (
|
||||
|
||||
func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
|
||||
if Keys.Cleanup.Mode == "archive" {
|
||||
cclog.Info("[METRICSTORE]> enable archive cleanup to parquet")
|
||||
// Run as Archiver
|
||||
cleanUpWorker(wg, ctx,
|
||||
Keys.RetentionInMemory,
|
||||
@@ -43,7 +45,6 @@ func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
|
||||
// cleanUpWorker takes simple values to configure what it does
|
||||
func cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {
|
||||
wg.Go(func() {
|
||||
|
||||
d, err := time.ParseDuration(interval)
|
||||
if err != nil {
|
||||
cclog.Fatalf("[METRICSTORE]> error parsing %s interval duration: %v\n", mode, err)
|
||||
@@ -99,8 +100,8 @@ func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {
|
||||
}
|
||||
|
||||
type workItem struct {
|
||||
dir string
|
||||
cluster, host string
|
||||
dir string
|
||||
cluster, host string
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
@@ -181,6 +182,7 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
}
|
||||
|
||||
totalFiles := 0
|
||||
var clusterErrors []string
|
||||
|
||||
for _, clusterEntry := range clusterEntries {
|
||||
if !clusterEntry.IsDir() {
|
||||
@@ -190,7 +192,9 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
cluster := clusterEntry.Name()
|
||||
hostEntries, err := os.ReadDir(filepath.Join(checkpointsDir, cluster))
|
||||
if err != nil {
|
||||
return totalFiles, err
|
||||
cclog.Errorf("[METRICSTORE]> error reading host entries for cluster %s: %s", cluster, err.Error())
|
||||
clusterErrors = append(clusterErrors, cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
// Workers load checkpoint files from disk; main thread writes to parquet.
|
||||
@@ -255,7 +259,9 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
// Drain results channel to unblock workers
|
||||
for range results {
|
||||
}
|
||||
return totalFiles, fmt.Errorf("creating parquet writer for cluster %s: %w", cluster, err)
|
||||
cclog.Errorf("[METRICSTORE]> error creating parquet writer for cluster %s: %s", cluster, err.Error())
|
||||
clusterErrors = append(clusterErrors, cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
type deleteItem struct {
|
||||
@@ -275,6 +281,12 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
break
|
||||
}
|
||||
}
|
||||
// Flush once per host to keep row group count within parquet limits.
|
||||
if writeErr == nil {
|
||||
if err := writer.FlushRowGroup(); err != nil {
|
||||
writeErr = err
|
||||
}
|
||||
}
|
||||
}
|
||||
// Always track files for deletion (even if write failed, we still drain)
|
||||
toDelete = append(toDelete, deleteItem{dir: r.dir, files: r.files})
|
||||
@@ -285,7 +297,10 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
}
|
||||
|
||||
if errs > 0 {
|
||||
return totalFiles, fmt.Errorf("%d errors reading checkpoints for cluster %s", errs, cluster)
|
||||
cclog.Errorf("[METRICSTORE]> %d errors reading checkpoints for cluster %s", errs, cluster)
|
||||
clusterErrors = append(clusterErrors, cluster)
|
||||
os.Remove(parquetFile)
|
||||
continue
|
||||
}
|
||||
|
||||
if writer.count == 0 {
|
||||
@@ -296,7 +311,9 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
|
||||
if writeErr != nil {
|
||||
os.Remove(parquetFile)
|
||||
return totalFiles, fmt.Errorf("writing parquet archive for cluster %s: %w", cluster, writeErr)
|
||||
cclog.Errorf("[METRICSTORE]> error writing parquet archive for cluster %s: %s", cluster, writeErr.Error())
|
||||
clusterErrors = append(clusterErrors, cluster)
|
||||
continue
|
||||
}
|
||||
|
||||
// Delete archived checkpoint files
|
||||
@@ -316,5 +333,10 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
||||
}
|
||||
|
||||
cclog.Infof("[METRICSTORE]> archiving checkpoints completed in %s (%d files)", time.Since(startTime).Round(time.Millisecond), totalFiles)
|
||||
|
||||
if len(clusterErrors) > 0 {
|
||||
return totalFiles, fmt.Errorf("archiving failed for clusters: %s", strings.Join(clusterErrors, ", "))
|
||||
}
|
||||
|
||||
return totalFiles, nil
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ func newParquetArchiveWriter(filename string) (*parquetArchiveWriter, error) {
|
||||
|
||||
// WriteCheckpointFile streams a CheckpointFile tree directly to Parquet rows,
|
||||
// writing metrics in sorted order without materializing all rows in memory.
|
||||
// Produces one row group per call (typically one host's data).
|
||||
// Call FlushRowGroup() after writing all checkpoint files for a host.
|
||||
func (w *parquetArchiveWriter) WriteCheckpointFile(cf *CheckpointFile, cluster, hostname, scope, scopeID string) error {
|
||||
w.writeLevel(cf, cluster, hostname, scope, scopeID)
|
||||
|
||||
@@ -112,10 +112,15 @@ func (w *parquetArchiveWriter) WriteCheckpointFile(cf *CheckpointFile, cluster,
|
||||
w.batch = w.batch[:0]
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FlushRowGroup flushes the current row group to the Parquet file.
|
||||
// Should be called once per host after all checkpoint files for that host are written.
|
||||
func (w *parquetArchiveWriter) FlushRowGroup() error {
|
||||
if err := w.writer.Flush(); err != nil {
|
||||
return fmt.Errorf("flushing parquet row group: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -69,6 +69,7 @@ import (
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
@@ -111,10 +112,16 @@ type walRotateReq struct {
|
||||
|
||||
// walFileState holds an open WAL file handle and buffered writer for one host directory.
|
||||
type walFileState struct {
|
||||
f *os.File
|
||||
w *bufio.Writer
|
||||
f *os.File
|
||||
w *bufio.Writer
|
||||
dirty bool
|
||||
}
|
||||
|
||||
// walFlushInterval controls how often dirty WAL files are flushed to disk.
|
||||
// Decoupling flushes from message processing lets the consumer run at memory
|
||||
// speed, amortizing syscall overhead across many writes.
|
||||
const walFlushInterval = 5 * time.Second
|
||||
|
||||
// walShardIndex computes which shard a message belongs to based on cluster+node.
|
||||
// Uses FNV-1a hash for fast, well-distributed mapping.
|
||||
func walShardIndex(cluster, node string) int {
|
||||
@@ -222,6 +229,7 @@ func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||
if err := writeWALRecordDirect(ws.w, msg); err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> WAL: write record: %v", err)
|
||||
}
|
||||
ws.dirty = true
|
||||
}
|
||||
|
||||
processRotate := func(req walRotateReq) {
|
||||
@@ -238,10 +246,11 @@ func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||
close(req.done)
|
||||
}
|
||||
|
||||
flushAll := func() {
|
||||
flushDirty := func() {
|
||||
for _, ws := range hostFiles {
|
||||
if ws.f != nil {
|
||||
if ws.dirty {
|
||||
ws.w.Flush()
|
||||
ws.dirty = false
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -257,12 +266,35 @@ func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||
case req := <-rotateCh:
|
||||
processRotate(req)
|
||||
default:
|
||||
flushAll()
|
||||
flushDirty()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(walFlushInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
// drainBatch processes up to 4096 pending messages without blocking.
|
||||
// Returns false if the channel was closed.
|
||||
drainBatch := func() bool {
|
||||
for range 4096 {
|
||||
select {
|
||||
case msg, ok := <-msgCh:
|
||||
if !ok {
|
||||
flushDirty()
|
||||
return false
|
||||
}
|
||||
processMsg(msg)
|
||||
case req := <-rotateCh:
|
||||
processRotate(req)
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -273,23 +305,12 @@ func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||
return
|
||||
}
|
||||
processMsg(msg)
|
||||
|
||||
// Drain up to 256 more messages without blocking to batch writes.
|
||||
for range 256 {
|
||||
select {
|
||||
case msg, ok := <-msgCh:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
processMsg(msg)
|
||||
case req := <-rotateCh:
|
||||
processRotate(req)
|
||||
default:
|
||||
goto flushed
|
||||
}
|
||||
if !drainBatch() {
|
||||
return
|
||||
}
|
||||
flushed:
|
||||
flushAll()
|
||||
// No flush here — timer handles periodic flushing.
|
||||
case <-ticker.C:
|
||||
flushDirty()
|
||||
case req := <-rotateCh:
|
||||
processRotate(req)
|
||||
}
|
||||
@@ -413,69 +434,6 @@ func writeWALRecordDirect(w *bufio.Writer, msg *WALMessage) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// buildWALPayload encodes a WALMessage into a binary payload (without magic/length/CRC).
|
||||
func buildWALPayload(msg *WALMessage) []byte {
|
||||
size := 8 + 2 + len(msg.MetricName) + 1 + 4
|
||||
for _, s := range msg.Selector {
|
||||
size += 1 + len(s)
|
||||
}
|
||||
|
||||
buf := make([]byte, 0, size)
|
||||
|
||||
// Timestamp (8 bytes, little-endian int64)
|
||||
var ts [8]byte
|
||||
binary.LittleEndian.PutUint64(ts[:], uint64(msg.Timestamp))
|
||||
buf = append(buf, ts[:]...)
|
||||
|
||||
// Metric name (2-byte length prefix + bytes)
|
||||
var mLen [2]byte
|
||||
binary.LittleEndian.PutUint16(mLen[:], uint16(len(msg.MetricName)))
|
||||
buf = append(buf, mLen[:]...)
|
||||
buf = append(buf, msg.MetricName...)
|
||||
|
||||
// Selector count (1 byte)
|
||||
buf = append(buf, byte(len(msg.Selector)))
|
||||
|
||||
// Selectors (1-byte length prefix + bytes each)
|
||||
for _, sel := range msg.Selector {
|
||||
buf = append(buf, byte(len(sel)))
|
||||
buf = append(buf, sel...)
|
||||
}
|
||||
|
||||
// Value (4 bytes, float32 bit representation)
|
||||
var val [4]byte
|
||||
binary.LittleEndian.PutUint32(val[:], math.Float32bits(float32(msg.Value)))
|
||||
buf = append(buf, val[:]...)
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
// writeWALRecord appends a binary WAL record to the writer.
|
||||
// Format: [4B magic][4B payload_len][payload][4B CRC32]
|
||||
func writeWALRecord(w io.Writer, msg *WALMessage) error {
|
||||
payload := buildWALPayload(msg)
|
||||
crc := crc32.ChecksumIEEE(payload)
|
||||
|
||||
record := make([]byte, 0, 4+4+len(payload)+4)
|
||||
|
||||
var magic [4]byte
|
||||
binary.LittleEndian.PutUint32(magic[:], walRecordMagic)
|
||||
record = append(record, magic[:]...)
|
||||
|
||||
var pLen [4]byte
|
||||
binary.LittleEndian.PutUint32(pLen[:], uint32(len(payload)))
|
||||
record = append(record, pLen[:]...)
|
||||
|
||||
record = append(record, payload...)
|
||||
|
||||
var crcBytes [4]byte
|
||||
binary.LittleEndian.PutUint32(crcBytes[:], crc)
|
||||
record = append(record, crcBytes[:]...)
|
||||
|
||||
_, err := w.Write(record)
|
||||
return err
|
||||
}
|
||||
|
||||
// readWALRecord reads one WAL record from the reader.
|
||||
// Returns (nil, nil) on clean EOF. Returns error on data corruption.
|
||||
// A CRC mismatch indicates a truncated trailing record (expected on crash).
|
||||
|
||||
51
web/frontend/package-lock.json
generated
51
web/frontend/package-lock.json
generated
@@ -328,9 +328,6 @@
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -344,9 +341,6 @@
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -360,9 +354,6 @@
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -376,9 +367,6 @@
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -392,9 +380,6 @@
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -408,9 +393,6 @@
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -424,9 +406,6 @@
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -440,9 +419,6 @@
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -456,9 +432,6 @@
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -472,9 +445,6 @@
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -488,9 +458,6 @@
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -504,9 +471,6 @@
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -520,9 +484,6 @@
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@@ -998,9 +959,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/picomatch": {
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
|
||||
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz",
|
||||
"integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
@@ -1134,9 +1095,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/rollup-plugin-svelte/node_modules/picomatch": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
|
||||
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
|
||||
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
|
||||
@@ -54,11 +54,16 @@
|
||||
const paging = { itemsPerPage: 50, page: 1 };
|
||||
const sorting = { field: "startTime", type: "col", order: "DESC" };
|
||||
const nodeMetricsQuery = gql`
|
||||
query ($cluster: String!, $nodes: [String!], $from: Time!, $to: Time!) {
|
||||
query (
|
||||
$cluster: String!,
|
||||
$nodes: [String!],
|
||||
$from: Time!,
|
||||
$to: Time!,
|
||||
$nodeFilter: [NodeFilter!]!,
|
||||
$sorting: OrderByInput!
|
||||
) {
|
||||
nodeMetrics(cluster: $cluster, nodes: $nodes, from: $from, to: $to) {
|
||||
host
|
||||
nodeState
|
||||
metricHealth
|
||||
subCluster
|
||||
metrics {
|
||||
name
|
||||
@@ -79,7 +84,14 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
nodeStatus: nodes(filter: $nodeFilter, order: $sorting) {
|
||||
count
|
||||
items {
|
||||
schedulerState
|
||||
healthState
|
||||
}
|
||||
}
|
||||
}
|
||||
`;
|
||||
const nodeJobsQuery = gql`
|
||||
@@ -146,6 +158,8 @@
|
||||
nodes: [hostname],
|
||||
from: from?.toISOString(),
|
||||
to: to?.toISOString(),
|
||||
nodeFilter: { hostname: { eq: hostname }},
|
||||
sorting // $sorting unused in backend: Use placeholder
|
||||
},
|
||||
})
|
||||
);
|
||||
@@ -157,8 +171,8 @@
|
||||
})
|
||||
);
|
||||
|
||||
const thisNodeState = $derived($nodeMetricsData?.data?.nodeMetrics[0]?.nodeState || 'notindb');
|
||||
const thisMetricHealth = $derived($nodeMetricsData?.data?.nodeMetrics[0]?.metricHealth || 'unknown');
|
||||
const thisNodeState = $derived($nodeMetricsData?.data?.nodeStatus?.items[0]?.schedulerState || 'notindb');
|
||||
const thisMetricHealth = $derived($nodeMetricsData?.data?.nodeStatus?.items[0]?.healthState || 'unknown');
|
||||
</script>
|
||||
|
||||
<Row cols={{ xs: 2, lg: 3}}>
|
||||
|
||||
@@ -166,12 +166,12 @@
|
||||
items.push({ project: { [filters.projectMatch]: filters.project } });
|
||||
if (filters.user)
|
||||
items.push({ user: { [filters.userMatch]: filters.user } });
|
||||
if (filters.numNodes.from != null || filters.numNodes.to != null) {
|
||||
if (filters.numNodes.from != null && filters.numNodes.to != null) {
|
||||
items.push({
|
||||
numNodes: { from: filters.numNodes.from, to: filters.numNodes.to },
|
||||
});
|
||||
}
|
||||
if (filters.numAccelerators.from != null || filters.numAccelerators.to != null) {
|
||||
if (filters.numAccelerators.from != null && filters.numAccelerators.to != null) {
|
||||
items.push({
|
||||
numAccelerators: {
|
||||
from: filters.numAccelerators.from,
|
||||
@@ -179,7 +179,7 @@
|
||||
},
|
||||
});
|
||||
}
|
||||
if (filters.numHWThreads.from != null || filters.numHWThreads.to != null) {
|
||||
if (filters.numHWThreads.from != null && filters.numHWThreads.to != null) {
|
||||
items.push({
|
||||
numHWThreads: {
|
||||
from: filters.numHWThreads.from,
|
||||
@@ -206,14 +206,21 @@
|
||||
items.push({ duration: { to: filters.duration.lessThan, from: 0 } });
|
||||
if (filters.duration.moreThan)
|
||||
items.push({ duration: { to: 0, from: filters.duration.moreThan } });
|
||||
if (filters.energy.from != null || filters.energy.to != null)
|
||||
if (filters.energy.from != null && filters.energy.to != null)
|
||||
items.push({
|
||||
energy: { from: filters.energy.from, to: filters.energy.to },
|
||||
});
|
||||
if (filters.jobId)
|
||||
items.push({ jobId: { [filters.jobIdMatch]: filters.jobId } });
|
||||
if (filters.stats.length != 0)
|
||||
items.push({ metricStats: filters.stats.map((st) => { return { metricName: st.field, range: { from: st.from, to: st.to }} }) });
|
||||
if (filters.stats.length != 0) {
|
||||
const metricStats = [];
|
||||
filters.stats.forEach((st) => {
|
||||
if (st.from != null && st.to != null)
|
||||
metricStats.push({ metricName: st.field, range: { from: st.from, to: st.to }});
|
||||
});
|
||||
if (metricStats.length != 0)
|
||||
items.push({metricStats})
|
||||
};
|
||||
if (filters.node) items.push({ node: { [filters.nodeMatch]: filters.node } });
|
||||
if (filters.jobName) items.push({ jobName: { contains: filters.jobName } });
|
||||
if (filters.schedule) items.push({ schedule: filters.schedule });
|
||||
@@ -280,40 +287,40 @@
|
||||
opts.push(`duration=morethan-${filters.duration.moreThan}`);
|
||||
if (filters.tags.length != 0)
|
||||
for (let tag of filters.tags) opts.push(`tag=${tag}`);
|
||||
if (filters.numNodes.from > 1 && filters.numNodes.to > 0)
|
||||
if (filters.numNodes.from > 0 && filters.numNodes.to > 0)
|
||||
opts.push(`numNodes=${filters.numNodes.from}-${filters.numNodes.to}`);
|
||||
else if (filters.numNodes.from > 1 && filters.numNodes.to == 0)
|
||||
else if (filters.numNodes.from > 0 && filters.numNodes.to == 0)
|
||||
opts.push(`numNodes=morethan-${filters.numNodes.from}`);
|
||||
else if (filters.numNodes.from == 1 && filters.numNodes.to > 0)
|
||||
else if (filters.numNodes.from == 0 && filters.numNodes.to > 0)
|
||||
opts.push(`numNodes=lessthan-${filters.numNodes.to}`);
|
||||
if (filters.numHWThreads.from > 1 && filters.numHWThreads.to > 0)
|
||||
if (filters.numHWThreads.from > 0 && filters.numHWThreads.to > 0)
|
||||
opts.push(`numHWThreads=${filters.numHWThreads.from}-${filters.numHWThreads.to}`);
|
||||
else if (filters.numHWThreads.from > 1 && filters.numHWThreads.to == 0)
|
||||
else if (filters.numHWThreads.from > 0 && filters.numHWThreads.to == 0)
|
||||
opts.push(`numHWThreads=morethan-${filters.numHWThreads.from}`);
|
||||
else if (filters.numHWThreads.from == 1 && filters.numHWThreads.to > 0)
|
||||
else if (filters.numHWThreads.from == 0 && filters.numHWThreads.to > 0)
|
||||
opts.push(`numHWThreads=lessthan-${filters.numHWThreads.to}`);
|
||||
if (filters.numAccelerators.from && filters.numAccelerators.to)
|
||||
if (filters.numAccelerators.from > 0 && filters.numAccelerators.to > 0)
|
||||
opts.push(`numAccelerators=${filters.numAccelerators.from}-${filters.numAccelerators.to}`);
|
||||
else if (filters.numAccelerators.from > 1 && filters.numAccelerators.to == 0)
|
||||
else if (filters.numAccelerators.from > 0 && filters.numAccelerators.to == 0)
|
||||
opts.push(`numAccelerators=morethan-${filters.numAccelerators.from}`);
|
||||
else if (filters.numAccelerators.from == 1 && filters.numAccelerators.to > 0)
|
||||
else if (filters.numAccelerators.from == 0 && filters.numAccelerators.to > 0)
|
||||
opts.push(`numAccelerators=lessthan-${filters.numAccelerators.to}`);
|
||||
if (filters.node) opts.push(`node=${filters.node}`);
|
||||
if (filters.node && filters.nodeMatch != "eq") // "eq" is default-case
|
||||
opts.push(`nodeMatch=${filters.nodeMatch}`);
|
||||
if (filters.energy.from > 1 && filters.energy.to > 0)
|
||||
if (filters.energy.from > 0 && filters.energy.to > 0)
|
||||
opts.push(`energy=${filters.energy.from}-${filters.energy.to}`);
|
||||
else if (filters.energy.from > 1 && filters.energy.to == 0)
|
||||
else if (filters.energy.from > 0 && filters.energy.to == 0)
|
||||
opts.push(`energy=morethan-${filters.energy.from}`);
|
||||
else if (filters.energy.from == 1 && filters.energy.to > 0)
|
||||
else if (filters.energy.from == 0 && filters.energy.to > 0)
|
||||
opts.push(`energy=lessthan-${filters.energy.to}`);
|
||||
if (filters.stats.length > 0)
|
||||
for (let stat of filters.stats) {
|
||||
if (stat.from > 1 && stat.to > 0)
|
||||
if (stat.from > 0 && stat.to > 0)
|
||||
opts.push(`stat=${stat.field}-${stat.from}-${stat.to}`);
|
||||
else if (stat.from > 1 && stat.to == 0)
|
||||
else if (stat.from > 0 && stat.to == 0)
|
||||
opts.push(`stat=${stat.field}-morethan-${stat.from}`);
|
||||
else if (stat.from == 1 && stat.to > 0)
|
||||
else if (stat.from == 0 && stat.to > 0)
|
||||
opts.push(`stat=${stat.field}-lessthan-${stat.to}`);
|
||||
}
|
||||
// Build && Return
|
||||
@@ -511,43 +518,43 @@
|
||||
</Info>
|
||||
{/if}
|
||||
|
||||
{#if filters.numNodes.from > 1 && filters.numNodes.to > 0}
|
||||
{#if filters.numNodes.from > 0 && filters.numNodes.to > 0}
|
||||
<Info icon="hdd-stack" onclick={() => (isResourcesOpen = true)}>
|
||||
Nodes: {filters.numNodes.from} - {filters.numNodes.to}
|
||||
</Info>
|
||||
{:else if filters.numNodes.from > 1 && filters.numNodes.to == 0}
|
||||
{:else if filters.numNodes.from > 0 && filters.numNodes.to == 0}
|
||||
<Info icon="hdd-stack" onclick={() => (isResourcesOpen = true)}>
|
||||
≥ {filters.numNodes.from} Node(s)
|
||||
</Info>
|
||||
{:else if filters.numNodes.from == 1 && filters.numNodes.to > 0}
|
||||
{:else if filters.numNodes.from == 0 && filters.numNodes.to > 0}
|
||||
<Info icon="hdd-stack" onclick={() => (isResourcesOpen = true)}>
|
||||
≤ {filters.numNodes.to} Node(s)
|
||||
</Info>
|
||||
{/if}
|
||||
|
||||
{#if filters.numHWThreads.from > 1 && filters.numHWThreads.to > 0}
|
||||
{#if filters.numHWThreads.from > 0 && filters.numHWThreads.to > 0}
|
||||
<Info icon="cpu" onclick={() => (isResourcesOpen = true)}>
|
||||
HWThreads: {filters.numHWThreads.from} - {filters.numHWThreads.to}
|
||||
</Info>
|
||||
{:else if filters.numHWThreads.from > 1 && filters.numHWThreads.to == 0}
|
||||
{:else if filters.numHWThreads.from > 0 && filters.numHWThreads.to == 0}
|
||||
<Info icon="cpu" onclick={() => (isResourcesOpen = true)}>
|
||||
≥ {filters.numHWThreads.from} HWThread(s)
|
||||
</Info>
|
||||
{:else if filters.numHWThreads.from == 1 && filters.numHWThreads.to > 0}
|
||||
{:else if filters.numHWThreads.from == 0 && filters.numHWThreads.to > 0}
|
||||
<Info icon="cpu" onclick={() => (isResourcesOpen = true)}>
|
||||
≤ {filters.numHWThreads.to} HWThread(s)
|
||||
</Info>
|
||||
{/if}
|
||||
|
||||
{#if filters.numAccelerators.from > 1 && filters.numAccelerators.to > 0}
|
||||
{#if filters.numAccelerators.from > 0 && filters.numAccelerators.to > 0}
|
||||
<Info icon="gpu-card" onclick={() => (isResourcesOpen = true)}>
|
||||
Accelerators: {filters.numAccelerators.from} - {filters.numAccelerators.to}
|
||||
</Info>
|
||||
{:else if filters.numAccelerators.from > 1 && filters.numAccelerators.to == 0}
|
||||
{:else if filters.numAccelerators.from > 0 && filters.numAccelerators.to == 0}
|
||||
<Info icon="gpu-card" onclick={() => (isResourcesOpen = true)}>
|
||||
≥ {filters.numAccelerators.from} Acc(s)
|
||||
</Info>
|
||||
{:else if filters.numAccelerators.from == 1 && filters.numAccelerators.to > 0}
|
||||
{:else if filters.numAccelerators.from == 0 && filters.numAccelerators.to > 0}
|
||||
<Info icon="gpu-card" onclick={() => (isResourcesOpen = true)}>
|
||||
≤ {filters.numAccelerators.to} Acc(s)
|
||||
</Info>
|
||||
@@ -559,15 +566,15 @@
|
||||
</Info>
|
||||
{/if}
|
||||
|
||||
{#if filters.energy.from > 1 && filters.energy.to > 0}
|
||||
{#if filters.energy.from > 0 && filters.energy.to > 0}
|
||||
<Info icon="lightning-charge-fill" onclick={() => (isEnergyOpen = true)}>
|
||||
Total Energy: {filters.energy.from} - {filters.energy.to} kWh
|
||||
</Info>
|
||||
{:else if filters.energy.from > 1 && filters.energy.to == 0}
|
||||
{:else if filters.energy.from > 0 && filters.energy.to == 0}
|
||||
<Info icon="lightning-charge-fill" onclick={() => (isEnergyOpen = true)}>
|
||||
Total Energy ≥ {filters.energy.from} kWh
|
||||
</Info>
|
||||
{:else if filters.energy.from == 1 && filters.energy.to > 0}
|
||||
{:else if filters.energy.from == 0 && filters.energy.to > 0}
|
||||
<Info icon="lightning-charge-fill" onclick={() => (isEnergyOpen = true)}>
|
||||
Total Energy ≤ {filters.energy.to} kWh
|
||||
</Info>
|
||||
@@ -575,15 +582,15 @@
|
||||
|
||||
{#if filters.stats.length > 0}
|
||||
{#each filters.stats as stat}
|
||||
{#if stat.from > 1 && stat.to > 0}
|
||||
{#if stat.from > 0 && stat.to > 0}
|
||||
<Info icon="bar-chart" onclick={() => (isStatsOpen = true)}>
|
||||
{stat.field}: {stat.from} - {stat.to} {stat.unit}
|
||||
</Info> 
|
||||
{:else if stat.from > 1 && stat.to == 0}
|
||||
{:else if stat.from > 0 && stat.to == 0}
|
||||
<Info icon="bar-chart" onclick={() => (isStatsOpen = true)}>
|
||||
{stat.field} ≥ {stat.from} {stat.unit}
|
||||
</Info> 
|
||||
{:else if stat.from == 1 && stat.to > 0}
|
||||
{:else if stat.from == 0 && stat.to > 0}
|
||||
<Info icon="bar-chart" onclick={() => (isStatsOpen = true)}>
|
||||
{stat.field} ≤ {stat.to} {stat.unit}
|
||||
</Info> 
|
||||
|
||||
@@ -28,31 +28,29 @@
|
||||
} = $props();
|
||||
|
||||
/* Const */
|
||||
const minEnergyPreset = 1;
|
||||
const minEnergyPreset = 0;
|
||||
const maxEnergyPreset = 100;
|
||||
|
||||
/* Derived */
|
||||
// Pending
|
||||
let pendingEnergyState = $derived({
|
||||
from: presetEnergy?.from ? presetEnergy.from : minEnergyPreset,
|
||||
to: !(presetEnergy.to == null || presetEnergy.to == 0) ? presetEnergy.to : maxEnergyPreset,
|
||||
from: presetEnergy?.from || minEnergyPreset,
|
||||
to: (presetEnergy.to == 0) ? null : presetEnergy.to,
|
||||
});
|
||||
// Changable
|
||||
let energyState = $derived({
|
||||
from: presetEnergy?.from ? presetEnergy.from : minEnergyPreset,
|
||||
to: !(presetEnergy.to == null || presetEnergy.to == 0) ? presetEnergy.to : maxEnergyPreset,
|
||||
from: presetEnergy?.from || minEnergyPreset,
|
||||
to: (presetEnergy.to == 0) ? null : presetEnergy.to,
|
||||
});
|
||||
|
||||
const energyActive = $derived(!(JSON.stringify(energyState) === JSON.stringify({ from: minEnergyPreset, to: maxEnergyPreset })));
|
||||
// Block Apply if null
|
||||
const disableApply = $derived(energyState.from === null || energyState.to === null);
|
||||
const energyActive = $derived(!(JSON.stringify(energyState) === JSON.stringify({ from: minEnergyPreset, to: null })));
|
||||
|
||||
/* Function */
|
||||
function setEnergy() {
|
||||
if (energyActive) {
|
||||
pendingEnergyState = {
|
||||
from: energyState.from,
|
||||
to: (energyState.to == maxEnergyPreset) ? 0 : energyState.to
|
||||
from: (!energyState?.from) ? 0 : energyState.from,
|
||||
to: (energyState.to === null) ? 0 : energyState.to
|
||||
};
|
||||
} else {
|
||||
pendingEnergyState = { from: null, to: null};
|
||||
@@ -86,7 +84,6 @@
|
||||
<ModalFooter>
|
||||
<Button
|
||||
color="primary"
|
||||
disabled={disableApply}
|
||||
onclick={() => {
|
||||
isOpen = false;
|
||||
setEnergy();
|
||||
|
||||
@@ -98,44 +98,38 @@
|
||||
// Pending
|
||||
let pendingNumNodes = $derived({
|
||||
from: presetNumNodes.from,
|
||||
to: (presetNumNodes.to == 0) ? maxNumNodes : presetNumNodes.to
|
||||
to: (presetNumNodes.to == 0) ? null : presetNumNodes.to
|
||||
});
|
||||
let pendingNumHWThreads = $derived({
|
||||
from: presetNumHWThreads.from,
|
||||
to: (presetNumHWThreads.to == 0) ? maxNumHWThreads : presetNumHWThreads.to
|
||||
to: (presetNumHWThreads.to == 0) ? null : presetNumHWThreads.to
|
||||
});
|
||||
let pendingNumAccelerators = $derived({
|
||||
from: presetNumAccelerators.from,
|
||||
to: (presetNumAccelerators.to == 0) ? maxNumAccelerators : presetNumAccelerators.to
|
||||
to: (presetNumAccelerators.to == 0) ? null : presetNumAccelerators.to
|
||||
});
|
||||
let pendingNamedNode = $derived(presetNamedNode);
|
||||
let pendingNodeMatch = $derived(presetNodeMatch);
|
||||
// Changable States
|
||||
let nodesState = $derived({
|
||||
from: presetNumNodes.from,
|
||||
to: (presetNumNodes.to == 0) ? maxNumNodes : presetNumNodes.to
|
||||
from: presetNumNodes?.from || 0,
|
||||
to: (presetNumNodes.to == 0) ? null : presetNumNodes.to
|
||||
});
|
||||
let threadState = $derived({
|
||||
from: presetNumHWThreads.from,
|
||||
to: (presetNumHWThreads.to == 0) ? maxNumHWThreads : presetNumHWThreads.to
|
||||
from: presetNumHWThreads?.from || 0,
|
||||
to: (presetNumHWThreads.to == 0) ? null : presetNumHWThreads.to
|
||||
});
|
||||
let accState = $derived({
|
||||
from: presetNumAccelerators.from,
|
||||
to: (presetNumAccelerators.to == 0) ? maxNumAccelerators : presetNumAccelerators.to
|
||||
from: presetNumAccelerators?.from || 0,
|
||||
to: (presetNumAccelerators.to == 0) ? null : presetNumAccelerators.to
|
||||
});
|
||||
|
||||
const initialized = $derived(getContext("initialized") || false);
|
||||
const clusterInfos = $derived($initialized ? getContext("clusters") : null);
|
||||
// Is Selection Active
|
||||
const nodesActive = $derived(!(JSON.stringify(nodesState) === JSON.stringify({ from: 1, to: maxNumNodes })));
|
||||
const threadActive = $derived(!(JSON.stringify(threadState) === JSON.stringify({ from: 1, to: maxNumHWThreads })));
|
||||
const accActive = $derived(!(JSON.stringify(accState) === JSON.stringify({ from: 1, to: maxNumAccelerators })));
|
||||
// Block Apply if null
|
||||
const disableApply = $derived(
|
||||
nodesState.from === null || nodesState.to === null ||
|
||||
threadState.from === null || threadState.to === null ||
|
||||
accState.from === null || accState.to === null
|
||||
);
|
||||
const nodesActive = $derived(!(JSON.stringify(nodesState) === JSON.stringify({ from: 0, to: null })));
|
||||
const threadActive = $derived(!(JSON.stringify(threadState) === JSON.stringify({ from: 0, to: null })));
|
||||
const accActive = $derived(!(JSON.stringify(accState) === JSON.stringify({ from: 0, to: null })));
|
||||
|
||||
/* Reactive Effects | Svelte 5 onMount */
|
||||
$effect(() => {
|
||||
@@ -153,58 +147,28 @@
|
||||
}
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
if (
|
||||
$initialized &&
|
||||
pendingNumNodes.from == null &&
|
||||
pendingNumNodes.to == null
|
||||
) {
|
||||
nodesState = { from: 1, to: maxNumNodes };
|
||||
}
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
if (
|
||||
$initialized &&
|
||||
pendingNumHWThreads.from == null &&
|
||||
pendingNumHWThreads.to == null
|
||||
) {
|
||||
threadState = { from: 1, to: maxNumHWThreads };
|
||||
}
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
if (
|
||||
$initialized &&
|
||||
pendingNumAccelerators.from == null &&
|
||||
pendingNumAccelerators.to == null
|
||||
) {
|
||||
accState = { from: 1, to: maxNumAccelerators };
|
||||
}
|
||||
});
|
||||
|
||||
/* Functions */
|
||||
function setResources() {
|
||||
if (nodesActive) {
|
||||
pendingNumNodes = {
|
||||
from: nodesState.from,
|
||||
to: (nodesState.to == maxNumNodes) ? 0 : nodesState.to
|
||||
from: (!nodesState?.from) ? 0 : nodesState.from,
|
||||
to: (nodesState.to === null) ? 0 : nodesState.to
|
||||
};
|
||||
} else {
|
||||
pendingNumNodes = { from: null, to: null};
|
||||
};
|
||||
if (threadActive) {
|
||||
pendingNumHWThreads = {
|
||||
from: threadState.from,
|
||||
to: (threadState.to == maxNumHWThreads) ? 0 : threadState.to
|
||||
from: (!threadState?.from) ? 0 : threadState.from,
|
||||
to: (threadState.to === null) ? 0 : threadState.to
|
||||
};
|
||||
} else {
|
||||
pendingNumHWThreads = { from: null, to: null};
|
||||
};
|
||||
if (accActive) {
|
||||
pendingNumAccelerators = {
|
||||
from: accState.from,
|
||||
to: (accState.to == maxNumAccelerators) ? 0 : accState.to
|
||||
from: (!accState?.from) ? 0 : accState.from,
|
||||
to: (accState.to === null) ? 0 : accState.to
|
||||
};
|
||||
} else {
|
||||
pendingNumAccelerators = { from: null, to: null};
|
||||
@@ -249,7 +213,7 @@
|
||||
nodesState.from = detail[0];
|
||||
nodesState.to = detail[1];
|
||||
}}
|
||||
sliderMin={1}
|
||||
sliderMin={0}
|
||||
sliderMax={maxNumNodes}
|
||||
fromPreset={nodesState.from}
|
||||
toPreset={nodesState.to}
|
||||
@@ -269,7 +233,7 @@
|
||||
threadState.from = detail[0];
|
||||
threadState.to = detail[1];
|
||||
}}
|
||||
sliderMin={1}
|
||||
sliderMin={0}
|
||||
sliderMax={maxNumHWThreads}
|
||||
fromPreset={threadState.from}
|
||||
toPreset={threadState.to}
|
||||
@@ -289,7 +253,7 @@
|
||||
accState.from = detail[0];
|
||||
accState.to = detail[1];
|
||||
}}
|
||||
sliderMin={1}
|
||||
sliderMin={0}
|
||||
sliderMax={maxNumAccelerators}
|
||||
fromPreset={accState.from}
|
||||
toPreset={accState.to}
|
||||
@@ -300,7 +264,6 @@
|
||||
<ModalFooter>
|
||||
<Button
|
||||
color="primary"
|
||||
disabled={disableApply}
|
||||
onclick={() => {
|
||||
isOpen = false;
|
||||
setResources();
|
||||
|
||||
@@ -34,7 +34,8 @@
|
||||
function setRanges() {
|
||||
for (let as of availableStats) {
|
||||
if (as.enabled) {
|
||||
as.to = (as.to == as.peak) ? 0 : as.to
|
||||
as.from = (!as?.from) ? 0 : as.from,
|
||||
as.to = (as.to == null) ? 0 : as.to
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -42,8 +43,8 @@
|
||||
function resetRanges() {
|
||||
for (let as of availableStats) {
|
||||
as.enabled = false
|
||||
as.from = 1
|
||||
as.to = as.peak
|
||||
as.from = null
|
||||
as.to = null
|
||||
};
|
||||
}
|
||||
</script>
|
||||
@@ -66,13 +67,13 @@
|
||||
changeRange={(detail) => {
|
||||
aStat.from = detail[0];
|
||||
aStat.to = detail[1];
|
||||
if (aStat.from == 1 && aStat.to == aStat.peak) {
|
||||
if (aStat.from == 0 && aStat.to === null) {
|
||||
aStat.enabled = false;
|
||||
} else {
|
||||
aStat.enabled = true;
|
||||
}
|
||||
}}
|
||||
sliderMin={1}
|
||||
sliderMin={0}
|
||||
sliderMax={aStat.peak}
|
||||
fromPreset={aStat.from}
|
||||
toPreset={aStat.to}
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
let {
|
||||
sliderMin,
|
||||
sliderMax,
|
||||
fromPreset = 1,
|
||||
fromPreset = 0,
|
||||
toPreset = 100,
|
||||
changeRange
|
||||
} = $props();
|
||||
@@ -33,9 +33,9 @@
|
||||
/* Derived */
|
||||
let pendingValues = $derived([fromPreset, toPreset]);
|
||||
let sliderFrom = $derived(Math.max(((fromPreset == null ? sliderMin : fromPreset) - sliderMin) / (sliderMax - sliderMin), 0.));
|
||||
let sliderTo = $derived(Math.min(((toPreset == null ? sliderMin : toPreset) - sliderMin) / (sliderMax - sliderMin), 1.));
|
||||
let inputFieldFrom = $derived(fromPreset ? fromPreset.toString() : null);
|
||||
let inputFieldTo = $derived(toPreset ? toPreset.toString() : null);
|
||||
let sliderTo = $derived(Math.min(((toPreset == null ? sliderMax : toPreset) - sliderMin) / (sliderMax - sliderMin), 1.));
|
||||
let inputFieldFrom = $derived(fromPreset != null ? fromPreset.toString() : null);
|
||||
let inputFieldTo = $derived(toPreset != null ? toPreset.toString() : null);
|
||||
|
||||
/* Var Init */
|
||||
let timeoutId = null;
|
||||
@@ -79,17 +79,22 @@
|
||||
evt.preventDefault()
|
||||
evt.stopPropagation()
|
||||
const newV = Number.parseInt(evt.target.value);
|
||||
const newP = clamp((newV - sliderMin) / (sliderMax - sliderMin), 0., 1.)
|
||||
const newP = clamp((newV - sliderMin) / (sliderMax - sliderMin), 0., 1., target)
|
||||
updateStates(newV, newP, target);
|
||||
}
|
||||
|
||||
function clamp(x, testMin, testMax) {
|
||||
return x < testMin
|
||||
? testMin
|
||||
: (x > testMax
|
||||
? testMax
|
||||
: x
|
||||
);
|
||||
function clamp(x, testMin, testMax, target) {
|
||||
if (isNaN(x)) {
|
||||
if (target == 'from') return testMin
|
||||
else if (target == 'to') return testMax
|
||||
} else {
|
||||
return x < testMin
|
||||
? testMin
|
||||
: (x > testMax
|
||||
? testMax
|
||||
: x
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function draggable(node) {
|
||||
@@ -159,23 +164,23 @@
|
||||
|
||||
<div class="double-range-container">
|
||||
<div class="header">
|
||||
<input class="form-control" type="text" placeholder="from..." value={inputFieldFrom}
|
||||
<input class="form-control" type="text" placeholder={`${sliderMin} ...`} value={inputFieldFrom}
|
||||
oninput={(e) => {
|
||||
inputChanged(e, 'from');
|
||||
}}
|
||||
/>
|
||||
|
||||
{#if inputFieldFrom != sliderMin?.toString() && inputFieldTo != sliderMax?.toString() }
|
||||
{#if (inputFieldFrom && inputFieldFrom != sliderMin?.toString()) && inputFieldTo != null }
|
||||
<span>Selected: Range <b> {inputFieldFrom} </b> - <b> {inputFieldTo} </b></span>
|
||||
{:else if inputFieldFrom != sliderMin?.toString() && inputFieldTo == sliderMax?.toString() }
|
||||
<span>Selected: More than <b> {inputFieldFrom} </b> </span>
|
||||
{:else if inputFieldFrom == sliderMin?.toString() && inputFieldTo != sliderMax?.toString() }
|
||||
<span>Selected: Less than <b> {inputFieldTo} </b></span>
|
||||
{:else if (inputFieldFrom && inputFieldFrom != sliderMin?.toString()) && inputFieldTo == null }
|
||||
<span>Selected: More Than Equal <b> {inputFieldFrom} </b> </span>
|
||||
{:else if (!inputFieldFrom || inputFieldFrom == sliderMin?.toString()) && inputFieldTo != null }
|
||||
<span>Selected: Less Than Equal <b> {inputFieldTo} </b></span>
|
||||
{:else}
|
||||
<span><i>No Selection</i></span>
|
||||
{/if}
|
||||
|
||||
<input class="form-control" type="text" placeholder="to..." value={inputFieldTo}
|
||||
<input class="form-control" type="text" placeholder={`... ${sliderMax} ...`} value={inputFieldTo}
|
||||
oninput={(e) => {
|
||||
inputChanged(e, 'to');
|
||||
}}
|
||||
|
||||
@@ -347,8 +347,8 @@ export function getStatsItems(presetStats = []) {
|
||||
field: presetEntry.field,
|
||||
text: `${gm.name} (${gm.footprint})`,
|
||||
metric: gm.name,
|
||||
from: presetEntry.from,
|
||||
to: (presetEntry.to == 0) ? mc.peak : presetEntry.to,
|
||||
from: presetEntry?.from || 0,
|
||||
to: (presetEntry.to == 0) ? null : presetEntry.to,
|
||||
peak: mc.peak,
|
||||
enabled: true,
|
||||
unit: `${gm?.unit?.prefix ? gm.unit.prefix : ''}${gm.unit.base}`
|
||||
@@ -358,8 +358,8 @@ export function getStatsItems(presetStats = []) {
|
||||
field: `${gm.name}_${gm.footprint}`,
|
||||
text: `${gm.name} (${gm.footprint})`,
|
||||
metric: gm.name,
|
||||
from: 1,
|
||||
to: mc.peak,
|
||||
from: 0,
|
||||
to: null,
|
||||
peak: mc.peak,
|
||||
enabled: false,
|
||||
unit: `${gm?.unit?.prefix ? gm.unit.prefix : ''}${gm.unit.base}`
|
||||
|
||||
Reference in New Issue
Block a user