{"parentUuid":null,"isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","type":"progress","data":{"type":"hook_progress","hookEvent":"SessionStart","hookName":"SessionStart:startup","command":"entire hooks claude-code session-start"},"parentToolUseID":"67d0ff87-ef96-4cf3-8684-38a007a4811b","toolUseID":"67d0ff87-ef96-4cf3-8684-38a007a4811b","timestamp":"2026-03-29T19:33:13.957Z","uuid":"52fd3207-ac2e-4215-8a38-9b2ab5e8f46f"}
{"type":"file-history-snapshot","messageId":"5850443c-83f3-4981-96ba-d5e60679f257","snapshot":{"messageId":"5850443c-83f3-4981-96ba-d5e60679f257","trackedFileBackups":{},"timestamp":"2026-03-29T19:34:20.233Z"},"isSnapshotUpdate":false}
{"parentUuid":"52fd3207-ac2e-4215-8a38-9b2ab5e8f46f","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","type":"user","message":{"role":"user","content":"Update the doc comments in metricstore package to reflect the current implementation"},"uuid":"5850443c-83f3-4981-96ba-d5e60679f257","timestamp":"2026-03-29T19:34:19.191Z","permissionMode":"default"}
{"parentUuid":"5850443c-83f3-4981-96ba-d5e60679f257","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","message":{"model":"claude-sonnet-4-6","id":"msg_01Fpc6oNk65makHiWeyd7osZ","type":"message","role":"assistant","content":[{"type":"thinking","thinking":"Let me explore the metricstore package to understand the current implementation and then update the doc comments accordingly.","signature":"ErsCClsIDBgCKkBfHMg2Xz8WAQN8zq+d6Lbfil3NuM2iqsbjseX/iPn1xGoKI6vnPk8zEylLXprVKS0JZArhuNBLXvyWlFm5vWM/MhFjbGF1ZGUtc29ubmV0LTQtNjgAEgzRsaTw8e0cUuYaVG4aDLBwJ1OLP6MLojdHgSIw4SDDez6AgxM6jqhUiEA9+lbSzeNGT4nrF5AQ2Bofx5qNaygK4mgEIo7WfIeo3oIOKo0Btz7Y7EVfsVIpof31RdoVlwlFFruxNBfoqBl1/rhdKWFtmLr0Ct0KwhG4bIHwKSHb7zUtCPZZFOkRvn0z/E8TatHPOG4TBqgOevF40RTNXpjws8kn4GUn7P2nUZu6I2aYLjdJgXYjlZAUXf5OAyB43lGLDD53myOaiVXWTNRDhAMuDK6nu8qQxuBxKVTjGAE="}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":13490,"cache_read_input_tokens":10318,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":13490},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvymPsMMquBmnYSgPHY","type":"assistant","uuid":"ea5ed8bc-a203-4b1b-b089-7c690f355d6b","timestamp":"2026-03-29T19:34:23.374Z"}
{"parentUuid":"ea5ed8bc-a203-4b1b-b089-7c690f355d6b","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01Fpc6oNk65makHiWeyd7osZ","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_017CTfKDyeifPxzSdyH6JnK6","name":"Glob","input":{"pattern":"pkg/metricstore/**/*.go"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":13490,"cache_read_input_tokens":10318,"output_tokens":95,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":13490,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXvymPsMMquBmnYSgPHY","type":"assistant","uuid":"56eb77c8-d2b0-462b-b57b-d79458966d3c","timestamp":"2026-03-29T19:34:23.461Z"}
{"parentUuid":"56eb77c8-d2b0-462b-b57b-d79458966d3c","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Glob","command":"callback"},"parentToolUseID":"toolu_017CTfKDyeifPxzSdyH6JnK6","toolUseID":"toolu_017CTfKDyeifPxzSdyH6JnK6","timestamp":"2026-03-29T19:34:23.889Z","uuid":"787097ef-8033-4253-9be8-9d699d7a9ac9"}
{"parentUuid":"56eb77c8-d2b0-462b-b57b-d79458966d3c","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_017CTfKDyeifPxzSdyH6JnK6","type":"tool_result","content":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/debug.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore_test.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/query.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/scopequery_test.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/scopequery.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/healthcheck.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/level.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/api.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/lineprotocol.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/parquetArchive_test.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/buffer.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/checkpoint.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/configSchema.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/config.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/parquetArchive.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/walCheckpoint.go\n/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/stats.go"}]},"uuid":"79c961cb-f43b-4cf7-b834-e871e257be61","timestamp":"2026-03-29T19:34:23.888Z","toolUseResult":{"filenames":["/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/debug.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore_test.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/query.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/scopequery_test.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/scopequery.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/healthcheck.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/level.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/api.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/lineprotocol.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/parquetArchive_test.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/buffer.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/checkpoint.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/configSchema.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/config.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/parquetArchive.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/walCheckpoint.go","/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/stats.go"],"durationMs":403,"numFiles":19,"truncated":false},"sourceToolAssistantUUID":"56eb77c8-d2b0-462b-b57b-d79458966d3c"}
{"parentUuid":"79c961cb-f43b-4cf7-b834-e871e257be61","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01LtgGFtH91tDuHvidxukNFL","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01UNY8XZYCLtZMWvKGtxwfx8","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":668,"cache_read_input_tokens":23808,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":668},"output_tokens":51,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvz1o3wmRUN8MJsqWbJ","type":"assistant","uuid":"a0e838c9-c432-4d6d-a3be-dd6d5d6490cb","timestamp":"2026-03-29T19:34:26.465Z"}
{"parentUuid":"a0e838c9-c432-4d6d-a3be-dd6d5d6490cb","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01LtgGFtH91tDuHvidxukNFL","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01VCK2d7dHwMHgU82JkSjG3b","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/api.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":668,"cache_read_input_tokens":23808,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":668},"output_tokens":51,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvz1o3wmRUN8MJsqWbJ","type":"assistant","uuid":"770a0e0c-edd3-4d77-a367-6e6a6a0efed7","timestamp":"2026-03-29T19:34:26.862Z"}
{"parentUuid":"770a0e0c-edd3-4d77-a367-6e6a6a0efed7","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01UNY8XZYCLtZMWvKGtxwfx8","toolUseID":"toolu_01UNY8XZYCLtZMWvKGtxwfx8","timestamp":"2026-03-29T19:34:26.882Z","uuid":"4c361635-de9f-4f03-bd37-400cfa73712c"}
{"parentUuid":"a0e838c9-c432-4d6d-a3be-dd6d5d6490cb","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01UNY8XZYCLtZMWvKGtxwfx8","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// Package metricstore provides an efficient in-memory time-series metric storage system\n     7→// with support for hierarchical data organization, checkpointing, and archiving.\n     8→//\n     9→// The package organizes metrics in a tree structure (cluster → host → component) and\n    10→// provides concurrent read/write access to metric data with configurable aggregation strategies.\n    11→// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,\n    12→// and enforcing retention policies.\n    13→//\n    14→// Key features:\n    15→//   - In-memory metric storage with configurable retention\n    16→//   - Hierarchical data organization (selectors)\n    17→//   - Concurrent checkpoint/archive workers\n    18→//   - Support for sum and average aggregation\n    19→//   - NATS integration for metric ingestion\n    20→package metricstore\n    21→\n    22→import (\n    23→\t\"bytes\"\n    24→\t\"context\"\n    25→\t\"encoding/json\"\n    26→\t\"errors\"\n    27→\t\"fmt\"\n    28→\t\"runtime\"\n    29→\t\"runtime/debug\"\n    30→\t\"slices\"\n    31→\t\"sync\"\n    32→\t\"time\"\n    33→\n    34→\t\"github.com/ClusterCockpit/cc-backend/internal/config\"\n    35→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    36→\t\"github.com/ClusterCockpit/cc-lib/v2/resampler\"\n    37→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    38→\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n    39→)\n    40→\n    41→// GlobalState holds the global state for the metric store with thread-safe access.\n    42→type GlobalState struct {\n    43→\tmu                sync.RWMutex\n    44→\tlastRetentionTime int64\n    45→\tselectorsExcluded bool\n    46→}\n    47→\n    48→var (\n    49→\tsingleton  sync.Once\n    50→\tmsInstance *MemoryStore\n    51→\t// shutdownFunc stores the context cancellation function created in Init\n    52→\t// and is called during Shutdown to cancel all background goroutines\n    53→\tshutdownFunc   context.CancelFunc\n    54→\tshutdownFuncMu sync.Mutex // Protects shutdownFunc from concurrent access\n    55→\t// Create a global instance\n    56→\tstate = &GlobalState{}\n    57→)\n    58→\n    59→// NodeProvider provides information about nodes currently in use by running jobs.\n    60→//\n    61→// This interface allows metricstore to query job information without directly\n    62→// depending on the repository package, breaking the import cycle.\n    63→//\n    64→// Implementations should return nodes that are actively processing jobs started\n    65→// before the given timestamp. These nodes will be excluded from retention-based\n    66→// garbage collection to prevent data loss for jobs that are still running or\n    67→// recently completed.\n    68→type NodeProvider interface {\n    69→\t// GetUsedNodes returns a map of cluster names to sorted lists of unique hostnames\n    70→\t// that are currently in use by jobs that started before the given timestamp.\n    71→\t//\n    72→\t// Parameters:\n    73→\t//   - ts: Unix timestamp threshold - returns nodes with jobs started before this time\n    74→\t//\n    75→\t// Returns:\n    76→\t//   - Map of cluster names to lists of node hostnames that should be excluded from garbage collection\n    77→\t//   - Error if the query fails\n    78→\tGetUsedNodes(ts int64) (map[string][]string, error)\n    79→}\n    80→\n    81→// Metric represents a single metric data point to be written to the store.\n    82→type Metric struct {\n    83→\tName  string\n    84→\tValue schema.Float\n    85→\t// MetricConfig contains frequency and aggregation settings for this metric.\n    86→\t// If Frequency is 0, configuration will be looked up from MemoryStore.Metrics during Write().\n    87→\tMetricConfig MetricConfig\n    88→}\n    89→\n    90→// MemoryStore is the main in-memory time-series metric storage implementation.\n    91→//\n    92→// It organizes metrics in a hierarchical tree structure where each level represents\n    93→// a component of the system hierarchy (e.g., cluster → host → CPU). Each level can\n    94→// store multiple metrics as time-series buffers.\n    95→//\n    96→// The store is initialized as a singleton via InitMetrics() and accessed via GetMemoryStore().\n    97→// All public methods are safe for concurrent use.\n    98→type MemoryStore struct {\n    99→\tMetrics      map[string]MetricConfig\n   100→\troot         Level\n   101→\tnodeProvider NodeProvider\n   102→}\n   103→\n   104→// Init initializes the metric store from configuration and starts background workers.\n   105→//\n   106→// This function must be called exactly once before any other metricstore operations.\n   107→// It performs the following initialization steps:\n   108→//  1. Validates and decodes the metric store configuration\n   109→//  2. Configures worker pool size (defaults to NumCPU/2+1, max 10)\n   110→//  3. Loads metric configurations from all registered clusters\n   111→//  4. Restores checkpoints within the retention window\n   112→//  5. Starts background workers for retention, checkpointing, archiving, and monitoring\n   113→//  6. Optionally subscribes to NATS for real-time metric ingestion\n   114→//\n   115→// Parameters:\n   116→//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig)\n   117→//   - wg: WaitGroup that will be incremented for each background goroutine started\n   118→//\n   119→// The function will call cclog.Fatal on critical errors during initialization.\n   120→// Use Shutdown() to cleanly stop all background workers started by Init().\n   121→//\n   122→// Note: Signal handling must be implemented by the caller. Call Shutdown() when\n   123→// receiving termination signals to ensure checkpoint data is persisted.\n   124→func Init(rawConfig json.RawMessage, metrics map[string]MetricConfig, wg *sync.WaitGroup) {\n   125→\tstartupTime := time.Now()\n   126→\n   127→\tif rawConfig != nil {\n   128→\t\tconfig.Validate(configSchema, rawConfig)\n   129→\t\tdec := json.NewDecoder(bytes.NewReader(rawConfig))\n   130→\t\tdec.DisallowUnknownFields()\n   131→\t\tif err := dec.Decode(&Keys); err != nil {\n   132→\t\t\tcclog.Abortf(\"[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\\nError: %s\\n\", rawConfig, err.Error())\n   133→\t\t}\n   134→\t}\n   135→\n   136→\t// Set NumWorkers from config or use default\n   137→\tif Keys.NumWorkers <= 0 {\n   138→\t\tKeys.NumWorkers = min(runtime.NumCPU()/2+1, DefaultMaxWorkers)\n   139→\t}\n   140→\tcclog.Debugf(\"[METRICSTORE]> Using %d workers for checkpoint/archive operations\\n\", Keys.NumWorkers)\n   141→\n   142→\t// Pass the config.MetricStoreKeys\n   143→\tInitMetrics(metrics)\n   144→\n   145→\tms := GetMemoryStore()\n   146→\n   147→\td, err := time.ParseDuration(Keys.RetentionInMemory)\n   148→\tif err != nil {\n   149→\t\tcclog.Fatal(err)\n   150→\t}\n   151→\n   152→\trestoreFrom := startupTime.Add(-d)\n   153→\tcclog.Infof(\"[METRICSTORE]> Loading checkpoints newer than %s\\n\", restoreFrom.Format(time.RFC3339))\n   154→\tfiles, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix())\n   155→\tloadedData := ms.SizeInBytes() / 1024 / 1024 // In MB\n   156→\tif err != nil {\n   157→\t\tcclog.Fatalf(\"[METRICSTORE]> Loading checkpoints failed: %s\\n\", err.Error())\n   158→\t} else {\n   159→\t\tcclog.Infof(\"[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\\n\", files, loadedData, time.Since(startupTime).Seconds())\n   160→\t}\n   161→\n   162→\t// Try to use less memory by forcing a GC run here and then\n   163→\t// lowering the target percentage. The default of 100 means\n   164→\t// that only once the ratio of new allocations execeds the\n   165→\t// previously active heap, a GC is triggered.\n   166→\t// Forcing a GC here will set the \"previously active heap\"\n   167→\t// to a minumum.\n   168→\t// runtime.GC()\n   169→\n   170→\tctx, shutdown := context.WithCancel(context.Background())\n   171→\n   172→\tRetention(wg, ctx)\n   173→\tCheckpointing(wg, ctx)\n   174→\tCleanUp(wg, ctx)\n   175→\tWALStaging(wg, ctx)\n   176→\tMemoryUsageTracker(wg, ctx)\n   177→\n   178→\t// Note: Signal handling has been removed from this function.\n   179→\t// The caller is responsible for handling shutdown signals and calling\n   180→\t// the shutdown() function when appropriate.\n   181→\t// Store the shutdown function for later use by Shutdown()\n   182→\tshutdownFuncMu.Lock()\n   183→\tshutdownFunc = shutdown\n   184→\tshutdownFuncMu.Unlock()\n   185→\n   186→\tif Keys.Subscriptions != nil {\n   187→\t\twg.Go(func() {\n   188→\t\t\tif err := ReceiveNats(ms, Keys.NumWorkers, ctx); err != nil {\n   189→\t\t\t\tcclog.Fatal(err)\n   190→\t\t\t}\n   191→\t\t})\n   192→\t}\n   193→}\n   194→\n   195→// InitMetrics initializes the singleton MemoryStore instance with the given metric configurations.\n   196→//\n   197→// This function must be called before GetMemoryStore() and can only be called once due to\n   198→// the singleton pattern. It assigns each metric an internal offset for efficient buffer indexing.\n   199→//\n   200→// Parameters:\n   201→//   - metrics: Map of metric names to their configurations (frequency and aggregation strategy)\n   202→//\n   203→// Panics if any metric has Frequency == 0, which indicates an invalid configuration.\n   204→//\n   205→// After this call, the global msInstance is ready for use via GetMemoryStore().\n   206→func InitMetrics(metrics map[string]MetricConfig) {\n   207→\tsingleton.Do(func() {\n   208→\t\toffset := 0\n   209→\t\tfor key, cfg := range metrics {\n   210→\t\t\tif cfg.Frequency == 0 {\n   211→\t\t\t\tpanic(\"[METRICSTORE]> invalid frequency\")\n   212→\t\t\t}\n   213→\n   214→\t\t\tmetrics[key] = MetricConfig{\n   215→\t\t\t\tFrequency:   cfg.Frequency,\n   216→\t\t\t\tAggregation: cfg.Aggregation,\n   217→\t\t\t\toffset:      offset,\n   218→\t\t\t}\n   219→\t\t\toffset += 1\n   220→\t\t}\n   221→\n   222→\t\tmsInstance = &MemoryStore{\n   223→\t\t\troot: Level{\n   224→\t\t\t\tmetrics:  make([]*buffer, len(metrics)),\n   225→\t\t\t\tchildren: make(map[string]*Level),\n   226→\t\t\t},\n   227→\t\t\tMetrics: metrics,\n   228→\t\t}\n   229→\t})\n   230→}\n   231→\n   232→// GetMemoryStore returns the singleton MemoryStore instance.\n   233→//\n   234→// Returns the initialized MemoryStore singleton. Calls cclog.Fatal if InitMetrics() was not called first.\n   235→//\n   236→// This function is safe for concurrent use after initialization.\n   237→func GetMemoryStore() *MemoryStore {\n   238→\tif msInstance == nil {\n   239→\t\tcclog.Warnf(\"[METRICSTORE]> MemoryStore not initialized!\")\n   240→\t}\n   241→\n   242→\treturn msInstance\n   243→}\n   244→\n   245→func (ms *MemoryStore) GetMetricFrequency(metricName string) (int64, error) {\n   246→\tif metric, ok := ms.Metrics[metricName]; ok {\n   247→\t\treturn metric.Frequency, nil\n   248→\t}\n   249→\treturn 0, fmt.Errorf(\"[METRICSTORE]> metric %s not found\", metricName)\n   250→}\n   251→\n   252→// SetNodeProvider sets the NodeProvider implementation for the MemoryStore.\n   253→// This must be called during initialization to provide job state information\n   254→// for selective buffer retention during Free operations.\n   255→// If not set, the Free function will fall back to freeing all buffers.\n   256→func (ms *MemoryStore) SetNodeProvider(provider NodeProvider) {\n   257→\tms.nodeProvider = provider\n   258→}\n   259→\n   260→// Shutdown performs a graceful shutdown of the metric store.\n   261→//\n   262→// This function cancels all background goroutines started by Init() and writes\n   263→// a final checkpoint to disk before returning. It should be called when the\n   264→// application receives a termination signal.\n   265→//\n   266→// The function will:\n   267→//  1. Cancel the context to stop all background workers\n   268→//  2. Close the WAL messages channel if using WAL format\n   269→//  3. Write a final checkpoint to preserve in-memory data\n   270→//  4. Log any errors encountered during shutdown\n   271→//\n   272→// Note: This function blocks until the final checkpoint is written.\n   273→func Shutdown() {\n   274→\ttotalStart := time.Now()\n   275→\n   276→\tshutdownFuncMu.Lock()\n   277→\tdefer shutdownFuncMu.Unlock()\n   278→\tif shutdownFunc != nil {\n   279→\t\tshutdownFunc()\n   280→\t}\n   281→\tcclog.Infof(\"[METRICSTORE]> Background workers cancelled (%v)\", time.Since(totalStart))\n   282→\n   283→\tif Keys.Checkpoints.FileFormat == \"wal\" {\n   284→\t\t// Signal producers to stop sending before closing channels,\n   285→\t\t// preventing send-on-closed-channel panics from in-flight NATS workers.\n   286→\t\twalShuttingDown.Store(true)\n   287→\t\t// Brief grace period for in-flight DecodeLine calls to complete.\n   288→\t\ttime.Sleep(100 * time.Millisecond)\n   289→\n   290→\t\tfor _, ch := range walShardChs {\n   291→\t\t\tclose(ch)\n   292→\t\t}\n   293→\t\tdrainStart := time.Now()\n   294→\t\tWaitForWALStagingDrain()\n   295→\t\tcclog.Infof(\"[METRICSTORE]> WAL staging goroutines exited (%v)\", time.Since(drainStart))\n   296→\t}\n   297→\n   298→\tcclog.Infof(\"[METRICSTORE]> Writing checkpoint to '%s'...\", Keys.Checkpoints.RootDir)\n   299→\tcheckpointStart := time.Now()\n   300→\tvar files int\n   301→\tvar err error\n   302→\n   303→\tms := GetMemoryStore()\n   304→\n   305→\tlastCheckpointMu.Lock()\n   306→\tfrom := lastCheckpoint\n   307→\tlastCheckpointMu.Unlock()\n   308→\n   309→\tif Keys.Checkpoints.FileFormat == \"wal\" {\n   310→\t\t// WAL files are deleted per-host inside ToCheckpointWAL workers.\n   311→\t\tfiles, _, err = ms.ToCheckpointWAL(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())\n   312→\t} else {\n   313→\t\tfiles, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())\n   314→\t}\n   315→\n   316→\tif err != nil {\n   317→\t\tcclog.Errorf(\"[METRICSTORE]> Writing checkpoint failed: %s\", err.Error())\n   318→\t}\n   319→\tcclog.Infof(\"[METRICSTORE]> Done! (%d files written in %v, total shutdown: %v)\", files, time.Since(checkpointStart), time.Since(totalStart))\n   320→}\n   321→\n   322→// Retention starts a background goroutine that periodically frees old metric data.\n   323→//\n   324→// This worker runs at half the retention interval and calls Free() to remove buffers\n   325→// older than the configured retention time. It respects the NodeProvider to preserve\n   326→// data for nodes with active jobs.\n   327→//\n   328→// Parameters:\n   329→//   - wg: WaitGroup to signal completion when context is cancelled\n   330→//   - ctx: Context for cancellation signal\n   331→//\n   332→// The goroutine exits when ctx is cancelled.\n   333→func Retention(wg *sync.WaitGroup, ctx context.Context) {\n   334→\tms := GetMemoryStore()\n   335→\n   336→\twg.Go(func() {\n   337→\t\td, err := time.ParseDuration(Keys.RetentionInMemory)\n   338→\t\tif err != nil {\n   339→\t\t\tcclog.Fatal(err)\n   340→\t\t}\n   341→\t\tif d <= 0 {\n   342→\t\t\treturn\n   343→\t\t}\n   344→\n   345→\t\ttickInterval := d / 2\n   346→\t\tif tickInterval <= 0 {\n   347→\t\t\treturn\n   348→\t\t}\n   349→\t\tticker := time.NewTicker(tickInterval)\n   350→\t\tdefer ticker.Stop()\n   351→\n   352→\t\tfor {\n   353→\t\t\tselect {\n   354→\t\t\tcase <-ctx.Done():\n   355→\t\t\t\treturn\n   356→\t\t\tcase <-ticker.C:\n   357→\t\t\t\tstate.mu.Lock()\n   358→\n   359→\t\t\t\tt := time.Now().Add(-d)\n   360→\n   361→\t\t\t\tstate.lastRetentionTime = t.Unix()\n   362→\n   363→\t\t\t\tcclog.Infof(\"[METRICSTORE]> start freeing buffers (older than %s)...\\n\", t.Format(time.RFC3339))\n   364→\n   365→\t\t\t\tfreed, err := Free(ms, t)\n   366→\t\t\t\tif err != nil {\n   367→\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> freeing up buffers failed: %s\\n\", err.Error())\n   368→\t\t\t\t} else {\n   369→\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d buffers freed\\n\", freed)\n   370→\t\t\t\t}\n   371→\n   372→\t\t\t\tstate.mu.Unlock()\n   373→\n   374→\t\t\t\t// Clean up the buffer pool\n   375→\t\t\t\tbufferPool.Clean(state.lastRetentionTime)\n   376→\t\t\t}\n   377→\t\t}\n   378→\t})\n   379→}\n   380→\n   381→// MemoryUsageTracker starts a background goroutine that monitors memory usage.\n   382→//\n   383→// This worker checks actual process memory usage (via runtime.MemStats) periodically\n   384→// and force-frees buffers if memory exceeds the configured cap. It uses FreeOSMemory()\n   385→// to return memory to the OS after freeing buffers, avoiding aggressive GC that causes\n   386→// performance issues.\n   387→//\n   388→// The tracker logs both actual memory usage (heap allocated) and metric data size for\n   389→// visibility into memory overhead from Go runtime structures and allocations.\n   390→//\n   391→// Parameters:\n   392→//   - wg: WaitGroup to signal completion when context is cancelled\n   393→//   - ctx: Context for cancellation signal\n   394→//\n   395→// The goroutine exits when ctx is cancelled.\n   396→func MemoryUsageTracker(wg *sync.WaitGroup, ctx context.Context) {\n   397→\tms := GetMemoryStore()\n   398→\n   399→\twg.Go(func() {\n   400→\t\tnormalInterval := DefaultMemoryUsageTrackerInterval\n   401→\t\tfastInterval := 30 * time.Second\n   402→\n   403→\t\tif normalInterval <= 0 {\n   404→\t\t\treturn\n   405→\t\t}\n   406→\n   407→\t\tticker := time.NewTicker(normalInterval)\n   408→\t\tdefer ticker.Stop()\n   409→\t\tcurrentInterval := normalInterval\n   410→\n   411→\t\tfor {\n   412→\t\t\tselect {\n   413→\t\t\tcase <-ctx.Done():\n   414→\t\t\t\treturn\n   415→\t\t\tcase <-ticker.C:\n   416→\t\t\t\tvar mem runtime.MemStats\n   417→\t\t\t\truntime.ReadMemStats(&mem)\n   418→\t\t\t\tactualMemoryGB := float64(mem.Alloc) / 1e9\n   419→\t\t\t\tmetricDataGB := ms.SizeInGB()\n   420→\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory usage: %.2f GB actual (%.2f GB metric data)\", actualMemoryGB, metricDataGB)\n   421→\n   422→\t\t\t\tfreedExcluded := 0\n   423→\t\t\t\tfreedEmergency := 0\n   424→\t\t\t\tvar err error\n   425→\n   426→\t\t\t\tstate.mu.RLock()\n   427→\t\t\t\tlastRetention := state.lastRetentionTime\n   428→\t\t\t\tselectorsExcluded := state.selectorsExcluded\n   429→\t\t\t\tstate.mu.RUnlock()\n   430→\n   431→\t\t\t\tif lastRetention != 0 && selectorsExcluded {\n   432→\t\t\t\t\tfreedExcluded, err = ms.Free(nil, lastRetention)\n   433→\t\t\t\t\tif err != nil {\n   434→\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while force-freeing the excluded buffers: %s\", err)\n   435→\t\t\t\t\t}\n   436→\n   437→\t\t\t\t\tif freedExcluded > 0 {\n   438→\t\t\t\t\t\tdebug.FreeOSMemory()\n   439→\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d excluded buffers force-freed\", freedExcluded)\n   440→\t\t\t\t\t}\n   441→\t\t\t\t}\n   442→\n   443→\t\t\t\truntime.ReadMemStats(&mem)\n   444→\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n   445→\n   446→\t\t\t\tif actualMemoryGB > float64(Keys.MemoryCap) {\n   447→\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> memory usage %.2f GB exceeds cap %d GB, starting emergency buffer freeing\", actualMemoryGB, Keys.MemoryCap)\n   448→\n   449→\t\t\t\t\t// Use progressive time-based Free with increasing threshold\n   450→\t\t\t\t\t// instead of ForceFree loop — fewer tree traversals, more effective\n   451→\t\t\t\t\td, parseErr := time.ParseDuration(Keys.RetentionInMemory)\n   452→\t\t\t\t\tif parseErr != nil {\n   453→\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> cannot parse retention duration: %s\", parseErr)\n   454→\t\t\t\t\t} else {\n   455→\t\t\t\t\t\tthresholds := []float64{0.75, 0.5, 0.25}\n   456→\t\t\t\t\t\tfor _, fraction := range thresholds {\n   457→\t\t\t\t\t\t\tthreshold := time.Now().Add(-time.Duration(float64(d) * fraction))\n   458→\t\t\t\t\t\t\tfreed, freeErr := ms.Free(nil, threshold.Unix())\n   459→\t\t\t\t\t\t\tif freeErr != nil {\n   460→\t\t\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while freeing buffers at %.0f%% retention: %s\", fraction*100, freeErr)\n   461→\t\t\t\t\t\t\t}\n   462→\t\t\t\t\t\t\tfreedEmergency += freed\n   463→\n   464→\t\t\t\t\t\t\tbufferPool.Clear()\n   465→\t\t\t\t\t\t\truntime.GC()\n   466→\t\t\t\t\t\t\truntime.ReadMemStats(&mem)\n   467→\t\t\t\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n   468→\n   469→\t\t\t\t\t\t\tif actualMemoryGB < float64(Keys.MemoryCap) {\n   470→\t\t\t\t\t\t\t\tbreak\n   471→\t\t\t\t\t\t\t}\n   472→\t\t\t\t\t\t}\n   473→\t\t\t\t\t}\n   474→\n   475→\t\t\t\t\tbufferPool.Clear()\n   476→\t\t\t\t\tdebug.FreeOSMemory()\n   477→\n   478→\t\t\t\t\truntime.ReadMemStats(&mem)\n   479→\t\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n   480→\n   481→\t\t\t\t\tif actualMemoryGB >= float64(Keys.MemoryCap) {\n   482→\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> after emergency frees (%d buffers), memory usage %.2f GB still at/above cap %d GB\", freedEmergency, actualMemoryGB, Keys.MemoryCap)\n   483→\t\t\t\t\t} else {\n   484→\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> emergency freeing complete: %d buffers freed, memory now %.2f GB\", freedEmergency, actualMemoryGB)\n   485→\t\t\t\t\t}\n   486→\t\t\t\t}\n   487→\n   488→\t\t\t\t// Adaptive ticker: check more frequently when memory is high\n   489→\t\t\t\tmemoryRatio := actualMemoryGB / float64(Keys.MemoryCap)\n   490→\t\t\t\tif memoryRatio > 0.8 && currentInterval != fastInterval {\n   491→\t\t\t\t\tticker.Reset(fastInterval)\n   492→\t\t\t\t\tcurrentInterval = fastInterval\n   493→\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory at %.0f%% of cap, switching to fast check interval (30s)\", memoryRatio*100)\n   494→\t\t\t\t} else if memoryRatio <= 0.8 && currentInterval != normalInterval {\n   495→\t\t\t\t\tticker.Reset(normalInterval)\n   496→\t\t\t\t\tcurrentInterval = normalInterval\n   497→\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory at %.0f%% of cap, switching to normal check interval\", memoryRatio*100)\n   498→\t\t\t\t}\n   499→\t\t\t}\n   500→\t\t}\n   501→\t})\n   502→}\n   503→\n   504→// Free removes metric data older than the given time while preserving data for active nodes.\n   505→//\n   506→// This function implements intelligent retention by consulting the NodeProvider (if configured)\n   507→// to determine which nodes are currently in use by running jobs. Data for these nodes is\n   508→// preserved even if older than the retention time.\n   509→//\n   510→// Parameters:\n   511→//   - ms: The MemoryStore instance\n   512→//   - t: Time threshold - buffers with data older than this will be freed\n   513→//\n   514→// Returns:\n   515→//   - Number of buffers freed\n   516→//   - Error if NodeProvider query fails\n   517→//\n   518→// Behavior:\n   519→//   - If no NodeProvider is set: frees all buffers older than t\n   520→//   - If NodeProvider returns empty map: frees all buffers older than t\n   521→//   - Otherwise: preserves buffers for nodes returned by GetUsedNodes(), frees others\n   522→func Free(ms *MemoryStore, t time.Time) (int, error) {\n   523→\t// If no NodeProvider is configured, free all buffers older than t\n   524→\tif ms.nodeProvider == nil {\n   525→\t\treturn ms.Free(nil, t.Unix())\n   526→\t}\n   527→\n   528→\texcludeSelectors, err := ms.nodeProvider.GetUsedNodes(t.Unix())\n   529→\tif err != nil {\n   530→\t\treturn 0, err\n   531→\t}\n   532→\n   533→\tswitch lenMap := len(excludeSelectors); lenMap {\n   534→\n   535→\t// If the length of the map returned by GetUsedNodes() is 0,\n   536→\t// then use default Free method with nil selector\n   537→\tcase 0:\n   538→\t\tstate.selectorsExcluded = false\n   539→\t\treturn ms.Free(nil, t.Unix())\n   540→\n   541→\t// Else formulate selectors, exclude those from the map\n   542→\t// and free the rest of the selectors\n   543→\tdefault:\n   544→\t\tstate.selectorsExcluded = true\n   545→\t\tselectors := GetSelectors(ms, excludeSelectors)\n   546→\t\treturn FreeSelected(ms, selectors, t)\n   547→\t}\n   548→}\n   549→\n   550→// FreeSelected frees buffers for specific selectors while preserving others.\n   551→//\n   552→// This function is used when we want to retain some specific nodes beyond the retention time.\n   553→// It iterates through the provided selectors and frees their associated buffers.\n   554→//\n   555→// Parameters:\n   556→//   - ms: The MemoryStore instance\n   557→//   - selectors: List of selector paths to free (e.g., [[\"cluster1\", \"node1\"], [\"cluster2\", \"node2\"]])\n   558→//   - t: Time threshold for freeing buffers\n   559→//\n   560→// Returns the total number of buffers freed and any error encountered.\n   561→func FreeSelected(ms *MemoryStore, selectors [][]string, t time.Time) (int, error) {\n   562→\tfreed := 0\n   563→\n   564→\tfor _, selector := range selectors {\n   565→\n   566→\t\tfreedBuffers, err := ms.Free(selector, t.Unix())\n   567→\t\tif err != nil {\n   568→\t\t\tcclog.Errorf(\"error while freeing selected buffers: %#v\", err)\n   569→\t\t}\n   570→\t\tfreed += freedBuffers\n   571→\n   572→\t}\n   573→\n   574→\treturn freed, nil\n   575→}\n   576→\n   577→// GetSelectors returns all selectors at depth 2 (cluster/node level) that are NOT in the exclusion map.\n   578→//\n   579→// This function generates a list of selectors whose buffers should be freed by excluding\n   580→// selectors that correspond to nodes currently in use by running jobs.\n   581→//\n   582→// Parameters:\n   583→//   - ms: The MemoryStore instance\n   584→//   - excludeSelectors: Map of cluster names to node hostnames that should NOT be freed\n   585→//\n   586→// Returns a list of selectors ([]string paths) that can be safely freed.\n   587→//\n   588→// Example:\n   589→//\n   590→//\tIf the tree has paths [\"emmy\", \"node001\"] and [\"emmy\", \"node002\"],\n   591→//\tand excludeSelectors contains {\"emmy\": [\"node001\"]},\n   592→//\tthen only [[\"emmy\", \"node002\"]] is returned.\n   593→func GetSelectors(ms *MemoryStore, excludeSelectors map[string][]string) [][]string {\n   594→\tallSelectors := ms.GetPaths(2)\n   595→\n   596→\tfilteredSelectors := make([][]string, 0, len(allSelectors))\n   597→\n   598→\tfor _, path := range allSelectors {\n   599→\t\tif len(path) < 2 {\n   600→\t\t\tcontinue\n   601→\t\t}\n   602→\n   603→\t\tkey := path[0]   // The \"Key\" (Level 1)\n   604→\t\tvalue := path[1] // The \"Value\" (Level 2)\n   605→\n   606→\t\texclude := false\n   607→\n   608→\t\t// Check if the key exists in our exclusion map\n   609→\t\tif excludedValues, exists := excludeSelectors[key]; exists {\n   610→\t\t\t// The key exists, now check if the specific value is in the exclusion list\n   611→\t\t\tif slices.Contains(excludedValues, value) {\n   612→\t\t\t\texclude = true\n   613→\t\t\t}\n   614→\t\t}\n   615→\n   616→\t\tif !exclude {\n   617→\t\t\tfilteredSelectors = append(filteredSelectors, path)\n   618→\t\t}\n   619→\t}\n   620→\n   621→\treturn filteredSelectors\n   622→}\n   623→\n   624→// GetPaths returns a list of lists (paths) to the specified depth.\n   625→func (ms *MemoryStore) GetPaths(targetDepth int) [][]string {\n   626→\tvar results [][]string\n   627→\n   628→\t// Start recursion. Initial path is empty.\n   629→\t// We treat Root as depth 0.\n   630→\tms.root.collectPaths(0, targetDepth, []string{}, &results)\n   631→\n   632→\treturn results\n   633→}\n   634→\n   635→// Write all values in `metrics` to the level specified by `selector` for time `ts`.\n   636→// Look at `findLevelOrCreate` for how selectors work.\n   637→func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {\n   638→\tvar ok bool\n   639→\tfor i, metric := range metrics {\n   640→\t\tif metric.MetricConfig.Frequency == 0 {\n   641→\t\t\tmetric.MetricConfig, ok = m.Metrics[metric.Name]\n   642→\t\t\tif !ok {\n   643→\t\t\t\tcclog.Debugf(\"[METRICSTORE]> Unknown metric '%s' in Write() - skipping\", metric.Name)\n   644→\t\t\t\tmetric.MetricConfig.Frequency = 0\n   645→\t\t\t}\n   646→\t\t\tmetrics[i] = metric\n   647→\t\t}\n   648→\t}\n   649→\n   650→\treturn m.WriteToLevel(&m.root, selector, ts, metrics)\n   651→}\n   652→\n   653→func (m *MemoryStore) GetLevel(selector []string) *Level {\n   654→\treturn m.root.findLevelOrCreate(selector, len(m.Metrics))\n   655→}\n   656→\n   657→// WriteToLevel assumes that `minfo` in `metrics` is filled in\n   658→func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error {\n   659→\tl = l.findLevelOrCreate(selector, len(m.Metrics))\n   660→\tl.lock.Lock()\n   661→\tdefer l.lock.Unlock()\n   662→\n   663→\tfor _, metric := range metrics {\n   664→\t\tif metric.MetricConfig.Frequency == 0 {\n   665→\t\t\tcontinue\n   666→\t\t}\n   667→\n   668→\t\tb := l.metrics[metric.MetricConfig.offset]\n   669→\t\tif b == nil {\n   670→\t\t\t// First write to this metric and level\n   671→\t\t\tb = newBuffer(ts, metric.MetricConfig.Frequency)\n   672→\t\t\tl.metrics[metric.MetricConfig.offset] = b\n   673→\t\t}\n   674→\n   675→\t\tnb, err := b.write(ts, metric.Value)\n   676→\t\tif err != nil {\n   677→\t\t\treturn err\n   678→\t\t}\n   679→\n   680→\t\t// Last write created a new buffer...\n   681→\t\tif b != nb {\n   682→\t\t\tl.metrics[metric.MetricConfig.offset] = nb\n   683→\t\t}\n   684→\t}\n   685→\treturn nil\n   686→}\n   687→\n   688→// Read returns all values for metric `metric` from `from` to `to` for the selected level(s).\n   689→// If the level does not hold the metric itself, the data will be aggregated recursively from the children.\n   690→// The second and third return value are the actual from/to for the data. Those can be different from\n   691→// the range asked for if no data was available.\n   692→func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {\n   693→\tif from > to {\n   694→\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> invalid time range\")\n   695→\t}\n   696→\n   697→\tminfo, ok := m.Metrics[metric]\n   698→\tif !ok {\n   699→\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> unknown metric: \" + metric)\n   700→\t}\n   701→\n   702→\tn, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)\n   703→\n   704→\terr := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {\n   705→\t\tcdata, cfrom, cto, err := b.read(from, to, data)\n   706→\t\tif err != nil {\n   707→\t\t\treturn err\n   708→\t\t}\n   709→\n   710→\t\tif n == 0 {\n   711→\t\t\tfrom, to = cfrom, cto\n   712→\t\t} else if from != cfrom || to != cto || len(data) != len(cdata) {\n   713→\t\t\tmissingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)\n   714→\t\t\tif missingfront != 0 {\n   715→\t\t\t\treturn ErrDataDoesNotAlignMissingFront\n   716→\t\t\t}\n   717→\n   718→\t\t\tnewlen := len(cdata) - missingback\n   719→\t\t\tif newlen < 1 {\n   720→\t\t\t\treturn ErrDataDoesNotAlignMissingBack\n   721→\t\t\t}\n   722→\t\t\tcdata = cdata[0:newlen]\n   723→\t\t\tif len(cdata) != len(data) {\n   724→\t\t\t\treturn ErrDataDoesNotAlignDataLenMismatch\n   725→\t\t\t}\n   726→\n   727→\t\t\tfrom, to = cfrom, cto\n   728→\t\t}\n   729→\n   730→\t\tdata = cdata\n   731→\t\tn += 1\n   732→\t\treturn nil\n   733→\t})\n   734→\n   735→\tif err != nil {\n   736→\t\treturn nil, 0, 0, 0, err\n   737→\t} else if n == 0 {\n   738→\t\treturn nil, 0, 0, 0, ErrNoHostOrMetric\n   739→\t} else if n > 1 {\n   740→\t\tif minfo.Aggregation == AvgAggregation {\n   741→\t\t\tnormalize := 1. / schema.Float(n)\n   742→\t\t\tfor i := 0; i < len(data); i++ {\n   743→\t\t\t\tdata[i] *= normalize\n   744→\t\t\t}\n   745→\t\t} else if minfo.Aggregation != SumAggregation {\n   746→\t\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> invalid aggregation\")\n   747→\t\t}\n   748→\t}\n   749→\n   750→\tdata, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)\n   751→\tif err != nil {\n   752→\t\treturn nil, 0, 0, 0, err\n   753→\t}\n   754→\n   755→\treturn data, from, to, resolution, nil\n   756→}\n   757→\n   758→// Free releases all buffers for the selected level and all its children that\n   759→// contain only values older than `t`.\n   760→func (m *MemoryStore) Free(selector []string, t int64) (int, error) {\n   761→\treturn m.GetLevel(selector).free(t)\n   762→}\n   763→\n   764→// ForceFree unconditionally removes the oldest buffer from each metric chain.\n   765→func (m *MemoryStore) ForceFree() (int, error) {\n   766→\treturn m.GetLevel(nil).forceFree()\n   767→}\n   768→\n   769→func (m *MemoryStore) FreeAll() error {\n   770→\tm.root.lock.Lock()\n   771→\tdefer m.root.lock.Unlock()\n   772→\n   773→\tfor k := range m.root.children {\n   774→\t\tdelete(m.root.children, k)\n   775→\t}\n   776→\n   777→\treturn nil\n   778→}\n   779→\n   780→func (m *MemoryStore) SizeInBytes() int64 {\n   781→\treturn m.root.sizeInBytes()\n   782→}\n   783→\n   784→func (m *MemoryStore) SizeInGB() float64 {\n   785→\treturn float64(m.root.sizeInBytes()) / 1e9\n   786→}\n   787→\n   788→// ListChildren , given a selector, returns a list of all children of the level\n   789→// selected.\n   790→func (m *MemoryStore) ListChildren(selector []string) []string {\n   791→\tlvl := &m.root\n   792→\tfor lvl != nil && len(selector) != 0 {\n   793→\t\tlvl.lock.RLock()\n   794→\t\tnext := lvl.children[selector[0]]\n   795→\t\tlvl.lock.RUnlock()\n   796→\t\tlvl = next\n   797→\t\tselector = selector[1:]\n   798→\t}\n   799→\n   800→\tif lvl == nil {\n   801→\t\treturn nil\n   802→\t}\n   803→\n   804→\tlvl.lock.RLock()\n   805→\tdefer lvl.lock.RUnlock()\n   806→\n   807→\tchildren := make([]string, 0, len(lvl.children))\n   808→\tfor child := range lvl.children {\n   809→\t\tchildren = append(children, child)\n   810→\t}\n   811→\n   812→\treturn children\n   813→}\n   814→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"f6351653-c703-42cc-9ec1-77eb3d2afff9","timestamp":"2026-03-29T19:34:26.879Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// Package metricstore provides an efficient in-memory time-series metric storage system\n// with support for hierarchical data organization, checkpointing, and archiving.\n//\n// The package organizes metrics in a tree structure (cluster → host → component) and\n// provides concurrent read/write access to metric data with configurable aggregation strategies.\n// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,\n// and enforcing retention policies.\n//\n// Key features:\n//   - In-memory metric storage with configurable retention\n//   - Hierarchical data organization (selectors)\n//   - Concurrent checkpoint/archive workers\n//   - Support for sum and average aggregation\n//   - NATS integration for metric ingestion\npackage metricstore\n\nimport (\n\t\"bytes\"\n\t\"context\"\n\t\"encoding/json\"\n\t\"errors\"\n\t\"fmt\"\n\t\"runtime\"\n\t\"runtime/debug\"\n\t\"slices\"\n\t\"sync\"\n\t\"time\"\n\n\t\"github.com/ClusterCockpit/cc-backend/internal/config\"\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/resampler\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n)\n\n// GlobalState holds the global state for the metric store with thread-safe access.\ntype GlobalState struct {\n\tmu                sync.RWMutex\n\tlastRetentionTime int64\n\tselectorsExcluded bool\n}\n\nvar (\n\tsingleton  sync.Once\n\tmsInstance *MemoryStore\n\t// shutdownFunc stores the context cancellation function created in Init\n\t// and is called during Shutdown to cancel all background goroutines\n\tshutdownFunc   context.CancelFunc\n\tshutdownFuncMu sync.Mutex // Protects shutdownFunc from concurrent access\n\t// Create a global instance\n\tstate = &GlobalState{}\n)\n\n// NodeProvider provides information about nodes currently in use by running jobs.\n//\n// This interface allows metricstore to query job information without directly\n// depending on the repository package, breaking the import cycle.\n//\n// Implementations should return nodes that are actively processing jobs started\n// before the given timestamp. These nodes will be excluded from retention-based\n// garbage collection to prevent data loss for jobs that are still running or\n// recently completed.\ntype NodeProvider interface {\n\t// GetUsedNodes returns a map of cluster names to sorted lists of unique hostnames\n\t// that are currently in use by jobs that started before the given timestamp.\n\t//\n\t// Parameters:\n\t//   - ts: Unix timestamp threshold - returns nodes with jobs started before this time\n\t//\n\t// Returns:\n\t//   - Map of cluster names to lists of node hostnames that should be excluded from garbage collection\n\t//   - Error if the query fails\n\tGetUsedNodes(ts int64) (map[string][]string, error)\n}\n\n// Metric represents a single metric data point to be written to the store.\ntype Metric struct {\n\tName  string\n\tValue schema.Float\n\t// MetricConfig contains frequency and aggregation settings for this metric.\n\t// If Frequency is 0, configuration will be looked up from MemoryStore.Metrics during Write().\n\tMetricConfig MetricConfig\n}\n\n// MemoryStore is the main in-memory time-series metric storage implementation.\n//\n// It organizes metrics in a hierarchical tree structure where each level represents\n// a component of the system hierarchy (e.g., cluster → host → CPU). Each level can\n// store multiple metrics as time-series buffers.\n//\n// The store is initialized as a singleton via InitMetrics() and accessed via GetMemoryStore().\n// All public methods are safe for concurrent use.\ntype MemoryStore struct {\n\tMetrics      map[string]MetricConfig\n\troot         Level\n\tnodeProvider NodeProvider\n}\n\n// Init initializes the metric store from configuration and starts background workers.\n//\n// This function must be called exactly once before any other metricstore operations.\n// It performs the following initialization steps:\n//  1. Validates and decodes the metric store configuration\n//  2. Configures worker pool size (defaults to NumCPU/2+1, max 10)\n//  3. Loads metric configurations from all registered clusters\n//  4. Restores checkpoints within the retention window\n//  5. Starts background workers for retention, checkpointing, archiving, and monitoring\n//  6. Optionally subscribes to NATS for real-time metric ingestion\n//\n// Parameters:\n//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig)\n//   - wg: WaitGroup that will be incremented for each background goroutine started\n//\n// The function will call cclog.Fatal on critical errors during initialization.\n// Use Shutdown() to cleanly stop all background workers started by Init().\n//\n// Note: Signal handling must be implemented by the caller. Call Shutdown() when\n// receiving termination signals to ensure checkpoint data is persisted.\nfunc Init(rawConfig json.RawMessage, metrics map[string]MetricConfig, wg *sync.WaitGroup) {\n\tstartupTime := time.Now()\n\n\tif rawConfig != nil {\n\t\tconfig.Validate(configSchema, rawConfig)\n\t\tdec := json.NewDecoder(bytes.NewReader(rawConfig))\n\t\tdec.DisallowUnknownFields()\n\t\tif err := dec.Decode(&Keys); err != nil {\n\t\t\tcclog.Abortf(\"[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\\nError: %s\\n\", rawConfig, err.Error())\n\t\t}\n\t}\n\n\t// Set NumWorkers from config or use default\n\tif Keys.NumWorkers <= 0 {\n\t\tKeys.NumWorkers = min(runtime.NumCPU()/2+1, DefaultMaxWorkers)\n\t}\n\tcclog.Debugf(\"[METRICSTORE]> Using %d workers for checkpoint/archive operations\\n\", Keys.NumWorkers)\n\n\t// Pass the config.MetricStoreKeys\n\tInitMetrics(metrics)\n\n\tms := GetMemoryStore()\n\n\td, err := time.ParseDuration(Keys.RetentionInMemory)\n\tif err != nil {\n\t\tcclog.Fatal(err)\n\t}\n\n\trestoreFrom := startupTime.Add(-d)\n\tcclog.Infof(\"[METRICSTORE]> Loading checkpoints newer than %s\\n\", restoreFrom.Format(time.RFC3339))\n\tfiles, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix())\n\tloadedData := ms.SizeInBytes() / 1024 / 1024 // In MB\n\tif err != nil {\n\t\tcclog.Fatalf(\"[METRICSTORE]> Loading checkpoints failed: %s\\n\", err.Error())\n\t} else {\n\t\tcclog.Infof(\"[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\\n\", files, loadedData, time.Since(startupTime).Seconds())\n\t}\n\n\t// Try to use less memory by forcing a GC run here and then\n\t// lowering the target percentage. The default of 100 means\n\t// that only once the ratio of new allocations execeds the\n\t// previously active heap, a GC is triggered.\n\t// Forcing a GC here will set the \"previously active heap\"\n\t// to a minumum.\n\t// runtime.GC()\n\n\tctx, shutdown := context.WithCancel(context.Background())\n\n\tRetention(wg, ctx)\n\tCheckpointing(wg, ctx)\n\tCleanUp(wg, ctx)\n\tWALStaging(wg, ctx)\n\tMemoryUsageTracker(wg, ctx)\n\n\t// Note: Signal handling has been removed from this function.\n\t// The caller is responsible for handling shutdown signals and calling\n\t// the shutdown() function when appropriate.\n\t// Store the shutdown function for later use by Shutdown()\n\tshutdownFuncMu.Lock()\n\tshutdownFunc = shutdown\n\tshutdownFuncMu.Unlock()\n\n\tif Keys.Subscriptions != nil {\n\t\twg.Go(func() {\n\t\t\tif err := ReceiveNats(ms, Keys.NumWorkers, ctx); err != nil {\n\t\t\t\tcclog.Fatal(err)\n\t\t\t}\n\t\t})\n\t}\n}\n\n// InitMetrics initializes the singleton MemoryStore instance with the given metric configurations.\n//\n// This function must be called before GetMemoryStore() and can only be called once due to\n// the singleton pattern. It assigns each metric an internal offset for efficient buffer indexing.\n//\n// Parameters:\n//   - metrics: Map of metric names to their configurations (frequency and aggregation strategy)\n//\n// Panics if any metric has Frequency == 0, which indicates an invalid configuration.\n//\n// After this call, the global msInstance is ready for use via GetMemoryStore().\nfunc InitMetrics(metrics map[string]MetricConfig) {\n\tsingleton.Do(func() {\n\t\toffset := 0\n\t\tfor key, cfg := range metrics {\n\t\t\tif cfg.Frequency == 0 {\n\t\t\t\tpanic(\"[METRICSTORE]> invalid frequency\")\n\t\t\t}\n\n\t\t\tmetrics[key] = MetricConfig{\n\t\t\t\tFrequency:   cfg.Frequency,\n\t\t\t\tAggregation: cfg.Aggregation,\n\t\t\t\toffset:      offset,\n\t\t\t}\n\t\t\toffset += 1\n\t\t}\n\n\t\tmsInstance = &MemoryStore{\n\t\t\troot: Level{\n\t\t\t\tmetrics:  make([]*buffer, len(metrics)),\n\t\t\t\tchildren: make(map[string]*Level),\n\t\t\t},\n\t\t\tMetrics: metrics,\n\t\t}\n\t})\n}\n\n// GetMemoryStore returns the singleton MemoryStore instance.\n//\n// Returns the initialized MemoryStore singleton. Calls cclog.Fatal if InitMetrics() was not called first.\n//\n// This function is safe for concurrent use after initialization.\nfunc GetMemoryStore() *MemoryStore {\n\tif msInstance == nil {\n\t\tcclog.Warnf(\"[METRICSTORE]> MemoryStore not initialized!\")\n\t}\n\n\treturn msInstance\n}\n\nfunc (ms *MemoryStore) GetMetricFrequency(metricName string) (int64, error) {\n\tif metric, ok := ms.Metrics[metricName]; ok {\n\t\treturn metric.Frequency, nil\n\t}\n\treturn 0, fmt.Errorf(\"[METRICSTORE]> metric %s not found\", metricName)\n}\n\n// SetNodeProvider sets the NodeProvider implementation for the MemoryStore.\n// This must be called during initialization to provide job state information\n// for selective buffer retention during Free operations.\n// If not set, the Free function will fall back to freeing all buffers.\nfunc (ms *MemoryStore) SetNodeProvider(provider NodeProvider) {\n\tms.nodeProvider = provider\n}\n\n// Shutdown performs a graceful shutdown of the metric store.\n//\n// This function cancels all background goroutines started by Init() and writes\n// a final checkpoint to disk before returning. It should be called when the\n// application receives a termination signal.\n//\n// The function will:\n//  1. Cancel the context to stop all background workers\n//  2. Close the WAL messages channel if using WAL format\n//  3. Write a final checkpoint to preserve in-memory data\n//  4. Log any errors encountered during shutdown\n//\n// Note: This function blocks until the final checkpoint is written.\nfunc Shutdown() {\n\ttotalStart := time.Now()\n\n\tshutdownFuncMu.Lock()\n\tdefer shutdownFuncMu.Unlock()\n\tif shutdownFunc != nil {\n\t\tshutdownFunc()\n\t}\n\tcclog.Infof(\"[METRICSTORE]> Background workers cancelled (%v)\", time.Since(totalStart))\n\n\tif Keys.Checkpoints.FileFormat == \"wal\" {\n\t\t// Signal producers to stop sending before closing channels,\n\t\t// preventing send-on-closed-channel panics from in-flight NATS workers.\n\t\twalShuttingDown.Store(true)\n\t\t// Brief grace period for in-flight DecodeLine calls to complete.\n\t\ttime.Sleep(100 * time.Millisecond)\n\n\t\tfor _, ch := range walShardChs {\n\t\t\tclose(ch)\n\t\t}\n\t\tdrainStart := time.Now()\n\t\tWaitForWALStagingDrain()\n\t\tcclog.Infof(\"[METRICSTORE]> WAL staging goroutines exited (%v)\", time.Since(drainStart))\n\t}\n\n\tcclog.Infof(\"[METRICSTORE]> Writing checkpoint to '%s'...\", Keys.Checkpoints.RootDir)\n\tcheckpointStart := time.Now()\n\tvar files int\n\tvar err error\n\n\tms := GetMemoryStore()\n\n\tlastCheckpointMu.Lock()\n\tfrom := lastCheckpoint\n\tlastCheckpointMu.Unlock()\n\n\tif Keys.Checkpoints.FileFormat == \"wal\" {\n\t\t// WAL files are deleted per-host inside ToCheckpointWAL workers.\n\t\tfiles, _, err = ms.ToCheckpointWAL(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())\n\t} else {\n\t\tfiles, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())\n\t}\n\n\tif err != nil {\n\t\tcclog.Errorf(\"[METRICSTORE]> Writing checkpoint failed: %s\", err.Error())\n\t}\n\tcclog.Infof(\"[METRICSTORE]> Done! (%d files written in %v, total shutdown: %v)\", files, time.Since(checkpointStart), time.Since(totalStart))\n}\n\n// Retention starts a background goroutine that periodically frees old metric data.\n//\n// This worker runs at half the retention interval and calls Free() to remove buffers\n// older than the configured retention time. It respects the NodeProvider to preserve\n// data for nodes with active jobs.\n//\n// Parameters:\n//   - wg: WaitGroup to signal completion when context is cancelled\n//   - ctx: Context for cancellation signal\n//\n// The goroutine exits when ctx is cancelled.\nfunc Retention(wg *sync.WaitGroup, ctx context.Context) {\n\tms := GetMemoryStore()\n\n\twg.Go(func() {\n\t\td, err := time.ParseDuration(Keys.RetentionInMemory)\n\t\tif err != nil {\n\t\t\tcclog.Fatal(err)\n\t\t}\n\t\tif d <= 0 {\n\t\t\treturn\n\t\t}\n\n\t\ttickInterval := d / 2\n\t\tif tickInterval <= 0 {\n\t\t\treturn\n\t\t}\n\t\tticker := time.NewTicker(tickInterval)\n\t\tdefer ticker.Stop()\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tstate.mu.Lock()\n\n\t\t\t\tt := time.Now().Add(-d)\n\n\t\t\t\tstate.lastRetentionTime = t.Unix()\n\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> start freeing buffers (older than %s)...\\n\", t.Format(time.RFC3339))\n\n\t\t\t\tfreed, err := Free(ms, t)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> freeing up buffers failed: %s\\n\", err.Error())\n\t\t\t\t} else {\n\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d buffers freed\\n\", freed)\n\t\t\t\t}\n\n\t\t\t\tstate.mu.Unlock()\n\n\t\t\t\t// Clean up the buffer pool\n\t\t\t\tbufferPool.Clean(state.lastRetentionTime)\n\t\t\t}\n\t\t}\n\t})\n}\n\n// MemoryUsageTracker starts a background goroutine that monitors memory usage.\n//\n// This worker checks actual process memory usage (via runtime.MemStats) periodically\n// and force-frees buffers if memory exceeds the configured cap. It uses FreeOSMemory()\n// to return memory to the OS after freeing buffers, avoiding aggressive GC that causes\n// performance issues.\n//\n// The tracker logs both actual memory usage (heap allocated) and metric data size for\n// visibility into memory overhead from Go runtime structures and allocations.\n//\n// Parameters:\n//   - wg: WaitGroup to signal completion when context is cancelled\n//   - ctx: Context for cancellation signal\n//\n// The goroutine exits when ctx is cancelled.\nfunc MemoryUsageTracker(wg *sync.WaitGroup, ctx context.Context) {\n\tms := GetMemoryStore()\n\n\twg.Go(func() {\n\t\tnormalInterval := DefaultMemoryUsageTrackerInterval\n\t\tfastInterval := 30 * time.Second\n\n\t\tif normalInterval <= 0 {\n\t\t\treturn\n\t\t}\n\n\t\tticker := time.NewTicker(normalInterval)\n\t\tdefer ticker.Stop()\n\t\tcurrentInterval := normalInterval\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tvar mem runtime.MemStats\n\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\tactualMemoryGB := float64(mem.Alloc) / 1e9\n\t\t\t\tmetricDataGB := ms.SizeInGB()\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory usage: %.2f GB actual (%.2f GB metric data)\", actualMemoryGB, metricDataGB)\n\n\t\t\t\tfreedExcluded := 0\n\t\t\t\tfreedEmergency := 0\n\t\t\t\tvar err error\n\n\t\t\t\tstate.mu.RLock()\n\t\t\t\tlastRetention := state.lastRetentionTime\n\t\t\t\tselectorsExcluded := state.selectorsExcluded\n\t\t\t\tstate.mu.RUnlock()\n\n\t\t\t\tif lastRetention != 0 && selectorsExcluded {\n\t\t\t\t\tfreedExcluded, err = ms.Free(nil, lastRetention)\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while force-freeing the excluded buffers: %s\", err)\n\t\t\t\t\t}\n\n\t\t\t\t\tif freedExcluded > 0 {\n\t\t\t\t\t\tdebug.FreeOSMemory()\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d excluded buffers force-freed\", freedExcluded)\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n\n\t\t\t\tif actualMemoryGB > float64(Keys.MemoryCap) {\n\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> memory usage %.2f GB exceeds cap %d GB, starting emergency buffer freeing\", actualMemoryGB, Keys.MemoryCap)\n\n\t\t\t\t\t// Use progressive time-based Free with increasing threshold\n\t\t\t\t\t// instead of ForceFree loop — fewer tree traversals, more effective\n\t\t\t\t\td, parseErr := time.ParseDuration(Keys.RetentionInMemory)\n\t\t\t\t\tif parseErr != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> cannot parse retention duration: %s\", parseErr)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tthresholds := []float64{0.75, 0.5, 0.25}\n\t\t\t\t\t\tfor _, fraction := range thresholds {\n\t\t\t\t\t\t\tthreshold := time.Now().Add(-time.Duration(float64(d) * fraction))\n\t\t\t\t\t\t\tfreed, freeErr := ms.Free(nil, threshold.Unix())\n\t\t\t\t\t\t\tif freeErr != nil {\n\t\t\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while freeing buffers at %.0f%% retention: %s\", fraction*100, freeErr)\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\tfreedEmergency += freed\n\n\t\t\t\t\t\t\tbufferPool.Clear()\n\t\t\t\t\t\t\truntime.GC()\n\t\t\t\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n\n\t\t\t\t\t\t\tif actualMemoryGB < float64(Keys.MemoryCap) {\n\t\t\t\t\t\t\t\tbreak\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tbufferPool.Clear()\n\t\t\t\t\tdebug.FreeOSMemory()\n\n\t\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n\n\t\t\t\t\tif actualMemoryGB >= float64(Keys.MemoryCap) {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> after emergency frees (%d buffers), memory usage %.2f GB still at/above cap %d GB\", freedEmergency, actualMemoryGB, Keys.MemoryCap)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> emergency freeing complete: %d buffers freed, memory now %.2f GB\", freedEmergency, actualMemoryGB)\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// Adaptive ticker: check more frequently when memory is high\n\t\t\t\tmemoryRatio := actualMemoryGB / float64(Keys.MemoryCap)\n\t\t\t\tif memoryRatio > 0.8 && currentInterval != fastInterval {\n\t\t\t\t\tticker.Reset(fastInterval)\n\t\t\t\t\tcurrentInterval = fastInterval\n\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory at %.0f%% of cap, switching to fast check interval (30s)\", memoryRatio*100)\n\t\t\t\t} else if memoryRatio <= 0.8 && currentInterval != normalInterval {\n\t\t\t\t\tticker.Reset(normalInterval)\n\t\t\t\t\tcurrentInterval = normalInterval\n\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory at %.0f%% of cap, switching to normal check interval\", memoryRatio*100)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t})\n}\n\n// Free removes metric data older than the given time while preserving data for active nodes.\n//\n// This function implements intelligent retention by consulting the NodeProvider (if configured)\n// to determine which nodes are currently in use by running jobs. Data for these nodes is\n// preserved even if older than the retention time.\n//\n// Parameters:\n//   - ms: The MemoryStore instance\n//   - t: Time threshold - buffers with data older than this will be freed\n//\n// Returns:\n//   - Number of buffers freed\n//   - Error if NodeProvider query fails\n//\n// Behavior:\n//   - If no NodeProvider is set: frees all buffers older than t\n//   - If NodeProvider returns empty map: frees all buffers older than t\n//   - Otherwise: preserves buffers for nodes returned by GetUsedNodes(), frees others\nfunc Free(ms *MemoryStore, t time.Time) (int, error) {\n\t// If no NodeProvider is configured, free all buffers older than t\n\tif ms.nodeProvider == nil {\n\t\treturn ms.Free(nil, t.Unix())\n\t}\n\n\texcludeSelectors, err := ms.nodeProvider.GetUsedNodes(t.Unix())\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\tswitch lenMap := len(excludeSelectors); lenMap {\n\n\t// If the length of the map returned by GetUsedNodes() is 0,\n\t// then use default Free method with nil selector\n\tcase 0:\n\t\tstate.selectorsExcluded = false\n\t\treturn ms.Free(nil, t.Unix())\n\n\t// Else formulate selectors, exclude those from the map\n\t// and free the rest of the selectors\n\tdefault:\n\t\tstate.selectorsExcluded = true\n\t\tselectors := GetSelectors(ms, excludeSelectors)\n\t\treturn FreeSelected(ms, selectors, t)\n\t}\n}\n\n// FreeSelected frees buffers for specific selectors while preserving others.\n//\n// This function is used when we want to retain some specific nodes beyond the retention time.\n// It iterates through the provided selectors and frees their associated buffers.\n//\n// Parameters:\n//   - ms: The MemoryStore instance\n//   - selectors: List of selector paths to free (e.g., [[\"cluster1\", \"node1\"], [\"cluster2\", \"node2\"]])\n//   - t: Time threshold for freeing buffers\n//\n// Returns the total number of buffers freed and any error encountered.\nfunc FreeSelected(ms *MemoryStore, selectors [][]string, t time.Time) (int, error) {\n\tfreed := 0\n\n\tfor _, selector := range selectors {\n\n\t\tfreedBuffers, err := ms.Free(selector, t.Unix())\n\t\tif err != nil {\n\t\t\tcclog.Errorf(\"error while freeing selected buffers: %#v\", err)\n\t\t}\n\t\tfreed += freedBuffers\n\n\t}\n\n\treturn freed, nil\n}\n\n// GetSelectors returns all selectors at depth 2 (cluster/node level) that are NOT in the exclusion map.\n//\n// This function generates a list of selectors whose buffers should be freed by excluding\n// selectors that correspond to nodes currently in use by running jobs.\n//\n// Parameters:\n//   - ms: The MemoryStore instance\n//   - excludeSelectors: Map of cluster names to node hostnames that should NOT be freed\n//\n// Returns a list of selectors ([]string paths) that can be safely freed.\n//\n// Example:\n//\n//\tIf the tree has paths [\"emmy\", \"node001\"] and [\"emmy\", \"node002\"],\n//\tand excludeSelectors contains {\"emmy\": [\"node001\"]},\n//\tthen only [[\"emmy\", \"node002\"]] is returned.\nfunc GetSelectors(ms *MemoryStore, excludeSelectors map[string][]string) [][]string {\n\tallSelectors := ms.GetPaths(2)\n\n\tfilteredSelectors := make([][]string, 0, len(allSelectors))\n\n\tfor _, path := range allSelectors {\n\t\tif len(path) < 2 {\n\t\t\tcontinue\n\t\t}\n\n\t\tkey := path[0]   // The \"Key\" (Level 1)\n\t\tvalue := path[1] // The \"Value\" (Level 2)\n\n\t\texclude := false\n\n\t\t// Check if the key exists in our exclusion map\n\t\tif excludedValues, exists := excludeSelectors[key]; exists {\n\t\t\t// The key exists, now check if the specific value is in the exclusion list\n\t\t\tif slices.Contains(excludedValues, value) {\n\t\t\t\texclude = true\n\t\t\t}\n\t\t}\n\n\t\tif !exclude {\n\t\t\tfilteredSelectors = append(filteredSelectors, path)\n\t\t}\n\t}\n\n\treturn filteredSelectors\n}\n\n// GetPaths returns a list of lists (paths) to the specified depth.\nfunc (ms *MemoryStore) GetPaths(targetDepth int) [][]string {\n\tvar results [][]string\n\n\t// Start recursion. Initial path is empty.\n\t// We treat Root as depth 0.\n\tms.root.collectPaths(0, targetDepth, []string{}, &results)\n\n\treturn results\n}\n\n// Write all values in `metrics` to the level specified by `selector` for time `ts`.\n// Look at `findLevelOrCreate` for how selectors work.\nfunc (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {\n\tvar ok bool\n\tfor i, metric := range metrics {\n\t\tif metric.MetricConfig.Frequency == 0 {\n\t\t\tmetric.MetricConfig, ok = m.Metrics[metric.Name]\n\t\t\tif !ok {\n\t\t\t\tcclog.Debugf(\"[METRICSTORE]> Unknown metric '%s' in Write() - skipping\", metric.Name)\n\t\t\t\tmetric.MetricConfig.Frequency = 0\n\t\t\t}\n\t\t\tmetrics[i] = metric\n\t\t}\n\t}\n\n\treturn m.WriteToLevel(&m.root, selector, ts, metrics)\n}\n\nfunc (m *MemoryStore) GetLevel(selector []string) *Level {\n\treturn m.root.findLevelOrCreate(selector, len(m.Metrics))\n}\n\n// WriteToLevel assumes that `minfo` in `metrics` is filled in\nfunc (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error {\n\tl = l.findLevelOrCreate(selector, len(m.Metrics))\n\tl.lock.Lock()\n\tdefer l.lock.Unlock()\n\n\tfor _, metric := range metrics {\n\t\tif metric.MetricConfig.Frequency == 0 {\n\t\t\tcontinue\n\t\t}\n\n\t\tb := l.metrics[metric.MetricConfig.offset]\n\t\tif b == nil {\n\t\t\t// First write to this metric and level\n\t\t\tb = newBuffer(ts, metric.MetricConfig.Frequency)\n\t\t\tl.metrics[metric.MetricConfig.offset] = b\n\t\t}\n\n\t\tnb, err := b.write(ts, metric.Value)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\t// Last write created a new buffer...\n\t\tif b != nb {\n\t\t\tl.metrics[metric.MetricConfig.offset] = nb\n\t\t}\n\t}\n\treturn nil\n}\n\n// Read returns all values for metric `metric` from `from` to `to` for the selected level(s).\n// If the level does not hold the metric itself, the data will be aggregated recursively from the children.\n// The second and third return value are the actual from/to for the data. Those can be different from\n// the range asked for if no data was available.\nfunc (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {\n\tif from > to {\n\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> invalid time range\")\n\t}\n\n\tminfo, ok := m.Metrics[metric]\n\tif !ok {\n\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> unknown metric: \" + metric)\n\t}\n\n\tn, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)\n\n\terr := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {\n\t\tcdata, cfrom, cto, err := b.read(from, to, data)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\tif n == 0 {\n\t\t\tfrom, to = cfrom, cto\n\t\t} else if from != cfrom || to != cto || len(data) != len(cdata) {\n\t\t\tmissingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)\n\t\t\tif missingfront != 0 {\n\t\t\t\treturn ErrDataDoesNotAlignMissingFront\n\t\t\t}\n\n\t\t\tnewlen := len(cdata) - missingback\n\t\t\tif newlen < 1 {\n\t\t\t\treturn ErrDataDoesNotAlignMissingBack\n\t\t\t}\n\t\t\tcdata = cdata[0:newlen]\n\t\t\tif len(cdata) != len(data) {\n\t\t\t\treturn ErrDataDoesNotAlignDataLenMismatch\n\t\t\t}\n\n\t\t\tfrom, to = cfrom, cto\n\t\t}\n\n\t\tdata = cdata\n\t\tn += 1\n\t\treturn nil\n\t})\n\n\tif err != nil {\n\t\treturn nil, 0, 0, 0, err\n\t} else if n == 0 {\n\t\treturn nil, 0, 0, 0, ErrNoHostOrMetric\n\t} else if n > 1 {\n\t\tif minfo.Aggregation == AvgAggregation {\n\t\t\tnormalize := 1. / schema.Float(n)\n\t\t\tfor i := 0; i < len(data); i++ {\n\t\t\t\tdata[i] *= normalize\n\t\t\t}\n\t\t} else if minfo.Aggregation != SumAggregation {\n\t\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> invalid aggregation\")\n\t\t}\n\t}\n\n\tdata, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)\n\tif err != nil {\n\t\treturn nil, 0, 0, 0, err\n\t}\n\n\treturn data, from, to, resolution, nil\n}\n\n// Free releases all buffers for the selected level and all its children that\n// contain only values older than `t`.\nfunc (m *MemoryStore) Free(selector []string, t int64) (int, error) {\n\treturn m.GetLevel(selector).free(t)\n}\n\n// ForceFree unconditionally removes the oldest buffer from each metric chain.\nfunc (m *MemoryStore) ForceFree() (int, error) {\n\treturn m.GetLevel(nil).forceFree()\n}\n\nfunc (m *MemoryStore) FreeAll() error {\n\tm.root.lock.Lock()\n\tdefer m.root.lock.Unlock()\n\n\tfor k := range m.root.children {\n\t\tdelete(m.root.children, k)\n\t}\n\n\treturn nil\n}\n\nfunc (m *MemoryStore) SizeInBytes() int64 {\n\treturn m.root.sizeInBytes()\n}\n\nfunc (m *MemoryStore) SizeInGB() float64 {\n\treturn float64(m.root.sizeInBytes()) / 1e9\n}\n\n// ListChildren , given a selector, returns a list of all children of the level\n// selected.\nfunc (m *MemoryStore) ListChildren(selector []string) []string {\n\tlvl := &m.root\n\tfor lvl != nil && len(selector) != 0 {\n\t\tlvl.lock.RLock()\n\t\tnext := lvl.children[selector[0]]\n\t\tlvl.lock.RUnlock()\n\t\tlvl = next\n\t\tselector = selector[1:]\n\t}\n\n\tif lvl == nil {\n\t\treturn nil\n\t}\n\n\tlvl.lock.RLock()\n\tdefer lvl.lock.RUnlock()\n\n\tchildren := make([]string, 0, len(lvl.children))\n\tfor child := range lvl.children {\n\t\tchildren = append(children, child)\n\t}\n\n\treturn children\n}\n","numLines":814,"startLine":1,"totalLines":814}},"sourceToolAssistantUUID":"a0e838c9-c432-4d6d-a3be-dd6d5d6490cb"}
{"parentUuid":"f6351653-c703-42cc-9ec1-77eb3d2afff9","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01VCK2d7dHwMHgU82JkSjG3b","toolUseID":"toolu_01VCK2d7dHwMHgU82JkSjG3b","timestamp":"2026-03-29T19:34:26.979Z","uuid":"aaf76296-5b2f-4b35-ad6f-261526fa9037"}
{"parentUuid":"770a0e0c-edd3-4d77-a367-6e6a6a0efed7","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01VCK2d7dHwMHgU82JkSjG3b","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// This file contains the API types and data fetching logic for querying metric data\n     7→// from the in-memory metric store. It provides structures for building complex queries\n     8→// with support for aggregation, scaling, padding, and statistics computation.\n     9→package metricstore\n    10→\n    11→import (\n    12→\t\"errors\"\n    13→\t\"fmt\"\n    14→\t\"math\"\n    15→\n    16→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    17→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    18→\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n    19→)\n    20→\n    21→var (\n    22→\t// ErrNoHostOrMetric is returned when the metric store does not find the host or the metric\n    23→\tErrNoHostOrMetric error = errors.New(\"[METRICSTORE]> metric or host not found\")\n    24→\t// ErrInvalidTimeRange is returned when a query has 'from' >= 'to'\n    25→\tErrInvalidTimeRange = errors.New(\"[METRICSTORE]> invalid time range: 'from' must be before 'to'\")\n    26→\t// ErrEmptyCluster is returned when a query with ForAllNodes has no cluster specified\n    27→\tErrEmptyCluster = errors.New(\"[METRICSTORE]> cluster name cannot be empty\")\n    28→)\n    29→\n    30→// APIMetricData represents the response data for a single metric query.\n    31→//\n    32→// It contains both the time-series data points and computed statistics (avg, min, max).\n    33→// If an error occurred during data retrieval, the Error field will be set and other\n    34→// fields may be incomplete.\n    35→type APIMetricData struct {\n    36→\tError      *string           `json:\"error,omitempty\"`\n    37→\tData       schema.FloatArray `json:\"data,omitempty\"`\n    38→\tFrom       int64             `json:\"from\"`\n    39→\tTo         int64             `json:\"to\"`\n    40→\tResolution int64             `json:\"resolution\"`\n    41→\tAvg        schema.Float      `json:\"avg\"`\n    42→\tMin        schema.Float      `json:\"min\"`\n    43→\tMax        schema.Float      `json:\"max\"`\n    44→}\n    45→\n    46→// APIQueryRequest represents a batch query request for metric data.\n    47→//\n    48→// It supports two modes of operation:\n    49→//  1. Explicit queries via the Queries field\n    50→//  2. Automatic query generation via ForAllNodes (queries all specified metrics for all nodes in the cluster)\n    51→//\n    52→// The request can be customized with flags to include/exclude statistics, raw data, and padding.\n    53→type APIQueryRequest struct {\n    54→\tCluster     string     `json:\"cluster\"`\n    55→\tQueries     []APIQuery `json:\"queries\"`\n    56→\tForAllNodes []string   `json:\"for-all-nodes\"`\n    57→\tFrom        int64      `json:\"from\"`\n    58→\tTo          int64      `json:\"to\"`\n    59→\tWithStats   bool       `json:\"with-stats\"`\n    60→\tWithData    bool       `json:\"with-data\"`\n    61→\tWithPadding bool       `json:\"with-padding\"`\n    62→}\n    63→\n    64→// APIQueryResponse represents the response to an APIQueryRequest.\n    65→//\n    66→// Results is a 2D array where each outer element corresponds to a query,\n    67→// and each inner element corresponds to a selector within that query\n    68→// (e.g., multiple CPUs or cores).\n    69→type APIQueryResponse struct {\n    70→\tQueries []APIQuery        `json:\"queries,omitempty\"`\n    71→\tResults [][]APIMetricData `json:\"results\"`\n    72→}\n    73→\n    74→// APIQuery represents a single metric query with optional hierarchical selectors.\n    75→//\n    76→// The hierarchical selection works as follows:\n    77→//   - Hostname: The node to query\n    78→//   - Type + TypeIds: First level of hierarchy (e.g., \"cpu\" + [\"0\", \"1\", \"2\"])\n    79→//   - SubType + SubTypeIds: Second level of hierarchy (e.g., \"core\" + [\"0\", \"1\"])\n    80→//\n    81→// If Aggregate is true, data from multiple type/subtype IDs will be aggregated according\n    82→// to the metric's aggregation strategy. Otherwise, separate results are returned for each combination.\n    83→type APIQuery struct {\n    84→\tType        *string      `json:\"type,omitempty\"`\n    85→\tSubType     *string      `json:\"subtype,omitempty\"`\n    86→\tMetric      string       `json:\"metric\"`\n    87→\tHostname    string       `json:\"host\"`\n    88→\tResolution  int64        `json:\"resolution\"`\n    89→\tTypeIds     []string     `json:\"type-ids,omitempty\"`\n    90→\tSubTypeIds  []string     `json:\"subtype-ids,omitempty\"`\n    91→\tScaleFactor schema.Float `json:\"scale-by,omitempty\"`\n    92→\tAggregate   bool         `json:\"aggreg\"`\n    93→}\n    94→\n    95→// AddStats computes and populates the Avg, Min, and Max fields from the Data array.\n    96→//\n    97→// NaN values in the data are ignored during computation. If all values are NaN,\n    98→// the statistics fields will be set to NaN.\n    99→//\n   100→// TODO: Optimize this, just like the stats endpoint!\n   101→func (data *APIMetricData) AddStats() {\n   102→\tn := 0\n   103→\tsum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64\n   104→\tfor _, x := range data.Data {\n   105→\t\tif x.IsNaN() {\n   106→\t\t\tcontinue\n   107→\t\t}\n   108→\n   109→\t\tn += 1\n   110→\t\tsum += float64(x)\n   111→\t\tmin = math.Min(min, float64(x))\n   112→\t\tmax = math.Max(max, float64(x))\n   113→\t}\n   114→\n   115→\tif n > 0 {\n   116→\t\tavg := sum / float64(n)\n   117→\t\tdata.Avg = schema.Float(avg)\n   118→\t\tdata.Min = schema.Float(min)\n   119→\t\tdata.Max = schema.Float(max)\n   120→\t} else {\n   121→\t\tdata.Avg, data.Min, data.Max = schema.NaN, schema.NaN, schema.NaN\n   122→\t}\n   123→}\n   124→\n   125→// ScaleBy multiplies all data points and statistics by the given factor.\n   126→//\n   127→// This is commonly used for unit conversion (e.g., bytes to gigabytes).\n   128→// Scaling by 0 or 1 is a no-op for performance reasons.\n   129→func (data *APIMetricData) ScaleBy(f schema.Float) {\n   130→\tif f == 1 {\n   131→\t\treturn\n   132→\t}\n   133→\n   134→\tdata.Avg *= f\n   135→\tdata.Min *= f\n   136→\tdata.Max *= f\n   137→\tfor i := 0; i < len(data.Data); i++ {\n   138→\t\tdata.Data[i] *= f\n   139→\t}\n   140→}\n   141→\n   142→// PadDataWithNull pads the beginning of the data array with NaN values if needed.\n   143→//\n   144→// This ensures that the data aligns with the requested 'from' timestamp, even if\n   145→// the metric store doesn't have data for the earliest time points. This is useful\n   146→// for maintaining consistent array indexing across multiple queries.\n   147→//\n   148→// Parameters:\n   149→//   - ms: MemoryStore instance to lookup metric configuration\n   150→//   - from: The requested start timestamp\n   151→//   - to: The requested end timestamp (unused but kept for API consistency)\n   152→//   - metric: The metric name to lookup frequency information\n   153→func (data *APIMetricData) PadDataWithNull(ms *MemoryStore, from, to int64, metric string) {\n   154→\tminfo, ok := ms.Metrics[metric]\n   155→\tif !ok {\n   156→\t\treturn\n   157→\t}\n   158→\n   159→\tif (data.From / minfo.Frequency) > (from / minfo.Frequency) {\n   160→\t\tpadfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency))\n   161→\t\tndata := make([]schema.Float, 0, padfront+len(data.Data))\n   162→\t\tfor range padfront {\n   163→\t\t\tndata = append(ndata, schema.NaN)\n   164→\t\t}\n   165→\t\tfor j := 0; j < len(data.Data); j++ {\n   166→\t\t\tndata = append(ndata, data.Data[j])\n   167→\t\t}\n   168→\t\tdata.Data = ndata\n   169→\t}\n   170→}\n   171→\n   172→// FetchData executes a batch metric query request and returns the results.\n   173→//\n   174→// This is the primary API for retrieving metric data from the memory store. It supports:\n   175→//   - Individual queries via req.Queries\n   176→//   - Batch queries for all nodes via req.ForAllNodes\n   177→//   - Hierarchical selector construction (cluster → host → type → subtype)\n   178→//   - Optional statistics computation (avg, min, max)\n   179→//   - Optional data scaling\n   180→//   - Optional data padding with NaN values\n   181→//\n   182→// The function constructs selectors based on the query parameters and calls MemoryStore.Read()\n   183→// for each selector. If a query specifies Aggregate=false with multiple type/subtype IDs,\n   184→// separate results are returned for each combination.\n   185→//\n   186→// Parameters:\n   187→//   - req: The query request containing queries, time range, and options\n   188→//\n   189→// Returns:\n   190→//   - APIQueryResponse containing results for each query, or error if validation fails\n   191→//\n   192→// Errors:\n   193→//   - ErrInvalidTimeRange if req.From > req.To\n   194→//   - ErrEmptyCluster if req.ForAllNodes is used without specifying a cluster\n   195→//   - Error if MemoryStore is not initialized\n   196→//   - Individual query errors are stored in APIMetricData.Error field\n   197→func FetchData(req APIQueryRequest) (*APIQueryResponse, error) {\n   198→\tif req.From > req.To {\n   199→\t\treturn nil, ErrInvalidTimeRange\n   200→\t}\n   201→\tif req.Cluster == \"\" && req.ForAllNodes != nil {\n   202→\t\treturn nil, ErrEmptyCluster\n   203→\t}\n   204→\n   205→\treq.WithData = true\n   206→\tms := GetMemoryStore()\n   207→\tif ms == nil {\n   208→\t\treturn nil, fmt.Errorf(\"[METRICSTORE]> memorystore not initialized\")\n   209→\t}\n   210→\n   211→\tresponse := APIQueryResponse{\n   212→\t\tResults: make([][]APIMetricData, 0, len(req.Queries)),\n   213→\t}\n   214→\tif req.ForAllNodes != nil {\n   215→\t\tnodes := ms.ListChildren([]string{req.Cluster})\n   216→\t\tfor _, node := range nodes {\n   217→\t\t\tfor _, metric := range req.ForAllNodes {\n   218→\t\t\t\tq := APIQuery{\n   219→\t\t\t\t\tMetric:   metric,\n   220→\t\t\t\t\tHostname: node,\n   221→\t\t\t\t}\n   222→\t\t\t\treq.Queries = append(req.Queries, q)\n   223→\t\t\t\tresponse.Queries = append(response.Queries, q)\n   224→\t\t\t}\n   225→\t\t}\n   226→\t}\n   227→\n   228→\tfor _, query := range req.Queries {\n   229→\t\tsels := make([]util.Selector, 0, 1)\n   230→\t\tif query.Aggregate || query.Type == nil {\n   231→\t\t\tsel := util.Selector{{String: req.Cluster}, {String: query.Hostname}}\n   232→\t\t\tif query.Type != nil {\n   233→\t\t\t\tif len(query.TypeIds) == 1 {\n   234→\t\t\t\t\tsel = append(sel, util.SelectorElement{String: *query.Type + query.TypeIds[0]})\n   235→\t\t\t\t} else {\n   236→\t\t\t\t\tids := make([]string, len(query.TypeIds))\n   237→\t\t\t\t\tfor i, id := range query.TypeIds {\n   238→\t\t\t\t\t\tids[i] = *query.Type + id\n   239→\t\t\t\t\t}\n   240→\t\t\t\t\tsel = append(sel, util.SelectorElement{Group: ids})\n   241→\t\t\t\t}\n   242→\n   243→\t\t\t\tif query.SubType != nil {\n   244→\t\t\t\t\tif len(query.SubTypeIds) == 1 {\n   245→\t\t\t\t\t\tsel = append(sel, util.SelectorElement{String: *query.SubType + query.SubTypeIds[0]})\n   246→\t\t\t\t\t} else {\n   247→\t\t\t\t\t\tids := make([]string, len(query.SubTypeIds))\n   248→\t\t\t\t\t\tfor i, id := range query.SubTypeIds {\n   249→\t\t\t\t\t\t\tids[i] = *query.SubType + id\n   250→\t\t\t\t\t\t}\n   251→\t\t\t\t\t\tsel = append(sel, util.SelectorElement{Group: ids})\n   252→\t\t\t\t\t}\n   253→\t\t\t\t}\n   254→\t\t\t}\n   255→\t\t\tsels = append(sels, sel)\n   256→\t\t} else {\n   257→\t\t\tfor _, typeID := range query.TypeIds {\n   258→\t\t\t\tif query.SubType != nil {\n   259→\t\t\t\t\tfor _, subTypeID := range query.SubTypeIds {\n   260→\t\t\t\t\t\tsels = append(sels, util.Selector{\n   261→\t\t\t\t\t\t\t{String: req.Cluster},\n   262→\t\t\t\t\t\t\t{String: query.Hostname},\n   263→\t\t\t\t\t\t\t{String: *query.Type + typeID},\n   264→\t\t\t\t\t\t\t{String: *query.SubType + subTypeID},\n   265→\t\t\t\t\t\t})\n   266→\t\t\t\t\t}\n   267→\t\t\t\t} else {\n   268→\t\t\t\t\tsels = append(sels, util.Selector{\n   269→\t\t\t\t\t\t{String: req.Cluster},\n   270→\t\t\t\t\t\t{String: query.Hostname},\n   271→\t\t\t\t\t\t{String: *query.Type + typeID},\n   272→\t\t\t\t\t})\n   273→\t\t\t\t}\n   274→\t\t\t}\n   275→\t\t}\n   276→\n   277→\t\tvar err error\n   278→\t\tres := make([]APIMetricData, 0, len(sels))\n   279→\t\tfor _, sel := range sels {\n   280→\t\t\tdata := APIMetricData{}\n   281→\n   282→\t\t\tdata.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)\n   283→\t\t\tif err != nil {\n   284→\t\t\t\t// Skip Error If Just Missing Host or Metric, Continue\n   285→\t\t\t\t// Empty Return For Metric Handled Gracefully By Frontend\n   286→\t\t\t\tif err != ErrNoHostOrMetric {\n   287→\t\t\t\t\tmsg := err.Error()\n   288→\t\t\t\t\tdata.Error = &msg\n   289→\t\t\t\t\tres = append(res, data)\n   290→\t\t\t\t} else {\n   291→\t\t\t\t\tcclog.Debugf(\"failed to fetch '%s' from host '%s' (cluster: %s): %s\", query.Metric, query.Hostname, req.Cluster, err.Error())\n   292→\t\t\t\t}\n   293→\t\t\t\tcontinue\n   294→\t\t\t}\n   295→\n   296→\t\t\tif req.WithStats {\n   297→\t\t\t\tdata.AddStats()\n   298→\t\t\t}\n   299→\t\t\tif query.ScaleFactor != 0 {\n   300→\t\t\t\tdata.ScaleBy(query.ScaleFactor)\n   301→\t\t\t}\n   302→\t\t\tif req.WithPadding {\n   303→\t\t\t\tdata.PadDataWithNull(ms, req.From, req.To, query.Metric)\n   304→\t\t\t}\n   305→\t\t\tif !req.WithData {\n   306→\t\t\t\tdata.Data = nil\n   307→\t\t\t}\n   308→\t\t\tres = append(res, data)\n   309→\t\t}\n   310→\t\tresponse.Results = append(response.Results, res)\n   311→\t}\n   312→\n   313→\treturn &response, nil\n   314→}\n   315→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"3817af14-e03e-43b1-b12b-5ac63f143f80","timestamp":"2026-03-29T19:34:26.975Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/api.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// This file contains the API types and data fetching logic for querying metric data\n// from the in-memory metric store. It provides structures for building complex queries\n// with support for aggregation, scaling, padding, and statistics computation.\npackage metricstore\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"math\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n)\n\nvar (\n\t// ErrNoHostOrMetric is returned when the metric store does not find the host or the metric\n\tErrNoHostOrMetric error = errors.New(\"[METRICSTORE]> metric or host not found\")\n\t// ErrInvalidTimeRange is returned when a query has 'from' >= 'to'\n\tErrInvalidTimeRange = errors.New(\"[METRICSTORE]> invalid time range: 'from' must be before 'to'\")\n\t// ErrEmptyCluster is returned when a query with ForAllNodes has no cluster specified\n\tErrEmptyCluster = errors.New(\"[METRICSTORE]> cluster name cannot be empty\")\n)\n\n// APIMetricData represents the response data for a single metric query.\n//\n// It contains both the time-series data points and computed statistics (avg, min, max).\n// If an error occurred during data retrieval, the Error field will be set and other\n// fields may be incomplete.\ntype APIMetricData struct {\n\tError      *string           `json:\"error,omitempty\"`\n\tData       schema.FloatArray `json:\"data,omitempty\"`\n\tFrom       int64             `json:\"from\"`\n\tTo         int64             `json:\"to\"`\n\tResolution int64             `json:\"resolution\"`\n\tAvg        schema.Float      `json:\"avg\"`\n\tMin        schema.Float      `json:\"min\"`\n\tMax        schema.Float      `json:\"max\"`\n}\n\n// APIQueryRequest represents a batch query request for metric data.\n//\n// It supports two modes of operation:\n//  1. Explicit queries via the Queries field\n//  2. Automatic query generation via ForAllNodes (queries all specified metrics for all nodes in the cluster)\n//\n// The request can be customized with flags to include/exclude statistics, raw data, and padding.\ntype APIQueryRequest struct {\n\tCluster     string     `json:\"cluster\"`\n\tQueries     []APIQuery `json:\"queries\"`\n\tForAllNodes []string   `json:\"for-all-nodes\"`\n\tFrom        int64      `json:\"from\"`\n\tTo          int64      `json:\"to\"`\n\tWithStats   bool       `json:\"with-stats\"`\n\tWithData    bool       `json:\"with-data\"`\n\tWithPadding bool       `json:\"with-padding\"`\n}\n\n// APIQueryResponse represents the response to an APIQueryRequest.\n//\n// Results is a 2D array where each outer element corresponds to a query,\n// and each inner element corresponds to a selector within that query\n// (e.g., multiple CPUs or cores).\ntype APIQueryResponse struct {\n\tQueries []APIQuery        `json:\"queries,omitempty\"`\n\tResults [][]APIMetricData `json:\"results\"`\n}\n\n// APIQuery represents a single metric query with optional hierarchical selectors.\n//\n// The hierarchical selection works as follows:\n//   - Hostname: The node to query\n//   - Type + TypeIds: First level of hierarchy (e.g., \"cpu\" + [\"0\", \"1\", \"2\"])\n//   - SubType + SubTypeIds: Second level of hierarchy (e.g., \"core\" + [\"0\", \"1\"])\n//\n// If Aggregate is true, data from multiple type/subtype IDs will be aggregated according\n// to the metric's aggregation strategy. Otherwise, separate results are returned for each combination.\ntype APIQuery struct {\n\tType        *string      `json:\"type,omitempty\"`\n\tSubType     *string      `json:\"subtype,omitempty\"`\n\tMetric      string       `json:\"metric\"`\n\tHostname    string       `json:\"host\"`\n\tResolution  int64        `json:\"resolution\"`\n\tTypeIds     []string     `json:\"type-ids,omitempty\"`\n\tSubTypeIds  []string     `json:\"subtype-ids,omitempty\"`\n\tScaleFactor schema.Float `json:\"scale-by,omitempty\"`\n\tAggregate   bool         `json:\"aggreg\"`\n}\n\n// AddStats computes and populates the Avg, Min, and Max fields from the Data array.\n//\n// NaN values in the data are ignored during computation. If all values are NaN,\n// the statistics fields will be set to NaN.\n//\n// TODO: Optimize this, just like the stats endpoint!\nfunc (data *APIMetricData) AddStats() {\n\tn := 0\n\tsum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64\n\tfor _, x := range data.Data {\n\t\tif x.IsNaN() {\n\t\t\tcontinue\n\t\t}\n\n\t\tn += 1\n\t\tsum += float64(x)\n\t\tmin = math.Min(min, float64(x))\n\t\tmax = math.Max(max, float64(x))\n\t}\n\n\tif n > 0 {\n\t\tavg := sum / float64(n)\n\t\tdata.Avg = schema.Float(avg)\n\t\tdata.Min = schema.Float(min)\n\t\tdata.Max = schema.Float(max)\n\t} else {\n\t\tdata.Avg, data.Min, data.Max = schema.NaN, schema.NaN, schema.NaN\n\t}\n}\n\n// ScaleBy multiplies all data points and statistics by the given factor.\n//\n// This is commonly used for unit conversion (e.g., bytes to gigabytes).\n// Scaling by 0 or 1 is a no-op for performance reasons.\nfunc (data *APIMetricData) ScaleBy(f schema.Float) {\n\tif f == 1 {\n\t\treturn\n\t}\n\n\tdata.Avg *= f\n\tdata.Min *= f\n\tdata.Max *= f\n\tfor i := 0; i < len(data.Data); i++ {\n\t\tdata.Data[i] *= f\n\t}\n}\n\n// PadDataWithNull pads the beginning of the data array with NaN values if needed.\n//\n// This ensures that the data aligns with the requested 'from' timestamp, even if\n// the metric store doesn't have data for the earliest time points. This is useful\n// for maintaining consistent array indexing across multiple queries.\n//\n// Parameters:\n//   - ms: MemoryStore instance to lookup metric configuration\n//   - from: The requested start timestamp\n//   - to: The requested end timestamp (unused but kept for API consistency)\n//   - metric: The metric name to lookup frequency information\nfunc (data *APIMetricData) PadDataWithNull(ms *MemoryStore, from, to int64, metric string) {\n\tminfo, ok := ms.Metrics[metric]\n\tif !ok {\n\t\treturn\n\t}\n\n\tif (data.From / minfo.Frequency) > (from / minfo.Frequency) {\n\t\tpadfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency))\n\t\tndata := make([]schema.Float, 0, padfront+len(data.Data))\n\t\tfor range padfront {\n\t\t\tndata = append(ndata, schema.NaN)\n\t\t}\n\t\tfor j := 0; j < len(data.Data); j++ {\n\t\t\tndata = append(ndata, data.Data[j])\n\t\t}\n\t\tdata.Data = ndata\n\t}\n}\n\n// FetchData executes a batch metric query request and returns the results.\n//\n// This is the primary API for retrieving metric data from the memory store. It supports:\n//   - Individual queries via req.Queries\n//   - Batch queries for all nodes via req.ForAllNodes\n//   - Hierarchical selector construction (cluster → host → type → subtype)\n//   - Optional statistics computation (avg, min, max)\n//   - Optional data scaling\n//   - Optional data padding with NaN values\n//\n// The function constructs selectors based on the query parameters and calls MemoryStore.Read()\n// for each selector. If a query specifies Aggregate=false with multiple type/subtype IDs,\n// separate results are returned for each combination.\n//\n// Parameters:\n//   - req: The query request containing queries, time range, and options\n//\n// Returns:\n//   - APIQueryResponse containing results for each query, or error if validation fails\n//\n// Errors:\n//   - ErrInvalidTimeRange if req.From > req.To\n//   - ErrEmptyCluster if req.ForAllNodes is used without specifying a cluster\n//   - Error if MemoryStore is not initialized\n//   - Individual query errors are stored in APIMetricData.Error field\nfunc FetchData(req APIQueryRequest) (*APIQueryResponse, error) {\n\tif req.From > req.To {\n\t\treturn nil, ErrInvalidTimeRange\n\t}\n\tif req.Cluster == \"\" && req.ForAllNodes != nil {\n\t\treturn nil, ErrEmptyCluster\n\t}\n\n\treq.WithData = true\n\tms := GetMemoryStore()\n\tif ms == nil {\n\t\treturn nil, fmt.Errorf(\"[METRICSTORE]> memorystore not initialized\")\n\t}\n\n\tresponse := APIQueryResponse{\n\t\tResults: make([][]APIMetricData, 0, len(req.Queries)),\n\t}\n\tif req.ForAllNodes != nil {\n\t\tnodes := ms.ListChildren([]string{req.Cluster})\n\t\tfor _, node := range nodes {\n\t\t\tfor _, metric := range req.ForAllNodes {\n\t\t\t\tq := APIQuery{\n\t\t\t\t\tMetric:   metric,\n\t\t\t\t\tHostname: node,\n\t\t\t\t}\n\t\t\t\treq.Queries = append(req.Queries, q)\n\t\t\t\tresponse.Queries = append(response.Queries, q)\n\t\t\t}\n\t\t}\n\t}\n\n\tfor _, query := range req.Queries {\n\t\tsels := make([]util.Selector, 0, 1)\n\t\tif query.Aggregate || query.Type == nil {\n\t\t\tsel := util.Selector{{String: req.Cluster}, {String: query.Hostname}}\n\t\t\tif query.Type != nil {\n\t\t\t\tif len(query.TypeIds) == 1 {\n\t\t\t\t\tsel = append(sel, util.SelectorElement{String: *query.Type + query.TypeIds[0]})\n\t\t\t\t} else {\n\t\t\t\t\tids := make([]string, len(query.TypeIds))\n\t\t\t\t\tfor i, id := range query.TypeIds {\n\t\t\t\t\t\tids[i] = *query.Type + id\n\t\t\t\t\t}\n\t\t\t\t\tsel = append(sel, util.SelectorElement{Group: ids})\n\t\t\t\t}\n\n\t\t\t\tif query.SubType != nil {\n\t\t\t\t\tif len(query.SubTypeIds) == 1 {\n\t\t\t\t\t\tsel = append(sel, util.SelectorElement{String: *query.SubType + query.SubTypeIds[0]})\n\t\t\t\t\t} else {\n\t\t\t\t\t\tids := make([]string, len(query.SubTypeIds))\n\t\t\t\t\t\tfor i, id := range query.SubTypeIds {\n\t\t\t\t\t\t\tids[i] = *query.SubType + id\n\t\t\t\t\t\t}\n\t\t\t\t\t\tsel = append(sel, util.SelectorElement{Group: ids})\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t\tsels = append(sels, sel)\n\t\t} else {\n\t\t\tfor _, typeID := range query.TypeIds {\n\t\t\t\tif query.SubType != nil {\n\t\t\t\t\tfor _, subTypeID := range query.SubTypeIds {\n\t\t\t\t\t\tsels = append(sels, util.Selector{\n\t\t\t\t\t\t\t{String: req.Cluster},\n\t\t\t\t\t\t\t{String: query.Hostname},\n\t\t\t\t\t\t\t{String: *query.Type + typeID},\n\t\t\t\t\t\t\t{String: *query.SubType + subTypeID},\n\t\t\t\t\t\t})\n\t\t\t\t\t}\n\t\t\t\t} else {\n\t\t\t\t\tsels = append(sels, util.Selector{\n\t\t\t\t\t\t{String: req.Cluster},\n\t\t\t\t\t\t{String: query.Hostname},\n\t\t\t\t\t\t{String: *query.Type + typeID},\n\t\t\t\t\t})\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tvar err error\n\t\tres := make([]APIMetricData, 0, len(sels))\n\t\tfor _, sel := range sels {\n\t\t\tdata := APIMetricData{}\n\n\t\t\tdata.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)\n\t\t\tif err != nil {\n\t\t\t\t// Skip Error If Just Missing Host or Metric, Continue\n\t\t\t\t// Empty Return For Metric Handled Gracefully By Frontend\n\t\t\t\tif err != ErrNoHostOrMetric {\n\t\t\t\t\tmsg := err.Error()\n\t\t\t\t\tdata.Error = &msg\n\t\t\t\t\tres = append(res, data)\n\t\t\t\t} else {\n\t\t\t\t\tcclog.Debugf(\"failed to fetch '%s' from host '%s' (cluster: %s): %s\", query.Metric, query.Hostname, req.Cluster, err.Error())\n\t\t\t\t}\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\tif req.WithStats {\n\t\t\t\tdata.AddStats()\n\t\t\t}\n\t\t\tif query.ScaleFactor != 0 {\n\t\t\t\tdata.ScaleBy(query.ScaleFactor)\n\t\t\t}\n\t\t\tif req.WithPadding {\n\t\t\t\tdata.PadDataWithNull(ms, req.From, req.To, query.Metric)\n\t\t\t}\n\t\t\tif !req.WithData {\n\t\t\t\tdata.Data = nil\n\t\t\t}\n\t\t\tres = append(res, data)\n\t\t}\n\t\tresponse.Results = append(response.Results, res)\n\t}\n\n\treturn &response, nil\n}\n","numLines":315,"startLine":1,"totalLines":315}},"sourceToolAssistantUUID":"770a0e0c-edd3-4d77-a367-6e6a6a0efed7"}
{"parentUuid":"3817af14-e03e-43b1-b12b-5ac63f143f80","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01LtgGFtH91tDuHvidxukNFL","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01RKN3a4cFK1gCKxrYsbcwiG","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/query.go"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":668,"cache_read_input_tokens":23808,"output_tokens":205,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":668,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXvz1o3wmRUN8MJsqWbJ","type":"assistant","uuid":"44c0db41-7816-4ca1-b2dd-95377289616f","timestamp":"2026-03-29T19:34:27.168Z"}
{"parentUuid":"44c0db41-7816-4ca1-b2dd-95377289616f","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01RKN3a4cFK1gCKxrYsbcwiG","toolUseID":"toolu_01RKN3a4cFK1gCKxrYsbcwiG","timestamp":"2026-03-29T19:34:27.576Z","uuid":"835e2afd-e846-479f-be2a-af9b40936b52"}
{"parentUuid":"44c0db41-7816-4ca1-b2dd-95377289616f","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01RKN3a4cFK1gCKxrYsbcwiG","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// This file implements high-level query functions for loading job metric data\n     7→// with automatic scope transformation and aggregation.\n     8→//\n     9→// Key Concepts:\n    10→//\n    11→// Metric Scopes: Metrics are collected at different granularities (native scope):\n    12→//   - HWThread: Per hardware thread\n    13→//   - Core: Per CPU core\n    14→//   - Socket: Per CPU socket\n    15→//   - MemoryDomain: Per memory domain (NUMA)\n    16→//   - Accelerator: Per GPU/accelerator\n    17→//   - Node: Per compute node\n    18→//\n    19→// Scope Transformation: The buildQueries functions transform between native scope\n    20→// and requested scope by:\n    21→//   - Aggregating finer-grained data (e.g., HWThread → Core → Socket → Node)\n    22→//   - Rejecting requests for finer granularity than available\n    23→//   - Handling special cases (e.g., Accelerator metrics)\n    24→//\n    25→// Query Building: Constructs APIQuery structures with proper selectors (Type, TypeIds)\n    26→// based on cluster topology and job resources.\n    27→package metricstore\n    28→\n    29→import (\n    30→\t\"context\"\n    31→\t\"fmt\"\n    32→\t\"strings\"\n    33→\t\"time\"\n    34→\n    35→\t\"github.com/ClusterCockpit/cc-backend/pkg/archive\"\n    36→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    37→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    38→)\n    39→\n    40→type InternalMetricStore struct{}\n    41→\n    42→var MetricStoreHandle *InternalMetricStore\n    43→\n    44→// HealthCheck delegates to the internal MemoryStore's HealthCheck.\n    45→func (ccms *InternalMetricStore) HealthCheck(cluster string,\n    46→\tnodes []string, metrics []string,\n    47→) (map[string]HealthCheckResult, error) {\n    48→\treturn GetMemoryStore().HealthCheck(cluster, nodes, metrics)\n    49→}\n    50→\n    51→// TestLoadDataCallback allows tests to override LoadData behavior for testing purposes.\n    52→// When set to a non-nil function, LoadData will call this function instead of the default implementation.\n    53→var TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)\n    54→\n    55→// LoadData loads metric data for a specific job with automatic scope transformation.\n    56→//\n    57→// This is the primary function for retrieving job metric data. It handles:\n    58→//   - Building queries with scope transformation via buildQueries\n    59→//   - Fetching data from the metric store\n    60→//   - Organizing results by metric and scope\n    61→//   - Converting NaN statistics to 0 for JSON compatibility\n    62→//   - Partial error handling (returns data for successful queries even if some fail)\n    63→//\n    64→// Parameters:\n    65→//   - job: Job metadata including cluster, resources, and time range\n    66→//   - metrics: List of metric names to load\n    67→//   - scopes: Requested metric scopes (will be transformed to match native scopes)\n    68→//   - ctx: Context for cancellation (currently unused but reserved for future use)\n    69→//   - resolution: Data resolution in seconds (0 for native resolution)\n    70→//\n    71→// Returns:\n    72→//   - JobData: Map of metric → scope → JobMetric with time-series data and statistics\n    73→//   - Error: Returns error if query building or fetching fails, or partial error listing failed hosts\n    74→//\n    75→// Example:\n    76→//\n    77→//\tjobData, err := LoadData(job, []string{\"cpu_load\", \"mem_used\"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 60)\n    78→func (ccms *InternalMetricStore) LoadData(\n    79→\tjob *schema.Job,\n    80→\tmetrics []string,\n    81→\tscopes []schema.MetricScope,\n    82→\tctx context.Context,\n    83→\tresolution int,\n    84→) (schema.JobData, error) {\n    85→\tif TestLoadDataCallback != nil {\n    86→\t\treturn TestLoadDataCallback(job, metrics, scopes, ctx, resolution)\n    87→\t}\n    88→\n    89→\tqueries, assignedScope, err := buildQueries(job, metrics, scopes, int64(resolution))\n    90→\tif err != nil {\n    91→\t\tcclog.Errorf(\"Error while building queries for jobId %d, Metrics %v, Scopes %v: %s\", job.JobID, metrics, scopes, err.Error())\n    92→\t\treturn nil, err\n    93→\t}\n    94→\n    95→\t// Verify assignment is correct - log any inconsistencies for debugging\n    96→\tif len(queries) != len(assignedScope) {\n    97→\t\tcclog.Errorf(\"Critical error: queries and assignedScope have different lengths after buildQueries: %d vs %d\",\n    98→\t\t\tlen(queries), len(assignedScope))\n    99→\t}\n   100→\n   101→\treq := APIQueryRequest{\n   102→\t\tCluster:   job.Cluster,\n   103→\t\tFrom:      job.StartTime,\n   104→\t\tTo:        job.StartTime + int64(job.Duration),\n   105→\t\tQueries:   queries,\n   106→\t\tWithStats: true,\n   107→\t\tWithData:  true,\n   108→\t}\n   109→\n   110→\tresBody, err := FetchData(req)\n   111→\tif err != nil {\n   112→\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n   113→\t\treturn nil, err\n   114→\t}\n   115→\n   116→\tvar errors []string\n   117→\tjobData := make(schema.JobData)\n   118→\n   119→\t// Add safety check for potential index out of range errors\n   120→\tif len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) {\n   121→\t\tcclog.Warnf(\"Mismatch in query results count: queries=%d, results=%d, assignedScope=%d\",\n   122→\t\t\tlen(req.Queries), len(resBody.Results), len(assignedScope))\n   123→\t\tif len(resBody.Results) > len(req.Queries) {\n   124→\t\t\tresBody.Results = resBody.Results[:len(req.Queries)]\n   125→\t\t}\n   126→\t\tif len(assignedScope) > len(req.Queries) {\n   127→\t\t\tassignedScope = assignedScope[:len(req.Queries)]\n   128→\t\t}\n   129→\t}\n   130→\n   131→\tfor i, row := range resBody.Results {\n   132→\t\tquery := req.Queries[i]\n   133→\t\tmetric := query.Metric\n   134→\t\tscope := assignedScope[i]\n   135→\t\tmc := archive.GetMetricConfig(job.Cluster, metric)\n   136→\n   137→\t\tif mc == nil {\n   138→\t\t\tcclog.Warnf(\"Metric config not found for %s on cluster %s\", metric, job.Cluster)\n   139→\t\t\tcontinue\n   140→\t\t}\n   141→\n   142→\t\tif _, ok := jobData[metric]; !ok {\n   143→\t\t\tjobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)\n   144→\t\t}\n   145→\n   146→\t\tres := mc.Timestep\n   147→\t\tif len(row) > 0 {\n   148→\t\t\tres = int(row[0].Resolution)\n   149→\t\t}\n   150→\n   151→\t\tjobMetric, ok := jobData[metric][scope]\n   152→\t\tif !ok {\n   153→\t\t\tjobMetric = &schema.JobMetric{\n   154→\t\t\t\tUnit:     mc.Unit,\n   155→\t\t\t\tTimestep: res,\n   156→\t\t\t\tSeries:   make([]schema.Series, 0),\n   157→\t\t\t}\n   158→\t\t\tjobData[metric][scope] = jobMetric\n   159→\t\t}\n   160→\n   161→\t\tfor ndx, res := range row {\n   162→\t\t\tif res.Error != nil {\n   163→\t\t\t\t/* Build list for \"partial errors\", if any */\n   164→\t\t\t\terrors = append(errors, fmt.Sprintf(\"failed to fetch '%s' from host '%s': %s\", query.Metric, query.Hostname, *res.Error))\n   165→\t\t\t\tcontinue\n   166→\t\t\t}\n   167→\n   168→\t\t\tid := ExtractTypeID(query.Type, query.TypeIds, ndx, query.Metric, query.Hostname)\n   169→\n   170→\t\t\tSanitizeStats(&res.Avg, &res.Min, &res.Max)\n   171→\n   172→\t\t\tjobMetric.Series = append(jobMetric.Series, schema.Series{\n   173→\t\t\t\tHostname: query.Hostname,\n   174→\t\t\t\tID:       id,\n   175→\t\t\t\tStatistics: schema.MetricStatistics{\n   176→\t\t\t\t\tAvg: float64(res.Avg),\n   177→\t\t\t\t\tMin: float64(res.Min),\n   178→\t\t\t\t\tMax: float64(res.Max),\n   179→\t\t\t\t},\n   180→\t\t\t\tData: res.Data,\n   181→\t\t\t})\n   182→\t\t}\n   183→\n   184→\t\t// So that one can later check len(jobData):\n   185→\t\tif len(jobMetric.Series) == 0 {\n   186→\t\t\tdelete(jobData[metric], scope)\n   187→\t\t\tif len(jobData[metric]) == 0 {\n   188→\t\t\t\tdelete(jobData, metric)\n   189→\t\t\t}\n   190→\t\t}\n   191→\t}\n   192→\n   193→\tif len(errors) != 0 {\n   194→\t\t/* Returns list for \"partial errors\" */\n   195→\t\treturn jobData, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > Errors: %s\", strings.Join(errors, \", \"))\n   196→\t}\n   197→\treturn jobData, nil\n   198→}\n   199→\n   200→// buildQueries constructs APIQuery structures with automatic scope transformation for a job.\n   201→//\n   202→// This function implements the core scope transformation logic, handling all combinations of\n   203→// native metric scopes and requested scopes. It uses the cluster topology to determine which\n   204→// hardware IDs to include in each query.\n   205→//\n   206→// Scope Transformation Rules:\n   207→//   - If native scope >= requested scope: Aggregates data (Aggregate=true in APIQuery)\n   208→//   - If native scope < requested scope: Returns error (cannot increase granularity)\n   209→//   - Special handling for Accelerator scope (independent of CPU hierarchy)\n   210→//\n   211→// The function generates one or more APIQuery per (metric, scope, host) combination:\n   212→//   - For non-aggregated queries: One query with all relevant IDs\n   213→//   - For aggregated queries: May generate multiple queries (e.g., one per socket/core)\n   214→//\n   215→// Parameters:\n   216→//   - job: Job metadata including cluster, subcluster, and resource allocation\n   217→//   - metrics: List of metrics to query\n   218→//   - scopes: Requested scopes for each metric\n   219→//   - resolution: Data resolution in seconds\n   220→//\n   221→// Returns:\n   222→//   - []APIQuery: List of queries to execute\n   223→//   - []schema.MetricScope: Assigned scope for each query (after transformation)\n   224→//   - error: Returns error if topology lookup fails or unhandled scope combination encountered\n   225→func buildQueries(\n   226→\tjob *schema.Job,\n   227→\tmetrics []string,\n   228→\tscopes []schema.MetricScope,\n   229→\tresolution int64,\n   230→) ([]APIQuery, []schema.MetricScope, error) {\n   231→\tif len(job.Resources) == 0 {\n   232→\t\treturn nil, nil, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > no resources allocated for job %d\", job.JobID)\n   233→\t}\n   234→\n   235→\tqueries := make([]APIQuery, 0, len(metrics)*len(scopes)*len(job.Resources))\n   236→\tassignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(job.Resources))\n   237→\n   238→\tsubcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)\n   239→\tif scerr != nil {\n   240→\t\treturn nil, nil, scerr\n   241→\t}\n   242→\ttopology := subcluster.Topology\n   243→\n   244→\tfor _, metric := range metrics {\n   245→\t\tmc := archive.GetMetricConfig(job.Cluster, metric)\n   246→\t\tif mc == nil {\n   247→\t\t\tcclog.Warnf(\"metric '%s' is not specified for cluster '%s'\", metric, job.Cluster)\n   248→\t\t\tcontinue\n   249→\t\t}\n   250→\n   251→\t\t// Skip if metric is removed for subcluster\n   252→\t\tif len(mc.SubClusters) != 0 && IsMetricRemovedForSubCluster(mc, job.SubCluster) {\n   253→\t\t\tcontinue\n   254→\t\t}\n   255→\n   256→\t\t// Avoid duplicates...\n   257→\t\thandledScopes := make([]schema.MetricScope, 0, 3)\n   258→\n   259→\tscopesLoop:\n   260→\t\tfor _, requestedScope := range scopes {\n   261→\t\t\tnativeScope := mc.Scope\n   262→\t\t\tif nativeScope == schema.MetricScopeAccelerator && job.NumAcc == 0 {\n   263→\t\t\t\tcontinue\n   264→\t\t\t}\n   265→\n   266→\t\t\tscope := nativeScope.Max(requestedScope)\n   267→\t\t\tfor _, s := range handledScopes {\n   268→\t\t\t\tif scope == s {\n   269→\t\t\t\t\tcontinue scopesLoop\n   270→\t\t\t\t}\n   271→\t\t\t}\n   272→\t\t\thandledScopes = append(handledScopes, scope)\n   273→\n   274→\t\t\tfor _, host := range job.Resources {\n   275→\t\t\t\thwthreads := host.HWThreads\n   276→\t\t\t\tif hwthreads == nil {\n   277→\t\t\t\t\thwthreads = topology.Node\n   278→\t\t\t\t}\n   279→\n   280→\t\t\t\tscopeResults, ok := BuildScopeQueries(\n   281→\t\t\t\t\tnativeScope, requestedScope,\n   282→\t\t\t\t\tmetric, host.Hostname,\n   283→\t\t\t\t\t&topology, hwthreads, host.Accelerators,\n   284→\t\t\t\t)\n   285→\n   286→\t\t\t\tif !ok {\n   287→\t\t\t\t\treturn nil, nil, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > unsupported scope transformation: native-scope=%s, requested-scope=%s\", nativeScope, requestedScope)\n   288→\t\t\t\t}\n   289→\n   290→\t\t\t\tfor _, sr := range scopeResults {\n   291→\t\t\t\t\tqueries = append(queries, APIQuery{\n   292→\t\t\t\t\t\tMetric:     sr.Metric,\n   293→\t\t\t\t\t\tHostname:   sr.Hostname,\n   294→\t\t\t\t\t\tAggregate:  sr.Aggregate,\n   295→\t\t\t\t\t\tType:       sr.Type,\n   296→\t\t\t\t\t\tTypeIds:    sr.TypeIds,\n   297→\t\t\t\t\t\tResolution: resolution,\n   298→\t\t\t\t\t})\n   299→\t\t\t\t\tassignedScope = append(assignedScope, sr.Scope)\n   300→\t\t\t\t}\n   301→\t\t\t}\n   302→\t\t}\n   303→\t}\n   304→\n   305→\treturn queries, assignedScope, nil\n   306→}\n   307→\n   308→// LoadStats loads only metric statistics (avg/min/max) for a job at node scope.\n   309→//\n   310→// This is an optimized version of LoadData that fetches only statistics without\n   311→// time-series data, reducing bandwidth and memory usage. Always queries at node scope.\n   312→//\n   313→// Parameters:\n   314→//   - job: Job metadata\n   315→//   - metrics: List of metric names\n   316→//   - ctx: Context (currently unused)\n   317→//\n   318→// Returns:\n   319→//   - Map of metric → hostname → statistics\n   320→//   - Error on query building or fetching failure\n   321→func (ccms *InternalMetricStore) LoadStats(\n   322→\tjob *schema.Job,\n   323→\tmetrics []string,\n   324→\tctx context.Context,\n   325→) (map[string]map[string]schema.MetricStatistics, error) {\n   326→\t// TODO(#166): Add scope parameter for analysis view accelerator normalization\n   327→\tqueries, _, err := buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0)\n   328→\tif err != nil {\n   329→\t\tcclog.Errorf(\"Error while building queries for jobId %d, Metrics %v: %s\", job.JobID, metrics, err.Error())\n   330→\t\treturn nil, err\n   331→\t}\n   332→\n   333→\treq := APIQueryRequest{\n   334→\t\tCluster:   job.Cluster,\n   335→\t\tFrom:      job.StartTime,\n   336→\t\tTo:        job.StartTime + int64(job.Duration),\n   337→\t\tQueries:   queries,\n   338→\t\tWithStats: true,\n   339→\t\tWithData:  false,\n   340→\t}\n   341→\n   342→\tresBody, err := FetchData(req)\n   343→\tif err != nil {\n   344→\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n   345→\t\treturn nil, err\n   346→\t}\n   347→\n   348→\tstats := make(map[string]map[string]schema.MetricStatistics, len(metrics))\n   349→\tfor i, res := range resBody.Results {\n   350→\t\tif i >= len(req.Queries) {\n   351→\t\t\tcclog.Warnf(\"LoadStats: result index %d exceeds queries length %d\", i, len(req.Queries))\n   352→\t\t\tbreak\n   353→\t\t}\n   354→\t\tif len(res) == 0 {\n   355→\t\t\t// No Data Found For Metric, Logged in FetchData to Warn\n   356→\t\t\tcontinue\n   357→\t\t}\n   358→\t\tquery := req.Queries[i]\n   359→\t\tmetric := query.Metric\n   360→\t\tdata := res[0]\n   361→\t\tif data.Error != nil {\n   362→\t\t\tcclog.Warnf(\"fetching %s for node %s failed: %s\", metric, query.Hostname, *data.Error)\n   363→\t\t\tcontinue\n   364→\t\t}\n   365→\n   366→\t\tmetricdata, ok := stats[metric]\n   367→\t\tif !ok {\n   368→\t\t\tmetricdata = make(map[string]schema.MetricStatistics, job.NumNodes)\n   369→\t\t\tstats[metric] = metricdata\n   370→\t\t}\n   371→\n   372→\t\tif data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() {\n   373→\t\t\tcclog.Warnf(\"fetching %s for node %s failed: one of avg/min/max is NaN\", metric, query.Hostname)\n   374→\t\t\tcontinue\n   375→\t\t}\n   376→\n   377→\t\tmetricdata[query.Hostname] = schema.MetricStatistics{\n   378→\t\t\tAvg: float64(data.Avg),\n   379→\t\t\tMin: float64(data.Min),\n   380→\t\t\tMax: float64(data.Max),\n   381→\t\t}\n   382→\t}\n   383→\n   384→\treturn stats, nil\n   385→}\n   386→\n   387→// LoadScopedStats loads metric statistics for a job with scope-aware grouping.\n   388→//\n   389→// Similar to LoadStats but supports multiple scopes and returns statistics grouped\n   390→// by scope with hardware IDs (e.g., per-core, per-socket statistics).\n   391→//\n   392→// Parameters:\n   393→//   - job: Job metadata\n   394→//   - metrics: List of metric names\n   395→//   - scopes: Requested metric scopes\n   396→//   - ctx: Context (currently unused)\n   397→//\n   398→// Returns:\n   399→//   - ScopedJobStats: Map of metric → scope → []ScopedStats (with hostname and ID)\n   400→//   - Error or partial error listing failed queries\n   401→func (ccms *InternalMetricStore) LoadScopedStats(\n   402→\tjob *schema.Job,\n   403→\tmetrics []string,\n   404→\tscopes []schema.MetricScope,\n   405→\tctx context.Context,\n   406→) (schema.ScopedJobStats, error) {\n   407→\tqueries, assignedScope, err := buildQueries(job, metrics, scopes, 0)\n   408→\tif err != nil {\n   409→\t\tcclog.Errorf(\"Error while building queries for jobId %d, Metrics %v, Scopes %v: %s\", job.JobID, metrics, scopes, err.Error())\n   410→\t\treturn nil, err\n   411→\t}\n   412→\n   413→\treq := APIQueryRequest{\n   414→\t\tCluster:   job.Cluster,\n   415→\t\tFrom:      job.StartTime,\n   416→\t\tTo:        job.StartTime + int64(job.Duration),\n   417→\t\tQueries:   queries,\n   418→\t\tWithStats: true,\n   419→\t\tWithData:  false,\n   420→\t}\n   421→\n   422→\tresBody, err := FetchData(req)\n   423→\tif err != nil {\n   424→\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n   425→\t\treturn nil, err\n   426→\t}\n   427→\n   428→\tvar errors []string\n   429→\tscopedJobStats := make(schema.ScopedJobStats)\n   430→\n   431→\tfor i, row := range resBody.Results {\n   432→\t\tif len(row) == 0 {\n   433→\t\t\t// No Data Found For Metric, Logged in FetchData to Warn\n   434→\t\t\tcontinue\n   435→\t\t}\n   436→\t\tquery := req.Queries[i]\n   437→\t\tmetric := query.Metric\n   438→\t\tscope := assignedScope[i]\n   439→\n   440→\t\tif _, ok := scopedJobStats[metric]; !ok {\n   441→\t\t\tscopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)\n   442→\t\t}\n   443→\n   444→\t\tif _, ok := scopedJobStats[metric][scope]; !ok {\n   445→\t\t\tscopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)\n   446→\t\t}\n   447→\n   448→\t\tfor ndx, res := range row {\n   449→\t\t\tif res.Error != nil {\n   450→\t\t\t\t/* Build list for \"partial errors\", if any */\n   451→\t\t\t\terrors = append(errors, fmt.Sprintf(\"failed to fetch '%s' from host '%s': %s\", query.Metric, query.Hostname, *res.Error))\n   452→\t\t\t\tcontinue\n   453→\t\t\t}\n   454→\n   455→\t\t\tid := ExtractTypeID(query.Type, query.TypeIds, ndx, query.Metric, query.Hostname)\n   456→\n   457→\t\t\tSanitizeStats(&res.Avg, &res.Min, &res.Max)\n   458→\n   459→\t\t\tscopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{\n   460→\t\t\t\tHostname: query.Hostname,\n   461→\t\t\t\tID:       id,\n   462→\t\t\t\tData: &schema.MetricStatistics{\n   463→\t\t\t\t\tAvg: float64(res.Avg),\n   464→\t\t\t\t\tMin: float64(res.Min),\n   465→\t\t\t\t\tMax: float64(res.Max),\n   466→\t\t\t\t},\n   467→\t\t\t})\n   468→\t\t}\n   469→\n   470→\t\t// So that one can later check len(scopedJobStats[metric][scope]): Remove from map if empty\n   471→\t\tif len(scopedJobStats[metric][scope]) == 0 {\n   472→\t\t\tdelete(scopedJobStats[metric], scope)\n   473→\t\t\tif len(scopedJobStats[metric]) == 0 {\n   474→\t\t\t\tdelete(scopedJobStats, metric)\n   475→\t\t\t}\n   476→\t\t}\n   477→\t}\n   478→\n   479→\tif len(errors) != 0 {\n   480→\t\t/* Returns list for \"partial errors\" */\n   481→\t\treturn scopedJobStats, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > Errors: %s\", strings.Join(errors, \", \"))\n   482→\t}\n   483→\treturn scopedJobStats, nil\n   484→}\n   485→\n   486→// LoadNodeData loads metric data for specific nodes in a cluster over a time range.\n   487→//\n   488→// Unlike LoadData which operates on job resources, this function queries arbitrary nodes\n   489→// directly. Useful for system monitoring and node status views.\n   490→//\n   491→// Parameters:\n   492→//   - cluster: Cluster name\n   493→//   - metrics: List of metric names\n   494→//   - nodes: List of node hostnames (nil = all nodes in cluster via ForAllNodes)\n   495→//   - scopes: Requested metric scopes (currently unused - always node scope)\n   496→//   - from, to: Time range\n   497→//   - ctx: Context (currently unused)\n   498→//\n   499→// Returns:\n   500→//   - Map of hostname → metric → []JobMetric\n   501→//   - Error or partial error listing failed queries\n   502→func (ccms *InternalMetricStore) LoadNodeData(\n   503→\tcluster string,\n   504→\tmetrics, nodes []string,\n   505→\tscopes []schema.MetricScope,\n   506→\tfrom, to time.Time,\n   507→\tctx context.Context,\n   508→) (map[string]map[string][]*schema.JobMetric, error) {\n   509→\treq := APIQueryRequest{\n   510→\t\tCluster:   cluster,\n   511→\t\tFrom:      from.Unix(),\n   512→\t\tTo:        to.Unix(),\n   513→\t\tWithStats: true,\n   514→\t\tWithData:  true,\n   515→\t}\n   516→\n   517→\tif nodes == nil {\n   518→\t\treq.ForAllNodes = append(req.ForAllNodes, metrics...)\n   519→\t} else {\n   520→\t\tfor _, node := range nodes {\n   521→\t\t\tfor _, metric := range metrics {\n   522→\t\t\t\treq.Queries = append(req.Queries, APIQuery{\n   523→\t\t\t\t\tHostname:   node,\n   524→\t\t\t\t\tMetric:     metric,\n   525→\t\t\t\t\tResolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution\n   526→\t\t\t\t})\n   527→\t\t\t}\n   528→\t\t}\n   529→\t}\n   530→\n   531→\tresBody, err := FetchData(req)\n   532→\tif err != nil {\n   533→\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n   534→\t\treturn nil, err\n   535→\t}\n   536→\n   537→\tvar errors []string\n   538→\tdata := make(map[string]map[string][]*schema.JobMetric)\n   539→\tfor i, res := range resBody.Results {\n   540→\t\tif len(res) == 0 {\n   541→\t\t\t// No Data Found For Metric, Logged in FetchData to Warn\n   542→\t\t\tcontinue\n   543→\t\t}\n   544→\n   545→\t\tvar query APIQuery\n   546→\t\tif resBody.Queries != nil {\n   547→\t\t\tquery = resBody.Queries[i]\n   548→\t\t} else {\n   549→\t\t\tquery = req.Queries[i]\n   550→\t\t}\n   551→\n   552→\t\tmetric := query.Metric\n   553→\t\tqdata := res[0]\n   554→\t\tif qdata.Error != nil {\n   555→\t\t\terrors = append(errors, fmt.Sprintf(\"fetching %s for node %s failed: %s\", metric, query.Hostname, *qdata.Error))\n   556→\t\t\tcontinue\n   557→\t\t}\n   558→\n   559→\t\tmc := archive.GetMetricConfig(cluster, metric)\n   560→\t\tif mc == nil {\n   561→\t\t\tcclog.Warnf(\"Metric config not found for %s on cluster %s\", metric, cluster)\n   562→\t\t\tcontinue\n   563→\t\t}\n   564→\n   565→\t\tSanitizeStats(&qdata.Avg, &qdata.Min, &qdata.Max)\n   566→\n   567→\t\thostdata, ok := data[query.Hostname]\n   568→\t\tif !ok {\n   569→\t\t\thostdata = make(map[string][]*schema.JobMetric)\n   570→\t\t\tdata[query.Hostname] = hostdata\n   571→\t\t}\n   572→\n   573→\t\thostdata[metric] = append(hostdata[metric], &schema.JobMetric{\n   574→\t\t\tUnit:     mc.Unit,\n   575→\t\t\tTimestep: mc.Timestep,\n   576→\t\t\tSeries: []schema.Series{\n   577→\t\t\t\t{\n   578→\t\t\t\t\tHostname: query.Hostname,\n   579→\t\t\t\t\tData:     qdata.Data,\n   580→\t\t\t\t\tStatistics: schema.MetricStatistics{\n   581→\t\t\t\t\t\tAvg: float64(qdata.Avg),\n   582→\t\t\t\t\t\tMin: float64(qdata.Min),\n   583→\t\t\t\t\t\tMax: float64(qdata.Max),\n   584→\t\t\t\t\t},\n   585→\t\t\t\t},\n   586→\t\t\t},\n   587→\t\t})\n   588→\t}\n   589→\n   590→\tif len(errors) != 0 {\n   591→\t\t/* Returns list of \"partial errors\" */\n   592→\t\treturn data, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > Errors: %s\", strings.Join(errors, \", \"))\n   593→\t}\n   594→\n   595→\treturn data, nil\n   596→}\n   597→\n   598→// LoadNodeListData loads metric data for a list of nodes with full scope transformation support.\n   599→//\n   600→// This is the most flexible node data loading function, supporting arbitrary scopes and\n   601→// resolution. Uses buildNodeQueries for proper scope transformation based on topology.\n   602→//\n   603→// Parameters:\n   604→//   - cluster: Cluster name\n   605→//   - subCluster: SubCluster name (empty string to infer from node names)\n   606→//   - nodes: List of node hostnames\n   607→//   - metrics: List of metric names\n   608→//   - scopes: Requested metric scopes\n   609→//   - resolution: Data resolution in seconds\n   610→//   - from, to: Time range\n   611→//   - ctx: Context (currently unused)\n   612→//\n   613→// Returns:\n   614→//   - Map of hostname → JobData (metric → scope → JobMetric)\n   615→//   - Error or partial error listing failed queries\n   616→func (ccms *InternalMetricStore) LoadNodeListData(\n   617→\tcluster, subCluster string,\n   618→\tnodes []string,\n   619→\tmetrics []string,\n   620→\tscopes []schema.MetricScope,\n   621→\tresolution int,\n   622→\tfrom, to time.Time,\n   623→\tctx context.Context,\n   624→) (map[string]schema.JobData, error) {\n   625→\t// Note: Order of node data is not guaranteed after this point\n   626→\tqueries, assignedScope, err := buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution))\n   627→\tif err != nil {\n   628→\t\tcclog.Errorf(\"Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s\", cluster, subCluster, metrics, scopes, err.Error())\n   629→\t\treturn nil, err\n   630→\t}\n   631→\n   632→\t// Verify assignment is correct - log any inconsistencies for debugging\n   633→\tif len(queries) != len(assignedScope) {\n   634→\t\tcclog.Errorf(\"Critical error: queries and assignedScope have different lengths after buildNodeQueries: %d vs %d\",\n   635→\t\t\tlen(queries), len(assignedScope))\n   636→\t}\n   637→\n   638→\treq := APIQueryRequest{\n   639→\t\tCluster:   cluster,\n   640→\t\tQueries:   queries,\n   641→\t\tFrom:      from.Unix(),\n   642→\t\tTo:        to.Unix(),\n   643→\t\tWithStats: true,\n   644→\t\tWithData:  true,\n   645→\t}\n   646→\n   647→\tresBody, err := FetchData(req)\n   648→\tif err != nil {\n   649→\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n   650→\t\treturn nil, err\n   651→\t}\n   652→\n   653→\tvar errors []string\n   654→\tdata := make(map[string]schema.JobData)\n   655→\n   656→\t// Add safety check for index out of range issues\n   657→\tif len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) {\n   658→\t\tcclog.Warnf(\"Mismatch in query results count: queries=%d, results=%d, assignedScope=%d\",\n   659→\t\t\tlen(req.Queries), len(resBody.Results), len(assignedScope))\n   660→\t\tif len(resBody.Results) > len(req.Queries) {\n   661→\t\t\tresBody.Results = resBody.Results[:len(req.Queries)]\n   662→\t\t}\n   663→\t\tif len(assignedScope) > len(req.Queries) {\n   664→\t\t\tassignedScope = assignedScope[:len(req.Queries)]\n   665→\t\t}\n   666→\t}\n   667→\n   668→\tfor i, row := range resBody.Results {\n   669→\t\tvar query APIQuery\n   670→\t\tif resBody.Queries != nil {\n   671→\t\t\tif i < len(resBody.Queries) {\n   672→\t\t\t\tquery = resBody.Queries[i]\n   673→\t\t\t} else {\n   674→\t\t\t\tcclog.Warnf(\"Index out of range prevented for resBody.Queries: i=%d, len=%d\",\n   675→\t\t\t\t\ti, len(resBody.Queries))\n   676→\t\t\t\tcontinue\n   677→\t\t\t}\n   678→\t\t} else {\n   679→\t\t\tquery = req.Queries[i]\n   680→\t\t}\n   681→\n   682→\t\tmetric := query.Metric\n   683→\t\tscope := assignedScope[i]\n   684→\t\tmc := archive.GetMetricConfig(cluster, metric)\n   685→\t\tif mc == nil {\n   686→\t\t\tcclog.Warnf(\"Metric config not found for %s on cluster %s\", metric, cluster)\n   687→\t\t\tcontinue\n   688→\t\t}\n   689→\n   690→\t\tres := mc.Timestep\n   691→\t\tif len(row) > 0 {\n   692→\t\t\tres = int(row[0].Resolution)\n   693→\t\t}\n   694→\n   695→\t\t// Init Nested Map Data Structures If Not Found\n   696→\t\thostData, ok := data[query.Hostname]\n   697→\t\tif !ok {\n   698→\t\t\thostData = make(schema.JobData)\n   699→\t\t\tdata[query.Hostname] = hostData\n   700→\t\t}\n   701→\n   702→\t\tmetricData, ok := hostData[metric]\n   703→\t\tif !ok {\n   704→\t\t\tmetricData = make(map[schema.MetricScope]*schema.JobMetric)\n   705→\t\t\tdata[query.Hostname][metric] = metricData\n   706→\t\t}\n   707→\n   708→\t\tscopeData, ok := metricData[scope]\n   709→\t\tif !ok {\n   710→\t\t\tscopeData = &schema.JobMetric{\n   711→\t\t\t\tUnit:     mc.Unit,\n   712→\t\t\t\tTimestep: res,\n   713→\t\t\t\tSeries:   make([]schema.Series, 0),\n   714→\t\t\t}\n   715→\t\t\tdata[query.Hostname][metric][scope] = scopeData\n   716→\t\t}\n   717→\n   718→\t\tfor ndx, res := range row {\n   719→\t\t\tif res.Error != nil {\n   720→\t\t\t\t/* Build list for \"partial errors\", if any */\n   721→\t\t\t\terrors = append(errors, fmt.Sprintf(\"failed to fetch '%s' from host '%s': %s\", query.Metric, query.Hostname, *res.Error))\n   722→\t\t\t\tcontinue\n   723→\t\t\t}\n   724→\n   725→\t\t\tid := ExtractTypeID(query.Type, query.TypeIds, ndx, query.Metric, query.Hostname)\n   726→\n   727→\t\t\tSanitizeStats(&res.Avg, &res.Min, &res.Max)\n   728→\n   729→\t\t\tscopeData.Series = append(scopeData.Series, schema.Series{\n   730→\t\t\t\tHostname: query.Hostname,\n   731→\t\t\t\tID:       id,\n   732→\t\t\t\tStatistics: schema.MetricStatistics{\n   733→\t\t\t\t\tAvg: float64(res.Avg),\n   734→\t\t\t\t\tMin: float64(res.Min),\n   735→\t\t\t\t\tMax: float64(res.Max),\n   736→\t\t\t\t},\n   737→\t\t\t\tData: res.Data,\n   738→\t\t\t})\n   739→\t\t}\n   740→\t}\n   741→\n   742→\tif len(errors) != 0 {\n   743→\t\t/* Returns list of \"partial errors\" */\n   744→\t\treturn data, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > Errors: %s\", strings.Join(errors, \", \"))\n   745→\t}\n   746→\n   747→\treturn data, nil\n   748→}\n   749→\n   750→// buildNodeQueries constructs APIQuery structures for node-based queries with scope transformation.\n   751→//\n   752→// Similar to buildQueries but operates on node lists rather than job resources.\n   753→// Supports dynamic subcluster lookup when subCluster parameter is empty.\n   754→//\n   755→// Parameters:\n   756→//   - cluster: Cluster name\n   757→//   - subCluster: SubCluster name (empty = infer from node hostnames)\n   758→//   - nodes: List of node hostnames\n   759→//   - metrics: List of metric names\n   760→//   - scopes: Requested metric scopes\n   761→//   - resolution: Data resolution in seconds\n   762→//\n   763→// Returns:\n   764→//   - []APIQuery: List of queries to execute\n   765→//   - []schema.MetricScope: Assigned scope for each query\n   766→//   - error: Returns error if topology lookup fails or unhandled scope combination\n   767→func buildNodeQueries(\n   768→\tcluster string,\n   769→\tsubCluster string,\n   770→\tnodes []string,\n   771→\tmetrics []string,\n   772→\tscopes []schema.MetricScope,\n   773→\tresolution int64,\n   774→) ([]APIQuery, []schema.MetricScope, error) {\n   775→\tqueries := make([]APIQuery, 0, len(metrics)*len(scopes)*len(nodes))\n   776→\tassignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(nodes))\n   777→\n   778→\t// Get Topol before loop if subCluster given\n   779→\tvar subClusterTopol *schema.SubCluster\n   780→\tvar scterr error\n   781→\tif subCluster != \"\" {\n   782→\t\tsubClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster)\n   783→\t\tif scterr != nil {\n   784→\t\t\tcclog.Errorf(\"could not load cluster %s subCluster %s topology: %s\", cluster, subCluster, scterr.Error())\n   785→\t\t\treturn nil, nil, scterr\n   786→\t\t}\n   787→\t}\n   788→\n   789→\tfor _, metric := range metrics {\n   790→\t\tmc := archive.GetMetricConfig(cluster, metric)\n   791→\t\tif mc == nil {\n   792→\t\t\tcclog.Warnf(\"metric '%s' is not specified for cluster '%s'\", metric, cluster)\n   793→\t\t\tcontinue\n   794→\t\t}\n   795→\n   796→\t\t// Skip if metric is removed for subcluster\n   797→\t\tif mc.SubClusters != nil && IsMetricRemovedForSubCluster(mc, subCluster) {\n   798→\t\t\tcontinue\n   799→\t\t}\n   800→\n   801→\t\t// Avoid duplicates...\n   802→\t\thandledScopes := make([]schema.MetricScope, 0, 3)\n   803→\n   804→\tnodeScopesLoop:\n   805→\t\tfor _, requestedScope := range scopes {\n   806→\t\t\tnativeScope := mc.Scope\n   807→\n   808→\t\t\tscope := nativeScope.Max(requestedScope)\n   809→\t\t\tfor _, s := range handledScopes {\n   810→\t\t\t\tif scope == s {\n   811→\t\t\t\t\tcontinue nodeScopesLoop\n   812→\t\t\t\t}\n   813→\t\t\t}\n   814→\t\t\thandledScopes = append(handledScopes, scope)\n   815→\n   816→\t\t\tfor _, hostname := range nodes {\n   817→\n   818→\t\t\t\t// If no subCluster given, get it by node\n   819→\t\t\t\tif subCluster == \"\" {\n   820→\t\t\t\t\tsubClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname)\n   821→\t\t\t\t\tif scnerr != nil {\n   822→\t\t\t\t\t\treturn nil, nil, scnerr\n   823→\t\t\t\t\t}\n   824→\t\t\t\t\tsubClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName)\n   825→\t\t\t\t\tif scterr != nil {\n   826→\t\t\t\t\t\treturn nil, nil, scterr\n   827→\t\t\t\t\t}\n   828→\t\t\t\t}\n   829→\n   830→\t\t\t\t// Always full node hwthread id list, no partial queries expected\n   831→\t\t\t\ttopology := subClusterTopol.Topology\n   832→\t\t\t\tacceleratorIds := topology.GetAcceleratorIDs()\n   833→\n   834→\t\t\t\t// Moved check here if metric matches hardware specs\n   835→\t\t\t\tif nativeScope == schema.MetricScopeAccelerator && len(acceleratorIds) == 0 {\n   836→\t\t\t\t\tcontinue\n   837→\t\t\t\t}\n   838→\n   839→\t\t\t\tscopeResults, ok := BuildScopeQueries(\n   840→\t\t\t\t\tnativeScope, requestedScope,\n   841→\t\t\t\t\tmetric, hostname,\n   842→\t\t\t\t\t&topology, topology.Node, acceleratorIds,\n   843→\t\t\t\t)\n   844→\n   845→\t\t\t\tif !ok {\n   846→\t\t\t\t\treturn nil, nil, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > unsupported scope transformation: native-scope=%s, requested-scope=%s\", nativeScope, requestedScope)\n   847→\t\t\t\t}\n   848→\n   849→\t\t\t\tfor _, sr := range scopeResults {\n   850→\t\t\t\t\tqueries = append(queries, APIQuery{\n   851→\t\t\t\t\t\tMetric:     sr.Metric,\n   852→\t\t\t\t\t\tHostname:   sr.Hostname,\n   853→\t\t\t\t\t\tAggregate:  sr.Aggregate,\n   854→\t\t\t\t\t\tType:       sr.Type,\n   855→\t\t\t\t\t\tTypeIds:    sr.TypeIds,\n   856→\t\t\t\t\t\tResolution: resolution,\n   857→\t\t\t\t\t})\n   858→\t\t\t\t\tassignedScope = append(assignedScope, sr.Scope)\n   859→\t\t\t\t}\n   860→\t\t\t}\n   861→\t\t}\n   862→\t}\n   863→\n   864→\treturn queries, assignedScope, nil\n   865→}\n   866→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"563b2508-b017-44ad-9ac7-5a323a7e1acd","timestamp":"2026-03-29T19:34:27.573Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/query.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// This file implements high-level query functions for loading job metric data\n// with automatic scope transformation and aggregation.\n//\n// Key Concepts:\n//\n// Metric Scopes: Metrics are collected at different granularities (native scope):\n//   - HWThread: Per hardware thread\n//   - Core: Per CPU core\n//   - Socket: Per CPU socket\n//   - MemoryDomain: Per memory domain (NUMA)\n//   - Accelerator: Per GPU/accelerator\n//   - Node: Per compute node\n//\n// Scope Transformation: The buildQueries functions transform between native scope\n// and requested scope by:\n//   - Aggregating finer-grained data (e.g., HWThread → Core → Socket → Node)\n//   - Rejecting requests for finer granularity than available\n//   - Handling special cases (e.g., Accelerator metrics)\n//\n// Query Building: Constructs APIQuery structures with proper selectors (Type, TypeIds)\n// based on cluster topology and job resources.\npackage metricstore\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\t\"strings\"\n\t\"time\"\n\n\t\"github.com/ClusterCockpit/cc-backend/pkg/archive\"\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n)\n\ntype InternalMetricStore struct{}\n\nvar MetricStoreHandle *InternalMetricStore\n\n// HealthCheck delegates to the internal MemoryStore's HealthCheck.\nfunc (ccms *InternalMetricStore) HealthCheck(cluster string,\n\tnodes []string, metrics []string,\n) (map[string]HealthCheckResult, error) {\n\treturn GetMemoryStore().HealthCheck(cluster, nodes, metrics)\n}\n\n// TestLoadDataCallback allows tests to override LoadData behavior for testing purposes.\n// When set to a non-nil function, LoadData will call this function instead of the default implementation.\nvar TestLoadDataCallback func(job *schema.Job, metrics []string, scopes []schema.MetricScope, ctx context.Context, resolution int) (schema.JobData, error)\n\n// LoadData loads metric data for a specific job with automatic scope transformation.\n//\n// This is the primary function for retrieving job metric data. It handles:\n//   - Building queries with scope transformation via buildQueries\n//   - Fetching data from the metric store\n//   - Organizing results by metric and scope\n//   - Converting NaN statistics to 0 for JSON compatibility\n//   - Partial error handling (returns data for successful queries even if some fail)\n//\n// Parameters:\n//   - job: Job metadata including cluster, resources, and time range\n//   - metrics: List of metric names to load\n//   - scopes: Requested metric scopes (will be transformed to match native scopes)\n//   - ctx: Context for cancellation (currently unused but reserved for future use)\n//   - resolution: Data resolution in seconds (0 for native resolution)\n//\n// Returns:\n//   - JobData: Map of metric → scope → JobMetric with time-series data and statistics\n//   - Error: Returns error if query building or fetching fails, or partial error listing failed hosts\n//\n// Example:\n//\n//\tjobData, err := LoadData(job, []string{\"cpu_load\", \"mem_used\"}, []schema.MetricScope{schema.MetricScopeNode}, ctx, 60)\nfunc (ccms *InternalMetricStore) LoadData(\n\tjob *schema.Job,\n\tmetrics []string,\n\tscopes []schema.MetricScope,\n\tctx context.Context,\n\tresolution int,\n) (schema.JobData, error) {\n\tif TestLoadDataCallback != nil {\n\t\treturn TestLoadDataCallback(job, metrics, scopes, ctx, resolution)\n\t}\n\n\tqueries, assignedScope, err := buildQueries(job, metrics, scopes, int64(resolution))\n\tif err != nil {\n\t\tcclog.Errorf(\"Error while building queries for jobId %d, Metrics %v, Scopes %v: %s\", job.JobID, metrics, scopes, err.Error())\n\t\treturn nil, err\n\t}\n\n\t// Verify assignment is correct - log any inconsistencies for debugging\n\tif len(queries) != len(assignedScope) {\n\t\tcclog.Errorf(\"Critical error: queries and assignedScope have different lengths after buildQueries: %d vs %d\",\n\t\t\tlen(queries), len(assignedScope))\n\t}\n\n\treq := APIQueryRequest{\n\t\tCluster:   job.Cluster,\n\t\tFrom:      job.StartTime,\n\t\tTo:        job.StartTime + int64(job.Duration),\n\t\tQueries:   queries,\n\t\tWithStats: true,\n\t\tWithData:  true,\n\t}\n\n\tresBody, err := FetchData(req)\n\tif err != nil {\n\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n\t\treturn nil, err\n\t}\n\n\tvar errors []string\n\tjobData := make(schema.JobData)\n\n\t// Add safety check for potential index out of range errors\n\tif len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) {\n\t\tcclog.Warnf(\"Mismatch in query results count: queries=%d, results=%d, assignedScope=%d\",\n\t\t\tlen(req.Queries), len(resBody.Results), len(assignedScope))\n\t\tif len(resBody.Results) > len(req.Queries) {\n\t\t\tresBody.Results = resBody.Results[:len(req.Queries)]\n\t\t}\n\t\tif len(assignedScope) > len(req.Queries) {\n\t\t\tassignedScope = assignedScope[:len(req.Queries)]\n\t\t}\n\t}\n\n\tfor i, row := range resBody.Results {\n\t\tquery := req.Queries[i]\n\t\tmetric := query.Metric\n\t\tscope := assignedScope[i]\n\t\tmc := archive.GetMetricConfig(job.Cluster, metric)\n\n\t\tif mc == nil {\n\t\t\tcclog.Warnf(\"Metric config not found for %s on cluster %s\", metric, job.Cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\tif _, ok := jobData[metric]; !ok {\n\t\t\tjobData[metric] = make(map[schema.MetricScope]*schema.JobMetric)\n\t\t}\n\n\t\tres := mc.Timestep\n\t\tif len(row) > 0 {\n\t\t\tres = int(row[0].Resolution)\n\t\t}\n\n\t\tjobMetric, ok := jobData[metric][scope]\n\t\tif !ok {\n\t\t\tjobMetric = &schema.JobMetric{\n\t\t\t\tUnit:     mc.Unit,\n\t\t\t\tTimestep: res,\n\t\t\t\tSeries:   make([]schema.Series, 0),\n\t\t\t}\n\t\t\tjobData[metric][scope] = jobMetric\n\t\t}\n\n\t\tfor ndx, res := range row {\n\t\t\tif res.Error != nil {\n\t\t\t\t/* Build list for \"partial errors\", if any */\n\t\t\t\terrors = append(errors, fmt.Sprintf(\"failed to fetch '%s' from host '%s': %s\", query.Metric, query.Hostname, *res.Error))\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\tid := ExtractTypeID(query.Type, query.TypeIds, ndx, query.Metric, query.Hostname)\n\n\t\t\tSanitizeStats(&res.Avg, &res.Min, &res.Max)\n\n\t\t\tjobMetric.Series = append(jobMetric.Series, schema.Series{\n\t\t\t\tHostname: query.Hostname,\n\t\t\t\tID:       id,\n\t\t\t\tStatistics: schema.MetricStatistics{\n\t\t\t\t\tAvg: float64(res.Avg),\n\t\t\t\t\tMin: float64(res.Min),\n\t\t\t\t\tMax: float64(res.Max),\n\t\t\t\t},\n\t\t\t\tData: res.Data,\n\t\t\t})\n\t\t}\n\n\t\t// So that one can later check len(jobData):\n\t\tif len(jobMetric.Series) == 0 {\n\t\t\tdelete(jobData[metric], scope)\n\t\t\tif len(jobData[metric]) == 0 {\n\t\t\t\tdelete(jobData, metric)\n\t\t\t}\n\t\t}\n\t}\n\n\tif len(errors) != 0 {\n\t\t/* Returns list for \"partial errors\" */\n\t\treturn jobData, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > Errors: %s\", strings.Join(errors, \", \"))\n\t}\n\treturn jobData, nil\n}\n\n// buildQueries constructs APIQuery structures with automatic scope transformation for a job.\n//\n// This function implements the core scope transformation logic, handling all combinations of\n// native metric scopes and requested scopes. It uses the cluster topology to determine which\n// hardware IDs to include in each query.\n//\n// Scope Transformation Rules:\n//   - If native scope >= requested scope: Aggregates data (Aggregate=true in APIQuery)\n//   - If native scope < requested scope: Returns error (cannot increase granularity)\n//   - Special handling for Accelerator scope (independent of CPU hierarchy)\n//\n// The function generates one or more APIQuery per (metric, scope, host) combination:\n//   - For non-aggregated queries: One query with all relevant IDs\n//   - For aggregated queries: May generate multiple queries (e.g., one per socket/core)\n//\n// Parameters:\n//   - job: Job metadata including cluster, subcluster, and resource allocation\n//   - metrics: List of metrics to query\n//   - scopes: Requested scopes for each metric\n//   - resolution: Data resolution in seconds\n//\n// Returns:\n//   - []APIQuery: List of queries to execute\n//   - []schema.MetricScope: Assigned scope for each query (after transformation)\n//   - error: Returns error if topology lookup fails or unhandled scope combination encountered\nfunc buildQueries(\n\tjob *schema.Job,\n\tmetrics []string,\n\tscopes []schema.MetricScope,\n\tresolution int64,\n) ([]APIQuery, []schema.MetricScope, error) {\n\tif len(job.Resources) == 0 {\n\t\treturn nil, nil, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > no resources allocated for job %d\", job.JobID)\n\t}\n\n\tqueries := make([]APIQuery, 0, len(metrics)*len(scopes)*len(job.Resources))\n\tassignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(job.Resources))\n\n\tsubcluster, scerr := archive.GetSubCluster(job.Cluster, job.SubCluster)\n\tif scerr != nil {\n\t\treturn nil, nil, scerr\n\t}\n\ttopology := subcluster.Topology\n\n\tfor _, metric := range metrics {\n\t\tmc := archive.GetMetricConfig(job.Cluster, metric)\n\t\tif mc == nil {\n\t\t\tcclog.Warnf(\"metric '%s' is not specified for cluster '%s'\", metric, job.Cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Skip if metric is removed for subcluster\n\t\tif len(mc.SubClusters) != 0 && IsMetricRemovedForSubCluster(mc, job.SubCluster) {\n\t\t\tcontinue\n\t\t}\n\n\t\t// Avoid duplicates...\n\t\thandledScopes := make([]schema.MetricScope, 0, 3)\n\n\tscopesLoop:\n\t\tfor _, requestedScope := range scopes {\n\t\t\tnativeScope := mc.Scope\n\t\t\tif nativeScope == schema.MetricScopeAccelerator && job.NumAcc == 0 {\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\tscope := nativeScope.Max(requestedScope)\n\t\t\tfor _, s := range handledScopes {\n\t\t\t\tif scope == s {\n\t\t\t\t\tcontinue scopesLoop\n\t\t\t\t}\n\t\t\t}\n\t\t\thandledScopes = append(handledScopes, scope)\n\n\t\t\tfor _, host := range job.Resources {\n\t\t\t\thwthreads := host.HWThreads\n\t\t\t\tif hwthreads == nil {\n\t\t\t\t\thwthreads = topology.Node\n\t\t\t\t}\n\n\t\t\t\tscopeResults, ok := BuildScopeQueries(\n\t\t\t\t\tnativeScope, requestedScope,\n\t\t\t\t\tmetric, host.Hostname,\n\t\t\t\t\t&topology, hwthreads, host.Accelerators,\n\t\t\t\t)\n\n\t\t\t\tif !ok {\n\t\t\t\t\treturn nil, nil, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > unsupported scope transformation: native-scope=%s, requested-scope=%s\", nativeScope, requestedScope)\n\t\t\t\t}\n\n\t\t\t\tfor _, sr := range scopeResults {\n\t\t\t\t\tqueries = append(queries, APIQuery{\n\t\t\t\t\t\tMetric:     sr.Metric,\n\t\t\t\t\t\tHostname:   sr.Hostname,\n\t\t\t\t\t\tAggregate:  sr.Aggregate,\n\t\t\t\t\t\tType:       sr.Type,\n\t\t\t\t\t\tTypeIds:    sr.TypeIds,\n\t\t\t\t\t\tResolution: resolution,\n\t\t\t\t\t})\n\t\t\t\t\tassignedScope = append(assignedScope, sr.Scope)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\treturn queries, assignedScope, nil\n}\n\n// LoadStats loads only metric statistics (avg/min/max) for a job at node scope.\n//\n// This is an optimized version of LoadData that fetches only statistics without\n// time-series data, reducing bandwidth and memory usage. Always queries at node scope.\n//\n// Parameters:\n//   - job: Job metadata\n//   - metrics: List of metric names\n//   - ctx: Context (currently unused)\n//\n// Returns:\n//   - Map of metric → hostname → statistics\n//   - Error on query building or fetching failure\nfunc (ccms *InternalMetricStore) LoadStats(\n\tjob *schema.Job,\n\tmetrics []string,\n\tctx context.Context,\n) (map[string]map[string]schema.MetricStatistics, error) {\n\t// TODO(#166): Add scope parameter for analysis view accelerator normalization\n\tqueries, _, err := buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0)\n\tif err != nil {\n\t\tcclog.Errorf(\"Error while building queries for jobId %d, Metrics %v: %s\", job.JobID, metrics, err.Error())\n\t\treturn nil, err\n\t}\n\n\treq := APIQueryRequest{\n\t\tCluster:   job.Cluster,\n\t\tFrom:      job.StartTime,\n\t\tTo:        job.StartTime + int64(job.Duration),\n\t\tQueries:   queries,\n\t\tWithStats: true,\n\t\tWithData:  false,\n\t}\n\n\tresBody, err := FetchData(req)\n\tif err != nil {\n\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n\t\treturn nil, err\n\t}\n\n\tstats := make(map[string]map[string]schema.MetricStatistics, len(metrics))\n\tfor i, res := range resBody.Results {\n\t\tif i >= len(req.Queries) {\n\t\t\tcclog.Warnf(\"LoadStats: result index %d exceeds queries length %d\", i, len(req.Queries))\n\t\t\tbreak\n\t\t}\n\t\tif len(res) == 0 {\n\t\t\t// No Data Found For Metric, Logged in FetchData to Warn\n\t\t\tcontinue\n\t\t}\n\t\tquery := req.Queries[i]\n\t\tmetric := query.Metric\n\t\tdata := res[0]\n\t\tif data.Error != nil {\n\t\t\tcclog.Warnf(\"fetching %s for node %s failed: %s\", metric, query.Hostname, *data.Error)\n\t\t\tcontinue\n\t\t}\n\n\t\tmetricdata, ok := stats[metric]\n\t\tif !ok {\n\t\t\tmetricdata = make(map[string]schema.MetricStatistics, job.NumNodes)\n\t\t\tstats[metric] = metricdata\n\t\t}\n\n\t\tif data.Avg.IsNaN() || data.Min.IsNaN() || data.Max.IsNaN() {\n\t\t\tcclog.Warnf(\"fetching %s for node %s failed: one of avg/min/max is NaN\", metric, query.Hostname)\n\t\t\tcontinue\n\t\t}\n\n\t\tmetricdata[query.Hostname] = schema.MetricStatistics{\n\t\t\tAvg: float64(data.Avg),\n\t\t\tMin: float64(data.Min),\n\t\t\tMax: float64(data.Max),\n\t\t}\n\t}\n\n\treturn stats, nil\n}\n\n// LoadScopedStats loads metric statistics for a job with scope-aware grouping.\n//\n// Similar to LoadStats but supports multiple scopes and returns statistics grouped\n// by scope with hardware IDs (e.g., per-core, per-socket statistics).\n//\n// Parameters:\n//   - job: Job metadata\n//   - metrics: List of metric names\n//   - scopes: Requested metric scopes\n//   - ctx: Context (currently unused)\n//\n// Returns:\n//   - ScopedJobStats: Map of metric → scope → []ScopedStats (with hostname and ID)\n//   - Error or partial error listing failed queries\nfunc (ccms *InternalMetricStore) LoadScopedStats(\n\tjob *schema.Job,\n\tmetrics []string,\n\tscopes []schema.MetricScope,\n\tctx context.Context,\n) (schema.ScopedJobStats, error) {\n\tqueries, assignedScope, err := buildQueries(job, metrics, scopes, 0)\n\tif err != nil {\n\t\tcclog.Errorf(\"Error while building queries for jobId %d, Metrics %v, Scopes %v: %s\", job.JobID, metrics, scopes, err.Error())\n\t\treturn nil, err\n\t}\n\n\treq := APIQueryRequest{\n\t\tCluster:   job.Cluster,\n\t\tFrom:      job.StartTime,\n\t\tTo:        job.StartTime + int64(job.Duration),\n\t\tQueries:   queries,\n\t\tWithStats: true,\n\t\tWithData:  false,\n\t}\n\n\tresBody, err := FetchData(req)\n\tif err != nil {\n\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n\t\treturn nil, err\n\t}\n\n\tvar errors []string\n\tscopedJobStats := make(schema.ScopedJobStats)\n\n\tfor i, row := range resBody.Results {\n\t\tif len(row) == 0 {\n\t\t\t// No Data Found For Metric, Logged in FetchData to Warn\n\t\t\tcontinue\n\t\t}\n\t\tquery := req.Queries[i]\n\t\tmetric := query.Metric\n\t\tscope := assignedScope[i]\n\n\t\tif _, ok := scopedJobStats[metric]; !ok {\n\t\t\tscopedJobStats[metric] = make(map[schema.MetricScope][]*schema.ScopedStats)\n\t\t}\n\n\t\tif _, ok := scopedJobStats[metric][scope]; !ok {\n\t\t\tscopedJobStats[metric][scope] = make([]*schema.ScopedStats, 0)\n\t\t}\n\n\t\tfor ndx, res := range row {\n\t\t\tif res.Error != nil {\n\t\t\t\t/* Build list for \"partial errors\", if any */\n\t\t\t\terrors = append(errors, fmt.Sprintf(\"failed to fetch '%s' from host '%s': %s\", query.Metric, query.Hostname, *res.Error))\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\tid := ExtractTypeID(query.Type, query.TypeIds, ndx, query.Metric, query.Hostname)\n\n\t\t\tSanitizeStats(&res.Avg, &res.Min, &res.Max)\n\n\t\t\tscopedJobStats[metric][scope] = append(scopedJobStats[metric][scope], &schema.ScopedStats{\n\t\t\t\tHostname: query.Hostname,\n\t\t\t\tID:       id,\n\t\t\t\tData: &schema.MetricStatistics{\n\t\t\t\t\tAvg: float64(res.Avg),\n\t\t\t\t\tMin: float64(res.Min),\n\t\t\t\t\tMax: float64(res.Max),\n\t\t\t\t},\n\t\t\t})\n\t\t}\n\n\t\t// So that one can later check len(scopedJobStats[metric][scope]): Remove from map if empty\n\t\tif len(scopedJobStats[metric][scope]) == 0 {\n\t\t\tdelete(scopedJobStats[metric], scope)\n\t\t\tif len(scopedJobStats[metric]) == 0 {\n\t\t\t\tdelete(scopedJobStats, metric)\n\t\t\t}\n\t\t}\n\t}\n\n\tif len(errors) != 0 {\n\t\t/* Returns list for \"partial errors\" */\n\t\treturn scopedJobStats, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > Errors: %s\", strings.Join(errors, \", \"))\n\t}\n\treturn scopedJobStats, nil\n}\n\n// LoadNodeData loads metric data for specific nodes in a cluster over a time range.\n//\n// Unlike LoadData which operates on job resources, this function queries arbitrary nodes\n// directly. Useful for system monitoring and node status views.\n//\n// Parameters:\n//   - cluster: Cluster name\n//   - metrics: List of metric names\n//   - nodes: List of node hostnames (nil = all nodes in cluster via ForAllNodes)\n//   - scopes: Requested metric scopes (currently unused - always node scope)\n//   - from, to: Time range\n//   - ctx: Context (currently unused)\n//\n// Returns:\n//   - Map of hostname → metric → []JobMetric\n//   - Error or partial error listing failed queries\nfunc (ccms *InternalMetricStore) LoadNodeData(\n\tcluster string,\n\tmetrics, nodes []string,\n\tscopes []schema.MetricScope,\n\tfrom, to time.Time,\n\tctx context.Context,\n) (map[string]map[string][]*schema.JobMetric, error) {\n\treq := APIQueryRequest{\n\t\tCluster:   cluster,\n\t\tFrom:      from.Unix(),\n\t\tTo:        to.Unix(),\n\t\tWithStats: true,\n\t\tWithData:  true,\n\t}\n\n\tif nodes == nil {\n\t\treq.ForAllNodes = append(req.ForAllNodes, metrics...)\n\t} else {\n\t\tfor _, node := range nodes {\n\t\t\tfor _, metric := range metrics {\n\t\t\t\treq.Queries = append(req.Queries, APIQuery{\n\t\t\t\t\tHostname:   node,\n\t\t\t\t\tMetric:     metric,\n\t\t\t\t\tResolution: 0, // Default for Node Queries: Will return metric $Timestep Resolution\n\t\t\t\t})\n\t\t\t}\n\t\t}\n\t}\n\n\tresBody, err := FetchData(req)\n\tif err != nil {\n\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n\t\treturn nil, err\n\t}\n\n\tvar errors []string\n\tdata := make(map[string]map[string][]*schema.JobMetric)\n\tfor i, res := range resBody.Results {\n\t\tif len(res) == 0 {\n\t\t\t// No Data Found For Metric, Logged in FetchData to Warn\n\t\t\tcontinue\n\t\t}\n\n\t\tvar query APIQuery\n\t\tif resBody.Queries != nil {\n\t\t\tquery = resBody.Queries[i]\n\t\t} else {\n\t\t\tquery = req.Queries[i]\n\t\t}\n\n\t\tmetric := query.Metric\n\t\tqdata := res[0]\n\t\tif qdata.Error != nil {\n\t\t\terrors = append(errors, fmt.Sprintf(\"fetching %s for node %s failed: %s\", metric, query.Hostname, *qdata.Error))\n\t\t\tcontinue\n\t\t}\n\n\t\tmc := archive.GetMetricConfig(cluster, metric)\n\t\tif mc == nil {\n\t\t\tcclog.Warnf(\"Metric config not found for %s on cluster %s\", metric, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\tSanitizeStats(&qdata.Avg, &qdata.Min, &qdata.Max)\n\n\t\thostdata, ok := data[query.Hostname]\n\t\tif !ok {\n\t\t\thostdata = make(map[string][]*schema.JobMetric)\n\t\t\tdata[query.Hostname] = hostdata\n\t\t}\n\n\t\thostdata[metric] = append(hostdata[metric], &schema.JobMetric{\n\t\t\tUnit:     mc.Unit,\n\t\t\tTimestep: mc.Timestep,\n\t\t\tSeries: []schema.Series{\n\t\t\t\t{\n\t\t\t\t\tHostname: query.Hostname,\n\t\t\t\t\tData:     qdata.Data,\n\t\t\t\t\tStatistics: schema.MetricStatistics{\n\t\t\t\t\t\tAvg: float64(qdata.Avg),\n\t\t\t\t\t\tMin: float64(qdata.Min),\n\t\t\t\t\t\tMax: float64(qdata.Max),\n\t\t\t\t\t},\n\t\t\t\t},\n\t\t\t},\n\t\t})\n\t}\n\n\tif len(errors) != 0 {\n\t\t/* Returns list of \"partial errors\" */\n\t\treturn data, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > Errors: %s\", strings.Join(errors, \", \"))\n\t}\n\n\treturn data, nil\n}\n\n// LoadNodeListData loads metric data for a list of nodes with full scope transformation support.\n//\n// This is the most flexible node data loading function, supporting arbitrary scopes and\n// resolution. Uses buildNodeQueries for proper scope transformation based on topology.\n//\n// Parameters:\n//   - cluster: Cluster name\n//   - subCluster: SubCluster name (empty string to infer from node names)\n//   - nodes: List of node hostnames\n//   - metrics: List of metric names\n//   - scopes: Requested metric scopes\n//   - resolution: Data resolution in seconds\n//   - from, to: Time range\n//   - ctx: Context (currently unused)\n//\n// Returns:\n//   - Map of hostname → JobData (metric → scope → JobMetric)\n//   - Error or partial error listing failed queries\nfunc (ccms *InternalMetricStore) LoadNodeListData(\n\tcluster, subCluster string,\n\tnodes []string,\n\tmetrics []string,\n\tscopes []schema.MetricScope,\n\tresolution int,\n\tfrom, to time.Time,\n\tctx context.Context,\n) (map[string]schema.JobData, error) {\n\t// Note: Order of node data is not guaranteed after this point\n\tqueries, assignedScope, err := buildNodeQueries(cluster, subCluster, nodes, metrics, scopes, int64(resolution))\n\tif err != nil {\n\t\tcclog.Errorf(\"Error while building node queries for Cluster %s, SubCLuster %s, Metrics %v, Scopes %v: %s\", cluster, subCluster, metrics, scopes, err.Error())\n\t\treturn nil, err\n\t}\n\n\t// Verify assignment is correct - log any inconsistencies for debugging\n\tif len(queries) != len(assignedScope) {\n\t\tcclog.Errorf(\"Critical error: queries and assignedScope have different lengths after buildNodeQueries: %d vs %d\",\n\t\t\tlen(queries), len(assignedScope))\n\t}\n\n\treq := APIQueryRequest{\n\t\tCluster:   cluster,\n\t\tQueries:   queries,\n\t\tFrom:      from.Unix(),\n\t\tTo:        to.Unix(),\n\t\tWithStats: true,\n\t\tWithData:  true,\n\t}\n\n\tresBody, err := FetchData(req)\n\tif err != nil {\n\t\tcclog.Errorf(\"Error while fetching data : %s\", err.Error())\n\t\treturn nil, err\n\t}\n\n\tvar errors []string\n\tdata := make(map[string]schema.JobData)\n\n\t// Add safety check for index out of range issues\n\tif len(resBody.Results) != len(req.Queries) || len(assignedScope) != len(req.Queries) {\n\t\tcclog.Warnf(\"Mismatch in query results count: queries=%d, results=%d, assignedScope=%d\",\n\t\t\tlen(req.Queries), len(resBody.Results), len(assignedScope))\n\t\tif len(resBody.Results) > len(req.Queries) {\n\t\t\tresBody.Results = resBody.Results[:len(req.Queries)]\n\t\t}\n\t\tif len(assignedScope) > len(req.Queries) {\n\t\t\tassignedScope = assignedScope[:len(req.Queries)]\n\t\t}\n\t}\n\n\tfor i, row := range resBody.Results {\n\t\tvar query APIQuery\n\t\tif resBody.Queries != nil {\n\t\t\tif i < len(resBody.Queries) {\n\t\t\t\tquery = resBody.Queries[i]\n\t\t\t} else {\n\t\t\t\tcclog.Warnf(\"Index out of range prevented for resBody.Queries: i=%d, len=%d\",\n\t\t\t\t\ti, len(resBody.Queries))\n\t\t\t\tcontinue\n\t\t\t}\n\t\t} else {\n\t\t\tquery = req.Queries[i]\n\t\t}\n\n\t\tmetric := query.Metric\n\t\tscope := assignedScope[i]\n\t\tmc := archive.GetMetricConfig(cluster, metric)\n\t\tif mc == nil {\n\t\t\tcclog.Warnf(\"Metric config not found for %s on cluster %s\", metric, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\tres := mc.Timestep\n\t\tif len(row) > 0 {\n\t\t\tres = int(row[0].Resolution)\n\t\t}\n\n\t\t// Init Nested Map Data Structures If Not Found\n\t\thostData, ok := data[query.Hostname]\n\t\tif !ok {\n\t\t\thostData = make(schema.JobData)\n\t\t\tdata[query.Hostname] = hostData\n\t\t}\n\n\t\tmetricData, ok := hostData[metric]\n\t\tif !ok {\n\t\t\tmetricData = make(map[schema.MetricScope]*schema.JobMetric)\n\t\t\tdata[query.Hostname][metric] = metricData\n\t\t}\n\n\t\tscopeData, ok := metricData[scope]\n\t\tif !ok {\n\t\t\tscopeData = &schema.JobMetric{\n\t\t\t\tUnit:     mc.Unit,\n\t\t\t\tTimestep: res,\n\t\t\t\tSeries:   make([]schema.Series, 0),\n\t\t\t}\n\t\t\tdata[query.Hostname][metric][scope] = scopeData\n\t\t}\n\n\t\tfor ndx, res := range row {\n\t\t\tif res.Error != nil {\n\t\t\t\t/* Build list for \"partial errors\", if any */\n\t\t\t\terrors = append(errors, fmt.Sprintf(\"failed to fetch '%s' from host '%s': %s\", query.Metric, query.Hostname, *res.Error))\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\tid := ExtractTypeID(query.Type, query.TypeIds, ndx, query.Metric, query.Hostname)\n\n\t\t\tSanitizeStats(&res.Avg, &res.Min, &res.Max)\n\n\t\t\tscopeData.Series = append(scopeData.Series, schema.Series{\n\t\t\t\tHostname: query.Hostname,\n\t\t\t\tID:       id,\n\t\t\t\tStatistics: schema.MetricStatistics{\n\t\t\t\t\tAvg: float64(res.Avg),\n\t\t\t\t\tMin: float64(res.Min),\n\t\t\t\t\tMax: float64(res.Max),\n\t\t\t\t},\n\t\t\t\tData: res.Data,\n\t\t\t})\n\t\t}\n\t}\n\n\tif len(errors) != 0 {\n\t\t/* Returns list of \"partial errors\" */\n\t\treturn data, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > Errors: %s\", strings.Join(errors, \", \"))\n\t}\n\n\treturn data, nil\n}\n\n// buildNodeQueries constructs APIQuery structures for node-based queries with scope transformation.\n//\n// Similar to buildQueries but operates on node lists rather than job resources.\n// Supports dynamic subcluster lookup when subCluster parameter is empty.\n//\n// Parameters:\n//   - cluster: Cluster name\n//   - subCluster: SubCluster name (empty = infer from node hostnames)\n//   - nodes: List of node hostnames\n//   - metrics: List of metric names\n//   - scopes: Requested metric scopes\n//   - resolution: Data resolution in seconds\n//\n// Returns:\n//   - []APIQuery: List of queries to execute\n//   - []schema.MetricScope: Assigned scope for each query\n//   - error: Returns error if topology lookup fails or unhandled scope combination\nfunc buildNodeQueries(\n\tcluster string,\n\tsubCluster string,\n\tnodes []string,\n\tmetrics []string,\n\tscopes []schema.MetricScope,\n\tresolution int64,\n) ([]APIQuery, []schema.MetricScope, error) {\n\tqueries := make([]APIQuery, 0, len(metrics)*len(scopes)*len(nodes))\n\tassignedScope := make([]schema.MetricScope, 0, len(metrics)*len(scopes)*len(nodes))\n\n\t// Get Topol before loop if subCluster given\n\tvar subClusterTopol *schema.SubCluster\n\tvar scterr error\n\tif subCluster != \"\" {\n\t\tsubClusterTopol, scterr = archive.GetSubCluster(cluster, subCluster)\n\t\tif scterr != nil {\n\t\t\tcclog.Errorf(\"could not load cluster %s subCluster %s topology: %s\", cluster, subCluster, scterr.Error())\n\t\t\treturn nil, nil, scterr\n\t\t}\n\t}\n\n\tfor _, metric := range metrics {\n\t\tmc := archive.GetMetricConfig(cluster, metric)\n\t\tif mc == nil {\n\t\t\tcclog.Warnf(\"metric '%s' is not specified for cluster '%s'\", metric, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Skip if metric is removed for subcluster\n\t\tif mc.SubClusters != nil && IsMetricRemovedForSubCluster(mc, subCluster) {\n\t\t\tcontinue\n\t\t}\n\n\t\t// Avoid duplicates...\n\t\thandledScopes := make([]schema.MetricScope, 0, 3)\n\n\tnodeScopesLoop:\n\t\tfor _, requestedScope := range scopes {\n\t\t\tnativeScope := mc.Scope\n\n\t\t\tscope := nativeScope.Max(requestedScope)\n\t\t\tfor _, s := range handledScopes {\n\t\t\t\tif scope == s {\n\t\t\t\t\tcontinue nodeScopesLoop\n\t\t\t\t}\n\t\t\t}\n\t\t\thandledScopes = append(handledScopes, scope)\n\n\t\t\tfor _, hostname := range nodes {\n\n\t\t\t\t// If no subCluster given, get it by node\n\t\t\t\tif subCluster == \"\" {\n\t\t\t\t\tsubClusterName, scnerr := archive.GetSubClusterByNode(cluster, hostname)\n\t\t\t\t\tif scnerr != nil {\n\t\t\t\t\t\treturn nil, nil, scnerr\n\t\t\t\t\t}\n\t\t\t\t\tsubClusterTopol, scterr = archive.GetSubCluster(cluster, subClusterName)\n\t\t\t\t\tif scterr != nil {\n\t\t\t\t\t\treturn nil, nil, scterr\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// Always full node hwthread id list, no partial queries expected\n\t\t\t\ttopology := subClusterTopol.Topology\n\t\t\t\tacceleratorIds := topology.GetAcceleratorIDs()\n\n\t\t\t\t// Moved check here if metric matches hardware specs\n\t\t\t\tif nativeScope == schema.MetricScopeAccelerator && len(acceleratorIds) == 0 {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tscopeResults, ok := BuildScopeQueries(\n\t\t\t\t\tnativeScope, requestedScope,\n\t\t\t\t\tmetric, hostname,\n\t\t\t\t\t&topology, topology.Node, acceleratorIds,\n\t\t\t\t)\n\n\t\t\t\tif !ok {\n\t\t\t\t\treturn nil, nil, fmt.Errorf(\"METRICDATA/INTERNAL-CCMS > unsupported scope transformation: native-scope=%s, requested-scope=%s\", nativeScope, requestedScope)\n\t\t\t\t}\n\n\t\t\t\tfor _, sr := range scopeResults {\n\t\t\t\t\tqueries = append(queries, APIQuery{\n\t\t\t\t\t\tMetric:     sr.Metric,\n\t\t\t\t\t\tHostname:   sr.Hostname,\n\t\t\t\t\t\tAggregate:  sr.Aggregate,\n\t\t\t\t\t\tType:       sr.Type,\n\t\t\t\t\t\tTypeIds:    sr.TypeIds,\n\t\t\t\t\t\tResolution: resolution,\n\t\t\t\t\t})\n\t\t\t\t\tassignedScope = append(assignedScope, sr.Scope)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\treturn queries, assignedScope, nil\n}\n","numLines":866,"startLine":1,"totalLines":866}},"sourceToolAssistantUUID":"44c0db41-7816-4ca1-b2dd-95377289616f"}
{"parentUuid":"563b2508-b017-44ad-9ac7-5a323a7e1acd","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01FP6GwNbJigmtD6kv2iF2tF","type":"message","role":"assistant","content":[{"type":"thinking","thinking":"Now let me read the remaining files to get a complete picture of the package.","signature":"EooCClsIDBgCKkClDp1nt0W29VbmwidJiD/F47EM+2p63uBAco6b//+Jd+h2ixOj1pTOD/C36dH2KHbqka2j98TVlic/+k1G7SNmMhFjbGF1ZGUtc29ubmV0LTQtNjgAEgypGnphjzcpINU2lPcaDF3eQH5/fFdSc/Mz4SIweVtyXSzEFQj8yk3UyG1OoThHz2U02gz98N8q0LVpbAYdIVJuMovi3yzowj9oKlpjKl06zE/ScyS1JaAzZ/+8rG9SVrPIS0vFnRSx9x7d+iZvPpkkC0BY192ZHfDn2VGDiISI051Nl+zyLNJWssgRLl+22sIZ4MJGJy2ctY3cEXeRi8phDiGargjj2t1M1m0YAQ=="}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":27304,"cache_read_input_tokens":24476,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":27304},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzHnyHeWih653uQT8w","type":"assistant","uuid":"3c60722b-9253-45a9-b9ba-a4c959ff1b98","timestamp":"2026-03-29T19:34:31.257Z"}
{"parentUuid":"3c60722b-9253-45a9-b9ba-a4c959ff1b98","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01FP6GwNbJigmtD6kv2iF2tF","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01AfCNq4gW1rME9agwV5zSZN","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/checkpoint.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":27304,"cache_read_input_tokens":24476,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":27304},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzHnyHeWih653uQT8w","type":"assistant","uuid":"d94c01ef-3ad8-483e-838b-2546190d48cd","timestamp":"2026-03-29T19:34:32.371Z"}
{"parentUuid":"d94c01ef-3ad8-483e-838b-2546190d48cd","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01AfCNq4gW1rME9agwV5zSZN","toolUseID":"toolu_01AfCNq4gW1rME9agwV5zSZN","timestamp":"2026-03-29T19:34:32.439Z","uuid":"82fd24b8-2aaa-4977-9f98-576c07567506"}
{"parentUuid":"d94c01ef-3ad8-483e-838b-2546190d48cd","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01AfCNq4gW1rME9agwV5zSZN","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// This file implements checkpoint persistence for the in-memory metric store.\n     7→//\n     8→// Checkpoints enable graceful restarts by periodically saving in-memory metric\n     9→// data to disk in JSON or binary format. The checkpoint system:\n    10→//\n    11→// Key Features:\n    12→//   - Periodic background checkpointing via the Checkpointing() worker\n    13→//   - Two format families: JSON (human-readable) and WAL+binary (compact, crash-safe)\n    14→//   - Parallel checkpoint creation and loading using worker pools\n    15→//   - Hierarchical file organization: checkpoint_dir/cluster/host/timestamp.{json|bin}\n    16→//   - WAL file: checkpoint_dir/cluster/host/current.wal (append-only, per-entry)\n    17→//   - Only saves unarchived data (archived data is already persisted elsewhere)\n    18→//   - GC optimization during loading to prevent excessive heap growth\n    19→//\n    20→// Checkpoint Workflow:\n    21→//  1. Init() loads checkpoints within retention window at startup\n    22→//  2. Checkpointing() worker periodically saves new data\n    23→//  3. Shutdown() writes final checkpoint before exit\n    24→//\n    25→// File Organization:\n    26→//\n    27→//\tcheckpoints/\n    28→//\t  cluster1/\n    29→//\t    host001/\n    30→//\t      1234567890.json  (JSON format: full subtree snapshot)\n    31→//\t      1234567890.bin   (binary format: full subtree snapshot)\n    32→//\t      current.wal      (WAL format: append-only per-entry log)\n    33→//\t    host002/\n    34→//\t      ...\n    35→package metricstore\n    36→\n    37→import (\n    38→\t\"bufio\"\n    39→\t\"context\"\n    40→\t\"encoding/json\"\n    41→\t\"errors\"\n    42→\t\"fmt\"\n    43→\t\"io/fs\"\n    44→\t\"os\"\n    45→\t\"path\"\n    46→\t\"path/filepath\"\n    47→\t\"sort\"\n    48→\t\"strconv\"\n    49→\t\"strings\"\n    50→\t\"sync\"\n    51→\t\"sync/atomic\"\n    52→\t\"time\"\n    53→\n    54→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    55→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    56→)\n    57→\n    58→const (\n    59→\tCheckpointFilePerms = 0o644                    // File permissions for checkpoint files\n    60→\tCheckpointDirPerms  = 0o755                    // Directory permissions for checkpoint directories\n    61→\tGCTriggerInterval   = DefaultGCTriggerInterval // Interval for triggering GC during checkpoint loading\n    62→)\n    63→\n    64→// CheckpointMetrics represents metric data in a checkpoint file.\n    65→// Whenever the structure changes, update MarshalJSON as well!\n    66→type CheckpointMetrics struct {\n    67→\tData      []schema.Float `json:\"data\"`\n    68→\tFrequency int64          `json:\"frequency\"`\n    69→\tStart     int64          `json:\"start\"`\n    70→}\n    71→\n    72→// CheckpointFile represents the hierarchical structure of a checkpoint file.\n    73→// It mirrors the Level tree structure from the MemoryStore.\n    74→type CheckpointFile struct {\n    75→\tMetrics  map[string]*CheckpointMetrics `json:\"metrics\"`\n    76→\tChildren map[string]*CheckpointFile    `json:\"children\"`\n    77→\tFrom     int64                         `json:\"from\"`\n    78→\tTo       int64                         `json:\"to\"`\n    79→}\n    80→\n    81→// lastCheckpoint tracks the timestamp of the last checkpoint creation.\n    82→var (\n    83→\tlastCheckpoint   time.Time\n    84→\tlastCheckpointMu sync.Mutex\n    85→)\n    86→\n    87→// Checkpointing starts a background worker that periodically saves metric data to disk.\n    88→//\n    89→// Checkpoints are written every 12 hours (hardcoded).\n    90→//\n    91→// Format behaviour:\n    92→//   - \"json\": Periodic checkpointing every checkpointInterval\n    93→//   - \"wal\":  Periodic binary snapshots + WAL rotation every checkpointInterval\n    94→func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {\n    95→\tlastCheckpointMu.Lock()\n    96→\tlastCheckpoint = time.Now()\n    97→\tlastCheckpointMu.Unlock()\n    98→\n    99→\tms := GetMemoryStore()\n   100→\n   101→\twg.Go(func() {\n   102→\n   103→\t\td := 12 * time.Hour // default checkpoint interval\n   104→\t\tif Keys.CheckpointInterval != \"\" {\n   105→\t\t\tparsed, err := time.ParseDuration(Keys.CheckpointInterval)\n   106→\t\t\tif err != nil {\n   107→\t\t\t\tcclog.Errorf(\"[METRICSTORE]> invalid checkpoint-interval %q: %s, using default 12h\", Keys.CheckpointInterval, err)\n   108→\t\t\t} else {\n   109→\t\t\t\td = parsed\n   110→\t\t\t}\n   111→\t\t}\n   112→\n   113→\t\tticker := time.NewTicker(d)\n   114→\t\tdefer ticker.Stop()\n   115→\n   116→\t\tfor {\n   117→\t\t\tselect {\n   118→\t\t\tcase <-ctx.Done():\n   119→\t\t\t\treturn\n   120→\t\t\tcase <-ticker.C:\n   121→\t\t\t\tlastCheckpointMu.Lock()\n   122→\t\t\t\tfrom := lastCheckpoint\n   123→\t\t\t\tlastCheckpointMu.Unlock()\n   124→\n   125→\t\t\t\tnow := time.Now()\n   126→\t\t\t\tcclog.Infof(\"[METRICSTORE]> start checkpointing (starting at %s)...\", from.Format(time.RFC3339))\n   127→\n   128→\t\t\t\tif Keys.Checkpoints.FileFormat == \"wal\" {\n   129→\t\t\t\t\t// Pause WAL writes: the binary snapshot captures all in-memory\n   130→\t\t\t\t\t// data, so WAL records written during checkpoint are redundant\n   131→\t\t\t\t\t// and would be deleted during rotation anyway.\n   132→\t\t\t\t\twalCheckpointActive.Store(true)\n   133→\t\t\t\t\tn, hostDirs, err := ms.ToCheckpointWAL(Keys.Checkpoints.RootDir, from.Unix(), now.Unix())\n   134→\t\t\t\t\tif err != nil {\n   135→\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> binary checkpointing failed: %s\", err.Error())\n   136→\t\t\t\t\t}\n   137→\t\t\t\t\tif n > 0 {\n   138→\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d binary snapshot files created\", n)\n   139→\t\t\t\t\t\tlastCheckpointMu.Lock()\n   140→\t\t\t\t\t\tlastCheckpoint = now\n   141→\t\t\t\t\t\tlastCheckpointMu.Unlock()\n   142→\t\t\t\t\t\t// Rotate WAL files for successfully checkpointed hosts.\n   143→\t\t\t\t\t\tRotateWALFiles(hostDirs)\n   144→\t\t\t\t\t}\n   145→\t\t\t\t\twalCheckpointActive.Store(false)\n   146→\t\t\t\t\twalDropped.Store(0)\n   147→\t\t\t\t} else {\n   148→\t\t\t\t\tn, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir, from.Unix(), now.Unix())\n   149→\t\t\t\t\tif err != nil {\n   150→\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> checkpointing failed: %s\", err.Error())\n   151→\t\t\t\t\t} else {\n   152→\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoint files created\", n)\n   153→\t\t\t\t\t\tlastCheckpointMu.Lock()\n   154→\t\t\t\t\t\tlastCheckpoint = now\n   155→\t\t\t\t\t\tlastCheckpointMu.Unlock()\n   156→\t\t\t\t\t}\n   157→\t\t\t\t}\n   158→\t\t\t}\n   159→\t\t}\n   160→\t})\n   161→}\n   162→\n   163→// MarshalJSON provides optimized JSON encoding for CheckpointMetrics.\n   164→//\n   165→// Since schema.Float has custom MarshalJSON, serializing []Float has significant overhead.\n   166→// This method manually constructs JSON to avoid allocations and interface conversions.\n   167→func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {\n   168→\tbuf := make([]byte, 0, 128+len(cm.Data)*8)\n   169→\tbuf = append(buf, `{\"frequency\":`...)\n   170→\tbuf = strconv.AppendInt(buf, cm.Frequency, 10)\n   171→\tbuf = append(buf, `,\"start\":`...)\n   172→\tbuf = strconv.AppendInt(buf, cm.Start, 10)\n   173→\tbuf = append(buf, `,\"data\":[`...)\n   174→\tfor i, x := range cm.Data {\n   175→\t\tif i != 0 {\n   176→\t\t\tbuf = append(buf, ',')\n   177→\t\t}\n   178→\t\tif x.IsNaN() {\n   179→\t\t\tbuf = append(buf, `null`...)\n   180→\t\t} else {\n   181→\t\t\tbuf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)\n   182→\t\t}\n   183→\t}\n   184→\tbuf = append(buf, `]}`...)\n   185→\treturn buf, nil\n   186→}\n   187→\n   188→// ToCheckpoint writes metric data to checkpoint files in parallel (JSON format).\n   189→//\n   190→// Metrics at root and cluster levels are skipped. One file per host is created.\n   191→// Uses worker pool (Keys.NumWorkers) for parallel processing. Only locks one host\n   192→// at a time, allowing concurrent writes/reads to other hosts.\n   193→//\n   194→// Returns the number of checkpoint files created and any errors encountered.\n   195→func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {\n   196→\t// Pre-calculate capacity by counting cluster/host pairs\n   197→\tm.root.lock.RLock()\n   198→\ttotalHosts := 0\n   199→\tfor _, l1 := range m.root.children {\n   200→\t\tl1.lock.RLock()\n   201→\t\ttotalHosts += len(l1.children)\n   202→\t\tl1.lock.RUnlock()\n   203→\t}\n   204→\tm.root.lock.RUnlock()\n   205→\n   206→\tlevels := make([]*Level, 0, totalHosts)\n   207→\tselectors := make([][]string, 0, totalHosts)\n   208→\n   209→\tm.root.lock.RLock()\n   210→\tfor sel1, l1 := range m.root.children {\n   211→\t\tl1.lock.RLock()\n   212→\t\tfor sel2, l2 := range l1.children {\n   213→\t\t\tlevels = append(levels, l2)\n   214→\t\t\tselectors = append(selectors, []string{sel1, sel2})\n   215→\t\t}\n   216→\t\tl1.lock.RUnlock()\n   217→\t}\n   218→\tm.root.lock.RUnlock()\n   219→\n   220→\ttype workItem struct {\n   221→\t\tlevel    *Level\n   222→\t\tdir      string\n   223→\t\tselector []string\n   224→\t}\n   225→\n   226→\tn, errs := int32(0), int32(0)\n   227→\n   228→\tvar wg sync.WaitGroup\n   229→\twg.Add(Keys.NumWorkers)\n   230→\twork := make(chan workItem, Keys.NumWorkers*2)\n   231→\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n   232→\t\tgo func() {\n   233→\t\t\tdefer wg.Done()\n   234→\n   235→\t\t\tfor workItem := range work {\n   236→\t\t\t\tif err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {\n   237→\t\t\t\t\tif err == ErrNoNewArchiveData {\n   238→\t\t\t\t\t\tcontinue\n   239→\t\t\t\t\t}\n   240→\n   241→\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while checkpointing %#v: %s\", workItem.selector, err.Error())\n   242→\t\t\t\t\tatomic.AddInt32(&errs, 1)\n   243→\t\t\t\t} else {\n   244→\t\t\t\t\tatomic.AddInt32(&n, 1)\n   245→\t\t\t\t}\n   246→\t\t\t}\n   247→\t\t}()\n   248→\t}\n   249→\n   250→\tfor i := 0; i < len(levels); i++ {\n   251→\t\tdir := path.Join(dir, path.Join(selectors[i]...))\n   252→\t\twork <- workItem{\n   253→\t\t\tlevel:    levels[i],\n   254→\t\t\tdir:      dir,\n   255→\t\t\tselector: selectors[i],\n   256→\t\t}\n   257→\t}\n   258→\n   259→\tclose(work)\n   260→\twg.Wait()\n   261→\n   262→\tif errs > 0 {\n   263→\t\treturn int(n), fmt.Errorf(\"[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)\", errs, n)\n   264→\t}\n   265→\treturn int(n), nil\n   266→}\n   267→\n   268→// toCheckpointFile recursively converts a Level tree to CheckpointFile structure.\n   269→// Skips metrics that are already archived. Returns nil if no unarchived data exists.\n   270→func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {\n   271→\tl.lock.RLock()\n   272→\tdefer l.lock.RUnlock()\n   273→\n   274→\tretval := &CheckpointFile{\n   275→\t\tFrom:     from,\n   276→\t\tTo:       to,\n   277→\t\tMetrics:  make(map[string]*CheckpointMetrics),\n   278→\t\tChildren: make(map[string]*CheckpointFile),\n   279→\t}\n   280→\n   281→\tfor metric, minfo := range m.Metrics {\n   282→\t\tb := l.metrics[minfo.offset]\n   283→\t\tif b == nil {\n   284→\t\t\tcontinue\n   285→\t\t}\n   286→\n   287→\t\tallArchived := true\n   288→\t\tb.iterFromTo(from, to, func(b *buffer) error {\n   289→\t\t\tif !b.archived {\n   290→\t\t\t\tallArchived = false\n   291→\t\t\t\treturn fmt.Errorf(\"stop\") // Early termination signal\n   292→\t\t\t}\n   293→\t\t\treturn nil\n   294→\t\t})\n   295→\n   296→\t\tif allArchived {\n   297→\t\t\tcontinue\n   298→\t\t}\n   299→\n   300→\t\tdata := make([]schema.Float, (to-from)/b.frequency+1)\n   301→\t\tdata, start, end, err := b.read(from, to, data)\n   302→\t\tif err != nil {\n   303→\t\t\treturn nil, err\n   304→\t\t}\n   305→\n   306→\t\tfor i := int((end - start) / b.frequency); i < len(data); i++ {\n   307→\t\t\tdata[i] = schema.NaN\n   308→\t\t}\n   309→\n   310→\t\tretval.Metrics[metric] = &CheckpointMetrics{\n   311→\t\t\tFrequency: b.frequency,\n   312→\t\t\tStart:     start,\n   313→\t\t\tData:      data,\n   314→\t\t}\n   315→\t}\n   316→\n   317→\tfor name, child := range l.children {\n   318→\t\tval, err := child.toCheckpointFile(from, to, m)\n   319→\t\tif err != nil {\n   320→\t\t\treturn nil, err\n   321→\t\t}\n   322→\n   323→\t\tif val != nil {\n   324→\t\t\tretval.Children[name] = val\n   325→\t\t}\n   326→\t}\n   327→\n   328→\tif len(retval.Children) == 0 && len(retval.Metrics) == 0 {\n   329→\t\treturn nil, nil\n   330→\t}\n   331→\n   332→\treturn retval, nil\n   333→}\n   334→\n   335→// toCheckpoint writes a Level's data to a JSON checkpoint file.\n   336→// Creates directory if needed. Returns ErrNoNewArchiveData if nothing to save.\n   337→func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {\n   338→\tcf, err := l.toCheckpointFile(from, to, m)\n   339→\tif err != nil {\n   340→\t\treturn err\n   341→\t}\n   342→\n   343→\tif cf == nil {\n   344→\t\treturn ErrNoNewArchiveData\n   345→\t}\n   346→\n   347→\tfilepath := path.Join(dir, fmt.Sprintf(\"%d.json\", from))\n   348→\tf, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)\n   349→\tif err != nil && os.IsNotExist(err) {\n   350→\t\terr = os.MkdirAll(dir, CheckpointDirPerms)\n   351→\t\tif err == nil {\n   352→\t\t\tf, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)\n   353→\t\t}\n   354→\t}\n   355→\tif err != nil {\n   356→\t\treturn err\n   357→\t}\n   358→\tdefer f.Close()\n   359→\n   360→\tbw := bufio.NewWriter(f)\n   361→\tif err = json.NewEncoder(bw).Encode(cf); err != nil {\n   362→\t\treturn err\n   363→\t}\n   364→\n   365→\treturn bw.Flush()\n   366→}\n   367→\n   368→// enqueueCheckpointHosts traverses checkpoint directory and enqueues cluster/host pairs.\n   369→// Returns error if directory structure is invalid.\n   370→func enqueueCheckpointHosts(dir string, work chan<- [2]string) error {\n   371→\tclustersDir, err := os.ReadDir(dir)\n   372→\tif err != nil {\n   373→\t\treturn err\n   374→\t}\n   375→\n   376→\tfor _, clusterDir := range clustersDir {\n   377→\t\tif !clusterDir.IsDir() {\n   378→\t\t\treturn errors.New(\"[METRICSTORE]> expected only directories at first level of checkpoints/ directory\")\n   379→\t\t}\n   380→\n   381→\t\thostsDir, err := os.ReadDir(filepath.Join(dir, clusterDir.Name()))\n   382→\t\tif err != nil {\n   383→\t\t\treturn err\n   384→\t\t}\n   385→\n   386→\t\tfor _, hostDir := range hostsDir {\n   387→\t\t\tif !hostDir.IsDir() {\n   388→\t\t\t\treturn errors.New(\"[METRICSTORE]> expected only directories at second level of checkpoints/ directory\")\n   389→\t\t\t}\n   390→\n   391→\t\t\twork <- [2]string{clusterDir.Name(), hostDir.Name()}\n   392→\t\t}\n   393→\t}\n   394→\n   395→\treturn nil\n   396→}\n   397→\n   398→// FromCheckpoint loads checkpoint files from disk into memory in parallel.\n   399→//\n   400→// Uses worker pool to load cluster/host combinations. Returns number of files\n   401→// loaded and any errors.\n   402→func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {\n   403→\tvar wg sync.WaitGroup\n   404→\twork := make(chan [2]string, Keys.NumWorkers*4)\n   405→\tn, errs := int32(0), int32(0)\n   406→\n   407→\twg.Add(Keys.NumWorkers)\n   408→\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n   409→\t\tgo func() {\n   410→\t\t\tdefer wg.Done()\n   411→\t\t\tfor host := range work {\n   412→\t\t\t\tlvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))\n   413→\t\t\t\tnn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from)\n   414→\t\t\t\tif err != nil {\n   415→\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while loading checkpoints for %s/%s: %s\", host[0], host[1], err.Error())\n   416→\t\t\t\t\tatomic.AddInt32(&errs, 1)\n   417→\t\t\t\t}\n   418→\t\t\t\tatomic.AddInt32(&n, int32(nn))\n   419→\t\t\t}\n   420→\t\t}()\n   421→\t}\n   422→\n   423→\terr := enqueueCheckpointHosts(dir, work)\n   424→\tclose(work)\n   425→\twg.Wait()\n   426→\n   427→\tif err != nil {\n   428→\t\treturn int(n), err\n   429→\t}\n   430→\n   431→\tif errs > 0 {\n   432→\t\treturn int(n), fmt.Errorf(\"[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)\", errs, n)\n   433→\t}\n   434→\treturn int(n), nil\n   435→}\n   436→\n   437→// FromCheckpointFiles is the main entry point for loading checkpoints at startup.\n   438→//\n   439→// Creates checkpoint directory if it doesn't exist. This function must be called\n   440→// before any writes or reads, and can only be called once.\n   441→func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {\n   442→\tif _, err := os.Stat(dir); os.IsNotExist(err) {\n   443→\t\terr := os.MkdirAll(dir, CheckpointDirPerms)\n   444→\t\tif err != nil {\n   445→\t\t\tcclog.Fatalf(\"[METRICSTORE]> Error creating directory: %#v\\n\", err)\n   446→\t\t}\n   447→\t\tcclog.Debugf(\"[METRICSTORE]> %#v Directory created successfully\", dir)\n   448→\t}\n   449→\n   450→\treturn m.FromCheckpoint(dir, from)\n   451→}\n   452→\n   453→func (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {\n   454→\tbr := bufio.NewReader(f)\n   455→\tcf := &CheckpointFile{}\n   456→\tif err := json.NewDecoder(br).Decode(cf); err != nil {\n   457→\t\treturn err\n   458→\t}\n   459→\n   460→\tif cf.To != 0 && cf.To < from {\n   461→\t\treturn nil\n   462→\t}\n   463→\n   464→\tif err := l.loadFile(cf, m); err != nil {\n   465→\t\treturn err\n   466→\t}\n   467→\n   468→\treturn nil\n   469→}\n   470→\n   471→func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {\n   472→\tfor name, metric := range cf.Metrics {\n   473→\t\tn := len(metric.Data)\n   474→\t\tb := &buffer{\n   475→\t\t\tfrequency: metric.Frequency,\n   476→\t\t\tstart:     metric.Start,\n   477→\t\t\tdata:      metric.Data[0:n:n],\n   478→\t\t\tprev:      nil,\n   479→\t\t\tnext:      nil,\n   480→\t\t\tarchived:  true,\n   481→\t\t}\n   482→\n   483→\t\tminfo, ok := m.Metrics[name]\n   484→\t\tif !ok {\n   485→\t\t\tcontinue\n   486→\t\t}\n   487→\n   488→\t\tprev := l.metrics[minfo.offset]\n   489→\t\tif prev == nil {\n   490→\t\t\tl.metrics[minfo.offset] = b\n   491→\t\t} else {\n   492→\t\t\tif prev.start > b.start {\n   493→\t\t\t\treturn fmt.Errorf(\"[METRICSTORE]> buffer start time %d is before previous buffer start %d\", b.start, prev.start)\n   494→\t\t\t}\n   495→\n   496→\t\t\tb.prev = prev\n   497→\t\t\tprev.next = b\n   498→\t\t}\n   499→\t\tl.metrics[minfo.offset] = b\n   500→\t}\n   501→\n   502→\tif len(cf.Children) > 0 && l.children == nil {\n   503→\t\tl.children = make(map[string]*Level)\n   504→\t}\n   505→\n   506→\tfor sel, childCf := range cf.Children {\n   507→\t\tchild, ok := l.children[sel]\n   508→\t\tif !ok {\n   509→\t\t\tchild = &Level{\n   510→\t\t\t\tmetrics:  make([]*buffer, len(m.Metrics)),\n   511→\t\t\t\tchildren: nil,\n   512→\t\t\t}\n   513→\t\t\tl.children[sel] = child\n   514→\t\t}\n   515→\n   516→\t\tif err := child.loadFile(childCf, m); err != nil {\n   517→\t\t\treturn err\n   518→\t\t}\n   519→\t}\n   520→\n   521→\treturn nil\n   522→}\n   523→\n   524→// fromCheckpoint loads all checkpoint files (JSON, binary snapshot, WAL) for a\n   525→// single host directory. Snapshot files are loaded first (sorted by timestamp),\n   526→// then current.wal is replayed on top.\n   527→func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, error) {\n   528→\tdirentries, err := os.ReadDir(dir)\n   529→\tif err != nil {\n   530→\t\tif os.IsNotExist(err) {\n   531→\t\t\treturn 0, nil\n   532→\t\t}\n   533→\t\treturn 0, err\n   534→\t}\n   535→\n   536→\tallFiles := make([]fs.DirEntry, 0)\n   537→\tvar walEntry fs.DirEntry\n   538→\tfilesLoaded := 0\n   539→\n   540→\tfor _, e := range direntries {\n   541→\t\tif e.IsDir() {\n   542→\t\t\t// Legacy: skip subdirectories (only used by old Avro format).\n   543→\t\t\t// These are ignored; their data is not loaded.\n   544→\t\t\tcclog.Debugf(\"[METRICSTORE]> skipping subdirectory %s in checkpoint dir %s\", e.Name(), dir)\n   545→\t\t\tcontinue\n   546→\t\t}\n   547→\n   548→\t\tname := e.Name()\n   549→\t\tif strings.HasSuffix(name, \".json\") || strings.HasSuffix(name, \".bin\") {\n   550→\t\t\tallFiles = append(allFiles, e)\n   551→\t\t} else if name == \"current.wal\" {\n   552→\t\t\twalEntry = e\n   553→\t\t}\n   554→\t\t// Silently ignore other files (e.g., .tmp, .bin.tmp from interrupted writes).\n   555→\t}\n   556→\n   557→\tfiles, err := findFiles(allFiles, from, true)\n   558→\tif err != nil {\n   559→\t\treturn filesLoaded, err\n   560→\t}\n   561→\n   562→\tloaders := map[string]func(*MemoryStore, *os.File, int64) error{\n   563→\t\t\".json\": l.loadJSONFile,\n   564→\t\t\".bin\":  l.loadBinaryFile,\n   565→\t}\n   566→\n   567→\tfor _, filename := range files {\n   568→\t\text := filepath.Ext(filename)\n   569→\t\tloader := loaders[ext]\n   570→\t\tif loader == nil {\n   571→\t\t\tcclog.Warnf(\"[METRICSTORE]> unknown extension for checkpoint file %s\", filename)\n   572→\t\t\tcontinue\n   573→\t\t}\n   574→\n   575→\t\terr := func() error {\n   576→\t\t\tf, err := os.Open(path.Join(dir, filename))\n   577→\t\t\tif err != nil {\n   578→\t\t\t\treturn err\n   579→\t\t\t}\n   580→\t\t\tdefer f.Close()\n   581→\t\t\treturn loader(m, f, from)\n   582→\t\t}()\n   583→\t\tif err != nil {\n   584→\t\t\treturn filesLoaded, err\n   585→\t\t}\n   586→\t\tfilesLoaded++\n   587→\t}\n   588→\n   589→\t// Replay WAL after all snapshot files so it fills in data since the last snapshot.\n   590→\tif walEntry != nil {\n   591→\t\terr := func() error {\n   592→\t\t\tf, err := os.Open(path.Join(dir, walEntry.Name()))\n   593→\t\t\tif err != nil {\n   594→\t\t\t\treturn err\n   595→\t\t\t}\n   596→\t\t\tdefer f.Close()\n   597→\t\t\treturn l.loadWALFile(m, f, from)\n   598→\t\t}()\n   599→\t\tif err != nil {\n   600→\t\t\t// WAL errors are non-fatal: the snapshot already loaded the bulk of data.\n   601→\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL replay error for %s: %v (data since last snapshot may be missing)\", dir, err)\n   602→\t\t} else {\n   603→\t\t\tfilesLoaded++\n   604→\t\t}\n   605→\t}\n   606→\n   607→\treturn filesLoaded, nil\n   608→}\n   609→\n   610→// parseTimestampFromFilename extracts a Unix timestamp from a checkpoint filename.\n   611→// Supports \".json\" (format: \"<ts>.json\") and \".bin\" (format: \"<ts>.bin\").\n   612→func parseTimestampFromFilename(name string) (int64, error) {\n   613→\tswitch {\n   614→\tcase strings.HasSuffix(name, \".json\"):\n   615→\t\treturn strconv.ParseInt(name[:len(name)-5], 10, 64)\n   616→\tcase strings.HasSuffix(name, \".bin\"):\n   617→\t\treturn strconv.ParseInt(name[:len(name)-4], 10, 64)\n   618→\tdefault:\n   619→\t\treturn 0, fmt.Errorf(\"unknown checkpoint extension for file %q\", name)\n   620→\t}\n   621→}\n   622→\n   623→// findFiles returns filenames from direntries whose timestamps satisfy the filter.\n   624→// If findMoreRecentFiles is true, returns files with timestamps >= t (plus the\n   625→// last file before t if t falls between two files).\n   626→func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {\n   627→\tnums := map[string]int64{}\n   628→\tfor _, e := range direntries {\n   629→\t\tname := e.Name()\n   630→\t\tif !strings.HasSuffix(name, \".json\") && !strings.HasSuffix(name, \".bin\") {\n   631→\t\t\tcontinue\n   632→\t\t}\n   633→\n   634→\t\tts, err := parseTimestampFromFilename(name)\n   635→\t\tif err != nil {\n   636→\t\t\treturn nil, err\n   637→\t\t}\n   638→\t\tnums[name] = ts\n   639→\t}\n   640→\n   641→\tsort.Slice(direntries, func(i, j int) bool {\n   642→\t\ta, b := direntries[i], direntries[j]\n   643→\t\treturn nums[a.Name()] < nums[b.Name()]\n   644→\t})\n   645→\n   646→\tif len(nums) == 0 {\n   647→\t\treturn nil, nil\n   648→\t}\n   649→\n   650→\tfilenames := make([]string, 0)\n   651→\n   652→\tfor i, e := range direntries {\n   653→\t\tts1 := nums[e.Name()]\n   654→\n   655→\t\tif findMoreRecentFiles && t <= ts1 {\n   656→\t\t\tfilenames = append(filenames, e.Name())\n   657→\t\t} else if !findMoreRecentFiles && ts1 <= t && ts1 != 0 {\n   658→\t\t\tfilenames = append(filenames, e.Name())\n   659→\t\t}\n   660→\n   661→\t\tif i == len(direntries)-1 {\n   662→\t\t\tcontinue\n   663→\t\t}\n   664→\n   665→\t\tenext := direntries[i+1]\n   666→\t\tts2 := nums[enext.Name()]\n   667→\n   668→\t\tif findMoreRecentFiles {\n   669→\t\t\tif ts1 < t && t < ts2 {\n   670→\t\t\t\tfilenames = append(filenames, e.Name())\n   671→\t\t\t}\n   672→\t\t}\n   673→\t}\n   674→\n   675→\treturn filenames, nil\n   676→}\n   677→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"fc88719e-7baa-4c50-82e2-8c0d467422ee","timestamp":"2026-03-29T19:34:32.436Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/checkpoint.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// This file implements checkpoint persistence for the in-memory metric store.\n//\n// Checkpoints enable graceful restarts by periodically saving in-memory metric\n// data to disk in JSON or binary format. The checkpoint system:\n//\n// Key Features:\n//   - Periodic background checkpointing via the Checkpointing() worker\n//   - Two format families: JSON (human-readable) and WAL+binary (compact, crash-safe)\n//   - Parallel checkpoint creation and loading using worker pools\n//   - Hierarchical file organization: checkpoint_dir/cluster/host/timestamp.{json|bin}\n//   - WAL file: checkpoint_dir/cluster/host/current.wal (append-only, per-entry)\n//   - Only saves unarchived data (archived data is already persisted elsewhere)\n//   - GC optimization during loading to prevent excessive heap growth\n//\n// Checkpoint Workflow:\n//  1. Init() loads checkpoints within retention window at startup\n//  2. Checkpointing() worker periodically saves new data\n//  3. Shutdown() writes final checkpoint before exit\n//\n// File Organization:\n//\n//\tcheckpoints/\n//\t  cluster1/\n//\t    host001/\n//\t      1234567890.json  (JSON format: full subtree snapshot)\n//\t      1234567890.bin   (binary format: full subtree snapshot)\n//\t      current.wal      (WAL format: append-only per-entry log)\n//\t    host002/\n//\t      ...\npackage metricstore\n\nimport (\n\t\"bufio\"\n\t\"context\"\n\t\"encoding/json\"\n\t\"errors\"\n\t\"fmt\"\n\t\"io/fs\"\n\t\"os\"\n\t\"path\"\n\t\"path/filepath\"\n\t\"sort\"\n\t\"strconv\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n)\n\nconst (\n\tCheckpointFilePerms = 0o644                    // File permissions for checkpoint files\n\tCheckpointDirPerms  = 0o755                    // Directory permissions for checkpoint directories\n\tGCTriggerInterval   = DefaultGCTriggerInterval // Interval for triggering GC during checkpoint loading\n)\n\n// CheckpointMetrics represents metric data in a checkpoint file.\n// Whenever the structure changes, update MarshalJSON as well!\ntype CheckpointMetrics struct {\n\tData      []schema.Float `json:\"data\"`\n\tFrequency int64          `json:\"frequency\"`\n\tStart     int64          `json:\"start\"`\n}\n\n// CheckpointFile represents the hierarchical structure of a checkpoint file.\n// It mirrors the Level tree structure from the MemoryStore.\ntype CheckpointFile struct {\n\tMetrics  map[string]*CheckpointMetrics `json:\"metrics\"`\n\tChildren map[string]*CheckpointFile    `json:\"children\"`\n\tFrom     int64                         `json:\"from\"`\n\tTo       int64                         `json:\"to\"`\n}\n\n// lastCheckpoint tracks the timestamp of the last checkpoint creation.\nvar (\n\tlastCheckpoint   time.Time\n\tlastCheckpointMu sync.Mutex\n)\n\n// Checkpointing starts a background worker that periodically saves metric data to disk.\n//\n// Checkpoints are written every 12 hours (hardcoded).\n//\n// Format behaviour:\n//   - \"json\": Periodic checkpointing every checkpointInterval\n//   - \"wal\":  Periodic binary snapshots + WAL rotation every checkpointInterval\nfunc Checkpointing(wg *sync.WaitGroup, ctx context.Context) {\n\tlastCheckpointMu.Lock()\n\tlastCheckpoint = time.Now()\n\tlastCheckpointMu.Unlock()\n\n\tms := GetMemoryStore()\n\n\twg.Go(func() {\n\n\t\td := 12 * time.Hour // default checkpoint interval\n\t\tif Keys.CheckpointInterval != \"\" {\n\t\t\tparsed, err := time.ParseDuration(Keys.CheckpointInterval)\n\t\t\tif err != nil {\n\t\t\t\tcclog.Errorf(\"[METRICSTORE]> invalid checkpoint-interval %q: %s, using default 12h\", Keys.CheckpointInterval, err)\n\t\t\t} else {\n\t\t\t\td = parsed\n\t\t\t}\n\t\t}\n\n\t\tticker := time.NewTicker(d)\n\t\tdefer ticker.Stop()\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tlastCheckpointMu.Lock()\n\t\t\t\tfrom := lastCheckpoint\n\t\t\t\tlastCheckpointMu.Unlock()\n\n\t\t\t\tnow := time.Now()\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> start checkpointing (starting at %s)...\", from.Format(time.RFC3339))\n\n\t\t\t\tif Keys.Checkpoints.FileFormat == \"wal\" {\n\t\t\t\t\t// Pause WAL writes: the binary snapshot captures all in-memory\n\t\t\t\t\t// data, so WAL records written during checkpoint are redundant\n\t\t\t\t\t// and would be deleted during rotation anyway.\n\t\t\t\t\twalCheckpointActive.Store(true)\n\t\t\t\t\tn, hostDirs, err := ms.ToCheckpointWAL(Keys.Checkpoints.RootDir, from.Unix(), now.Unix())\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> binary checkpointing failed: %s\", err.Error())\n\t\t\t\t\t}\n\t\t\t\t\tif n > 0 {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d binary snapshot files created\", n)\n\t\t\t\t\t\tlastCheckpointMu.Lock()\n\t\t\t\t\t\tlastCheckpoint = now\n\t\t\t\t\t\tlastCheckpointMu.Unlock()\n\t\t\t\t\t\t// Rotate WAL files for successfully checkpointed hosts.\n\t\t\t\t\t\tRotateWALFiles(hostDirs)\n\t\t\t\t\t}\n\t\t\t\t\twalCheckpointActive.Store(false)\n\t\t\t\t\twalDropped.Store(0)\n\t\t\t\t} else {\n\t\t\t\t\tn, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir, from.Unix(), now.Unix())\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> checkpointing failed: %s\", err.Error())\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoint files created\", n)\n\t\t\t\t\t\tlastCheckpointMu.Lock()\n\t\t\t\t\t\tlastCheckpoint = now\n\t\t\t\t\t\tlastCheckpointMu.Unlock()\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t})\n}\n\n// MarshalJSON provides optimized JSON encoding for CheckpointMetrics.\n//\n// Since schema.Float has custom MarshalJSON, serializing []Float has significant overhead.\n// This method manually constructs JSON to avoid allocations and interface conversions.\nfunc (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {\n\tbuf := make([]byte, 0, 128+len(cm.Data)*8)\n\tbuf = append(buf, `{\"frequency\":`...)\n\tbuf = strconv.AppendInt(buf, cm.Frequency, 10)\n\tbuf = append(buf, `,\"start\":`...)\n\tbuf = strconv.AppendInt(buf, cm.Start, 10)\n\tbuf = append(buf, `,\"data\":[`...)\n\tfor i, x := range cm.Data {\n\t\tif i != 0 {\n\t\t\tbuf = append(buf, ',')\n\t\t}\n\t\tif x.IsNaN() {\n\t\t\tbuf = append(buf, `null`...)\n\t\t} else {\n\t\t\tbuf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)\n\t\t}\n\t}\n\tbuf = append(buf, `]}`...)\n\treturn buf, nil\n}\n\n// ToCheckpoint writes metric data to checkpoint files in parallel (JSON format).\n//\n// Metrics at root and cluster levels are skipped. One file per host is created.\n// Uses worker pool (Keys.NumWorkers) for parallel processing. Only locks one host\n// at a time, allowing concurrent writes/reads to other hosts.\n//\n// Returns the number of checkpoint files created and any errors encountered.\nfunc (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {\n\t// Pre-calculate capacity by counting cluster/host pairs\n\tm.root.lock.RLock()\n\ttotalHosts := 0\n\tfor _, l1 := range m.root.children {\n\t\tl1.lock.RLock()\n\t\ttotalHosts += len(l1.children)\n\t\tl1.lock.RUnlock()\n\t}\n\tm.root.lock.RUnlock()\n\n\tlevels := make([]*Level, 0, totalHosts)\n\tselectors := make([][]string, 0, totalHosts)\n\n\tm.root.lock.RLock()\n\tfor sel1, l1 := range m.root.children {\n\t\tl1.lock.RLock()\n\t\tfor sel2, l2 := range l1.children {\n\t\t\tlevels = append(levels, l2)\n\t\t\tselectors = append(selectors, []string{sel1, sel2})\n\t\t}\n\t\tl1.lock.RUnlock()\n\t}\n\tm.root.lock.RUnlock()\n\n\ttype workItem struct {\n\t\tlevel    *Level\n\t\tdir      string\n\t\tselector []string\n\t}\n\n\tn, errs := int32(0), int32(0)\n\n\tvar wg sync.WaitGroup\n\twg.Add(Keys.NumWorkers)\n\twork := make(chan workItem, Keys.NumWorkers*2)\n\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\n\t\t\tfor workItem := range work {\n\t\t\t\tif err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {\n\t\t\t\t\tif err == ErrNoNewArchiveData {\n\t\t\t\t\t\tcontinue\n\t\t\t\t\t}\n\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while checkpointing %#v: %s\", workItem.selector, err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t} else {\n\t\t\t\t\tatomic.AddInt32(&n, 1)\n\t\t\t\t}\n\t\t\t}\n\t\t}()\n\t}\n\n\tfor i := 0; i < len(levels); i++ {\n\t\tdir := path.Join(dir, path.Join(selectors[i]...))\n\t\twork <- workItem{\n\t\t\tlevel:    levels[i],\n\t\t\tdir:      dir,\n\t\t\tselector: selectors[i],\n\t\t}\n\t}\n\n\tclose(work)\n\twg.Wait()\n\n\tif errs > 0 {\n\t\treturn int(n), fmt.Errorf(\"[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)\", errs, n)\n\t}\n\treturn int(n), nil\n}\n\n// toCheckpointFile recursively converts a Level tree to CheckpointFile structure.\n// Skips metrics that are already archived. Returns nil if no unarchived data exists.\nfunc (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {\n\tl.lock.RLock()\n\tdefer l.lock.RUnlock()\n\n\tretval := &CheckpointFile{\n\t\tFrom:     from,\n\t\tTo:       to,\n\t\tMetrics:  make(map[string]*CheckpointMetrics),\n\t\tChildren: make(map[string]*CheckpointFile),\n\t}\n\n\tfor metric, minfo := range m.Metrics {\n\t\tb := l.metrics[minfo.offset]\n\t\tif b == nil {\n\t\t\tcontinue\n\t\t}\n\n\t\tallArchived := true\n\t\tb.iterFromTo(from, to, func(b *buffer) error {\n\t\t\tif !b.archived {\n\t\t\t\tallArchived = false\n\t\t\t\treturn fmt.Errorf(\"stop\") // Early termination signal\n\t\t\t}\n\t\t\treturn nil\n\t\t})\n\n\t\tif allArchived {\n\t\t\tcontinue\n\t\t}\n\n\t\tdata := make([]schema.Float, (to-from)/b.frequency+1)\n\t\tdata, start, end, err := b.read(from, to, data)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\tfor i := int((end - start) / b.frequency); i < len(data); i++ {\n\t\t\tdata[i] = schema.NaN\n\t\t}\n\n\t\tretval.Metrics[metric] = &CheckpointMetrics{\n\t\t\tFrequency: b.frequency,\n\t\t\tStart:     start,\n\t\t\tData:      data,\n\t\t}\n\t}\n\n\tfor name, child := range l.children {\n\t\tval, err := child.toCheckpointFile(from, to, m)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\tif val != nil {\n\t\t\tretval.Children[name] = val\n\t\t}\n\t}\n\n\tif len(retval.Children) == 0 && len(retval.Metrics) == 0 {\n\t\treturn nil, nil\n\t}\n\n\treturn retval, nil\n}\n\n// toCheckpoint writes a Level's data to a JSON checkpoint file.\n// Creates directory if needed. Returns ErrNoNewArchiveData if nothing to save.\nfunc (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {\n\tcf, err := l.toCheckpointFile(from, to, m)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\tif cf == nil {\n\t\treturn ErrNoNewArchiveData\n\t}\n\n\tfilepath := path.Join(dir, fmt.Sprintf(\"%d.json\", from))\n\tf, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)\n\tif err != nil && os.IsNotExist(err) {\n\t\terr = os.MkdirAll(dir, CheckpointDirPerms)\n\t\tif err == nil {\n\t\t\tf, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)\n\t\t}\n\t}\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer f.Close()\n\n\tbw := bufio.NewWriter(f)\n\tif err = json.NewEncoder(bw).Encode(cf); err != nil {\n\t\treturn err\n\t}\n\n\treturn bw.Flush()\n}\n\n// enqueueCheckpointHosts traverses checkpoint directory and enqueues cluster/host pairs.\n// Returns error if directory structure is invalid.\nfunc enqueueCheckpointHosts(dir string, work chan<- [2]string) error {\n\tclustersDir, err := os.ReadDir(dir)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\tfor _, clusterDir := range clustersDir {\n\t\tif !clusterDir.IsDir() {\n\t\t\treturn errors.New(\"[METRICSTORE]> expected only directories at first level of checkpoints/ directory\")\n\t\t}\n\n\t\thostsDir, err := os.ReadDir(filepath.Join(dir, clusterDir.Name()))\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\tfor _, hostDir := range hostsDir {\n\t\t\tif !hostDir.IsDir() {\n\t\t\t\treturn errors.New(\"[METRICSTORE]> expected only directories at second level of checkpoints/ directory\")\n\t\t\t}\n\n\t\t\twork <- [2]string{clusterDir.Name(), hostDir.Name()}\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// FromCheckpoint loads checkpoint files from disk into memory in parallel.\n//\n// Uses worker pool to load cluster/host combinations. Returns number of files\n// loaded and any errors.\nfunc (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {\n\tvar wg sync.WaitGroup\n\twork := make(chan [2]string, Keys.NumWorkers*4)\n\tn, errs := int32(0), int32(0)\n\n\twg.Add(Keys.NumWorkers)\n\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\t\t\tfor host := range work {\n\t\t\t\tlvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))\n\t\t\t\tnn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while loading checkpoints for %s/%s: %s\", host[0], host[1], err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t}\n\t\t\t\tatomic.AddInt32(&n, int32(nn))\n\t\t\t}\n\t\t}()\n\t}\n\n\terr := enqueueCheckpointHosts(dir, work)\n\tclose(work)\n\twg.Wait()\n\n\tif err != nil {\n\t\treturn int(n), err\n\t}\n\n\tif errs > 0 {\n\t\treturn int(n), fmt.Errorf(\"[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)\", errs, n)\n\t}\n\treturn int(n), nil\n}\n\n// FromCheckpointFiles is the main entry point for loading checkpoints at startup.\n//\n// Creates checkpoint directory if it doesn't exist. This function must be called\n// before any writes or reads, and can only be called once.\nfunc (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {\n\tif _, err := os.Stat(dir); os.IsNotExist(err) {\n\t\terr := os.MkdirAll(dir, CheckpointDirPerms)\n\t\tif err != nil {\n\t\t\tcclog.Fatalf(\"[METRICSTORE]> Error creating directory: %#v\\n\", err)\n\t\t}\n\t\tcclog.Debugf(\"[METRICSTORE]> %#v Directory created successfully\", dir)\n\t}\n\n\treturn m.FromCheckpoint(dir, from)\n}\n\nfunc (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {\n\tbr := bufio.NewReader(f)\n\tcf := &CheckpointFile{}\n\tif err := json.NewDecoder(br).Decode(cf); err != nil {\n\t\treturn err\n\t}\n\n\tif cf.To != 0 && cf.To < from {\n\t\treturn nil\n\t}\n\n\tif err := l.loadFile(cf, m); err != nil {\n\t\treturn err\n\t}\n\n\treturn nil\n}\n\nfunc (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {\n\tfor name, metric := range cf.Metrics {\n\t\tn := len(metric.Data)\n\t\tb := &buffer{\n\t\t\tfrequency: metric.Frequency,\n\t\t\tstart:     metric.Start,\n\t\t\tdata:      metric.Data[0:n:n],\n\t\t\tprev:      nil,\n\t\t\tnext:      nil,\n\t\t\tarchived:  true,\n\t\t}\n\n\t\tminfo, ok := m.Metrics[name]\n\t\tif !ok {\n\t\t\tcontinue\n\t\t}\n\n\t\tprev := l.metrics[minfo.offset]\n\t\tif prev == nil {\n\t\t\tl.metrics[minfo.offset] = b\n\t\t} else {\n\t\t\tif prev.start > b.start {\n\t\t\t\treturn fmt.Errorf(\"[METRICSTORE]> buffer start time %d is before previous buffer start %d\", b.start, prev.start)\n\t\t\t}\n\n\t\t\tb.prev = prev\n\t\t\tprev.next = b\n\t\t}\n\t\tl.metrics[minfo.offset] = b\n\t}\n\n\tif len(cf.Children) > 0 && l.children == nil {\n\t\tl.children = make(map[string]*Level)\n\t}\n\n\tfor sel, childCf := range cf.Children {\n\t\tchild, ok := l.children[sel]\n\t\tif !ok {\n\t\t\tchild = &Level{\n\t\t\t\tmetrics:  make([]*buffer, len(m.Metrics)),\n\t\t\t\tchildren: nil,\n\t\t\t}\n\t\t\tl.children[sel] = child\n\t\t}\n\n\t\tif err := child.loadFile(childCf, m); err != nil {\n\t\t\treturn err\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// fromCheckpoint loads all checkpoint files (JSON, binary snapshot, WAL) for a\n// single host directory. Snapshot files are loaded first (sorted by timestamp),\n// then current.wal is replayed on top.\nfunc (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, error) {\n\tdirentries, err := os.ReadDir(dir)\n\tif err != nil {\n\t\tif os.IsNotExist(err) {\n\t\t\treturn 0, nil\n\t\t}\n\t\treturn 0, err\n\t}\n\n\tallFiles := make([]fs.DirEntry, 0)\n\tvar walEntry fs.DirEntry\n\tfilesLoaded := 0\n\n\tfor _, e := range direntries {\n\t\tif e.IsDir() {\n\t\t\t// Legacy: skip subdirectories (only used by old Avro format).\n\t\t\t// These are ignored; their data is not loaded.\n\t\t\tcclog.Debugf(\"[METRICSTORE]> skipping subdirectory %s in checkpoint dir %s\", e.Name(), dir)\n\t\t\tcontinue\n\t\t}\n\n\t\tname := e.Name()\n\t\tif strings.HasSuffix(name, \".json\") || strings.HasSuffix(name, \".bin\") {\n\t\t\tallFiles = append(allFiles, e)\n\t\t} else if name == \"current.wal\" {\n\t\t\twalEntry = e\n\t\t}\n\t\t// Silently ignore other files (e.g., .tmp, .bin.tmp from interrupted writes).\n\t}\n\n\tfiles, err := findFiles(allFiles, from, true)\n\tif err != nil {\n\t\treturn filesLoaded, err\n\t}\n\n\tloaders := map[string]func(*MemoryStore, *os.File, int64) error{\n\t\t\".json\": l.loadJSONFile,\n\t\t\".bin\":  l.loadBinaryFile,\n\t}\n\n\tfor _, filename := range files {\n\t\text := filepath.Ext(filename)\n\t\tloader := loaders[ext]\n\t\tif loader == nil {\n\t\t\tcclog.Warnf(\"[METRICSTORE]> unknown extension for checkpoint file %s\", filename)\n\t\t\tcontinue\n\t\t}\n\n\t\terr := func() error {\n\t\t\tf, err := os.Open(path.Join(dir, filename))\n\t\t\tif err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t\tdefer f.Close()\n\t\t\treturn loader(m, f, from)\n\t\t}()\n\t\tif err != nil {\n\t\t\treturn filesLoaded, err\n\t\t}\n\t\tfilesLoaded++\n\t}\n\n\t// Replay WAL after all snapshot files so it fills in data since the last snapshot.\n\tif walEntry != nil {\n\t\terr := func() error {\n\t\t\tf, err := os.Open(path.Join(dir, walEntry.Name()))\n\t\t\tif err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t\tdefer f.Close()\n\t\t\treturn l.loadWALFile(m, f, from)\n\t\t}()\n\t\tif err != nil {\n\t\t\t// WAL errors are non-fatal: the snapshot already loaded the bulk of data.\n\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL replay error for %s: %v (data since last snapshot may be missing)\", dir, err)\n\t\t} else {\n\t\t\tfilesLoaded++\n\t\t}\n\t}\n\n\treturn filesLoaded, nil\n}\n\n// parseTimestampFromFilename extracts a Unix timestamp from a checkpoint filename.\n// Supports \".json\" (format: \"<ts>.json\") and \".bin\" (format: \"<ts>.bin\").\nfunc parseTimestampFromFilename(name string) (int64, error) {\n\tswitch {\n\tcase strings.HasSuffix(name, \".json\"):\n\t\treturn strconv.ParseInt(name[:len(name)-5], 10, 64)\n\tcase strings.HasSuffix(name, \".bin\"):\n\t\treturn strconv.ParseInt(name[:len(name)-4], 10, 64)\n\tdefault:\n\t\treturn 0, fmt.Errorf(\"unknown checkpoint extension for file %q\", name)\n\t}\n}\n\n// findFiles returns filenames from direntries whose timestamps satisfy the filter.\n// If findMoreRecentFiles is true, returns files with timestamps >= t (plus the\n// last file before t if t falls between two files).\nfunc findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {\n\tnums := map[string]int64{}\n\tfor _, e := range direntries {\n\t\tname := e.Name()\n\t\tif !strings.HasSuffix(name, \".json\") && !strings.HasSuffix(name, \".bin\") {\n\t\t\tcontinue\n\t\t}\n\n\t\tts, err := parseTimestampFromFilename(name)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\t\tnums[name] = ts\n\t}\n\n\tsort.Slice(direntries, func(i, j int) bool {\n\t\ta, b := direntries[i], direntries[j]\n\t\treturn nums[a.Name()] < nums[b.Name()]\n\t})\n\n\tif len(nums) == 0 {\n\t\treturn nil, nil\n\t}\n\n\tfilenames := make([]string, 0)\n\n\tfor i, e := range direntries {\n\t\tts1 := nums[e.Name()]\n\n\t\tif findMoreRecentFiles && t <= ts1 {\n\t\t\tfilenames = append(filenames, e.Name())\n\t\t} else if !findMoreRecentFiles && ts1 <= t && ts1 != 0 {\n\t\t\tfilenames = append(filenames, e.Name())\n\t\t}\n\n\t\tif i == len(direntries)-1 {\n\t\t\tcontinue\n\t\t}\n\n\t\tenext := direntries[i+1]\n\t\tts2 := nums[enext.Name()]\n\n\t\tif findMoreRecentFiles {\n\t\t\tif ts1 < t && t < ts2 {\n\t\t\t\tfilenames = append(filenames, e.Name())\n\t\t\t}\n\t\t}\n\t}\n\n\treturn filenames, nil\n}\n","numLines":677,"startLine":1,"totalLines":677}},"sourceToolAssistantUUID":"d94c01ef-3ad8-483e-838b-2546190d48cd"}
{"parentUuid":"fc88719e-7baa-4c50-82e2-8c0d467422ee","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01FP6GwNbJigmtD6kv2iF2tF","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01Ari9c62VwNixy2h3pVeCcz","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/walCheckpoint.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":27304,"cache_read_input_tokens":24476,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":27304},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzHnyHeWih653uQT8w","type":"assistant","uuid":"396aea8b-7eb7-4780-9dc2-677da33e7aa7","timestamp":"2026-03-29T19:34:32.692Z"}
{"parentUuid":"396aea8b-7eb7-4780-9dc2-677da33e7aa7","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01FP6GwNbJigmtD6kv2iF2tF","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01WSoaUYSv7iuBy3XFqoNfhH","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/level.go"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":27304,"cache_read_input_tokens":24476,"output_tokens":236,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":27304,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXvzHnyHeWih653uQT8w","type":"assistant","uuid":"07a3cbc6-6b63-4740-b1f3-8d06479467e0","timestamp":"2026-03-29T19:34:32.856Z"}
{"parentUuid":"07a3cbc6-6b63-4740-b1f3-8d06479467e0","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01WSoaUYSv7iuBy3XFqoNfhH","toolUseID":"toolu_01WSoaUYSv7iuBy3XFqoNfhH","timestamp":"2026-03-29T19:34:32.933Z","uuid":"200c96b3-3172-4228-9f9f-9c7984b274fc"}
{"parentUuid":"07a3cbc6-6b63-4740-b1f3-8d06479467e0","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01WSoaUYSv7iuBy3XFqoNfhH","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// Package metricstore provides level.go: Hierarchical tree structure for metric storage.\n     7→//\n     8→// # Level Architecture\n     9→//\n    10→// The Level type forms a tree structure where each node represents a level in the\n    11→// ClusterCockpit hierarchy: cluster → host → socket → core → hwthread, with special\n    12→// nodes for memory domains and accelerators.\n    13→//\n    14→// Structure:\n    15→//\n    16→//\tRoot Level (cluster=\"emmy\")\n    17→//\t├─ Level (host=\"node001\")\n    18→//\t│  ├─ Level (socket=\"0\")\n    19→//\t│  │  ├─ Level (core=\"0\") [stores cpu0 metrics]\n    20→//\t│  │  └─ Level (core=\"1\") [stores cpu1 metrics]\n    21→//\t│  └─ Level (socket=\"1\")\n    22→//\t│     └─ ...\n    23→//\t└─ Level (host=\"node002\")\n    24→//\t   └─ ...\n    25→//\n    26→// Each Level can:\n    27→//   - Hold data (metrics slice of buffer pointers)\n    28→//   - Have child nodes (children map[string]*Level)\n    29→//   - Both simultaneously (inner nodes can store aggregated metrics)\n    30→//\n    31→// # Selector Paths\n    32→//\n    33→// Selectors are hierarchical paths: []string{\"cluster\", \"host\", \"component\"}.\n    34→// Example: []string{\"emmy\", \"node001\", \"cpu0\"} navigates to the cpu0 core level.\n    35→//\n    36→// # Concurrency\n    37→//\n    38→// RWMutex protects children map and metrics slice. Read-heavy workload (metric reads)\n    39→// uses RLock. Writes (new levels, buffer updates) use Lock. Double-checked locking\n    40→// prevents races during level creation.\n    41→package metricstore\n    42→\n    43→import (\n    44→\t\"sync\"\n    45→\t\"time\"\n    46→\t\"unsafe\"\n    47→\n    48→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    49→\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n    50→)\n    51→\n    52→// Level represents a node in the hierarchical metric storage tree.\n    53→//\n    54→// Can be both a leaf or inner node. Inner nodes hold data in 'metrics' for aggregated\n    55→// values (e.g., socket-level metrics derived from core-level data). Named \"Level\"\n    56→// instead of \"node\" to avoid confusion with cluster nodes (hosts).\n    57→//\n    58→// Fields:\n    59→//   - children: Map of child level names to Level pointers (e.g., \"cpu0\" → Level)\n    60→//   - metrics:  Slice of buffer pointers (one per metric, indexed by MetricConfig.offset)\n    61→//   - lock:     RWMutex for concurrent access (read-heavy, write-rare)\n    62→type Level struct {\n    63→\tchildren map[string]*Level\n    64→\tmetrics  []*buffer\n    65→\tlock     sync.RWMutex\n    66→}\n    67→\n    68→// findLevelOrCreate navigates to or creates the level specified by selector.\n    69→//\n    70→// Recursively descends the tree, creating missing levels as needed. Uses double-checked\n    71→// locking: RLock first (fast path), then Lock if creation needed (slow path), then\n    72→// re-check after acquiring Lock to handle races.\n    73→//\n    74→// Example selector: []string{\"emmy\", \"node001\", \"cpu0\"}\n    75→// Navigates: root → emmy → node001 → cpu0, creating levels as needed.\n    76→//\n    77→// Parameters:\n    78→//   - selector: Hierarchical path (consumed recursively, decreasing depth)\n    79→//   - nMetrics: Number of metric slots to allocate in new levels\n    80→//\n    81→// Returns:\n    82→//   - *Level: The target level (existing or newly created)\n    83→//\n    84→// Note: sync.Map may improve performance for high-concurrency writes, but current\n    85→// approach suffices for read-heavy workload.\n    86→func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {\n    87→\tif len(selector) == 0 {\n    88→\t\treturn l\n    89→\t}\n    90→\n    91→\t// Allow concurrent reads:\n    92→\tl.lock.RLock()\n    93→\tvar child *Level\n    94→\tvar ok bool\n    95→\tif l.children == nil {\n    96→\t\t// Children map needs to be created...\n    97→\t\tl.lock.RUnlock()\n    98→\t} else {\n    99→\t\tchild, ok = l.children[selector[0]]\n   100→\t\tl.lock.RUnlock()\n   101→\t\tif ok {\n   102→\t\t\treturn child.findLevelOrCreate(selector[1:], nMetrics)\n   103→\t\t}\n   104→\t}\n   105→\n   106→\t// The level does not exist, take write lock for unique access:\n   107→\tl.lock.Lock()\n   108→\t// While this thread waited for the write lock, another thread\n   109→\t// could have created the child node.\n   110→\tif l.children != nil {\n   111→\t\tchild, ok = l.children[selector[0]]\n   112→\t\tif ok {\n   113→\t\t\tl.lock.Unlock()\n   114→\t\t\treturn child.findLevelOrCreate(selector[1:], nMetrics)\n   115→\t\t}\n   116→\t}\n   117→\n   118→\tchild = &Level{\n   119→\t\tmetrics:  make([]*buffer, nMetrics),\n   120→\t\tchildren: nil,\n   121→\t}\n   122→\n   123→\tif l.children != nil {\n   124→\t\tl.children[selector[0]] = child\n   125→\t} else {\n   126→\t\tl.children = map[string]*Level{selector[0]: child}\n   127→\t}\n   128→\tl.lock.Unlock()\n   129→\treturn child.findLevelOrCreate(selector[1:], nMetrics)\n   130→}\n   131→\n   132→// collectPaths gathers all selector paths at the specified depth in the tree.\n   133→//\n   134→// Recursively traverses children, collecting paths when currentDepth+1 == targetDepth.\n   135→// Each path is a selector that can be used with findLevel() or findBuffers().\n   136→//\n   137→// Explicitly copies slices to avoid shared underlying arrays between siblings, preventing\n   138→// unintended mutations.\n   139→//\n   140→// Parameters:\n   141→//   - currentDepth: Depth of current level (0 = root)\n   142→//   - targetDepth:  Depth to collect paths from\n   143→//   - currentPath:  Path accumulated so far\n   144→//   - results:      Output slice (appended to)\n   145→//\n   146→// Example: collectPaths(0, 2, []string{}, &results) collects all 2-level paths\n   147→// like []string{\"emmy\", \"node001\"}, []string{\"emmy\", \"node002\"}, etc.\n   148→func (l *Level) collectPaths(currentDepth, targetDepth int, currentPath []string, results *[][]string) {\n   149→\tl.lock.RLock()\n   150→\tdefer l.lock.RUnlock()\n   151→\n   152→\tfor key, child := range l.children {\n   153→\t\tif child == nil {\n   154→\t\t\tcontinue\n   155→\t\t}\n   156→\n   157→\t\t// We explicitly make a new slice and copy data to avoid sharing underlying arrays between siblings\n   158→\t\tnewPath := make([]string, len(currentPath))\n   159→\t\tcopy(newPath, currentPath)\n   160→\t\tnewPath = append(newPath, key)\n   161→\n   162→\t\t// Check depth, and just return if depth reached\n   163→\t\tif currentDepth+1 == targetDepth {\n   164→\t\t\t*results = append(*results, newPath)\n   165→\t\t} else {\n   166→\t\t\tchild.collectPaths(currentDepth+1, targetDepth, newPath, results)\n   167→\t\t}\n   168→\t}\n   169→}\n   170→\n   171→// free removes buffers older than the retention threshold from the entire subtree.\n   172→//\n   173→// Recursively frees buffers in this level's metrics and all child levels. Buffers\n   174→// with standard capacity (BufferCap) are returned to the pool. Called by the\n   175→// retention worker to enforce retention policies.\n   176→//\n   177→// Parameters:\n   178→//   - t: Retention threshold timestamp (Unix seconds)\n   179→//\n   180→// Returns:\n   181→//   - int:   Total number of buffers freed in this subtree\n   182→//   - error: Non-nil on failure (propagated from children)\n   183→func (l *Level) free(t int64) (int, error) {\n   184→\tn, _, err := l.freeAndCheckEmpty(t)\n   185→\treturn n, err\n   186→}\n   187→\n   188→// freeAndCheckEmpty performs free() and atomically checks if the level is empty\n   189→// while still holding its own lock, avoiding a TOCTOU race between free() and\n   190→// a separate isEmpty() call.\n   191→func (l *Level) freeAndCheckEmpty(t int64) (int, bool, error) {\n   192→\tl.lock.Lock()\n   193→\tdefer l.lock.Unlock()\n   194→\n   195→\tn := 0\n   196→\tfor i, b := range l.metrics {\n   197→\t\tif b != nil {\n   198→\t\t\tdelme, m := b.free(t)\n   199→\t\t\tn += m\n   200→\t\t\tif delme {\n   201→\t\t\t\tif cap(b.data) != BufferCap {\n   202→\t\t\t\t\tb.data = make([]schema.Float, 0, BufferCap)\n   203→\t\t\t\t}\n   204→\t\t\t\tb.lastUsed = time.Now().Unix()\n   205→\t\t\t\tbufferPool.Put(b)\n   206→\t\t\t\tl.metrics[i] = nil\n   207→\t\t\t}\n   208→\t\t}\n   209→\t}\n   210→\n   211→\tfor key, child := range l.children {\n   212→\t\tm, empty, err := child.freeAndCheckEmpty(t)\n   213→\t\tn += m\n   214→\t\tif err != nil {\n   215→\t\t\treturn n, false, err\n   216→\t\t}\n   217→\t\tif empty {\n   218→\t\t\tdelete(l.children, key)\n   219→\t\t}\n   220→\t}\n   221→\n   222→\t// Check emptiness while still holding the lock\n   223→\tempty := len(l.children) == 0\n   224→\tif empty {\n   225→\t\tfor _, b := range l.metrics {\n   226→\t\t\tif b != nil {\n   227→\t\t\t\tempty = false\n   228→\t\t\t\tbreak\n   229→\t\t\t}\n   230→\t\t}\n   231→\t}\n   232→\n   233→\treturn n, empty, nil\n   234→}\n   235→\n   236→// forceFree removes the oldest buffer from each metric chain in the subtree.\n   237→//\n   238→// Unlike free(), which removes based on time threshold, this unconditionally removes\n   239→// the oldest buffer in each chain. Used by MemoryUsageTracker when memory cap is\n   240→// exceeded and time-based retention is insufficient.\n   241→//\n   242→// Recursively processes current level's metrics and all child levels.\n   243→//\n   244→// Returns:\n   245→//   - int:   Total number of buffers freed in this subtree\n   246→//   - error: Non-nil on failure (propagated from children)\n   247→func (l *Level) forceFree() (int, error) {\n   248→\tl.lock.Lock()\n   249→\tdefer l.lock.Unlock()\n   250→\n   251→\tn := 0\n   252→\n   253→\t// Iterate over metrics in the current level\n   254→\tfor i, b := range l.metrics {\n   255→\t\tif b != nil {\n   256→\t\t\t// Attempt to free the oldest buffer in this chain\n   257→\t\t\tdelme, freedCount := b.forceFreeOldest()\n   258→\t\t\tn += freedCount\n   259→\n   260→\t\t\t// If delme is true, it means 'b' itself (the head) was the oldest\n   261→\t\t\t// and needs to be removed from the slice.\n   262→\t\t\tif delme {\n   263→\t\t\t\tb.next = nil\n   264→\t\t\t\tb.prev = nil\n   265→\t\t\t\tif cap(b.data) != BufferCap {\n   266→\t\t\t\t\tb.data = make([]schema.Float, 0, BufferCap)\n   267→\t\t\t\t}\n   268→\t\t\t\tb.lastUsed = time.Now().Unix()\n   269→\t\t\t\tbufferPool.Put(b)\n   270→\t\t\t\tl.metrics[i] = nil\n   271→\t\t\t}\n   272→\t\t}\n   273→\t}\n   274→\n   275→\t// Recursively traverse children\n   276→\tfor _, child := range l.children {\n   277→\t\tm, err := child.forceFree()\n   278→\t\tn += m\n   279→\t\tif err != nil {\n   280→\t\t\treturn n, err\n   281→\t\t}\n   282→\t}\n   283→\n   284→\treturn n, nil\n   285→}\n   286→\n   287→// sizeInBytes calculates the total memory usage of all buffers in the subtree.\n   288→//\n   289→// Recursively sums buffer data sizes (count of Float values × sizeof(Float)) across\n   290→// this level's metrics and all child levels. Used by MemoryUsageTracker to enforce\n   291→// memory cap limits.\n   292→//\n   293→// Returns:\n   294→//   - int64: Total bytes used by buffer data in this subtree\n   295→func (l *Level) sizeInBytes() int64 {\n   296→\tl.lock.RLock()\n   297→\tdefer l.lock.RUnlock()\n   298→\tsize := int64(0)\n   299→\n   300→\tfor _, b := range l.metrics {\n   301→\t\tif b != nil {\n   302→\t\t\tsize += b.count() * int64(unsafe.Sizeof(schema.Float(0)))\n   303→\t\t\tsize += b.bufferCount() * int64(unsafe.Sizeof(buffer{}))\n   304→\t\t}\n   305→\t}\n   306→\n   307→\tfor _, child := range l.children {\n   308→\t\tsize += child.sizeInBytes()\n   309→\t}\n   310→\n   311→\treturn size\n   312→}\n   313→\n   314→// findLevel navigates to the level specified by selector, returning nil if not found.\n   315→//\n   316→// Read-only variant of findLevelOrCreate. Does not create missing levels.\n   317→// Recursively descends the tree following the selector path.\n   318→//\n   319→// Parameters:\n   320→//   - selector: Hierarchical path (e.g., []string{\"emmy\", \"node001\", \"cpu0\"})\n   321→//\n   322→// Returns:\n   323→//   - *Level: The target level, or nil if any component in the path does not exist\n   324→func (l *Level) findLevel(selector []string) *Level {\n   325→\tif len(selector) == 0 {\n   326→\t\treturn l\n   327→\t}\n   328→\n   329→\tl.lock.RLock()\n   330→\tdefer l.lock.RUnlock()\n   331→\n   332→\tlvl := l.children[selector[0]]\n   333→\tif lvl == nil {\n   334→\t\treturn nil\n   335→\t}\n   336→\n   337→\treturn lvl.findLevel(selector[1:])\n   338→}\n   339→\n   340→// findBuffers invokes callback on all buffers matching the selector pattern.\n   341→//\n   342→// Supports flexible selector patterns (from cc-lib/util.Selector):\n   343→//   - Exact match: Selector element with String set (e.g., \"node001\")\n   344→//   - Group match: Selector element with Group set (e.g., [\"cpu0\", \"cpu2\", \"cpu4\"])\n   345→//   - Wildcard:    Selector element with Any=true (matches all children)\n   346→//\n   347→// Empty selector (len==0) matches current level's buffer at 'offset' and recursively\n   348→// all descendant buffers at the same offset (used for aggregation queries).\n   349→//\n   350→// Parameters:\n   351→//   - selector: Pattern to match (consumed recursively)\n   352→//   - offset:   Metric index in metrics slice (from MetricConfig.offset)\n   353→//   - f:        Callback invoked on each matching buffer\n   354→//\n   355→// Returns:\n   356→//   - error: First error returned by callback, or nil if all succeeded\n   357→//\n   358→// Example:\n   359→//\n   360→//\t// Find all cpu0 buffers across all hosts:\n   361→//\tfindBuffers([]Selector{{Any: true}, {String: \"cpu0\"}}, metricOffset, callback)\n   362→func (l *Level) findBuffers(selector util.Selector, offset int, f func(b *buffer) error) error {\n   363→\tl.lock.RLock()\n   364→\tdefer l.lock.RUnlock()\n   365→\n   366→\tif len(selector) == 0 {\n   367→\t\tb := l.metrics[offset]\n   368→\t\tif b != nil {\n   369→\t\t\treturn f(b)\n   370→\t\t}\n   371→\n   372→\t\tfor _, lvl := range l.children {\n   373→\t\t\terr := lvl.findBuffers(nil, offset, f)\n   374→\t\t\tif err != nil {\n   375→\t\t\t\treturn err\n   376→\t\t\t}\n   377→\t\t}\n   378→\t\treturn nil\n   379→\t}\n   380→\n   381→\tsel := selector[0]\n   382→\tif len(sel.String) != 0 && l.children != nil {\n   383→\t\tlvl, ok := l.children[sel.String]\n   384→\t\tif ok {\n   385→\t\t\terr := lvl.findBuffers(selector[1:], offset, f)\n   386→\t\t\tif err != nil {\n   387→\t\t\t\treturn err\n   388→\t\t\t}\n   389→\t\t}\n   390→\t\treturn nil\n   391→\t}\n   392→\n   393→\tif sel.Group != nil && l.children != nil {\n   394→\t\tfor _, key := range sel.Group {\n   395→\t\t\tlvl, ok := l.children[key]\n   396→\t\t\tif ok {\n   397→\t\t\t\terr := lvl.findBuffers(selector[1:], offset, f)\n   398→\t\t\t\tif err != nil {\n   399→\t\t\t\t\treturn err\n   400→\t\t\t\t}\n   401→\t\t\t}\n   402→\t\t}\n   403→\t\treturn nil\n   404→\t}\n   405→\n   406→\tif sel.Any && l.children != nil {\n   407→\t\tfor _, lvl := range l.children {\n   408→\t\t\tif err := lvl.findBuffers(selector[1:], offset, f); err != nil {\n   409→\t\t\t\treturn err\n   410→\t\t\t}\n   411→\t\t}\n   412→\t\treturn nil\n   413→\t}\n   414→\n   415→\treturn nil\n   416→}\n   417→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"018bdf68-a2be-40e3-8b78-9d74baff7465","timestamp":"2026-03-29T19:34:32.931Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/level.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// Package metricstore provides level.go: Hierarchical tree structure for metric storage.\n//\n// # Level Architecture\n//\n// The Level type forms a tree structure where each node represents a level in the\n// ClusterCockpit hierarchy: cluster → host → socket → core → hwthread, with special\n// nodes for memory domains and accelerators.\n//\n// Structure:\n//\n//\tRoot Level (cluster=\"emmy\")\n//\t├─ Level (host=\"node001\")\n//\t│  ├─ Level (socket=\"0\")\n//\t│  │  ├─ Level (core=\"0\") [stores cpu0 metrics]\n//\t│  │  └─ Level (core=\"1\") [stores cpu1 metrics]\n//\t│  └─ Level (socket=\"1\")\n//\t│     └─ ...\n//\t└─ Level (host=\"node002\")\n//\t   └─ ...\n//\n// Each Level can:\n//   - Hold data (metrics slice of buffer pointers)\n//   - Have child nodes (children map[string]*Level)\n//   - Both simultaneously (inner nodes can store aggregated metrics)\n//\n// # Selector Paths\n//\n// Selectors are hierarchical paths: []string{\"cluster\", \"host\", \"component\"}.\n// Example: []string{\"emmy\", \"node001\", \"cpu0\"} navigates to the cpu0 core level.\n//\n// # Concurrency\n//\n// RWMutex protects children map and metrics slice. Read-heavy workload (metric reads)\n// uses RLock. Writes (new levels, buffer updates) use Lock. Double-checked locking\n// prevents races during level creation.\npackage metricstore\n\nimport (\n\t\"sync\"\n\t\"time\"\n\t\"unsafe\"\n\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n)\n\n// Level represents a node in the hierarchical metric storage tree.\n//\n// Can be both a leaf or inner node. Inner nodes hold data in 'metrics' for aggregated\n// values (e.g., socket-level metrics derived from core-level data). Named \"Level\"\n// instead of \"node\" to avoid confusion with cluster nodes (hosts).\n//\n// Fields:\n//   - children: Map of child level names to Level pointers (e.g., \"cpu0\" → Level)\n//   - metrics:  Slice of buffer pointers (one per metric, indexed by MetricConfig.offset)\n//   - lock:     RWMutex for concurrent access (read-heavy, write-rare)\ntype Level struct {\n\tchildren map[string]*Level\n\tmetrics  []*buffer\n\tlock     sync.RWMutex\n}\n\n// findLevelOrCreate navigates to or creates the level specified by selector.\n//\n// Recursively descends the tree, creating missing levels as needed. Uses double-checked\n// locking: RLock first (fast path), then Lock if creation needed (slow path), then\n// re-check after acquiring Lock to handle races.\n//\n// Example selector: []string{\"emmy\", \"node001\", \"cpu0\"}\n// Navigates: root → emmy → node001 → cpu0, creating levels as needed.\n//\n// Parameters:\n//   - selector: Hierarchical path (consumed recursively, decreasing depth)\n//   - nMetrics: Number of metric slots to allocate in new levels\n//\n// Returns:\n//   - *Level: The target level (existing or newly created)\n//\n// Note: sync.Map may improve performance for high-concurrency writes, but current\n// approach suffices for read-heavy workload.\nfunc (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {\n\tif len(selector) == 0 {\n\t\treturn l\n\t}\n\n\t// Allow concurrent reads:\n\tl.lock.RLock()\n\tvar child *Level\n\tvar ok bool\n\tif l.children == nil {\n\t\t// Children map needs to be created...\n\t\tl.lock.RUnlock()\n\t} else {\n\t\tchild, ok = l.children[selector[0]]\n\t\tl.lock.RUnlock()\n\t\tif ok {\n\t\t\treturn child.findLevelOrCreate(selector[1:], nMetrics)\n\t\t}\n\t}\n\n\t// The level does not exist, take write lock for unique access:\n\tl.lock.Lock()\n\t// While this thread waited for the write lock, another thread\n\t// could have created the child node.\n\tif l.children != nil {\n\t\tchild, ok = l.children[selector[0]]\n\t\tif ok {\n\t\t\tl.lock.Unlock()\n\t\t\treturn child.findLevelOrCreate(selector[1:], nMetrics)\n\t\t}\n\t}\n\n\tchild = &Level{\n\t\tmetrics:  make([]*buffer, nMetrics),\n\t\tchildren: nil,\n\t}\n\n\tif l.children != nil {\n\t\tl.children[selector[0]] = child\n\t} else {\n\t\tl.children = map[string]*Level{selector[0]: child}\n\t}\n\tl.lock.Unlock()\n\treturn child.findLevelOrCreate(selector[1:], nMetrics)\n}\n\n// collectPaths gathers all selector paths at the specified depth in the tree.\n//\n// Recursively traverses children, collecting paths when currentDepth+1 == targetDepth.\n// Each path is a selector that can be used with findLevel() or findBuffers().\n//\n// Explicitly copies slices to avoid shared underlying arrays between siblings, preventing\n// unintended mutations.\n//\n// Parameters:\n//   - currentDepth: Depth of current level (0 = root)\n//   - targetDepth:  Depth to collect paths from\n//   - currentPath:  Path accumulated so far\n//   - results:      Output slice (appended to)\n//\n// Example: collectPaths(0, 2, []string{}, &results) collects all 2-level paths\n// like []string{\"emmy\", \"node001\"}, []string{\"emmy\", \"node002\"}, etc.\nfunc (l *Level) collectPaths(currentDepth, targetDepth int, currentPath []string, results *[][]string) {\n\tl.lock.RLock()\n\tdefer l.lock.RUnlock()\n\n\tfor key, child := range l.children {\n\t\tif child == nil {\n\t\t\tcontinue\n\t\t}\n\n\t\t// We explicitly make a new slice and copy data to avoid sharing underlying arrays between siblings\n\t\tnewPath := make([]string, len(currentPath))\n\t\tcopy(newPath, currentPath)\n\t\tnewPath = append(newPath, key)\n\n\t\t// Check depth, and just return if depth reached\n\t\tif currentDepth+1 == targetDepth {\n\t\t\t*results = append(*results, newPath)\n\t\t} else {\n\t\t\tchild.collectPaths(currentDepth+1, targetDepth, newPath, results)\n\t\t}\n\t}\n}\n\n// free removes buffers older than the retention threshold from the entire subtree.\n//\n// Recursively frees buffers in this level's metrics and all child levels. Buffers\n// with standard capacity (BufferCap) are returned to the pool. Called by the\n// retention worker to enforce retention policies.\n//\n// Parameters:\n//   - t: Retention threshold timestamp (Unix seconds)\n//\n// Returns:\n//   - int:   Total number of buffers freed in this subtree\n//   - error: Non-nil on failure (propagated from children)\nfunc (l *Level) free(t int64) (int, error) {\n\tn, _, err := l.freeAndCheckEmpty(t)\n\treturn n, err\n}\n\n// freeAndCheckEmpty performs free() and atomically checks if the level is empty\n// while still holding its own lock, avoiding a TOCTOU race between free() and\n// a separate isEmpty() call.\nfunc (l *Level) freeAndCheckEmpty(t int64) (int, bool, error) {\n\tl.lock.Lock()\n\tdefer l.lock.Unlock()\n\n\tn := 0\n\tfor i, b := range l.metrics {\n\t\tif b != nil {\n\t\t\tdelme, m := b.free(t)\n\t\t\tn += m\n\t\t\tif delme {\n\t\t\t\tif cap(b.data) != BufferCap {\n\t\t\t\t\tb.data = make([]schema.Float, 0, BufferCap)\n\t\t\t\t}\n\t\t\t\tb.lastUsed = time.Now().Unix()\n\t\t\t\tbufferPool.Put(b)\n\t\t\t\tl.metrics[i] = nil\n\t\t\t}\n\t\t}\n\t}\n\n\tfor key, child := range l.children {\n\t\tm, empty, err := child.freeAndCheckEmpty(t)\n\t\tn += m\n\t\tif err != nil {\n\t\t\treturn n, false, err\n\t\t}\n\t\tif empty {\n\t\t\tdelete(l.children, key)\n\t\t}\n\t}\n\n\t// Check emptiness while still holding the lock\n\tempty := len(l.children) == 0\n\tif empty {\n\t\tfor _, b := range l.metrics {\n\t\t\tif b != nil {\n\t\t\t\tempty = false\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t}\n\n\treturn n, empty, nil\n}\n\n// forceFree removes the oldest buffer from each metric chain in the subtree.\n//\n// Unlike free(), which removes based on time threshold, this unconditionally removes\n// the oldest buffer in each chain. Used by MemoryUsageTracker when memory cap is\n// exceeded and time-based retention is insufficient.\n//\n// Recursively processes current level's metrics and all child levels.\n//\n// Returns:\n//   - int:   Total number of buffers freed in this subtree\n//   - error: Non-nil on failure (propagated from children)\nfunc (l *Level) forceFree() (int, error) {\n\tl.lock.Lock()\n\tdefer l.lock.Unlock()\n\n\tn := 0\n\n\t// Iterate over metrics in the current level\n\tfor i, b := range l.metrics {\n\t\tif b != nil {\n\t\t\t// Attempt to free the oldest buffer in this chain\n\t\t\tdelme, freedCount := b.forceFreeOldest()\n\t\t\tn += freedCount\n\n\t\t\t// If delme is true, it means 'b' itself (the head) was the oldest\n\t\t\t// and needs to be removed from the slice.\n\t\t\tif delme {\n\t\t\t\tb.next = nil\n\t\t\t\tb.prev = nil\n\t\t\t\tif cap(b.data) != BufferCap {\n\t\t\t\t\tb.data = make([]schema.Float, 0, BufferCap)\n\t\t\t\t}\n\t\t\t\tb.lastUsed = time.Now().Unix()\n\t\t\t\tbufferPool.Put(b)\n\t\t\t\tl.metrics[i] = nil\n\t\t\t}\n\t\t}\n\t}\n\n\t// Recursively traverse children\n\tfor _, child := range l.children {\n\t\tm, err := child.forceFree()\n\t\tn += m\n\t\tif err != nil {\n\t\t\treturn n, err\n\t\t}\n\t}\n\n\treturn n, nil\n}\n\n// sizeInBytes calculates the total memory usage of all buffers in the subtree.\n//\n// Recursively sums buffer data sizes (count of Float values × sizeof(Float)) across\n// this level's metrics and all child levels. Used by MemoryUsageTracker to enforce\n// memory cap limits.\n//\n// Returns:\n//   - int64: Total bytes used by buffer data in this subtree\nfunc (l *Level) sizeInBytes() int64 {\n\tl.lock.RLock()\n\tdefer l.lock.RUnlock()\n\tsize := int64(0)\n\n\tfor _, b := range l.metrics {\n\t\tif b != nil {\n\t\t\tsize += b.count() * int64(unsafe.Sizeof(schema.Float(0)))\n\t\t\tsize += b.bufferCount() * int64(unsafe.Sizeof(buffer{}))\n\t\t}\n\t}\n\n\tfor _, child := range l.children {\n\t\tsize += child.sizeInBytes()\n\t}\n\n\treturn size\n}\n\n// findLevel navigates to the level specified by selector, returning nil if not found.\n//\n// Read-only variant of findLevelOrCreate. Does not create missing levels.\n// Recursively descends the tree following the selector path.\n//\n// Parameters:\n//   - selector: Hierarchical path (e.g., []string{\"emmy\", \"node001\", \"cpu0\"})\n//\n// Returns:\n//   - *Level: The target level, or nil if any component in the path does not exist\nfunc (l *Level) findLevel(selector []string) *Level {\n\tif len(selector) == 0 {\n\t\treturn l\n\t}\n\n\tl.lock.RLock()\n\tdefer l.lock.RUnlock()\n\n\tlvl := l.children[selector[0]]\n\tif lvl == nil {\n\t\treturn nil\n\t}\n\n\treturn lvl.findLevel(selector[1:])\n}\n\n// findBuffers invokes callback on all buffers matching the selector pattern.\n//\n// Supports flexible selector patterns (from cc-lib/util.Selector):\n//   - Exact match: Selector element with String set (e.g., \"node001\")\n//   - Group match: Selector element with Group set (e.g., [\"cpu0\", \"cpu2\", \"cpu4\"])\n//   - Wildcard:    Selector element with Any=true (matches all children)\n//\n// Empty selector (len==0) matches current level's buffer at 'offset' and recursively\n// all descendant buffers at the same offset (used for aggregation queries).\n//\n// Parameters:\n//   - selector: Pattern to match (consumed recursively)\n//   - offset:   Metric index in metrics slice (from MetricConfig.offset)\n//   - f:        Callback invoked on each matching buffer\n//\n// Returns:\n//   - error: First error returned by callback, or nil if all succeeded\n//\n// Example:\n//\n//\t// Find all cpu0 buffers across all hosts:\n//\tfindBuffers([]Selector{{Any: true}, {String: \"cpu0\"}}, metricOffset, callback)\nfunc (l *Level) findBuffers(selector util.Selector, offset int, f func(b *buffer) error) error {\n\tl.lock.RLock()\n\tdefer l.lock.RUnlock()\n\n\tif len(selector) == 0 {\n\t\tb := l.metrics[offset]\n\t\tif b != nil {\n\t\t\treturn f(b)\n\t\t}\n\n\t\tfor _, lvl := range l.children {\n\t\t\terr := lvl.findBuffers(nil, offset, f)\n\t\t\tif err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t}\n\t\treturn nil\n\t}\n\n\tsel := selector[0]\n\tif len(sel.String) != 0 && l.children != nil {\n\t\tlvl, ok := l.children[sel.String]\n\t\tif ok {\n\t\t\terr := lvl.findBuffers(selector[1:], offset, f)\n\t\t\tif err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t}\n\t\treturn nil\n\t}\n\n\tif sel.Group != nil && l.children != nil {\n\t\tfor _, key := range sel.Group {\n\t\t\tlvl, ok := l.children[key]\n\t\t\tif ok {\n\t\t\t\terr := lvl.findBuffers(selector[1:], offset, f)\n\t\t\t\tif err != nil {\n\t\t\t\t\treturn err\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t\treturn nil\n\t}\n\n\tif sel.Any && l.children != nil {\n\t\tfor _, lvl := range l.children {\n\t\t\tif err := lvl.findBuffers(selector[1:], offset, f); err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t}\n\t\treturn nil\n\t}\n\n\treturn nil\n}\n","numLines":417,"startLine":1,"totalLines":417}},"sourceToolAssistantUUID":"07a3cbc6-6b63-4740-b1f3-8d06479467e0"}
{"parentUuid":"018bdf68-a2be-40e3-8b78-9d74baff7465","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01Ari9c62VwNixy2h3pVeCcz","toolUseID":"toolu_01Ari9c62VwNixy2h3pVeCcz","timestamp":"2026-03-29T19:34:33.002Z","uuid":"b8757618-f379-4047-b56b-d740fa0f1aa9"}
{"parentUuid":"396aea8b-7eb7-4780-9dc2-677da33e7aa7","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01Ari9c62VwNixy2h3pVeCcz","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// Package metricstore provides walCheckpoint.go: WAL-based checkpoint implementation.\n     7→//\n     8→// This replaces the Avro shadow tree with an append-only Write-Ahead Log (WAL)\n     9→// per host, eliminating the extra memory overhead of the AvroStore and providing\n    10→// truly continuous (per-write) crash safety.\n    11→//\n    12→// # Architecture\n    13→//\n    14→//\tMetric write (DecodeLine)\n    15→//\t      │\n    16→//\t      ├─► WriteToLevel()    → main MemoryStore (unchanged)\n    17→//\t      │\n    18→//\t      └─► WALMessages channel\n    19→//\t               │\n    20→//\t               ▼\n    21→//\t        WALStaging goroutine\n    22→//\t               │\n    23→//\t               ▼\n    24→//\t        checkpoints/cluster/host/current.wal  (append-only, binary)\n    25→//\n    26→//\tPeriodic checkpoint (Checkpointing goroutine):\n    27→//\t  1. Write <timestamp>.bin snapshot (column-oriented, from main tree)\n    28→//\t  2. Signal WALStaging to truncate current.wal per host\n    29→//\n    30→//\tOn restart (FromCheckpoint):\n    31→//\t  1. Load most recent <timestamp>.bin snapshot\n    32→//\t  2. Replay current.wal (overwrite-safe: buffer.write handles duplicate timestamps)\n    33→//\n    34→// # WAL Record Format\n    35→//\n    36→//\t[4B magic 0xCC1DA7A1][4B payload_len][payload][4B CRC32]\n    37→//\n    38→//\tpayload:\n    39→//\t  [8B timestamp int64]\n    40→//\t  [2B metric_name_len uint16][N metric name bytes]\n    41→//\t  [1B selector_count uint8]\n    42→//\t  per selector: [1B selector_len uint8][M selector bytes]\n    43→//\t  [4B value float32 bits]\n    44→//\n    45→// # Binary Snapshot Format\n    46→//\n    47→//\t[4B magic 0xCC5B0001][8B from int64][8B to int64]\n    48→//\tLevel tree (recursive):\n    49→//\t  [4B num_metrics uint32]\n    50→//\t  per metric:\n    51→//\t    [2B name_len uint16][N name bytes]\n    52→//\t    [8B frequency int64][8B start int64]\n    53→//\t    [4B num_values uint32][num_values × 4B float32]\n    54→//\t  [4B num_children uint32]\n    55→//\t  per child: [2B name_len uint16][N name bytes] + Level (recursive)\n    56→package metricstore\n    57→\n    58→import (\n    59→\t\"bufio\"\n    60→\t\"context\"\n    61→\t\"encoding/binary\"\n    62→\t\"fmt\"\n    63→\t\"hash/crc32\"\n    64→\t\"hash/fnv\"\n    65→\t\"io\"\n    66→\t\"math\"\n    67→\t\"os\"\n    68→\t\"path\"\n    69→\t\"strings\"\n    70→\t\"sync\"\n    71→\t\"sync/atomic\"\n    72→\t\"time\"\n    73→\n    74→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    75→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    76→)\n    77→\n    78→// Magic numbers for binary formats.\n    79→const (\n    80→\twalFileMagic   = uint32(0xCC1DA701) // WAL file header magic\n    81→\twalRecordMagic = uint32(0xCC1DA7A1) // WAL record magic\n    82→\tsnapFileMagic  = uint32(0xCC5B0001) // Binary snapshot magic\n    83→)\n    84→\n    85→// walShardChs holds per-shard channels for WAL messages.\n    86→// Initialized by WALStaging; nil when WAL is not active.\n    87→var walShardChs []chan *WALMessage\n    88→\n    89→// walShardRotateChs holds per-shard channels for WAL rotation requests.\n    90→var walShardRotateChs []chan walRotateReq\n    91→\n    92→// walNumShards stores the number of shards (set during WALStaging init).\n    93→var walNumShards int\n    94→\n    95→// walStagingWg tracks WALStaging goroutine exits for shutdown synchronization.\n    96→var walStagingWg sync.WaitGroup\n    97→\n    98→// walShuttingDown is set before closing shard channels to prevent\n    99→// SendWALMessage from sending on a closed channel (which panics in Go).\n   100→var walShuttingDown atomic.Bool\n   101→\n   102→// walCheckpointActive is set during binary checkpoint writes.\n   103→// While active, SendWALMessage skips sending (returns true) because the\n   104→// snapshot captures all in-memory data, making WAL writes redundant.\n   105→var walCheckpointActive atomic.Bool\n   106→\n   107→// WALMessage represents a single metric write to be appended to the WAL.\n   108→// Cluster and Node are NOT stored in the WAL record (inferred from file path).\n   109→type WALMessage struct {\n   110→\tMetricName string\n   111→\tCluster    string\n   112→\tNode       string\n   113→\tSelector   []string\n   114→\tValue      schema.Float\n   115→\tTimestamp  int64\n   116→}\n   117→\n   118→// walRotateReq requests WAL file rotation for a specific host directory.\n   119→// The done channel is closed by the WAL goroutine when rotation is complete.\n   120→type walRotateReq struct {\n   121→\thostDir string\n   122→\tdone    chan struct{}\n   123→}\n   124→\n   125→// walFileState holds an open WAL file handle and buffered writer for one host directory.\n   126→type walFileState struct {\n   127→\tf     *os.File\n   128→\tw     *bufio.Writer\n   129→\tdirty bool\n   130→\tsize  int64 // approximate bytes written (tracked from open + writes)\n   131→}\n   132→\n   133→// walFlushInterval controls how often dirty WAL files are flushed to disk.\n   134→// Decoupling flushes from message processing lets the consumer run at memory\n   135→// speed, amortizing syscall overhead across many writes.\n   136→const walFlushInterval = 1 * time.Second\n   137→\n   138→// walShardIndex computes which shard a message belongs to based on cluster+node.\n   139→// Uses FNV-1a hash for fast, well-distributed mapping.\n   140→func walShardIndex(cluster, node string) int {\n   141→\th := fnv.New32a()\n   142→\th.Write([]byte(cluster))\n   143→\th.Write([]byte{0})\n   144→\th.Write([]byte(node))\n   145→\treturn int(h.Sum32()) % walNumShards\n   146→}\n   147→\n   148→// SendWALMessage routes a WAL message to the appropriate shard channel.\n   149→// Returns false if the channel is full or shutdown is in progress.\n   150→func SendWALMessage(msg *WALMessage) bool {\n   151→\tif walShardChs == nil || walShuttingDown.Load() {\n   152→\t\treturn false\n   153→\t}\n   154→\tif walCheckpointActive.Load() {\n   155→\t\treturn true // Data safe in memory; snapshot will capture it\n   156→\t}\n   157→\tshard := walShardIndex(msg.Cluster, msg.Node)\n   158→\tselect {\n   159→\tcase walShardChs[shard] <- msg:\n   160→\t\treturn true\n   161→\tdefault:\n   162→\t\treturn false\n   163→\t}\n   164→}\n   165→\n   166→// WALStaging starts N sharded background goroutines that receive WALMessage items\n   167→// and append binary WAL records to per-host current.wal files.\n   168→// Each shard owns a subset of hosts (determined by hash of cluster+node),\n   169→// parallelizing serialization and I/O while keeping per-host writes serial.\n   170→func WALStaging(wg *sync.WaitGroup, ctx context.Context) {\n   171→\tif Keys.Checkpoints.FileFormat == \"json\" {\n   172→\t\treturn\n   173→\t}\n   174→\n   175→\twalNumShards = max(Keys.NumWorkers, 1)\n   176→\tchBufSize := max(65536/walNumShards, 1024)\n   177→\n   178→\twalShardChs = make([]chan *WALMessage, walNumShards)\n   179→\twalShardRotateChs = make([]chan walRotateReq, walNumShards)\n   180→\n   181→\tfor i := range walNumShards {\n   182→\t\twalShardChs[i] = make(chan *WALMessage, chBufSize)\n   183→\t\twalShardRotateChs[i] = make(chan walRotateReq, 64)\n   184→\t}\n   185→\n   186→\tfor i := range walNumShards {\n   187→\t\tmsgCh := walShardChs[i]\n   188→\t\trotateCh := walShardRotateChs[i]\n   189→\n   190→\t\twalStagingWg.Add(1)\n   191→\t\twg.Go(func() {\n   192→\t\t\tdefer walStagingWg.Done()\n   193→\t\t\thostFiles := make(map[string]*walFileState)\n   194→\n   195→\t\t\tdefer func() {\n   196→\t\t\t\tfor _, ws := range hostFiles {\n   197→\t\t\t\t\tif ws.f != nil {\n   198→\t\t\t\t\t\tws.w.Flush()\n   199→\t\t\t\t\t\tws.f.Close()\n   200→\t\t\t\t\t}\n   201→\t\t\t\t}\n   202→\t\t\t}()\n   203→\n   204→\t\t\tgetOrOpenWAL := func(hostDir string) *walFileState {\n   205→\t\t\t\tws, ok := hostFiles[hostDir]\n   206→\t\t\t\tif ok {\n   207→\t\t\t\t\treturn ws\n   208→\t\t\t\t}\n   209→\n   210→\t\t\t\tif err := os.MkdirAll(hostDir, CheckpointDirPerms); err != nil {\n   211→\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: mkdir %s: %v\", hostDir, err)\n   212→\t\t\t\t\treturn nil\n   213→\t\t\t\t}\n   214→\n   215→\t\t\t\twalPath := path.Join(hostDir, \"current.wal\")\n   216→\t\t\t\tf, err := os.OpenFile(walPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, CheckpointFilePerms)\n   217→\t\t\t\tif err != nil {\n   218→\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: open %s: %v\", walPath, err)\n   219→\t\t\t\t\treturn nil\n   220→\t\t\t\t}\n   221→\n   222→\t\t\t\tw := bufio.NewWriter(f)\n   223→\n   224→\t\t\t\t// Write file header magic if file is new (empty).\n   225→\t\t\t\tinfo, err := f.Stat()\n   226→\t\t\t\tvar fileSize int64\n   227→\t\t\t\tif err == nil {\n   228→\t\t\t\t\tfileSize = info.Size()\n   229→\t\t\t\t}\n   230→\t\t\t\tif err == nil && fileSize == 0 {\n   231→\t\t\t\t\tvar hdr [4]byte\n   232→\t\t\t\t\tbinary.LittleEndian.PutUint32(hdr[:], walFileMagic)\n   233→\t\t\t\t\tif _, err := w.Write(hdr[:]); err != nil {\n   234→\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: write header %s: %v\", walPath, err)\n   235→\t\t\t\t\t\tf.Close()\n   236→\t\t\t\t\t\treturn nil\n   237→\t\t\t\t\t}\n   238→\t\t\t\t\tfileSize = 4\n   239→\t\t\t\t}\n   240→\n   241→\t\t\t\tws = &walFileState{f: f, w: w, size: fileSize}\n   242→\t\t\t\thostFiles[hostDir] = ws\n   243→\t\t\t\treturn ws\n   244→\t\t\t}\n   245→\n   246→\t\t\tprocessMsg := func(msg *WALMessage) {\n   247→\t\t\t\thostDir := path.Join(Keys.Checkpoints.RootDir, msg.Cluster, msg.Node)\n   248→\t\t\t\tws := getOrOpenWAL(hostDir)\n   249→\t\t\t\tif ws == nil {\n   250→\t\t\t\t\treturn\n   251→\t\t\t\t}\n   252→\n   253→\t\t\t\t// Enforce max WAL size: force-rotate before writing if limit is exceeded.\n   254→\t\t\t\t// The in-memory store still holds the data; only crash-recovery coverage is lost.\n   255→\t\t\t\tif maxSize := Keys.Checkpoints.MaxWALSize; maxSize > 0 && ws.size >= maxSize {\n   256→\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL: force-rotating %s (size %d >= limit %d)\",\n   257→\t\t\t\t\t\thostDir, ws.size, maxSize)\n   258→\t\t\t\t\tws.w.Flush()\n   259→\t\t\t\t\tws.f.Close()\n   260→\t\t\t\t\twalPath := path.Join(hostDir, \"current.wal\")\n   261→\t\t\t\t\tif err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {\n   262→\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: remove %s: %v\", walPath, err)\n   263→\t\t\t\t\t}\n   264→\t\t\t\t\tdelete(hostFiles, hostDir)\n   265→\t\t\t\t\tws = getOrOpenWAL(hostDir)\n   266→\t\t\t\t\tif ws == nil {\n   267→\t\t\t\t\t\treturn\n   268→\t\t\t\t\t}\n   269→\t\t\t\t}\n   270→\n   271→\t\t\t\tn, err := writeWALRecordDirect(ws.w, msg)\n   272→\t\t\t\tif err != nil {\n   273→\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: write record: %v\", err)\n   274→\t\t\t\t}\n   275→\t\t\t\tws.size += int64(n)\n   276→\t\t\t\tws.dirty = true\n   277→\t\t\t}\n   278→\n   279→\t\t\tprocessRotate := func(req walRotateReq) {\n   280→\t\t\t\tws, ok := hostFiles[req.hostDir]\n   281→\t\t\t\tif ok && ws.f != nil {\n   282→\t\t\t\t\tws.w.Flush()\n   283→\t\t\t\t\tws.f.Close()\n   284→\t\t\t\t\twalPath := path.Join(req.hostDir, \"current.wal\")\n   285→\t\t\t\t\tif err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {\n   286→\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: remove %s: %v\", walPath, err)\n   287→\t\t\t\t\t}\n   288→\t\t\t\t\tdelete(hostFiles, req.hostDir)\n   289→\t\t\t\t}\n   290→\t\t\t\tclose(req.done)\n   291→\t\t\t}\n   292→\n   293→\t\t\tflushDirty := func() {\n   294→\t\t\t\tfor _, ws := range hostFiles {\n   295→\t\t\t\t\tif ws.dirty {\n   296→\t\t\t\t\t\tws.w.Flush()\n   297→\t\t\t\t\t\tws.dirty = false\n   298→\t\t\t\t\t}\n   299→\t\t\t\t}\n   300→\t\t\t}\n   301→\n   302→\t\t\tticker := time.NewTicker(walFlushInterval)\n   303→\t\t\tdefer ticker.Stop()\n   304→\n   305→\t\t\t// drainBatch processes up to 4096 pending messages without blocking.\n   306→\t\t\t// Returns false if the channel was closed.\n   307→\t\t\tdrainBatch := func() bool {\n   308→\t\t\t\tfor range 4096 {\n   309→\t\t\t\t\tselect {\n   310→\t\t\t\t\tcase msg, ok := <-msgCh:\n   311→\t\t\t\t\t\tif !ok {\n   312→\t\t\t\t\t\t\tflushDirty()\n   313→\t\t\t\t\t\t\treturn false\n   314→\t\t\t\t\t\t}\n   315→\t\t\t\t\t\tprocessMsg(msg)\n   316→\t\t\t\t\tcase req := <-rotateCh:\n   317→\t\t\t\t\t\tprocessRotate(req)\n   318→\t\t\t\t\tdefault:\n   319→\t\t\t\t\t\treturn true\n   320→\t\t\t\t\t}\n   321→\t\t\t\t}\n   322→\t\t\t\treturn true\n   323→\t\t\t}\n   324→\n   325→\t\t\tfor {\n   326→\t\t\t\tselect {\n   327→\t\t\t\tcase <-ctx.Done():\n   328→\t\t\t\t\t// On shutdown, skip draining buffered messages — a full binary\n   329→\t\t\t\t\t// checkpoint will be written from in-memory state, making\n   330→\t\t\t\t\t// buffered WAL records redundant.\n   331→\t\t\t\t\tflushDirty()\n   332→\t\t\t\t\treturn\n   333→\t\t\t\tcase msg, ok := <-msgCh:\n   334→\t\t\t\t\tif !ok {\n   335→\t\t\t\t\t\treturn\n   336→\t\t\t\t\t}\n   337→\t\t\t\t\tprocessMsg(msg)\n   338→\t\t\t\t\tif !drainBatch() {\n   339→\t\t\t\t\t\treturn\n   340→\t\t\t\t\t}\n   341→\t\t\t\t\t// No flush here — timer handles periodic flushing.\n   342→\t\t\t\tcase <-ticker.C:\n   343→\t\t\t\t\tflushDirty()\n   344→\t\t\t\tcase req := <-rotateCh:\n   345→\t\t\t\t\tprocessRotate(req)\n   346→\t\t\t\t}\n   347→\t\t\t}\n   348→\t\t})\n   349→\t}\n   350→}\n   351→\n   352→// WaitForWALStagingDrain blocks until all WALStaging goroutines have exited.\n   353→// Must be called after closing walShardChs to ensure all file handles are\n   354→// flushed and closed before checkpoint writes begin.\n   355→func WaitForWALStagingDrain() {\n   356→\twalStagingWg.Wait()\n   357→}\n   358→\n   359→// RotateWALFiles sends rotation requests for the given host directories\n   360→// and blocks until all rotations complete. Each request is routed to the\n   361→// shard that owns the host directory.\n   362→//\n   363→// If shutdown is in progress (WAL staging goroutines may have exited),\n   364→// rotation is skipped to avoid deadlocking on abandoned channels.\n   365→func RotateWALFiles(hostDirs []string) {\n   366→\tif walShardRotateChs == nil || walShuttingDown.Load() {\n   367→\t\treturn\n   368→\t}\n   369→\tdeadline := time.After(2 * time.Minute)\n   370→\tdones := make([]chan struct{}, 0, len(hostDirs))\n   371→\tfor _, dir := range hostDirs {\n   372→\t\tdone := make(chan struct{})\n   373→\t\tshard := walShardIndexFromDir(dir)\n   374→\t\tselect {\n   375→\t\tcase walShardRotateChs[shard] <- walRotateReq{hostDir: dir, done: done}:\n   376→\t\t\tdones = append(dones, done)\n   377→\t\tcase <-deadline:\n   378→\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL rotation send timed out, %d of %d hosts remaining\",\n   379→\t\t\t\tlen(hostDirs)-len(dones), len(hostDirs))\n   380→\t\t\tgoto waitDones\n   381→\t\t}\n   382→\t}\n   383→waitDones:\n   384→\tfor _, done := range dones {\n   385→\t\tselect {\n   386→\t\tcase <-done:\n   387→\t\tcase <-time.After(30 * time.Second):\n   388→\t\t\tcclog.Warn(\"[METRICSTORE]> WAL rotation completion timed out, continuing\")\n   389→\t\t\treturn\n   390→\t\t}\n   391→\t}\n   392→}\n   393→\n   394→// walShardIndexFromDir extracts cluster and node from a host directory path\n   395→// and returns the shard index. The path format is: rootDir/cluster/node\n   396→func walShardIndexFromDir(hostDir string) int {\n   397→\t// hostDir = .../cluster/node — extract last two path components\n   398→\tnode := path.Base(hostDir)\n   399→\tcluster := path.Base(path.Dir(hostDir))\n   400→\treturn walShardIndex(cluster, node)\n   401→}\n   402→\n   403→// RotateWALFiles sends rotation requests for the given host directories\n   404→// and blocks until all rotations complete.\n   405→func RotateWALFilesAfterShutdown(hostDirs []string) {\n   406→\tfor _, dir := range hostDirs {\n   407→\t\twalPath := path.Join(dir, \"current.wal\")\n   408→\t\tif err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {\n   409→\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: remove %s: %v\", walPath, err)\n   410→\t\t}\n   411→\t}\n   412→}\n   413→\n   414→// writeWALRecordDirect encodes a WAL record into a contiguous buffer first,\n   415→// then writes it to the bufio.Writer in a single call. This prevents partial\n   416→// records in the write buffer if a write error occurs mid-record (e.g. disk full).\n   417→// Returns the number of bytes written and any error.\n   418→func writeWALRecordDirect(w *bufio.Writer, msg *WALMessage) (int, error) {\n   419→\t// Compute payload and total record size.\n   420→\tpayloadSize := 8 + 2 + len(msg.MetricName) + 1 + 4\n   421→\tfor _, s := range msg.Selector {\n   422→\t\tpayloadSize += 1 + len(s)\n   423→\t}\n   424→\t// Total: 8 (header) + payload + 4 (CRC).\n   425→\ttotalSize := 8 + payloadSize + 4\n   426→\n   427→\t// Use stack buffer for typical small records, heap-allocate only for large ones.\n   428→\tvar stackBuf [256]byte\n   429→\tvar buf []byte\n   430→\tif totalSize <= len(stackBuf) {\n   431→\t\tbuf = stackBuf[:totalSize]\n   432→\t} else {\n   433→\t\tbuf = make([]byte, totalSize)\n   434→\t}\n   435→\n   436→\t// Header: magic + payload length.\n   437→\tbinary.LittleEndian.PutUint32(buf[0:4], walRecordMagic)\n   438→\tbinary.LittleEndian.PutUint32(buf[4:8], uint32(payloadSize))\n   439→\n   440→\t// Payload starts at offset 8.\n   441→\tp := 8\n   442→\n   443→\t// Timestamp (8 bytes).\n   444→\tbinary.LittleEndian.PutUint64(buf[p:p+8], uint64(msg.Timestamp))\n   445→\tp += 8\n   446→\n   447→\t// Metric name length (2 bytes) + metric name.\n   448→\tbinary.LittleEndian.PutUint16(buf[p:p+2], uint16(len(msg.MetricName)))\n   449→\tp += 2\n   450→\tp += copy(buf[p:], msg.MetricName)\n   451→\n   452→\t// Selector count (1 byte).\n   453→\tbuf[p] = byte(len(msg.Selector))\n   454→\tp++\n   455→\n   456→\t// Selectors (1-byte length + bytes each).\n   457→\tfor _, sel := range msg.Selector {\n   458→\t\tbuf[p] = byte(len(sel))\n   459→\t\tp++\n   460→\t\tp += copy(buf[p:], sel)\n   461→\t}\n   462→\n   463→\t// Value (4 bytes, float32 bits).\n   464→\tbinary.LittleEndian.PutUint32(buf[p:p+4], math.Float32bits(float32(msg.Value)))\n   465→\tp += 4\n   466→\n   467→\t// CRC32 over payload (bytes 8..8+payloadSize).\n   468→\tcrc := crc32.ChecksumIEEE(buf[8 : 8+payloadSize])\n   469→\tbinary.LittleEndian.PutUint32(buf[p:p+4], crc)\n   470→\n   471→\t// Single atomic write to the buffered writer.\n   472→\tn, err := w.Write(buf)\n   473→\treturn n, err\n   474→}\n   475→\n   476→// readWALRecord reads one WAL record from the reader.\n   477→// Returns (nil, nil) on clean EOF. Returns error on data corruption.\n   478→// A CRC mismatch indicates a truncated trailing record (expected on crash).\n   479→func readWALRecord(r io.Reader) (*WALMessage, error) {\n   480→\tvar magic uint32\n   481→\tif err := binary.Read(r, binary.LittleEndian, &magic); err != nil {\n   482→\t\tif err == io.EOF {\n   483→\t\t\treturn nil, nil // Clean EOF\n   484→\t\t}\n   485→\t\treturn nil, fmt.Errorf(\"read record magic: %w\", err)\n   486→\t}\n   487→\n   488→\tif magic != walRecordMagic {\n   489→\t\treturn nil, fmt.Errorf(\"invalid record magic 0x%08X (expected 0x%08X)\", magic, walRecordMagic)\n   490→\t}\n   491→\n   492→\tvar payloadLen uint32\n   493→\tif err := binary.Read(r, binary.LittleEndian, &payloadLen); err != nil {\n   494→\t\treturn nil, fmt.Errorf(\"read payload length: %w\", err)\n   495→\t}\n   496→\n   497→\tif payloadLen > 1<<20 { // 1 MB sanity limit\n   498→\t\treturn nil, fmt.Errorf(\"record payload too large: %d bytes\", payloadLen)\n   499→\t}\n   500→\n   501→\tpayload := make([]byte, payloadLen)\n   502→\tif _, err := io.ReadFull(r, payload); err != nil {\n   503→\t\treturn nil, fmt.Errorf(\"read payload: %w\", err)\n   504→\t}\n   505→\n   506→\tvar storedCRC uint32\n   507→\tif err := binary.Read(r, binary.LittleEndian, &storedCRC); err != nil {\n   508→\t\treturn nil, fmt.Errorf(\"read CRC: %w\", err)\n   509→\t}\n   510→\n   511→\tif crc32.ChecksumIEEE(payload) != storedCRC {\n   512→\t\treturn nil, fmt.Errorf(\"CRC mismatch (truncated write or corruption)\")\n   513→\t}\n   514→\n   515→\treturn parseWALPayload(payload)\n   516→}\n   517→\n   518→// parseWALPayload decodes a binary payload into a WALMessage.\n   519→func parseWALPayload(payload []byte) (*WALMessage, error) {\n   520→\tif len(payload) < 8+2+1+4 {\n   521→\t\treturn nil, fmt.Errorf(\"payload too short: %d bytes\", len(payload))\n   522→\t}\n   523→\n   524→\toffset := 0\n   525→\n   526→\t// Timestamp (8 bytes)\n   527→\tts := int64(binary.LittleEndian.Uint64(payload[offset : offset+8]))\n   528→\toffset += 8\n   529→\n   530→\t// Metric name (2-byte length + bytes)\n   531→\tif offset+2 > len(payload) {\n   532→\t\treturn nil, fmt.Errorf(\"metric name length overflows payload\")\n   533→\t}\n   534→\tmLen := int(binary.LittleEndian.Uint16(payload[offset : offset+2]))\n   535→\toffset += 2\n   536→\n   537→\tif offset+mLen > len(payload) {\n   538→\t\treturn nil, fmt.Errorf(\"metric name overflows payload\")\n   539→\t}\n   540→\tmetricName := string(payload[offset : offset+mLen])\n   541→\toffset += mLen\n   542→\n   543→\t// Selector count (1 byte)\n   544→\tif offset >= len(payload) {\n   545→\t\treturn nil, fmt.Errorf(\"selector count overflows payload\")\n   546→\t}\n   547→\tselCount := int(payload[offset])\n   548→\toffset++\n   549→\n   550→\tselectors := make([]string, selCount)\n   551→\tfor i := range selCount {\n   552→\t\tif offset >= len(payload) {\n   553→\t\t\treturn nil, fmt.Errorf(\"selector[%d] length overflows payload\", i)\n   554→\t\t}\n   555→\t\tsLen := int(payload[offset])\n   556→\t\toffset++\n   557→\n   558→\t\tif offset+sLen > len(payload) {\n   559→\t\t\treturn nil, fmt.Errorf(\"selector[%d] data overflows payload\", i)\n   560→\t\t}\n   561→\t\tselectors[i] = string(payload[offset : offset+sLen])\n   562→\t\toffset += sLen\n   563→\t}\n   564→\n   565→\t// Value (4 bytes, float32 bits)\n   566→\tif offset+4 > len(payload) {\n   567→\t\treturn nil, fmt.Errorf(\"value overflows payload\")\n   568→\t}\n   569→\tbits := binary.LittleEndian.Uint32(payload[offset : offset+4])\n   570→\tvalue := schema.Float(math.Float32frombits(bits))\n   571→\n   572→\treturn &WALMessage{\n   573→\t\tMetricName: metricName,\n   574→\t\tTimestamp:  ts,\n   575→\t\tSelector:   selectors,\n   576→\t\tValue:      value,\n   577→\t}, nil\n   578→}\n   579→\n   580→// loadWALFile reads a WAL file and replays all valid records into the Level tree.\n   581→// l is the host-level node. Corrupt or partial trailing records are silently skipped\n   582→// (expected on crash). Records older than 'from' are skipped.\n   583→func (l *Level) loadWALFile(m *MemoryStore, f *os.File, from int64) error {\n   584→\tbr := bufio.NewReader(f)\n   585→\n   586→\t// Verify file header magic.\n   587→\tvar fileMagic uint32\n   588→\tif err := binary.Read(br, binary.LittleEndian, &fileMagic); err != nil {\n   589→\t\tif err == io.EOF {\n   590→\t\t\treturn nil // Empty file, no data\n   591→\t\t}\n   592→\t\treturn fmt.Errorf(\"[METRICSTORE]> WAL: read file header: %w\", err)\n   593→\t}\n   594→\n   595→\tif fileMagic != walFileMagic {\n   596→\t\treturn fmt.Errorf(\"[METRICSTORE]> WAL: invalid file magic 0x%08X (expected 0x%08X)\", fileMagic, walFileMagic)\n   597→\t}\n   598→\n   599→\t// Cache level lookups to avoid repeated tree traversal.\n   600→\tlvlCache := make(map[string]*Level)\n   601→\n   602→\tfor {\n   603→\t\tmsg, err := readWALRecord(br)\n   604→\t\tif err != nil {\n   605→\t\t\t// Truncated trailing record is expected after a crash; stop replaying.\n   606→\t\t\tcclog.Debugf(\"[METRICSTORE]> WAL: stopping replay at corrupted/partial record: %v\", err)\n   607→\t\t\tbreak\n   608→\t\t}\n   609→\t\tif msg == nil {\n   610→\t\t\tbreak // Clean EOF\n   611→\t\t}\n   612→\n   613→\t\tif msg.Timestamp < from {\n   614→\t\t\tcontinue // Older than retention window\n   615→\t\t}\n   616→\n   617→\t\tminfo, ok := m.Metrics[msg.MetricName]\n   618→\t\tif !ok {\n   619→\t\t\tcontinue // Unknown metric (config may have changed)\n   620→\t\t}\n   621→\n   622→\t\t// Cache key is the null-separated selector path.\n   623→\t\tcacheKey := joinSelector(msg.Selector)\n   624→\t\tlvl, ok := lvlCache[cacheKey]\n   625→\t\tif !ok {\n   626→\t\t\tlvl = l.findLevelOrCreate(msg.Selector, len(m.Metrics))\n   627→\t\t\tlvlCache[cacheKey] = lvl\n   628→\t\t}\n   629→\n   630→\t\t// Write directly to the buffer, same as WriteToLevel but without the\n   631→\t\t// global level lookup (we already have the right level).\n   632→\t\tlvl.lock.Lock()\n   633→\t\tb := lvl.metrics[minfo.offset]\n   634→\t\tif b == nil {\n   635→\t\t\tb = newBuffer(msg.Timestamp, minfo.Frequency)\n   636→\t\t\tlvl.metrics[minfo.offset] = b\n   637→\t\t}\n   638→\t\tnb, writeErr := b.write(msg.Timestamp, msg.Value)\n   639→\t\tif writeErr == nil && b != nb {\n   640→\t\t\tlvl.metrics[minfo.offset] = nb\n   641→\t\t}\n   642→\t\t// Ignore write errors for timestamps before buffer start (can happen when\n   643→\t\t// replaying WAL entries that predate a loaded snapshot's start time).\n   644→\t\tlvl.lock.Unlock()\n   645→\t}\n   646→\n   647→\treturn nil\n   648→}\n   649→\n   650→// joinSelector builds a cache key from a selector slice using null bytes as separators.\n   651→func joinSelector(sel []string) string {\n   652→\tif len(sel) == 0 {\n   653→\t\treturn \"\"\n   654→\t}\n   655→\tvar result strings.Builder\n   656→\tresult.WriteString(sel[0])\n   657→\tfor i := 1; i < len(sel); i++ {\n   658→\t\tresult.WriteString(\"\\x00\" + sel[i])\n   659→\t}\n   660→\treturn result.String()\n   661→}\n   662→\n   663→// ToCheckpointWAL writes binary snapshot files for all hosts in parallel.\n   664→// Returns the number of files written, the list of host directories that were\n   665→// successfully checkpointed (for WAL rotation), and any errors.\n   666→func (m *MemoryStore) ToCheckpointWAL(dir string, from, to int64) (int, []string, error) {\n   667→\t// Collect all cluster/host pairs.\n   668→\tm.root.lock.RLock()\n   669→\ttotalHosts := 0\n   670→\tfor _, l1 := range m.root.children {\n   671→\t\tl1.lock.RLock()\n   672→\t\ttotalHosts += len(l1.children)\n   673→\t\tl1.lock.RUnlock()\n   674→\t}\n   675→\tm.root.lock.RUnlock()\n   676→\n   677→\tlevels := make([]*Level, 0, totalHosts)\n   678→\tselectors := make([][]string, 0, totalHosts)\n   679→\n   680→\tm.root.lock.RLock()\n   681→\tfor sel1, l1 := range m.root.children {\n   682→\t\tl1.lock.RLock()\n   683→\t\tfor sel2, l2 := range l1.children {\n   684→\t\t\tlevels = append(levels, l2)\n   685→\t\t\tselectors = append(selectors, []string{sel1, sel2})\n   686→\t\t}\n   687→\t\tl1.lock.RUnlock()\n   688→\t}\n   689→\tm.root.lock.RUnlock()\n   690→\n   691→\ttype workItem struct {\n   692→\t\tlevel    *Level\n   693→\t\thostDir  string\n   694→\t\tselector []string\n   695→\t}\n   696→\n   697→\ttotalWork := len(levels)\n   698→\tcclog.Infof(\"[METRICSTORE]> Starting binary checkpoint for %d hosts with %d workers\", totalWork, Keys.NumWorkers)\n   699→\n   700→\tn, errs, completed := int32(0), int32(0), int32(0)\n   701→\tvar successDirs []string\n   702→\tvar successMu sync.Mutex\n   703→\n   704→\tvar wg sync.WaitGroup\n   705→\twg.Add(Keys.NumWorkers)\n   706→\twork := make(chan workItem, Keys.NumWorkers*2)\n   707→\n   708→\t// Progress logging goroutine.\n   709→\tstopProgress := make(chan struct{})\n   710→\tgo func() {\n   711→\t\tticker := time.NewTicker(10 * time.Second)\n   712→\t\tdefer ticker.Stop()\n   713→\t\tfor {\n   714→\t\t\tselect {\n   715→\t\t\tcase <-ticker.C:\n   716→\t\t\t\tcclog.Infof(\"[METRICSTORE]> Checkpoint progress: %d/%d hosts (%d written, %d errors)\",\n   717→\t\t\t\t\tatomic.LoadInt32(&completed), totalWork, atomic.LoadInt32(&n), atomic.LoadInt32(&errs))\n   718→\t\t\tcase <-stopProgress:\n   719→\t\t\t\treturn\n   720→\t\t\t}\n   721→\t\t}\n   722→\t}()\n   723→\n   724→\tfor range Keys.NumWorkers {\n   725→\t\tgo func() {\n   726→\t\t\tdefer wg.Done()\n   727→\t\t\tfor wi := range work {\n   728→\t\t\t\terr := wi.level.toCheckpointBinary(wi.hostDir, from, to, m)\n   729→\t\t\t\tif err != nil {\n   730→\t\t\t\t\tif err == ErrNoNewArchiveData {\n   731→\t\t\t\t\t\tatomic.AddInt32(&completed, 1)\n   732→\t\t\t\t\t\tcontinue\n   733→\t\t\t\t\t}\n   734→\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> binary checkpoint error for %s: %v\", wi.hostDir, err)\n   735→\t\t\t\t\tatomic.AddInt32(&errs, 1)\n   736→\t\t\t\t} else {\n   737→\t\t\t\t\tatomic.AddInt32(&n, 1)\n   738→\t\t\t\t\tsuccessMu.Lock()\n   739→\t\t\t\t\tsuccessDirs = append(successDirs, wi.hostDir)\n   740→\t\t\t\t\tsuccessMu.Unlock()\n   741→\t\t\t\t}\n   742→\t\t\t\tatomic.AddInt32(&completed, 1)\n   743→\t\t\t}\n   744→\t\t}()\n   745→\t}\n   746→\n   747→\tfor i := range levels {\n   748→\t\thostDir := path.Join(dir, path.Join(selectors[i]...))\n   749→\t\twork <- workItem{\n   750→\t\t\tlevel:    levels[i],\n   751→\t\t\thostDir:  hostDir,\n   752→\t\t\tselector: selectors[i],\n   753→\t\t}\n   754→\t}\n   755→\tclose(work)\n   756→\twg.Wait()\n   757→\tclose(stopProgress)\n   758→\n   759→\tif errs > 0 {\n   760→\t\treturn int(n), successDirs, fmt.Errorf(\"[METRICSTORE]> %d errors during binary checkpoint (%d successes)\", errs, n)\n   761→\t}\n   762→\treturn int(n), successDirs, nil\n   763→}\n   764→\n   765→// toCheckpointBinary writes a binary snapshot file for a single host-level node.\n   766→// Uses atomic rename (write to .tmp then rename) to avoid partial reads on crash.\n   767→func (l *Level) toCheckpointBinary(dir string, from, to int64, m *MemoryStore) error {\n   768→\tcf, err := l.toCheckpointFile(from, to, m)\n   769→\tif err != nil {\n   770→\t\treturn err\n   771→\t}\n   772→\tif cf == nil {\n   773→\t\treturn ErrNoNewArchiveData\n   774→\t}\n   775→\n   776→\tif err := os.MkdirAll(dir, CheckpointDirPerms); err != nil {\n   777→\t\treturn fmt.Errorf(\"mkdir %s: %w\", dir, err)\n   778→\t}\n   779→\n   780→\t// Write to a temp file first, then rename (atomic on POSIX).\n   781→\ttmpPath := path.Join(dir, fmt.Sprintf(\"%d.bin.tmp\", from))\n   782→\tfinalPath := path.Join(dir, fmt.Sprintf(\"%d.bin\", from))\n   783→\n   784→\tf, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)\n   785→\tif err != nil {\n   786→\t\treturn fmt.Errorf(\"open binary snapshot %s: %w\", tmpPath, err)\n   787→\t}\n   788→\n   789→\tbw := bufio.NewWriter(f)\n   790→\tif err := writeBinarySnapshotFile(bw, cf); err != nil {\n   791→\t\tf.Close()\n   792→\t\tos.Remove(tmpPath)\n   793→\t\treturn fmt.Errorf(\"write binary snapshot: %w\", err)\n   794→\t}\n   795→\tif err := bw.Flush(); err != nil {\n   796→\t\tf.Close()\n   797→\t\tos.Remove(tmpPath)\n   798→\t\treturn err\n   799→\t}\n   800→\tf.Close()\n   801→\n   802→\treturn os.Rename(tmpPath, finalPath)\n   803→}\n   804→\n   805→// writeBinarySnapshotFile writes the binary snapshot file header and level tree.\n   806→func writeBinarySnapshotFile(w io.Writer, cf *CheckpointFile) error {\n   807→\tif err := binary.Write(w, binary.LittleEndian, snapFileMagic); err != nil {\n   808→\t\treturn err\n   809→\t}\n   810→\tif err := binary.Write(w, binary.LittleEndian, cf.From); err != nil {\n   811→\t\treturn err\n   812→\t}\n   813→\tif err := binary.Write(w, binary.LittleEndian, cf.To); err != nil {\n   814→\t\treturn err\n   815→\t}\n   816→\treturn writeBinaryLevel(w, cf)\n   817→}\n   818→\n   819→// writeBinaryLevel recursively writes a CheckpointFile level in binary format.\n   820→func writeBinaryLevel(w io.Writer, cf *CheckpointFile) error {\n   821→\tif err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Metrics))); err != nil {\n   822→\t\treturn err\n   823→\t}\n   824→\n   825→\tfor name, metric := range cf.Metrics {\n   826→\t\tif err := writeString16(w, name); err != nil {\n   827→\t\t\treturn err\n   828→\t\t}\n   829→\t\tif err := binary.Write(w, binary.LittleEndian, metric.Frequency); err != nil {\n   830→\t\t\treturn err\n   831→\t\t}\n   832→\t\tif err := binary.Write(w, binary.LittleEndian, metric.Start); err != nil {\n   833→\t\t\treturn err\n   834→\t\t}\n   835→\t\tif err := binary.Write(w, binary.LittleEndian, uint32(len(metric.Data))); err != nil {\n   836→\t\t\treturn err\n   837→\t\t}\n   838→\t\tfor _, v := range metric.Data {\n   839→\t\t\tif err := binary.Write(w, binary.LittleEndian, math.Float32bits(float32(v))); err != nil {\n   840→\t\t\t\treturn err\n   841→\t\t\t}\n   842→\t\t}\n   843→\t}\n   844→\n   845→\tif err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Children))); err != nil {\n   846→\t\treturn err\n   847→\t}\n   848→\n   849→\tfor name, child := range cf.Children {\n   850→\t\tif err := writeString16(w, name); err != nil {\n   851→\t\t\treturn err\n   852→\t\t}\n   853→\t\tif err := writeBinaryLevel(w, child); err != nil {\n   854→\t\t\treturn err\n   855→\t\t}\n   856→\t}\n   857→\n   858→\treturn nil\n   859→}\n   860→\n   861→// writeString16 writes a 2-byte length-prefixed string to w.\n   862→func writeString16(w io.Writer, s string) error {\n   863→\tif err := binary.Write(w, binary.LittleEndian, uint16(len(s))); err != nil {\n   864→\t\treturn err\n   865→\t}\n   866→\t_, err := io.WriteString(w, s)\n   867→\treturn err\n   868→}\n   869→\n   870→// loadBinaryFile reads a binary snapshot file and loads data into the Level tree.\n   871→// The retention check (from) is applied to the file's 'to' timestamp.\n   872→func (l *Level) loadBinaryFile(m *MemoryStore, f *os.File, from int64) error {\n   873→\tbr := bufio.NewReader(f)\n   874→\n   875→\tvar magic uint32\n   876→\tif err := binary.Read(br, binary.LittleEndian, &magic); err != nil {\n   877→\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read magic: %w\", err)\n   878→\t}\n   879→\tif magic != snapFileMagic {\n   880→\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: invalid magic 0x%08X (expected 0x%08X)\", magic, snapFileMagic)\n   881→\t}\n   882→\n   883→\tvar fileFrom, fileTo int64\n   884→\tif err := binary.Read(br, binary.LittleEndian, &fileFrom); err != nil {\n   885→\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read from: %w\", err)\n   886→\t}\n   887→\tif err := binary.Read(br, binary.LittleEndian, &fileTo); err != nil {\n   888→\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read to: %w\", err)\n   889→\t}\n   890→\n   891→\tif fileTo != 0 && fileTo < from {\n   892→\t\treturn nil // File is older than retention window, skip it\n   893→\t}\n   894→\n   895→\tcf, err := readBinaryLevel(br)\n   896→\tif err != nil {\n   897→\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read level tree: %w\", err)\n   898→\t}\n   899→\tcf.From = fileFrom\n   900→\tcf.To = fileTo\n   901→\n   902→\treturn l.loadFile(cf, m)\n   903→}\n   904→\n   905→// readBinaryLevel recursively reads a level from the binary snapshot format.\n   906→func readBinaryLevel(r io.Reader) (*CheckpointFile, error) {\n   907→\tcf := &CheckpointFile{\n   908→\t\tMetrics:  make(map[string]*CheckpointMetrics),\n   909→\t\tChildren: make(map[string]*CheckpointFile),\n   910→\t}\n   911→\n   912→\tvar numMetrics uint32\n   913→\tif err := binary.Read(r, binary.LittleEndian, &numMetrics); err != nil {\n   914→\t\treturn nil, fmt.Errorf(\"read num_metrics: %w\", err)\n   915→\t}\n   916→\n   917→\tfor range numMetrics {\n   918→\t\tname, err := readString16(r)\n   919→\t\tif err != nil {\n   920→\t\t\treturn nil, fmt.Errorf(\"read metric name: %w\", err)\n   921→\t\t}\n   922→\n   923→\t\tvar freq, start int64\n   924→\t\tif err := binary.Read(r, binary.LittleEndian, &freq); err != nil {\n   925→\t\t\treturn nil, fmt.Errorf(\"read frequency for %s: %w\", name, err)\n   926→\t\t}\n   927→\t\tif err := binary.Read(r, binary.LittleEndian, &start); err != nil {\n   928→\t\t\treturn nil, fmt.Errorf(\"read start for %s: %w\", name, err)\n   929→\t\t}\n   930→\n   931→\t\tvar numValues uint32\n   932→\t\tif err := binary.Read(r, binary.LittleEndian, &numValues); err != nil {\n   933→\t\t\treturn nil, fmt.Errorf(\"read num_values for %s: %w\", name, err)\n   934→\t\t}\n   935→\n   936→\t\tdata := make([]schema.Float, numValues)\n   937→\t\tfor i := range numValues {\n   938→\t\t\tvar bits uint32\n   939→\t\t\tif err := binary.Read(r, binary.LittleEndian, &bits); err != nil {\n   940→\t\t\t\treturn nil, fmt.Errorf(\"read value[%d] for %s: %w\", i, name, err)\n   941→\t\t\t}\n   942→\t\t\tdata[i] = schema.Float(math.Float32frombits(bits))\n   943→\t\t}\n   944→\n   945→\t\tcf.Metrics[name] = &CheckpointMetrics{\n   946→\t\t\tFrequency: freq,\n   947→\t\t\tStart:     start,\n   948→\t\t\tData:      data,\n   949→\t\t}\n   950→\t}\n   951→\n   952→\tvar numChildren uint32\n   953→\tif err := binary.Read(r, binary.LittleEndian, &numChildren); err != nil {\n   954→\t\treturn nil, fmt.Errorf(\"read num_children: %w\", err)\n   955→\t}\n   956→\n   957→\tfor range numChildren {\n   958→\t\tchildName, err := readString16(r)\n   959→\t\tif err != nil {\n   960→\t\t\treturn nil, fmt.Errorf(\"read child name: %w\", err)\n   961→\t\t}\n   962→\n   963→\t\tchild, err := readBinaryLevel(r)\n   964→\t\tif err != nil {\n   965→\t\t\treturn nil, fmt.Errorf(\"read child %s: %w\", childName, err)\n   966→\t\t}\n   967→\t\tcf.Children[childName] = child\n   968→\t}\n   969→\n   970→\treturn cf, nil\n   971→}\n   972→\n   973→// readString16 reads a 2-byte length-prefixed string from r.\n   974→func readString16(r io.Reader) (string, error) {\n   975→\tvar sLen uint16\n   976→\tif err := binary.Read(r, binary.LittleEndian, &sLen); err != nil {\n   977→\t\treturn \"\", err\n   978→\t}\n   979→\tbuf := make([]byte, sLen)\n   980→\tif _, err := io.ReadFull(r, buf); err != nil {\n   981→\t\treturn \"\", err\n   982→\t}\n   983→\treturn string(buf), nil\n   984→}\n   985→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"282ac3cc-b4b9-423c-a521-9d77b53d60ac","timestamp":"2026-03-29T19:34:33.000Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/walCheckpoint.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// Package metricstore provides walCheckpoint.go: WAL-based checkpoint implementation.\n//\n// This replaces the Avro shadow tree with an append-only Write-Ahead Log (WAL)\n// per host, eliminating the extra memory overhead of the AvroStore and providing\n// truly continuous (per-write) crash safety.\n//\n// # Architecture\n//\n//\tMetric write (DecodeLine)\n//\t      │\n//\t      ├─► WriteToLevel()    → main MemoryStore (unchanged)\n//\t      │\n//\t      └─► WALMessages channel\n//\t               │\n//\t               ▼\n//\t        WALStaging goroutine\n//\t               │\n//\t               ▼\n//\t        checkpoints/cluster/host/current.wal  (append-only, binary)\n//\n//\tPeriodic checkpoint (Checkpointing goroutine):\n//\t  1. Write <timestamp>.bin snapshot (column-oriented, from main tree)\n//\t  2. Signal WALStaging to truncate current.wal per host\n//\n//\tOn restart (FromCheckpoint):\n//\t  1. Load most recent <timestamp>.bin snapshot\n//\t  2. Replay current.wal (overwrite-safe: buffer.write handles duplicate timestamps)\n//\n// # WAL Record Format\n//\n//\t[4B magic 0xCC1DA7A1][4B payload_len][payload][4B CRC32]\n//\n//\tpayload:\n//\t  [8B timestamp int64]\n//\t  [2B metric_name_len uint16][N metric name bytes]\n//\t  [1B selector_count uint8]\n//\t  per selector: [1B selector_len uint8][M selector bytes]\n//\t  [4B value float32 bits]\n//\n// # Binary Snapshot Format\n//\n//\t[4B magic 0xCC5B0001][8B from int64][8B to int64]\n//\tLevel tree (recursive):\n//\t  [4B num_metrics uint32]\n//\t  per metric:\n//\t    [2B name_len uint16][N name bytes]\n//\t    [8B frequency int64][8B start int64]\n//\t    [4B num_values uint32][num_values × 4B float32]\n//\t  [4B num_children uint32]\n//\t  per child: [2B name_len uint16][N name bytes] + Level (recursive)\npackage metricstore\n\nimport (\n\t\"bufio\"\n\t\"context\"\n\t\"encoding/binary\"\n\t\"fmt\"\n\t\"hash/crc32\"\n\t\"hash/fnv\"\n\t\"io\"\n\t\"math\"\n\t\"os\"\n\t\"path\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n)\n\n// Magic numbers for binary formats.\nconst (\n\twalFileMagic   = uint32(0xCC1DA701) // WAL file header magic\n\twalRecordMagic = uint32(0xCC1DA7A1) // WAL record magic\n\tsnapFileMagic  = uint32(0xCC5B0001) // Binary snapshot magic\n)\n\n// walShardChs holds per-shard channels for WAL messages.\n// Initialized by WALStaging; nil when WAL is not active.\nvar walShardChs []chan *WALMessage\n\n// walShardRotateChs holds per-shard channels for WAL rotation requests.\nvar walShardRotateChs []chan walRotateReq\n\n// walNumShards stores the number of shards (set during WALStaging init).\nvar walNumShards int\n\n// walStagingWg tracks WALStaging goroutine exits for shutdown synchronization.\nvar walStagingWg sync.WaitGroup\n\n// walShuttingDown is set before closing shard channels to prevent\n// SendWALMessage from sending on a closed channel (which panics in Go).\nvar walShuttingDown atomic.Bool\n\n// walCheckpointActive is set during binary checkpoint writes.\n// While active, SendWALMessage skips sending (returns true) because the\n// snapshot captures all in-memory data, making WAL writes redundant.\nvar walCheckpointActive atomic.Bool\n\n// WALMessage represents a single metric write to be appended to the WAL.\n// Cluster and Node are NOT stored in the WAL record (inferred from file path).\ntype WALMessage struct {\n\tMetricName string\n\tCluster    string\n\tNode       string\n\tSelector   []string\n\tValue      schema.Float\n\tTimestamp  int64\n}\n\n// walRotateReq requests WAL file rotation for a specific host directory.\n// The done channel is closed by the WAL goroutine when rotation is complete.\ntype walRotateReq struct {\n\thostDir string\n\tdone    chan struct{}\n}\n\n// walFileState holds an open WAL file handle and buffered writer for one host directory.\ntype walFileState struct {\n\tf     *os.File\n\tw     *bufio.Writer\n\tdirty bool\n\tsize  int64 // approximate bytes written (tracked from open + writes)\n}\n\n// walFlushInterval controls how often dirty WAL files are flushed to disk.\n// Decoupling flushes from message processing lets the consumer run at memory\n// speed, amortizing syscall overhead across many writes.\nconst walFlushInterval = 1 * time.Second\n\n// walShardIndex computes which shard a message belongs to based on cluster+node.\n// Uses FNV-1a hash for fast, well-distributed mapping.\nfunc walShardIndex(cluster, node string) int {\n\th := fnv.New32a()\n\th.Write([]byte(cluster))\n\th.Write([]byte{0})\n\th.Write([]byte(node))\n\treturn int(h.Sum32()) % walNumShards\n}\n\n// SendWALMessage routes a WAL message to the appropriate shard channel.\n// Returns false if the channel is full or shutdown is in progress.\nfunc SendWALMessage(msg *WALMessage) bool {\n\tif walShardChs == nil || walShuttingDown.Load() {\n\t\treturn false\n\t}\n\tif walCheckpointActive.Load() {\n\t\treturn true // Data safe in memory; snapshot will capture it\n\t}\n\tshard := walShardIndex(msg.Cluster, msg.Node)\n\tselect {\n\tcase walShardChs[shard] <- msg:\n\t\treturn true\n\tdefault:\n\t\treturn false\n\t}\n}\n\n// WALStaging starts N sharded background goroutines that receive WALMessage items\n// and append binary WAL records to per-host current.wal files.\n// Each shard owns a subset of hosts (determined by hash of cluster+node),\n// parallelizing serialization and I/O while keeping per-host writes serial.\nfunc WALStaging(wg *sync.WaitGroup, ctx context.Context) {\n\tif Keys.Checkpoints.FileFormat == \"json\" {\n\t\treturn\n\t}\n\n\twalNumShards = max(Keys.NumWorkers, 1)\n\tchBufSize := max(65536/walNumShards, 1024)\n\n\twalShardChs = make([]chan *WALMessage, walNumShards)\n\twalShardRotateChs = make([]chan walRotateReq, walNumShards)\n\n\tfor i := range walNumShards {\n\t\twalShardChs[i] = make(chan *WALMessage, chBufSize)\n\t\twalShardRotateChs[i] = make(chan walRotateReq, 64)\n\t}\n\n\tfor i := range walNumShards {\n\t\tmsgCh := walShardChs[i]\n\t\trotateCh := walShardRotateChs[i]\n\n\t\twalStagingWg.Add(1)\n\t\twg.Go(func() {\n\t\t\tdefer walStagingWg.Done()\n\t\t\thostFiles := make(map[string]*walFileState)\n\n\t\t\tdefer func() {\n\t\t\t\tfor _, ws := range hostFiles {\n\t\t\t\t\tif ws.f != nil {\n\t\t\t\t\t\tws.w.Flush()\n\t\t\t\t\t\tws.f.Close()\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}()\n\n\t\t\tgetOrOpenWAL := func(hostDir string) *walFileState {\n\t\t\t\tws, ok := hostFiles[hostDir]\n\t\t\t\tif ok {\n\t\t\t\t\treturn ws\n\t\t\t\t}\n\n\t\t\t\tif err := os.MkdirAll(hostDir, CheckpointDirPerms); err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: mkdir %s: %v\", hostDir, err)\n\t\t\t\t\treturn nil\n\t\t\t\t}\n\n\t\t\t\twalPath := path.Join(hostDir, \"current.wal\")\n\t\t\t\tf, err := os.OpenFile(walPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, CheckpointFilePerms)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: open %s: %v\", walPath, err)\n\t\t\t\t\treturn nil\n\t\t\t\t}\n\n\t\t\t\tw := bufio.NewWriter(f)\n\n\t\t\t\t// Write file header magic if file is new (empty).\n\t\t\t\tinfo, err := f.Stat()\n\t\t\t\tvar fileSize int64\n\t\t\t\tif err == nil {\n\t\t\t\t\tfileSize = info.Size()\n\t\t\t\t}\n\t\t\t\tif err == nil && fileSize == 0 {\n\t\t\t\t\tvar hdr [4]byte\n\t\t\t\t\tbinary.LittleEndian.PutUint32(hdr[:], walFileMagic)\n\t\t\t\t\tif _, err := w.Write(hdr[:]); err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: write header %s: %v\", walPath, err)\n\t\t\t\t\t\tf.Close()\n\t\t\t\t\t\treturn nil\n\t\t\t\t\t}\n\t\t\t\t\tfileSize = 4\n\t\t\t\t}\n\n\t\t\t\tws = &walFileState{f: f, w: w, size: fileSize}\n\t\t\t\thostFiles[hostDir] = ws\n\t\t\t\treturn ws\n\t\t\t}\n\n\t\t\tprocessMsg := func(msg *WALMessage) {\n\t\t\t\thostDir := path.Join(Keys.Checkpoints.RootDir, msg.Cluster, msg.Node)\n\t\t\t\tws := getOrOpenWAL(hostDir)\n\t\t\t\tif ws == nil {\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\t// Enforce max WAL size: force-rotate before writing if limit is exceeded.\n\t\t\t\t// The in-memory store still holds the data; only crash-recovery coverage is lost.\n\t\t\t\tif maxSize := Keys.Checkpoints.MaxWALSize; maxSize > 0 && ws.size >= maxSize {\n\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL: force-rotating %s (size %d >= limit %d)\",\n\t\t\t\t\t\thostDir, ws.size, maxSize)\n\t\t\t\t\tws.w.Flush()\n\t\t\t\t\tws.f.Close()\n\t\t\t\t\twalPath := path.Join(hostDir, \"current.wal\")\n\t\t\t\t\tif err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: remove %s: %v\", walPath, err)\n\t\t\t\t\t}\n\t\t\t\t\tdelete(hostFiles, hostDir)\n\t\t\t\t\tws = getOrOpenWAL(hostDir)\n\t\t\t\t\tif ws == nil {\n\t\t\t\t\t\treturn\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tn, err := writeWALRecordDirect(ws.w, msg)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: write record: %v\", err)\n\t\t\t\t}\n\t\t\t\tws.size += int64(n)\n\t\t\t\tws.dirty = true\n\t\t\t}\n\n\t\t\tprocessRotate := func(req walRotateReq) {\n\t\t\t\tws, ok := hostFiles[req.hostDir]\n\t\t\t\tif ok && ws.f != nil {\n\t\t\t\t\tws.w.Flush()\n\t\t\t\t\tws.f.Close()\n\t\t\t\t\twalPath := path.Join(req.hostDir, \"current.wal\")\n\t\t\t\t\tif err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: remove %s: %v\", walPath, err)\n\t\t\t\t\t}\n\t\t\t\t\tdelete(hostFiles, req.hostDir)\n\t\t\t\t}\n\t\t\t\tclose(req.done)\n\t\t\t}\n\n\t\t\tflushDirty := func() {\n\t\t\t\tfor _, ws := range hostFiles {\n\t\t\t\t\tif ws.dirty {\n\t\t\t\t\t\tws.w.Flush()\n\t\t\t\t\t\tws.dirty = false\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tticker := time.NewTicker(walFlushInterval)\n\t\t\tdefer ticker.Stop()\n\n\t\t\t// drainBatch processes up to 4096 pending messages without blocking.\n\t\t\t// Returns false if the channel was closed.\n\t\t\tdrainBatch := func() bool {\n\t\t\t\tfor range 4096 {\n\t\t\t\t\tselect {\n\t\t\t\t\tcase msg, ok := <-msgCh:\n\t\t\t\t\t\tif !ok {\n\t\t\t\t\t\t\tflushDirty()\n\t\t\t\t\t\t\treturn false\n\t\t\t\t\t\t}\n\t\t\t\t\t\tprocessMsg(msg)\n\t\t\t\t\tcase req := <-rotateCh:\n\t\t\t\t\t\tprocessRotate(req)\n\t\t\t\t\tdefault:\n\t\t\t\t\t\treturn true\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\treturn true\n\t\t\t}\n\n\t\t\tfor {\n\t\t\t\tselect {\n\t\t\t\tcase <-ctx.Done():\n\t\t\t\t\t// On shutdown, skip draining buffered messages — a full binary\n\t\t\t\t\t// checkpoint will be written from in-memory state, making\n\t\t\t\t\t// buffered WAL records redundant.\n\t\t\t\t\tflushDirty()\n\t\t\t\t\treturn\n\t\t\t\tcase msg, ok := <-msgCh:\n\t\t\t\t\tif !ok {\n\t\t\t\t\t\treturn\n\t\t\t\t\t}\n\t\t\t\t\tprocessMsg(msg)\n\t\t\t\t\tif !drainBatch() {\n\t\t\t\t\t\treturn\n\t\t\t\t\t}\n\t\t\t\t\t// No flush here — timer handles periodic flushing.\n\t\t\t\tcase <-ticker.C:\n\t\t\t\t\tflushDirty()\n\t\t\t\tcase req := <-rotateCh:\n\t\t\t\t\tprocessRotate(req)\n\t\t\t\t}\n\t\t\t}\n\t\t})\n\t}\n}\n\n// WaitForWALStagingDrain blocks until all WALStaging goroutines have exited.\n// Must be called after closing walShardChs to ensure all file handles are\n// flushed and closed before checkpoint writes begin.\nfunc WaitForWALStagingDrain() {\n\twalStagingWg.Wait()\n}\n\n// RotateWALFiles sends rotation requests for the given host directories\n// and blocks until all rotations complete. Each request is routed to the\n// shard that owns the host directory.\n//\n// If shutdown is in progress (WAL staging goroutines may have exited),\n// rotation is skipped to avoid deadlocking on abandoned channels.\nfunc RotateWALFiles(hostDirs []string) {\n\tif walShardRotateChs == nil || walShuttingDown.Load() {\n\t\treturn\n\t}\n\tdeadline := time.After(2 * time.Minute)\n\tdones := make([]chan struct{}, 0, len(hostDirs))\n\tfor _, dir := range hostDirs {\n\t\tdone := make(chan struct{})\n\t\tshard := walShardIndexFromDir(dir)\n\t\tselect {\n\t\tcase walShardRotateChs[shard] <- walRotateReq{hostDir: dir, done: done}:\n\t\t\tdones = append(dones, done)\n\t\tcase <-deadline:\n\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL rotation send timed out, %d of %d hosts remaining\",\n\t\t\t\tlen(hostDirs)-len(dones), len(hostDirs))\n\t\t\tgoto waitDones\n\t\t}\n\t}\nwaitDones:\n\tfor _, done := range dones {\n\t\tselect {\n\t\tcase <-done:\n\t\tcase <-time.After(30 * time.Second):\n\t\t\tcclog.Warn(\"[METRICSTORE]> WAL rotation completion timed out, continuing\")\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// walShardIndexFromDir extracts cluster and node from a host directory path\n// and returns the shard index. The path format is: rootDir/cluster/node\nfunc walShardIndexFromDir(hostDir string) int {\n\t// hostDir = .../cluster/node — extract last two path components\n\tnode := path.Base(hostDir)\n\tcluster := path.Base(path.Dir(hostDir))\n\treturn walShardIndex(cluster, node)\n}\n\n// RotateWALFiles sends rotation requests for the given host directories\n// and blocks until all rotations complete.\nfunc RotateWALFilesAfterShutdown(hostDirs []string) {\n\tfor _, dir := range hostDirs {\n\t\twalPath := path.Join(dir, \"current.wal\")\n\t\tif err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {\n\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: remove %s: %v\", walPath, err)\n\t\t}\n\t}\n}\n\n// writeWALRecordDirect encodes a WAL record into a contiguous buffer first,\n// then writes it to the bufio.Writer in a single call. This prevents partial\n// records in the write buffer if a write error occurs mid-record (e.g. disk full).\n// Returns the number of bytes written and any error.\nfunc writeWALRecordDirect(w *bufio.Writer, msg *WALMessage) (int, error) {\n\t// Compute payload and total record size.\n\tpayloadSize := 8 + 2 + len(msg.MetricName) + 1 + 4\n\tfor _, s := range msg.Selector {\n\t\tpayloadSize += 1 + len(s)\n\t}\n\t// Total: 8 (header) + payload + 4 (CRC).\n\ttotalSize := 8 + payloadSize + 4\n\n\t// Use stack buffer for typical small records, heap-allocate only for large ones.\n\tvar stackBuf [256]byte\n\tvar buf []byte\n\tif totalSize <= len(stackBuf) {\n\t\tbuf = stackBuf[:totalSize]\n\t} else {\n\t\tbuf = make([]byte, totalSize)\n\t}\n\n\t// Header: magic + payload length.\n\tbinary.LittleEndian.PutUint32(buf[0:4], walRecordMagic)\n\tbinary.LittleEndian.PutUint32(buf[4:8], uint32(payloadSize))\n\n\t// Payload starts at offset 8.\n\tp := 8\n\n\t// Timestamp (8 bytes).\n\tbinary.LittleEndian.PutUint64(buf[p:p+8], uint64(msg.Timestamp))\n\tp += 8\n\n\t// Metric name length (2 bytes) + metric name.\n\tbinary.LittleEndian.PutUint16(buf[p:p+2], uint16(len(msg.MetricName)))\n\tp += 2\n\tp += copy(buf[p:], msg.MetricName)\n\n\t// Selector count (1 byte).\n\tbuf[p] = byte(len(msg.Selector))\n\tp++\n\n\t// Selectors (1-byte length + bytes each).\n\tfor _, sel := range msg.Selector {\n\t\tbuf[p] = byte(len(sel))\n\t\tp++\n\t\tp += copy(buf[p:], sel)\n\t}\n\n\t// Value (4 bytes, float32 bits).\n\tbinary.LittleEndian.PutUint32(buf[p:p+4], math.Float32bits(float32(msg.Value)))\n\tp += 4\n\n\t// CRC32 over payload (bytes 8..8+payloadSize).\n\tcrc := crc32.ChecksumIEEE(buf[8 : 8+payloadSize])\n\tbinary.LittleEndian.PutUint32(buf[p:p+4], crc)\n\n\t// Single atomic write to the buffered writer.\n\tn, err := w.Write(buf)\n\treturn n, err\n}\n\n// readWALRecord reads one WAL record from the reader.\n// Returns (nil, nil) on clean EOF. Returns error on data corruption.\n// A CRC mismatch indicates a truncated trailing record (expected on crash).\nfunc readWALRecord(r io.Reader) (*WALMessage, error) {\n\tvar magic uint32\n\tif err := binary.Read(r, binary.LittleEndian, &magic); err != nil {\n\t\tif err == io.EOF {\n\t\t\treturn nil, nil // Clean EOF\n\t\t}\n\t\treturn nil, fmt.Errorf(\"read record magic: %w\", err)\n\t}\n\n\tif magic != walRecordMagic {\n\t\treturn nil, fmt.Errorf(\"invalid record magic 0x%08X (expected 0x%08X)\", magic, walRecordMagic)\n\t}\n\n\tvar payloadLen uint32\n\tif err := binary.Read(r, binary.LittleEndian, &payloadLen); err != nil {\n\t\treturn nil, fmt.Errorf(\"read payload length: %w\", err)\n\t}\n\n\tif payloadLen > 1<<20 { // 1 MB sanity limit\n\t\treturn nil, fmt.Errorf(\"record payload too large: %d bytes\", payloadLen)\n\t}\n\n\tpayload := make([]byte, payloadLen)\n\tif _, err := io.ReadFull(r, payload); err != nil {\n\t\treturn nil, fmt.Errorf(\"read payload: %w\", err)\n\t}\n\n\tvar storedCRC uint32\n\tif err := binary.Read(r, binary.LittleEndian, &storedCRC); err != nil {\n\t\treturn nil, fmt.Errorf(\"read CRC: %w\", err)\n\t}\n\n\tif crc32.ChecksumIEEE(payload) != storedCRC {\n\t\treturn nil, fmt.Errorf(\"CRC mismatch (truncated write or corruption)\")\n\t}\n\n\treturn parseWALPayload(payload)\n}\n\n// parseWALPayload decodes a binary payload into a WALMessage.\nfunc parseWALPayload(payload []byte) (*WALMessage, error) {\n\tif len(payload) < 8+2+1+4 {\n\t\treturn nil, fmt.Errorf(\"payload too short: %d bytes\", len(payload))\n\t}\n\n\toffset := 0\n\n\t// Timestamp (8 bytes)\n\tts := int64(binary.LittleEndian.Uint64(payload[offset : offset+8]))\n\toffset += 8\n\n\t// Metric name (2-byte length + bytes)\n\tif offset+2 > len(payload) {\n\t\treturn nil, fmt.Errorf(\"metric name length overflows payload\")\n\t}\n\tmLen := int(binary.LittleEndian.Uint16(payload[offset : offset+2]))\n\toffset += 2\n\n\tif offset+mLen > len(payload) {\n\t\treturn nil, fmt.Errorf(\"metric name overflows payload\")\n\t}\n\tmetricName := string(payload[offset : offset+mLen])\n\toffset += mLen\n\n\t// Selector count (1 byte)\n\tif offset >= len(payload) {\n\t\treturn nil, fmt.Errorf(\"selector count overflows payload\")\n\t}\n\tselCount := int(payload[offset])\n\toffset++\n\n\tselectors := make([]string, selCount)\n\tfor i := range selCount {\n\t\tif offset >= len(payload) {\n\t\t\treturn nil, fmt.Errorf(\"selector[%d] length overflows payload\", i)\n\t\t}\n\t\tsLen := int(payload[offset])\n\t\toffset++\n\n\t\tif offset+sLen > len(payload) {\n\t\t\treturn nil, fmt.Errorf(\"selector[%d] data overflows payload\", i)\n\t\t}\n\t\tselectors[i] = string(payload[offset : offset+sLen])\n\t\toffset += sLen\n\t}\n\n\t// Value (4 bytes, float32 bits)\n\tif offset+4 > len(payload) {\n\t\treturn nil, fmt.Errorf(\"value overflows payload\")\n\t}\n\tbits := binary.LittleEndian.Uint32(payload[offset : offset+4])\n\tvalue := schema.Float(math.Float32frombits(bits))\n\n\treturn &WALMessage{\n\t\tMetricName: metricName,\n\t\tTimestamp:  ts,\n\t\tSelector:   selectors,\n\t\tValue:      value,\n\t}, nil\n}\n\n// loadWALFile reads a WAL file and replays all valid records into the Level tree.\n// l is the host-level node. Corrupt or partial trailing records are silently skipped\n// (expected on crash). Records older than 'from' are skipped.\nfunc (l *Level) loadWALFile(m *MemoryStore, f *os.File, from int64) error {\n\tbr := bufio.NewReader(f)\n\n\t// Verify file header magic.\n\tvar fileMagic uint32\n\tif err := binary.Read(br, binary.LittleEndian, &fileMagic); err != nil {\n\t\tif err == io.EOF {\n\t\t\treturn nil // Empty file, no data\n\t\t}\n\t\treturn fmt.Errorf(\"[METRICSTORE]> WAL: read file header: %w\", err)\n\t}\n\n\tif fileMagic != walFileMagic {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> WAL: invalid file magic 0x%08X (expected 0x%08X)\", fileMagic, walFileMagic)\n\t}\n\n\t// Cache level lookups to avoid repeated tree traversal.\n\tlvlCache := make(map[string]*Level)\n\n\tfor {\n\t\tmsg, err := readWALRecord(br)\n\t\tif err != nil {\n\t\t\t// Truncated trailing record is expected after a crash; stop replaying.\n\t\t\tcclog.Debugf(\"[METRICSTORE]> WAL: stopping replay at corrupted/partial record: %v\", err)\n\t\t\tbreak\n\t\t}\n\t\tif msg == nil {\n\t\t\tbreak // Clean EOF\n\t\t}\n\n\t\tif msg.Timestamp < from {\n\t\t\tcontinue // Older than retention window\n\t\t}\n\n\t\tminfo, ok := m.Metrics[msg.MetricName]\n\t\tif !ok {\n\t\t\tcontinue // Unknown metric (config may have changed)\n\t\t}\n\n\t\t// Cache key is the null-separated selector path.\n\t\tcacheKey := joinSelector(msg.Selector)\n\t\tlvl, ok := lvlCache[cacheKey]\n\t\tif !ok {\n\t\t\tlvl = l.findLevelOrCreate(msg.Selector, len(m.Metrics))\n\t\t\tlvlCache[cacheKey] = lvl\n\t\t}\n\n\t\t// Write directly to the buffer, same as WriteToLevel but without the\n\t\t// global level lookup (we already have the right level).\n\t\tlvl.lock.Lock()\n\t\tb := lvl.metrics[minfo.offset]\n\t\tif b == nil {\n\t\t\tb = newBuffer(msg.Timestamp, minfo.Frequency)\n\t\t\tlvl.metrics[minfo.offset] = b\n\t\t}\n\t\tnb, writeErr := b.write(msg.Timestamp, msg.Value)\n\t\tif writeErr == nil && b != nb {\n\t\t\tlvl.metrics[minfo.offset] = nb\n\t\t}\n\t\t// Ignore write errors for timestamps before buffer start (can happen when\n\t\t// replaying WAL entries that predate a loaded snapshot's start time).\n\t\tlvl.lock.Unlock()\n\t}\n\n\treturn nil\n}\n\n// joinSelector builds a cache key from a selector slice using null bytes as separators.\nfunc joinSelector(sel []string) string {\n\tif len(sel) == 0 {\n\t\treturn \"\"\n\t}\n\tvar result strings.Builder\n\tresult.WriteString(sel[0])\n\tfor i := 1; i < len(sel); i++ {\n\t\tresult.WriteString(\"\\x00\" + sel[i])\n\t}\n\treturn result.String()\n}\n\n// ToCheckpointWAL writes binary snapshot files for all hosts in parallel.\n// Returns the number of files written, the list of host directories that were\n// successfully checkpointed (for WAL rotation), and any errors.\nfunc (m *MemoryStore) ToCheckpointWAL(dir string, from, to int64) (int, []string, error) {\n\t// Collect all cluster/host pairs.\n\tm.root.lock.RLock()\n\ttotalHosts := 0\n\tfor _, l1 := range m.root.children {\n\t\tl1.lock.RLock()\n\t\ttotalHosts += len(l1.children)\n\t\tl1.lock.RUnlock()\n\t}\n\tm.root.lock.RUnlock()\n\n\tlevels := make([]*Level, 0, totalHosts)\n\tselectors := make([][]string, 0, totalHosts)\n\n\tm.root.lock.RLock()\n\tfor sel1, l1 := range m.root.children {\n\t\tl1.lock.RLock()\n\t\tfor sel2, l2 := range l1.children {\n\t\t\tlevels = append(levels, l2)\n\t\t\tselectors = append(selectors, []string{sel1, sel2})\n\t\t}\n\t\tl1.lock.RUnlock()\n\t}\n\tm.root.lock.RUnlock()\n\n\ttype workItem struct {\n\t\tlevel    *Level\n\t\thostDir  string\n\t\tselector []string\n\t}\n\n\ttotalWork := len(levels)\n\tcclog.Infof(\"[METRICSTORE]> Starting binary checkpoint for %d hosts with %d workers\", totalWork, Keys.NumWorkers)\n\n\tn, errs, completed := int32(0), int32(0), int32(0)\n\tvar successDirs []string\n\tvar successMu sync.Mutex\n\n\tvar wg sync.WaitGroup\n\twg.Add(Keys.NumWorkers)\n\twork := make(chan workItem, Keys.NumWorkers*2)\n\n\t// Progress logging goroutine.\n\tstopProgress := make(chan struct{})\n\tgo func() {\n\t\tticker := time.NewTicker(10 * time.Second)\n\t\tdefer ticker.Stop()\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ticker.C:\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> Checkpoint progress: %d/%d hosts (%d written, %d errors)\",\n\t\t\t\t\tatomic.LoadInt32(&completed), totalWork, atomic.LoadInt32(&n), atomic.LoadInt32(&errs))\n\t\t\tcase <-stopProgress:\n\t\t\t\treturn\n\t\t\t}\n\t\t}\n\t}()\n\n\tfor range Keys.NumWorkers {\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\t\t\tfor wi := range work {\n\t\t\t\terr := wi.level.toCheckpointBinary(wi.hostDir, from, to, m)\n\t\t\t\tif err != nil {\n\t\t\t\t\tif err == ErrNoNewArchiveData {\n\t\t\t\t\t\tatomic.AddInt32(&completed, 1)\n\t\t\t\t\t\tcontinue\n\t\t\t\t\t}\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> binary checkpoint error for %s: %v\", wi.hostDir, err)\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t} else {\n\t\t\t\t\tatomic.AddInt32(&n, 1)\n\t\t\t\t\tsuccessMu.Lock()\n\t\t\t\t\tsuccessDirs = append(successDirs, wi.hostDir)\n\t\t\t\t\tsuccessMu.Unlock()\n\t\t\t\t}\n\t\t\t\tatomic.AddInt32(&completed, 1)\n\t\t\t}\n\t\t}()\n\t}\n\n\tfor i := range levels {\n\t\thostDir := path.Join(dir, path.Join(selectors[i]...))\n\t\twork <- workItem{\n\t\t\tlevel:    levels[i],\n\t\t\thostDir:  hostDir,\n\t\t\tselector: selectors[i],\n\t\t}\n\t}\n\tclose(work)\n\twg.Wait()\n\tclose(stopProgress)\n\n\tif errs > 0 {\n\t\treturn int(n), successDirs, fmt.Errorf(\"[METRICSTORE]> %d errors during binary checkpoint (%d successes)\", errs, n)\n\t}\n\treturn int(n), successDirs, nil\n}\n\n// toCheckpointBinary writes a binary snapshot file for a single host-level node.\n// Uses atomic rename (write to .tmp then rename) to avoid partial reads on crash.\nfunc (l *Level) toCheckpointBinary(dir string, from, to int64, m *MemoryStore) error {\n\tcf, err := l.toCheckpointFile(from, to, m)\n\tif err != nil {\n\t\treturn err\n\t}\n\tif cf == nil {\n\t\treturn ErrNoNewArchiveData\n\t}\n\n\tif err := os.MkdirAll(dir, CheckpointDirPerms); err != nil {\n\t\treturn fmt.Errorf(\"mkdir %s: %w\", dir, err)\n\t}\n\n\t// Write to a temp file first, then rename (atomic on POSIX).\n\ttmpPath := path.Join(dir, fmt.Sprintf(\"%d.bin.tmp\", from))\n\tfinalPath := path.Join(dir, fmt.Sprintf(\"%d.bin\", from))\n\n\tf, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"open binary snapshot %s: %w\", tmpPath, err)\n\t}\n\n\tbw := bufio.NewWriter(f)\n\tif err := writeBinarySnapshotFile(bw, cf); err != nil {\n\t\tf.Close()\n\t\tos.Remove(tmpPath)\n\t\treturn fmt.Errorf(\"write binary snapshot: %w\", err)\n\t}\n\tif err := bw.Flush(); err != nil {\n\t\tf.Close()\n\t\tos.Remove(tmpPath)\n\t\treturn err\n\t}\n\tf.Close()\n\n\treturn os.Rename(tmpPath, finalPath)\n}\n\n// writeBinarySnapshotFile writes the binary snapshot file header and level tree.\nfunc writeBinarySnapshotFile(w io.Writer, cf *CheckpointFile) error {\n\tif err := binary.Write(w, binary.LittleEndian, snapFileMagic); err != nil {\n\t\treturn err\n\t}\n\tif err := binary.Write(w, binary.LittleEndian, cf.From); err != nil {\n\t\treturn err\n\t}\n\tif err := binary.Write(w, binary.LittleEndian, cf.To); err != nil {\n\t\treturn err\n\t}\n\treturn writeBinaryLevel(w, cf)\n}\n\n// writeBinaryLevel recursively writes a CheckpointFile level in binary format.\nfunc writeBinaryLevel(w io.Writer, cf *CheckpointFile) error {\n\tif err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Metrics))); err != nil {\n\t\treturn err\n\t}\n\n\tfor name, metric := range cf.Metrics {\n\t\tif err := writeString16(w, name); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif err := binary.Write(w, binary.LittleEndian, metric.Frequency); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif err := binary.Write(w, binary.LittleEndian, metric.Start); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif err := binary.Write(w, binary.LittleEndian, uint32(len(metric.Data))); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tfor _, v := range metric.Data {\n\t\t\tif err := binary.Write(w, binary.LittleEndian, math.Float32bits(float32(v))); err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t}\n\t}\n\n\tif err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Children))); err != nil {\n\t\treturn err\n\t}\n\n\tfor name, child := range cf.Children {\n\t\tif err := writeString16(w, name); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif err := writeBinaryLevel(w, child); err != nil {\n\t\t\treturn err\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// writeString16 writes a 2-byte length-prefixed string to w.\nfunc writeString16(w io.Writer, s string) error {\n\tif err := binary.Write(w, binary.LittleEndian, uint16(len(s))); err != nil {\n\t\treturn err\n\t}\n\t_, err := io.WriteString(w, s)\n\treturn err\n}\n\n// loadBinaryFile reads a binary snapshot file and loads data into the Level tree.\n// The retention check (from) is applied to the file's 'to' timestamp.\nfunc (l *Level) loadBinaryFile(m *MemoryStore, f *os.File, from int64) error {\n\tbr := bufio.NewReader(f)\n\n\tvar magic uint32\n\tif err := binary.Read(br, binary.LittleEndian, &magic); err != nil {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read magic: %w\", err)\n\t}\n\tif magic != snapFileMagic {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: invalid magic 0x%08X (expected 0x%08X)\", magic, snapFileMagic)\n\t}\n\n\tvar fileFrom, fileTo int64\n\tif err := binary.Read(br, binary.LittleEndian, &fileFrom); err != nil {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read from: %w\", err)\n\t}\n\tif err := binary.Read(br, binary.LittleEndian, &fileTo); err != nil {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read to: %w\", err)\n\t}\n\n\tif fileTo != 0 && fileTo < from {\n\t\treturn nil // File is older than retention window, skip it\n\t}\n\n\tcf, err := readBinaryLevel(br)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read level tree: %w\", err)\n\t}\n\tcf.From = fileFrom\n\tcf.To = fileTo\n\n\treturn l.loadFile(cf, m)\n}\n\n// readBinaryLevel recursively reads a level from the binary snapshot format.\nfunc readBinaryLevel(r io.Reader) (*CheckpointFile, error) {\n\tcf := &CheckpointFile{\n\t\tMetrics:  make(map[string]*CheckpointMetrics),\n\t\tChildren: make(map[string]*CheckpointFile),\n\t}\n\n\tvar numMetrics uint32\n\tif err := binary.Read(r, binary.LittleEndian, &numMetrics); err != nil {\n\t\treturn nil, fmt.Errorf(\"read num_metrics: %w\", err)\n\t}\n\n\tfor range numMetrics {\n\t\tname, err := readString16(r)\n\t\tif err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read metric name: %w\", err)\n\t\t}\n\n\t\tvar freq, start int64\n\t\tif err := binary.Read(r, binary.LittleEndian, &freq); err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read frequency for %s: %w\", name, err)\n\t\t}\n\t\tif err := binary.Read(r, binary.LittleEndian, &start); err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read start for %s: %w\", name, err)\n\t\t}\n\n\t\tvar numValues uint32\n\t\tif err := binary.Read(r, binary.LittleEndian, &numValues); err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read num_values for %s: %w\", name, err)\n\t\t}\n\n\t\tdata := make([]schema.Float, numValues)\n\t\tfor i := range numValues {\n\t\t\tvar bits uint32\n\t\t\tif err := binary.Read(r, binary.LittleEndian, &bits); err != nil {\n\t\t\t\treturn nil, fmt.Errorf(\"read value[%d] for %s: %w\", i, name, err)\n\t\t\t}\n\t\t\tdata[i] = schema.Float(math.Float32frombits(bits))\n\t\t}\n\n\t\tcf.Metrics[name] = &CheckpointMetrics{\n\t\t\tFrequency: freq,\n\t\t\tStart:     start,\n\t\t\tData:      data,\n\t\t}\n\t}\n\n\tvar numChildren uint32\n\tif err := binary.Read(r, binary.LittleEndian, &numChildren); err != nil {\n\t\treturn nil, fmt.Errorf(\"read num_children: %w\", err)\n\t}\n\n\tfor range numChildren {\n\t\tchildName, err := readString16(r)\n\t\tif err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read child name: %w\", err)\n\t\t}\n\n\t\tchild, err := readBinaryLevel(r)\n\t\tif err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read child %s: %w\", childName, err)\n\t\t}\n\t\tcf.Children[childName] = child\n\t}\n\n\treturn cf, nil\n}\n\n// readString16 reads a 2-byte length-prefixed string from r.\nfunc readString16(r io.Reader) (string, error) {\n\tvar sLen uint16\n\tif err := binary.Read(r, binary.LittleEndian, &sLen); err != nil {\n\t\treturn \"\", err\n\t}\n\tbuf := make([]byte, sLen)\n\tif _, err := io.ReadFull(r, buf); err != nil {\n\t\treturn \"\", err\n\t}\n\treturn string(buf), nil\n}\n","numLines":985,"startLine":1,"totalLines":985}},"sourceToolAssistantUUID":"396aea8b-7eb7-4780-9dc2-677da33e7aa7"}
{"parentUuid":"282ac3cc-b4b9-423c-a521-9d77b53d60ac","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01BTzFaAmKApCLCwStTPQxag","type":"message","role":"assistant","content":[{"type":"thinking","thinking":"Now let me read the remaining files I haven't seen yet.","signature":"EvQBClsIDBgCKkDnxjSen21wCGi0p5+tQcyrWkInL63mua4/fVZziKelpsTcKv1+6z1PP8kzB1lqp+a2b0gNQT2T0Ek4wCEduz4zMhFjbGF1ZGUtc29ubmV0LTQtNjgAEgxbj971k/ujvpUyhOoaDNAMALmLRQSDDF0T1iIwlPAI2oRen5obrUqwE5xDjScuukKQv1yUQBxW3FxWGtWyIgWYklgbFDV+9bSV5FDTKkeajHKRloQmvwhUdGDEJMqFJ7xz8UMhqY9RbbZc2l89Af0+r2tVYC9xst9zh8DpTKWMhMIo2OxQb1yufwOjc/CPfmGTq33IpBgB"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5260,"cache_creation_input_tokens":22492,"cache_read_input_tokens":51780,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":22492},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzh1ioFQXKBiazHM9K","type":"assistant","uuid":"9994deed-a775-431e-ab61-3b764dfee761","timestamp":"2026-03-29T19:34:36.687Z"}
{"parentUuid":"9994deed-a775-431e-ab61-3b764dfee761","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01BTzFaAmKApCLCwStTPQxag","type":"message","role":"assistant","content":[{"type":"text","text":"Let me read the remaining files."}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5260,"cache_creation_input_tokens":22492,"cache_read_input_tokens":51780,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":22492},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzh1ioFQXKBiazHM9K","type":"assistant","uuid":"31927e27-0640-4678-8ec8-ee189150b953","timestamp":"2026-03-29T19:34:36.991Z"}
{"parentUuid":"31927e27-0640-4678-8ec8-ee189150b953","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01BTzFaAmKApCLCwStTPQxag","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01TCPGRfXh1d2hbpicL6Em8v","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/buffer.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5260,"cache_creation_input_tokens":22492,"cache_read_input_tokens":51780,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":22492},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzh1ioFQXKBiazHM9K","type":"assistant","uuid":"28653467-4421-47dc-9cd7-2bb906296767","timestamp":"2026-03-29T19:34:38.120Z"}
{"parentUuid":"28653467-4421-47dc-9cd7-2bb906296767","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01TCPGRfXh1d2hbpicL6Em8v","toolUseID":"toolu_01TCPGRfXh1d2hbpicL6Em8v","timestamp":"2026-03-29T19:34:38.233Z","uuid":"1b683384-05c1-49be-ad8f-301daa859516"}
{"parentUuid":"28653467-4421-47dc-9cd7-2bb906296767","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01TCPGRfXh1d2hbpicL6Em8v","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// Package metricstore provides buffer.go: Time-series data buffer implementation.\n     7→//\n     8→// # Buffer Architecture\n     9→//\n    10→// Each metric at each hierarchical level (cluster/host/cpu/etc.) uses a linked-list\n    11→// chain of fixed-size buffers to store time-series data. This design:\n    12→//\n    13→//   - Avoids reallocation/copying when growing (new links added instead)\n    14→//   - Enables efficient pooling (buffers returned to sync.Pool)\n    15→//   - Supports traversal back in time (via prev pointers)\n    16→//   - Maintains temporal ordering (newer data in later buffers)\n    17→//\n    18→// # Buffer Chain Example\n    19→//\n    20→//\t[oldest buffer] <- prev -- [older] <- prev -- [newest buffer (head)]\n    21→//\t  start=1000               start=1512           start=2024\n    22→//\t  data=[v0...v511]         data=[v0...v511]     data=[v0...v42]\n    23→//\n    24→// When the head buffer reaches capacity (BufferCap = 512), a new buffer becomes\n    25→// the new head and the old head is linked via prev.\n    26→//\n    27→// # Pooling Strategy\n    28→//\n    29→// sync.Pool reduces GC pressure for the common case (BufferCap-sized allocations).\n    30→// Non-standard capacity buffers are not pooled (e.g., from checkpoint deserialization).\n    31→//\n    32→// # Time Alignment\n    33→//\n    34→// Timestamps are aligned to measurement frequency intervals:\n    35→//\n    36→//\tindex = (timestamp - buffer.start) / buffer.frequency\n    37→//\tactualTime = buffer.start + (frequency / 2) + (index * frequency)\n    38→//\n    39→// Missing data points are represented as NaN values. The read() function performs\n    40→// linear interpolation where possible.\n    41→package metricstore\n    42→\n    43→import (\n    44→\t\"errors\"\n    45→\t\"sync\"\n    46→\t\"time\"\n    47→\n    48→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    49→)\n    50→\n    51→// BufferCap is the default buffer capacity.\n    52→// buffer.data will only ever grow up to its capacity and a new link\n    53→// in the buffer chain will be created if needed so that no copying\n    54→// of data or reallocation needs to happen on writes.\n    55→const BufferCap int = DefaultBufferCapacity\n    56→\n    57→// BufferPool is the global instance.\n    58→// It is initialized immediately when the package loads.\n    59→var bufferPool = NewPersistentBufferPool()\n    60→\n    61→type PersistentBufferPool struct {\n    62→\tpool []*buffer\n    63→\tmu   sync.Mutex\n    64→}\n    65→\n    66→// NewPersistentBufferPool creates a dynamic pool for buffers.\n    67→func NewPersistentBufferPool() *PersistentBufferPool {\n    68→\treturn &PersistentBufferPool{\n    69→\t\tpool: make([]*buffer, 0),\n    70→\t}\n    71→}\n    72→\n    73→func (p *PersistentBufferPool) Get() *buffer {\n    74→\tp.mu.Lock()\n    75→\tdefer p.mu.Unlock()\n    76→\n    77→\tn := len(p.pool)\n    78→\tif n == 0 {\n    79→\t\t// Pool is empty, allocate a new one\n    80→\t\treturn &buffer{\n    81→\t\t\tdata: make([]schema.Float, 0, BufferCap),\n    82→\t\t}\n    83→\t}\n    84→\n    85→\t// Reuse existing buffer from the pool\n    86→\tb := p.pool[n-1]\n    87→\tp.pool[n-1] = nil // Avoid memory leak\n    88→\tp.pool = p.pool[:n-1]\n    89→\treturn b\n    90→}\n    91→\n    92→// Put returns b to the pool. The caller must set b.lastUsed = time.Now().Unix()\n    93→// before calling Put so that Clean() can evict idle entries correctly.\n    94→func (p *PersistentBufferPool) Put(b *buffer) {\n    95→\t// Reset the buffer before putting it back\n    96→\tb.data = b.data[:0]\n    97→\n    98→\tp.mu.Lock()\n    99→\tdefer p.mu.Unlock()\n   100→\n   101→\tp.pool = append(p.pool, b)\n   102→}\n   103→\n   104→// GetSize returns the exact number of buffers currently sitting in the pool.\n   105→func (p *PersistentBufferPool) GetSize() int {\n   106→\tp.mu.Lock()\n   107→\tdefer p.mu.Unlock()\n   108→\treturn len(p.pool)\n   109→}\n   110→\n   111→// Clear drains all buffers currently in the pool, allowing the GC to collect them.\n   112→func (p *PersistentBufferPool) Clear() {\n   113→\tp.mu.Lock()\n   114→\tdefer p.mu.Unlock()\n   115→\tfor i := range p.pool {\n   116→\t\tp.pool[i] = nil\n   117→\t}\n   118→\tp.pool = p.pool[:0]\n   119→}\n   120→\n   121→// Clean removes buffers from the pool that haven't been used in the given duration.\n   122→// It uses a simple LRU approach based on the lastUsed timestamp.\n   123→func (p *PersistentBufferPool) Clean(threshold int64) {\n   124→\tp.mu.Lock()\n   125→\tdefer p.mu.Unlock()\n   126→\n   127→\t// Filter in place, retaining only buffers returned to the pool recently enough.\n   128→\tactive := p.pool[:0]\n   129→\tfor _, b := range p.pool {\n   130→\t\tif b.lastUsed >= threshold {\n   131→\t\t\tactive = append(active, b)\n   132→\t\t}\n   133→\t}\n   134→\n   135→\t// Nullify the rest to prevent memory leaks\n   136→\tfor i := len(active); i < len(p.pool); i++ {\n   137→\t\tp.pool[i] = nil\n   138→\t}\n   139→\n   140→\tp.pool = active\n   141→}\n   142→\n   143→var (\n   144→\t// ErrNoData indicates no time-series data exists for the requested metric/level.\n   145→\tErrNoData error = errors.New(\"[METRICSTORE]> no data for this metric/level\")\n   146→\n   147→\t// ErrDataDoesNotAlign indicates that aggregated data from child scopes\n   148→\t// does not align with the parent scope's expected timestamps/intervals.\n   149→\tErrDataDoesNotAlignMissingFront    error = errors.New(\"[METRICSTORE]> data from lower granularities does not align (missing data prior to start of the buffers)\")\n   150→\tErrDataDoesNotAlignMissingBack     error = errors.New(\"[METRICSTORE]> data from lower granularities does not align (missing data after the end of the buffers)\")\n   151→\tErrDataDoesNotAlignDataLenMismatch error = errors.New(\"[METRICSTORE]> data from lower granularities does not align (collected data length is different than expected data length)\")\n   152→)\n   153→\n   154→// buffer stores time-series data for a single metric at a specific hierarchical level.\n   155→//\n   156→// Buffers form doubly-linked chains ordered by time. When capacity is reached,\n   157→// a new buffer becomes the head and the old head is linked via prev/next.\n   158→//\n   159→// Fields:\n   160→//   - prev:      Link to older buffer in the chain (nil if this is oldest)\n   161→//   - next:      Link to newer buffer in the chain (nil if this is newest/head)\n   162→//   - data:      Time-series values (schema.Float supports NaN for missing data)\n   163→//   - frequency: Measurement interval in seconds\n   164→//   - start:     Start timestamp (adjusted by -frequency/2 for alignment)\n   165→//   - archived:  True if data has been persisted to disk archive\n   166→//   - closed:    True if buffer is no longer accepting writes\n   167→//\n   168→// Index calculation: index = (timestamp - start) / frequency\n   169→// Actual data timestamp: start + (frequency / 2) + (index * frequency)\n   170→type buffer struct {\n   171→\tprev      *buffer\n   172→\tnext      *buffer\n   173→\tdata      []schema.Float\n   174→\tfrequency int64\n   175→\tstart     int64\n   176→\tarchived  bool\n   177→\tclosed    bool\n   178→\tlastUsed  int64\n   179→}\n   180→\n   181→func newBuffer(ts, freq int64) *buffer {\n   182→\tb := bufferPool.Get()\n   183→\tb.frequency = freq\n   184→\tb.start = ts - (freq / 2)\n   185→\tb.prev = nil\n   186→\tb.next = nil\n   187→\tb.archived = false\n   188→\tb.closed = false\n   189→\tb.data = b.data[:0]\n   190→\treturn b\n   191→}\n   192→\n   193→// write appends a timestamped value to the buffer chain.\n   194→//\n   195→// Returns the head buffer (which may be newly created if capacity was reached).\n   196→// Timestamps older than the buffer's start are rejected. If the calculated index\n   197→// exceeds capacity, a new buffer is allocated and linked as the new head.\n   198→//\n   199→// Missing timestamps are automatically filled with NaN values to maintain alignment.\n   200→// Overwrites are allowed if the index is already within the existing data slice.\n   201→//\n   202→// Parameters:\n   203→//   - ts:    Unix timestamp in seconds\n   204→//   - value: Metric value (can be schema.NaN for missing data)\n   205→//\n   206→// Returns:\n   207→//   - *buffer: The new head buffer (same as b if no new buffer created)\n   208→//   - error:   Non-nil if timestamp is before buffer start\n   209→func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {\n   210→\tif ts < b.start {\n   211→\t\treturn nil, errors.New(\"[METRICSTORE]> cannot write value to buffer from past\")\n   212→\t}\n   213→\n   214→\t// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)\n   215→\tidx := int((ts - b.start) / b.frequency)\n   216→\tif idx >= cap(b.data) {\n   217→\t\tnewbuf := newBuffer(ts, b.frequency)\n   218→\t\tnewbuf.prev = b\n   219→\t\tb.next = newbuf\n   220→\t\tb = newbuf\n   221→\t\tidx = 0\n   222→\t}\n   223→\n   224→\t// Overwriting value or writing value from past\n   225→\tif idx < len(b.data) {\n   226→\t\tb.data[idx] = value\n   227→\t\treturn b, nil\n   228→\t}\n   229→\n   230→\t// Fill up unwritten slots with NaN\n   231→\tfor i := len(b.data); i < idx; i++ {\n   232→\t\tb.data = append(b.data, schema.NaN)\n   233→\t}\n   234→\n   235→\tb.data = append(b.data, value)\n   236→\treturn b, nil\n   237→}\n   238→\n   239→func (b *buffer) end() int64 {\n   240→\treturn b.firstWrite() + int64(len(b.data))*b.frequency\n   241→}\n   242→\n   243→func (b *buffer) firstWrite() int64 {\n   244→\treturn b.start + (b.frequency / 2)\n   245→}\n   246→\n   247→// read retrieves time-series data from the buffer chain for the specified time range.\n   248→//\n   249→// Traverses the buffer chain backwards (via prev links) if 'from' precedes the current\n   250→// buffer's start. Missing data points are represented as NaN. Values are accumulated\n   251→// into the provided 'data' slice (using +=, so caller must zero-initialize if needed).\n   252→//\n   253→// The function adjusts the actual time range returned if data is unavailable at the\n   254→// boundaries (returned via adjusted from/to timestamps).\n   255→//\n   256→// Parameters:\n   257→//   - from: Start timestamp (Unix seconds)\n   258→//   - to:   End timestamp (Unix seconds, exclusive)\n   259→//   - data: Pre-allocated slice to accumulate results (must be large enough)\n   260→//\n   261→// Returns:\n   262→//   - []schema.Float: Slice of data (may be shorter than input 'data' slice)\n   263→//   - int64:          Actual start timestamp with available data\n   264→//   - int64:          Actual end timestamp (exclusive)\n   265→//   - error:          Non-nil on failure\n   266→//\n   267→// Panics if 'data' slice is too small to hold all values in [from, to).\n   268→func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {\n   269→\t// Walk back to the buffer that covers 'from', adjusting if we hit the oldest.\n   270→\tfor from < b.firstWrite() {\n   271→\t\tif b.prev == nil {\n   272→\t\t\tfrom = b.firstWrite()\n   273→\t\t\tbreak\n   274→\t\t}\n   275→\t\tb = b.prev\n   276→\t}\n   277→\n   278→\ti := 0\n   279→\tt := from\n   280→\tfor ; t < to; t += b.frequency {\n   281→\t\tidx := int((t - b.start) / b.frequency)\n   282→\t\tif idx >= cap(b.data) {\n   283→\t\t\tif b.next == nil {\n   284→\t\t\t\tbreak\n   285→\t\t\t}\n   286→\t\t\tb = b.next\n   287→\t\t\t// Recalculate idx in the new buffer; a gap between buffers may exist.\n   288→\t\t\tidx = int((t - b.start) / b.frequency)\n   289→\t\t}\n   290→\n   291→\t\tif idx >= len(b.data) {\n   292→\t\t\tif b.next == nil || to <= b.next.start {\n   293→\t\t\t\tbreak\n   294→\t\t\t}\n   295→\t\t\tdata[i] += schema.NaN // NaN + anything = NaN; propagates missing data\n   296→\t\t} else if t < b.start {\n   297→\t\t\tdata[i] += schema.NaN // gap before this buffer's first write\n   298→\t\t} else {\n   299→\t\t\tdata[i] += b.data[idx]\n   300→\t\t}\n   301→\t\ti++\n   302→\t}\n   303→\n   304→\treturn data[:i], from, t, nil\n   305→}\n   306→\n   307→// free removes buffers older than the specified timestamp from the chain.\n   308→//\n   309→// Recursively traverses backwards (via prev) and unlinks buffers whose end time\n   310→// is before the retention threshold. Freed buffers are returned to the pool if\n   311→// they have the standard capacity (BufferCap).\n   312→//\n   313→// Parameters:\n   314→//   - t: Retention threshold timestamp (Unix seconds)\n   315→//\n   316→// Returns:\n   317→//   - delme: True if the current buffer itself should be deleted by caller\n   318→//   - n:     Number of buffers freed in this subtree\n   319→func (b *buffer) free(t int64) (delme bool, n int) {\n   320→\tif b.prev != nil {\n   321→\t\tdelme, m := b.prev.free(t)\n   322→\t\tn += m\n   323→\t\tif delme {\n   324→\t\t\tb.prev.next = nil\n   325→\t\t\tif cap(b.prev.data) != BufferCap {\n   326→\t\t\t\tb.prev.data = make([]schema.Float, 0, BufferCap)\n   327→\t\t\t}\n   328→\t\t\tb.prev.lastUsed = time.Now().Unix()\n   329→\t\t\tbufferPool.Put(b.prev)\n   330→\t\t\tb.prev = nil\n   331→\t\t}\n   332→\t}\n   333→\n   334→\tend := b.end()\n   335→\tif end < t {\n   336→\t\treturn true, n + 1\n   337→\t}\n   338→\n   339→\treturn false, n\n   340→}\n   341→\n   342→// forceFreeOldest recursively finds the end of the linked list (the oldest buffer)\n   343→// and removes it.\n   344→// Returns:\n   345→//\n   346→//\tdelme: true if 'b' itself is the oldest and should be removed by the caller\n   347→//\tn:     the number of buffers freed (will be 1 or 0)\n   348→func (b *buffer) forceFreeOldest() (delme bool, n int) {\n   349→\t// If there is a previous buffer, recurse down to find the oldest\n   350→\tif b.prev != nil {\n   351→\t\tdelPrev, freed := b.prev.forceFreeOldest()\n   352→\n   353→\t\t// If the previous buffer signals it should be deleted:\n   354→\t\tif delPrev {\n   355→\t\t\tb.prev.next = nil\n   356→\t\t\tif cap(b.prev.data) != BufferCap {\n   357→\t\t\t\tb.prev.data = make([]schema.Float, 0, BufferCap)\n   358→\t\t\t}\n   359→\t\t\tb.prev.lastUsed = time.Now().Unix()\n   360→\t\t\tbufferPool.Put(b.prev)\n   361→\t\t\tb.prev = nil\n   362→\t\t}\n   363→\t\treturn false, freed\n   364→\t}\n   365→\n   366→\t// If b.prev is nil, THIS buffer is the oldest.\n   367→\t// We return true so the parent (or the Level loop) knows to delete reference to 'b'.\n   368→\treturn true, 1\n   369→}\n   370→\n   371→// iterFromTo invokes callback on every buffer in the chain that overlaps [from, to].\n   372→//\n   373→// Traverses backwards (via prev) first, then processes current buffer if it overlaps\n   374→// the time range. Used for checkpoint/archive operations that need to serialize buffers\n   375→// within a specific time window.\n   376→//\n   377→// Parameters:\n   378→//   - from:     Start timestamp (Unix seconds, inclusive)\n   379→//   - to:       End timestamp (Unix seconds, inclusive)\n   380→//   - callback: Function to invoke on each overlapping buffer\n   381→//\n   382→// Returns:\n   383→//   - error: First error returned by callback, or nil if all succeeded\n   384→func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {\n   385→\tif b == nil {\n   386→\t\treturn nil\n   387→\t}\n   388→\n   389→\t// Collect overlapping buffers walking backwards (newest → oldest).\n   390→\tvar matching []*buffer\n   391→\tfor cur := b; cur != nil; cur = cur.prev {\n   392→\t\tif from <= cur.end() && cur.start <= to {\n   393→\t\t\tmatching = append(matching, cur)\n   394→\t\t}\n   395→\t}\n   396→\n   397→\t// Invoke callback in chronological order (oldest → newest).\n   398→\tfor i := len(matching) - 1; i >= 0; i-- {\n   399→\t\tif err := callback(matching[i]); err != nil {\n   400→\t\t\treturn err\n   401→\t\t}\n   402→\t}\n   403→\treturn nil\n   404→}\n   405→\n   406→func (b *buffer) count() int64 {\n   407→\tvar res int64\n   408→\tfor ; b != nil; b = b.prev {\n   409→\t\tres += int64(len(b.data))\n   410→\t}\n   411→\treturn res\n   412→}\n   413→\n   414→// bufferCount returns the number of buffer nodes in the linked list.\n   415→func (b *buffer) bufferCount() int64 {\n   416→\tvar n int64\n   417→\tfor ; b != nil; b = b.prev {\n   418→\t\tn++\n   419→\t}\n   420→\treturn n\n   421→}\n   422→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"844c403d-f522-47e4-ba0f-c7392fafb3b4","timestamp":"2026-03-29T19:34:38.230Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/buffer.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// Package metricstore provides buffer.go: Time-series data buffer implementation.\n//\n// # Buffer Architecture\n//\n// Each metric at each hierarchical level (cluster/host/cpu/etc.) uses a linked-list\n// chain of fixed-size buffers to store time-series data. This design:\n//\n//   - Avoids reallocation/copying when growing (new links added instead)\n//   - Enables efficient pooling (buffers returned to sync.Pool)\n//   - Supports traversal back in time (via prev pointers)\n//   - Maintains temporal ordering (newer data in later buffers)\n//\n// # Buffer Chain Example\n//\n//\t[oldest buffer] <- prev -- [older] <- prev -- [newest buffer (head)]\n//\t  start=1000               start=1512           start=2024\n//\t  data=[v0...v511]         data=[v0...v511]     data=[v0...v42]\n//\n// When the head buffer reaches capacity (BufferCap = 512), a new buffer becomes\n// the new head and the old head is linked via prev.\n//\n// # Pooling Strategy\n//\n// sync.Pool reduces GC pressure for the common case (BufferCap-sized allocations).\n// Non-standard capacity buffers are not pooled (e.g., from checkpoint deserialization).\n//\n// # Time Alignment\n//\n// Timestamps are aligned to measurement frequency intervals:\n//\n//\tindex = (timestamp - buffer.start) / buffer.frequency\n//\tactualTime = buffer.start + (frequency / 2) + (index * frequency)\n//\n// Missing data points are represented as NaN values. The read() function performs\n// linear interpolation where possible.\npackage metricstore\n\nimport (\n\t\"errors\"\n\t\"sync\"\n\t\"time\"\n\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n)\n\n// BufferCap is the default buffer capacity.\n// buffer.data will only ever grow up to its capacity and a new link\n// in the buffer chain will be created if needed so that no copying\n// of data or reallocation needs to happen on writes.\nconst BufferCap int = DefaultBufferCapacity\n\n// BufferPool is the global instance.\n// It is initialized immediately when the package loads.\nvar bufferPool = NewPersistentBufferPool()\n\ntype PersistentBufferPool struct {\n\tpool []*buffer\n\tmu   sync.Mutex\n}\n\n// NewPersistentBufferPool creates a dynamic pool for buffers.\nfunc NewPersistentBufferPool() *PersistentBufferPool {\n\treturn &PersistentBufferPool{\n\t\tpool: make([]*buffer, 0),\n\t}\n}\n\nfunc (p *PersistentBufferPool) Get() *buffer {\n\tp.mu.Lock()\n\tdefer p.mu.Unlock()\n\n\tn := len(p.pool)\n\tif n == 0 {\n\t\t// Pool is empty, allocate a new one\n\t\treturn &buffer{\n\t\t\tdata: make([]schema.Float, 0, BufferCap),\n\t\t}\n\t}\n\n\t// Reuse existing buffer from the pool\n\tb := p.pool[n-1]\n\tp.pool[n-1] = nil // Avoid memory leak\n\tp.pool = p.pool[:n-1]\n\treturn b\n}\n\n// Put returns b to the pool. The caller must set b.lastUsed = time.Now().Unix()\n// before calling Put so that Clean() can evict idle entries correctly.\nfunc (p *PersistentBufferPool) Put(b *buffer) {\n\t// Reset the buffer before putting it back\n\tb.data = b.data[:0]\n\n\tp.mu.Lock()\n\tdefer p.mu.Unlock()\n\n\tp.pool = append(p.pool, b)\n}\n\n// GetSize returns the exact number of buffers currently sitting in the pool.\nfunc (p *PersistentBufferPool) GetSize() int {\n\tp.mu.Lock()\n\tdefer p.mu.Unlock()\n\treturn len(p.pool)\n}\n\n// Clear drains all buffers currently in the pool, allowing the GC to collect them.\nfunc (p *PersistentBufferPool) Clear() {\n\tp.mu.Lock()\n\tdefer p.mu.Unlock()\n\tfor i := range p.pool {\n\t\tp.pool[i] = nil\n\t}\n\tp.pool = p.pool[:0]\n}\n\n// Clean removes buffers from the pool that haven't been used in the given duration.\n// It uses a simple LRU approach based on the lastUsed timestamp.\nfunc (p *PersistentBufferPool) Clean(threshold int64) {\n\tp.mu.Lock()\n\tdefer p.mu.Unlock()\n\n\t// Filter in place, retaining only buffers returned to the pool recently enough.\n\tactive := p.pool[:0]\n\tfor _, b := range p.pool {\n\t\tif b.lastUsed >= threshold {\n\t\t\tactive = append(active, b)\n\t\t}\n\t}\n\n\t// Nullify the rest to prevent memory leaks\n\tfor i := len(active); i < len(p.pool); i++ {\n\t\tp.pool[i] = nil\n\t}\n\n\tp.pool = active\n}\n\nvar (\n\t// ErrNoData indicates no time-series data exists for the requested metric/level.\n\tErrNoData error = errors.New(\"[METRICSTORE]> no data for this metric/level\")\n\n\t// ErrDataDoesNotAlign indicates that aggregated data from child scopes\n\t// does not align with the parent scope's expected timestamps/intervals.\n\tErrDataDoesNotAlignMissingFront    error = errors.New(\"[METRICSTORE]> data from lower granularities does not align (missing data prior to start of the buffers)\")\n\tErrDataDoesNotAlignMissingBack     error = errors.New(\"[METRICSTORE]> data from lower granularities does not align (missing data after the end of the buffers)\")\n\tErrDataDoesNotAlignDataLenMismatch error = errors.New(\"[METRICSTORE]> data from lower granularities does not align (collected data length is different than expected data length)\")\n)\n\n// buffer stores time-series data for a single metric at a specific hierarchical level.\n//\n// Buffers form doubly-linked chains ordered by time. When capacity is reached,\n// a new buffer becomes the head and the old head is linked via prev/next.\n//\n// Fields:\n//   - prev:      Link to older buffer in the chain (nil if this is oldest)\n//   - next:      Link to newer buffer in the chain (nil if this is newest/head)\n//   - data:      Time-series values (schema.Float supports NaN for missing data)\n//   - frequency: Measurement interval in seconds\n//   - start:     Start timestamp (adjusted by -frequency/2 for alignment)\n//   - archived:  True if data has been persisted to disk archive\n//   - closed:    True if buffer is no longer accepting writes\n//\n// Index calculation: index = (timestamp - start) / frequency\n// Actual data timestamp: start + (frequency / 2) + (index * frequency)\ntype buffer struct {\n\tprev      *buffer\n\tnext      *buffer\n\tdata      []schema.Float\n\tfrequency int64\n\tstart     int64\n\tarchived  bool\n\tclosed    bool\n\tlastUsed  int64\n}\n\nfunc newBuffer(ts, freq int64) *buffer {\n\tb := bufferPool.Get()\n\tb.frequency = freq\n\tb.start = ts - (freq / 2)\n\tb.prev = nil\n\tb.next = nil\n\tb.archived = false\n\tb.closed = false\n\tb.data = b.data[:0]\n\treturn b\n}\n\n// write appends a timestamped value to the buffer chain.\n//\n// Returns the head buffer (which may be newly created if capacity was reached).\n// Timestamps older than the buffer's start are rejected. If the calculated index\n// exceeds capacity, a new buffer is allocated and linked as the new head.\n//\n// Missing timestamps are automatically filled with NaN values to maintain alignment.\n// Overwrites are allowed if the index is already within the existing data slice.\n//\n// Parameters:\n//   - ts:    Unix timestamp in seconds\n//   - value: Metric value (can be schema.NaN for missing data)\n//\n// Returns:\n//   - *buffer: The new head buffer (same as b if no new buffer created)\n//   - error:   Non-nil if timestamp is before buffer start\nfunc (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {\n\tif ts < b.start {\n\t\treturn nil, errors.New(\"[METRICSTORE]> cannot write value to buffer from past\")\n\t}\n\n\t// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)\n\tidx := int((ts - b.start) / b.frequency)\n\tif idx >= cap(b.data) {\n\t\tnewbuf := newBuffer(ts, b.frequency)\n\t\tnewbuf.prev = b\n\t\tb.next = newbuf\n\t\tb = newbuf\n\t\tidx = 0\n\t}\n\n\t// Overwriting value or writing value from past\n\tif idx < len(b.data) {\n\t\tb.data[idx] = value\n\t\treturn b, nil\n\t}\n\n\t// Fill up unwritten slots with NaN\n\tfor i := len(b.data); i < idx; i++ {\n\t\tb.data = append(b.data, schema.NaN)\n\t}\n\n\tb.data = append(b.data, value)\n\treturn b, nil\n}\n\nfunc (b *buffer) end() int64 {\n\treturn b.firstWrite() + int64(len(b.data))*b.frequency\n}\n\nfunc (b *buffer) firstWrite() int64 {\n\treturn b.start + (b.frequency / 2)\n}\n\n// read retrieves time-series data from the buffer chain for the specified time range.\n//\n// Traverses the buffer chain backwards (via prev links) if 'from' precedes the current\n// buffer's start. Missing data points are represented as NaN. Values are accumulated\n// into the provided 'data' slice (using +=, so caller must zero-initialize if needed).\n//\n// The function adjusts the actual time range returned if data is unavailable at the\n// boundaries (returned via adjusted from/to timestamps).\n//\n// Parameters:\n//   - from: Start timestamp (Unix seconds)\n//   - to:   End timestamp (Unix seconds, exclusive)\n//   - data: Pre-allocated slice to accumulate results (must be large enough)\n//\n// Returns:\n//   - []schema.Float: Slice of data (may be shorter than input 'data' slice)\n//   - int64:          Actual start timestamp with available data\n//   - int64:          Actual end timestamp (exclusive)\n//   - error:          Non-nil on failure\n//\n// Panics if 'data' slice is too small to hold all values in [from, to).\nfunc (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {\n\t// Walk back to the buffer that covers 'from', adjusting if we hit the oldest.\n\tfor from < b.firstWrite() {\n\t\tif b.prev == nil {\n\t\t\tfrom = b.firstWrite()\n\t\t\tbreak\n\t\t}\n\t\tb = b.prev\n\t}\n\n\ti := 0\n\tt := from\n\tfor ; t < to; t += b.frequency {\n\t\tidx := int((t - b.start) / b.frequency)\n\t\tif idx >= cap(b.data) {\n\t\t\tif b.next == nil {\n\t\t\t\tbreak\n\t\t\t}\n\t\t\tb = b.next\n\t\t\t// Recalculate idx in the new buffer; a gap between buffers may exist.\n\t\t\tidx = int((t - b.start) / b.frequency)\n\t\t}\n\n\t\tif idx >= len(b.data) {\n\t\t\tif b.next == nil || to <= b.next.start {\n\t\t\t\tbreak\n\t\t\t}\n\t\t\tdata[i] += schema.NaN // NaN + anything = NaN; propagates missing data\n\t\t} else if t < b.start {\n\t\t\tdata[i] += schema.NaN // gap before this buffer's first write\n\t\t} else {\n\t\t\tdata[i] += b.data[idx]\n\t\t}\n\t\ti++\n\t}\n\n\treturn data[:i], from, t, nil\n}\n\n// free removes buffers older than the specified timestamp from the chain.\n//\n// Recursively traverses backwards (via prev) and unlinks buffers whose end time\n// is before the retention threshold. Freed buffers are returned to the pool if\n// they have the standard capacity (BufferCap).\n//\n// Parameters:\n//   - t: Retention threshold timestamp (Unix seconds)\n//\n// Returns:\n//   - delme: True if the current buffer itself should be deleted by caller\n//   - n:     Number of buffers freed in this subtree\nfunc (b *buffer) free(t int64) (delme bool, n int) {\n\tif b.prev != nil {\n\t\tdelme, m := b.prev.free(t)\n\t\tn += m\n\t\tif delme {\n\t\t\tb.prev.next = nil\n\t\t\tif cap(b.prev.data) != BufferCap {\n\t\t\t\tb.prev.data = make([]schema.Float, 0, BufferCap)\n\t\t\t}\n\t\t\tb.prev.lastUsed = time.Now().Unix()\n\t\t\tbufferPool.Put(b.prev)\n\t\t\tb.prev = nil\n\t\t}\n\t}\n\n\tend := b.end()\n\tif end < t {\n\t\treturn true, n + 1\n\t}\n\n\treturn false, n\n}\n\n// forceFreeOldest recursively finds the end of the linked list (the oldest buffer)\n// and removes it.\n// Returns:\n//\n//\tdelme: true if 'b' itself is the oldest and should be removed by the caller\n//\tn:     the number of buffers freed (will be 1 or 0)\nfunc (b *buffer) forceFreeOldest() (delme bool, n int) {\n\t// If there is a previous buffer, recurse down to find the oldest\n\tif b.prev != nil {\n\t\tdelPrev, freed := b.prev.forceFreeOldest()\n\n\t\t// If the previous buffer signals it should be deleted:\n\t\tif delPrev {\n\t\t\tb.prev.next = nil\n\t\t\tif cap(b.prev.data) != BufferCap {\n\t\t\t\tb.prev.data = make([]schema.Float, 0, BufferCap)\n\t\t\t}\n\t\t\tb.prev.lastUsed = time.Now().Unix()\n\t\t\tbufferPool.Put(b.prev)\n\t\t\tb.prev = nil\n\t\t}\n\t\treturn false, freed\n\t}\n\n\t// If b.prev is nil, THIS buffer is the oldest.\n\t// We return true so the parent (or the Level loop) knows to delete reference to 'b'.\n\treturn true, 1\n}\n\n// iterFromTo invokes callback on every buffer in the chain that overlaps [from, to].\n//\n// Traverses backwards (via prev) first, then processes current buffer if it overlaps\n// the time range. Used for checkpoint/archive operations that need to serialize buffers\n// within a specific time window.\n//\n// Parameters:\n//   - from:     Start timestamp (Unix seconds, inclusive)\n//   - to:       End timestamp (Unix seconds, inclusive)\n//   - callback: Function to invoke on each overlapping buffer\n//\n// Returns:\n//   - error: First error returned by callback, or nil if all succeeded\nfunc (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {\n\tif b == nil {\n\t\treturn nil\n\t}\n\n\t// Collect overlapping buffers walking backwards (newest → oldest).\n\tvar matching []*buffer\n\tfor cur := b; cur != nil; cur = cur.prev {\n\t\tif from <= cur.end() && cur.start <= to {\n\t\t\tmatching = append(matching, cur)\n\t\t}\n\t}\n\n\t// Invoke callback in chronological order (oldest → newest).\n\tfor i := len(matching) - 1; i >= 0; i-- {\n\t\tif err := callback(matching[i]); err != nil {\n\t\t\treturn err\n\t\t}\n\t}\n\treturn nil\n}\n\nfunc (b *buffer) count() int64 {\n\tvar res int64\n\tfor ; b != nil; b = b.prev {\n\t\tres += int64(len(b.data))\n\t}\n\treturn res\n}\n\n// bufferCount returns the number of buffer nodes in the linked list.\nfunc (b *buffer) bufferCount() int64 {\n\tvar n int64\n\tfor ; b != nil; b = b.prev {\n\t\tn++\n\t}\n\treturn n\n}\n","numLines":422,"startLine":1,"totalLines":422}},"sourceToolAssistantUUID":"28653467-4421-47dc-9cd7-2bb906296767"}
{"parentUuid":"844c403d-f522-47e4-ba0f-c7392fafb3b4","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01BTzFaAmKApCLCwStTPQxag","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01Pt9K6fzisBSCRx3ds5R5nj","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/config.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5260,"cache_creation_input_tokens":22492,"cache_read_input_tokens":51780,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":22492},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzh1ioFQXKBiazHM9K","type":"assistant","uuid":"058c570b-090b-46f4-be66-e53c4d83814b","timestamp":"2026-03-29T19:34:38.520Z"}
{"parentUuid":"058c570b-090b-46f4-be66-e53c4d83814b","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01Pt9K6fzisBSCRx3ds5R5nj","toolUseID":"toolu_01Pt9K6fzisBSCRx3ds5R5nj","timestamp":"2026-03-29T19:34:38.597Z","uuid":"cdc40d97-3b33-4bc1-b21b-88b245bff928"}
{"parentUuid":"058c570b-090b-46f4-be66-e53c4d83814b","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01Pt9K6fzisBSCRx3ds5R5nj","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// Package metricstore provides config.go: Configuration structures and metric management.\n     7→//\n     8→// # Configuration Hierarchy\n     9→//\n    10→// The metricstore package uses nested configuration structures:\n    11→//\n    12→//\tMetricStoreConfig (Keys)\n    13→//\t├─ NumWorkers: Parallel checkpoint/archive workers\n    14→//\t├─ RetentionInMemory: How long to keep data in RAM (also used as cleanup interval)\n    15→//\t├─ MemoryCap: Memory limit in bytes (triggers forceFree)\n    16→//\t├─ Checkpoints: Persistence configuration\n    17→//\t│  ├─ FileFormat: \"json\" or \"wal\" (default: \"wal\")\n    18→//\t│  └─ RootDir: Checkpoint storage path\n    19→//\t├─ Cleanup: Long-term storage configuration (interval = RetentionInMemory)\n    20→//\t│  ├─ RootDir: Archive storage path (archive mode only)\n    21→//\t│  └─ Mode: \"delete\" or \"archive\"\n    22→//\t├─ Debug: Development/debugging options\n    23→//\t└─ Subscriptions: NATS topic subscriptions for metric ingestion\n    24→//\n    25→// # Metric Configuration\n    26→//\n    27→// Each metric (e.g., \"cpu_load\", \"mem_used\") has a MetricConfig entry in the global\n    28→// Metrics map, defining:\n    29→//\n    30→//   - Frequency: Measurement interval in seconds\n    31→//   - Aggregation: How to combine values (sum/avg/none) when transforming scopes\n    32→//   - offset: Internal index into Level.metrics slice (assigned during Init)\n    33→//\n    34→// # AggregationStrategy\n    35→//\n    36→// Determines how to combine metric values when aggregating from finer to coarser scopes:\n    37→//\n    38→//   - NoAggregation: Do not combine (incompatible scopes)\n    39→//   - SumAggregation: Add values (e.g., power consumption: core→socket)\n    40→//   - AvgAggregation: Average values (e.g., temperature: core→socket)\n    41→package metricstore\n    42→\n    43→import (\n    44→\t\"fmt\"\n    45→\t\"time\"\n    46→\n    47→\t\"github.com/ClusterCockpit/cc-backend/pkg/archive\"\n    48→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    49→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    50→)\n    51→\n    52→const (\n    53→\tDefaultMaxWorkers                 = 10\n    54→\tDefaultBufferCapacity             = 512\n    55→\tDefaultGCTriggerInterval          = 100\n    56→\tDefaultMemoryUsageTrackerInterval = 5 * time.Minute\n    57→)\n    58→\n    59→// Checkpoints configures periodic persistence of in-memory metric data.\n    60→//\n    61→// Fields:\n    62→//   - FileFormat:  \"json\" (human-readable, periodic) or \"wal\" (binary snapshot + WAL, crash-safe); default is \"wal\"\n    63→//   - RootDir:     Filesystem path for checkpoint files (created if missing)\n    64→//   - MaxWALSize:  Maximum size in bytes for a single host's WAL file; 0 = unlimited (default).\n    65→//     When exceeded the WAL is force-rotated to prevent unbounded disk growth.\n    66→type Checkpoints struct {\n    67→\tFileFormat string `json:\"file-format\"`\n    68→\tRootDir    string `json:\"directory\"`\n    69→\tMaxWALSize int64  `json:\"max-wal-size,omitempty\"`\n    70→}\n    71→\n    72→// Debug provides development and profiling options.\n    73→//\n    74→// Fields:\n    75→//   - DumpToFile: Path to dump checkpoint data for inspection (empty = disabled)\n    76→//   - EnableGops: Enable gops agent for live runtime debugging (https://github.com/google/gops)\n    77→type Debug struct {\n    78→\tDumpToFile string `json:\"dump-to-file\"`\n    79→\tEnableGops bool   `json:\"gops\"`\n    80→}\n    81→\n    82→// Cleanup configures long-term storage of old metric data.\n    83→//\n    84→// Data older than RetentionInMemory is archived to disk or deleted.\n    85→// The cleanup interval is always RetentionInMemory.\n    86→//\n    87→// Fields:\n    88→//   - RootDir: Filesystem path for archived data (used in \"archive\" mode)\n    89→//   - Mode:    \"delete\" (discard old data) or \"archive\" (write to RootDir)\n    90→type Cleanup struct {\n    91→\tRootDir string `json:\"directory\"`\n    92→\tMode    string `json:\"mode\"`\n    93→}\n    94→\n    95→// Subscriptions defines NATS topics to subscribe to for metric ingestion.\n    96→//\n    97→// Each subscription receives metrics via NATS messaging, enabling real-time\n    98→// data collection from compute nodes.\n    99→//\n   100→// Fields:\n   101→//   - SubscribeTo: NATS subject/channel name (e.g., \"metrics.compute.*\")\n   102→//   - ClusterTag:  Default cluster name for metrics without cluster tag (optional)\n   103→type Subscriptions []struct {\n   104→\t// Channel name\n   105→\tSubscribeTo string `json:\"subscribe-to\"`\n   106→\n   107→\t// Allow lines without a cluster tag, use this as default, optional\n   108→\tClusterTag string `json:\"cluster-tag\"`\n   109→}\n   110→\n   111→// MetricStoreConfig defines the main configuration for the metricstore.\n   112→//\n   113→// Loaded from cc-backend's config.json \"metricstore\" section. Controls memory usage,\n   114→// persistence, archiving, and metric ingestion.\n   115→//\n   116→// Fields:\n   117→//   - NumWorkers:        Parallel workers for checkpoint/archive (0 = auto: min(NumCPU/2+1, 10))\n   118→//   - RetentionInMemory: Duration string (e.g., \"48h\") for in-memory data retention\n   119→//   - MemoryCap:         Max memory in GB for buffer data (0 = unlimited); triggers forceFree when exceeded\n   120→//   - Checkpoints:       Periodic persistence configuration\n   121→//   - Debug:             Development/profiling options (nil = disabled)\n   122→//   - Archive:           Long-term storage configuration (nil = disabled)\n   123→//   - Subscriptions:     NATS topics for metric ingestion (nil = polling only)\n   124→type MetricStoreConfig struct {\n   125→\t// Number of concurrent workers for checkpoint and archive operations.\n   126→\t// If not set or 0, defaults to min(runtime.NumCPU()/2+1, 10)\n   127→\tNumWorkers         int            `json:\"num-workers\"`\n   128→\tRetentionInMemory  string         `json:\"retention-in-memory\"`\n   129→\tCheckpointInterval string         `json:\"checkpoint-interval,omitempty\"`\n   130→\tMemoryCap          int            `json:\"memory-cap\"`\n   131→\tCheckpoints        Checkpoints    `json:\"checkpoints\"`\n   132→\tDebug              *Debug         `json:\"debug\"`\n   133→\tCleanup            *Cleanup       `json:\"cleanup\"`\n   134→\tSubscriptions      *Subscriptions `json:\"nats-subscriptions\"`\n   135→}\n   136→\n   137→// Keys is the global metricstore configuration instance.\n   138→//\n   139→// Initialized with defaults, then overwritten by cc-backend's config.json.\n   140→// Accessed by Init(), Checkpointing(), and other lifecycle functions.\n   141→var Keys MetricStoreConfig = MetricStoreConfig{\n   142→\tCheckpoints: Checkpoints{\n   143→\t\tFileFormat: \"wal\",\n   144→\t\tRootDir:    \"./var/checkpoints\",\n   145→\t},\n   146→\tCleanup: &Cleanup{\n   147→\t\tMode: \"delete\",\n   148→\t},\n   149→}\n   150→\n   151→// AggregationStrategy defines how to combine metric values across hierarchy levels.\n   152→//\n   153→// Used when transforming data from finer-grained scopes (e.g., core) to coarser scopes\n   154→// (e.g., socket). This is SPATIAL aggregation, not TEMPORAL (time-based) aggregation.\n   155→//\n   156→// Values:\n   157→//   - NoAggregation:  Do not aggregate (incompatible scopes or non-aggregatable metrics)\n   158→//   - SumAggregation: Add values (e.g., power: sum core power → socket power)\n   159→//   - AvgAggregation: Average values (e.g., temperature: average core temps → socket temp)\n   160→type AggregationStrategy int\n   161→\n   162→const (\n   163→\tNoAggregation  AggregationStrategy = iota // Do not aggregate\n   164→\tSumAggregation                            // Sum values (e.g., power, energy)\n   165→\tAvgAggregation                            // Average values (e.g., temperature, utilization)\n   166→)\n   167→\n   168→// AssignAggregationStrategy parses a string into an AggregationStrategy value.\n   169→//\n   170→// Used when loading metric configurations from JSON/YAML files.\n   171→//\n   172→// Parameters:\n   173→//   - str: \"sum\", \"avg\", or \"\" (empty string for NoAggregation)\n   174→//\n   175→// Returns:\n   176→//   - AggregationStrategy: Parsed value\n   177→//   - error:               Non-nil if str is unrecognized\n   178→func AssignAggregationStrategy(str string) (AggregationStrategy, error) {\n   179→\tswitch str {\n   180→\tcase \"\":\n   181→\t\treturn NoAggregation, nil\n   182→\tcase \"sum\":\n   183→\t\treturn SumAggregation, nil\n   184→\tcase \"avg\":\n   185→\t\treturn AvgAggregation, nil\n   186→\tdefault:\n   187→\t\treturn NoAggregation, fmt.Errorf(\"[METRICSTORE]> unknown aggregation strategy: %s\", str)\n   188→\t}\n   189→}\n   190→\n   191→// MetricConfig defines configuration for a single metric type.\n   192→//\n   193→// Stored in the global Metrics map, keyed by metric name (e.g., \"cpu_load\").\n   194→//\n   195→// Fields:\n   196→//   - Frequency:   Measurement interval in seconds (e.g., 60 for 1-minute granularity)\n   197→//   - Aggregation: How to combine values across hierarchy levels (sum/avg/none)\n   198→//   - offset:      Internal index into Level.metrics slice (assigned during Init)\n   199→type MetricConfig struct {\n   200→\t// Interval in seconds at which measurements are stored\n   201→\tFrequency int64\n   202→\n   203→\t// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.\n   204→\tAggregation AggregationStrategy\n   205→\n   206→\t// Private, used internally...\n   207→\toffset int\n   208→}\n   209→\n   210→func BuildMetricList() map[string]MetricConfig {\n   211→\tvar metrics map[string]MetricConfig = make(map[string]MetricConfig)\n   212→\n   213→\taddMetric := func(name string, metric MetricConfig) error {\n   214→\t\tif metrics == nil {\n   215→\t\t\tmetrics = make(map[string]MetricConfig, 0)\n   216→\t\t}\n   217→\n   218→\t\tif existingMetric, ok := metrics[name]; ok {\n   219→\t\t\tif existingMetric.Frequency != metric.Frequency {\n   220→\t\t\t\tif existingMetric.Frequency < metric.Frequency {\n   221→\t\t\t\t\texistingMetric.Frequency = metric.Frequency\n   222→\t\t\t\t\tmetrics[name] = existingMetric\n   223→\t\t\t\t}\n   224→\t\t\t}\n   225→\t\t} else {\n   226→\t\t\tmetrics[name] = metric\n   227→\t\t}\n   228→\n   229→\t\treturn nil\n   230→\t}\n   231→\n   232→\t// Helper function to add metric configuration\n   233→\taddMetricConfig := func(mc *schema.MetricConfig) {\n   234→\t\tagg, err := AssignAggregationStrategy(mc.Aggregation)\n   235→\t\tif err != nil {\n   236→\t\t\tcclog.Warnf(\"Could not find aggregation strategy for metric config '%s': %s\", mc.Name, err.Error())\n   237→\t\t}\n   238→\n   239→\t\taddMetric(mc.Name, MetricConfig{\n   240→\t\t\tFrequency:   int64(mc.Timestep),\n   241→\t\t\tAggregation: agg,\n   242→\t\t})\n   243→\t}\n   244→\tfor _, c := range archive.Clusters {\n   245→\t\tfor _, mc := range c.MetricConfig {\n   246→\t\t\taddMetricConfig(mc)\n   247→\t\t}\n   248→\n   249→\t\tfor _, sc := range c.SubClusters {\n   250→\t\t\tfor _, mc := range sc.MetricConfig {\n   251→\t\t\t\taddMetricConfig(mc)\n   252→\t\t\t}\n   253→\t\t}\n   254→\t}\n   255→\n   256→\treturn metrics\n   257→}\n   258→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"18274ebe-67b8-442b-bdef-cd6d99916c29","timestamp":"2026-03-29T19:34:38.594Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/config.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// Package metricstore provides config.go: Configuration structures and metric management.\n//\n// # Configuration Hierarchy\n//\n// The metricstore package uses nested configuration structures:\n//\n//\tMetricStoreConfig (Keys)\n//\t├─ NumWorkers: Parallel checkpoint/archive workers\n//\t├─ RetentionInMemory: How long to keep data in RAM (also used as cleanup interval)\n//\t├─ MemoryCap: Memory limit in bytes (triggers forceFree)\n//\t├─ Checkpoints: Persistence configuration\n//\t│  ├─ FileFormat: \"json\" or \"wal\" (default: \"wal\")\n//\t│  └─ RootDir: Checkpoint storage path\n//\t├─ Cleanup: Long-term storage configuration (interval = RetentionInMemory)\n//\t│  ├─ RootDir: Archive storage path (archive mode only)\n//\t│  └─ Mode: \"delete\" or \"archive\"\n//\t├─ Debug: Development/debugging options\n//\t└─ Subscriptions: NATS topic subscriptions for metric ingestion\n//\n// # Metric Configuration\n//\n// Each metric (e.g., \"cpu_load\", \"mem_used\") has a MetricConfig entry in the global\n// Metrics map, defining:\n//\n//   - Frequency: Measurement interval in seconds\n//   - Aggregation: How to combine values (sum/avg/none) when transforming scopes\n//   - offset: Internal index into Level.metrics slice (assigned during Init)\n//\n// # AggregationStrategy\n//\n// Determines how to combine metric values when aggregating from finer to coarser scopes:\n//\n//   - NoAggregation: Do not combine (incompatible scopes)\n//   - SumAggregation: Add values (e.g., power consumption: core→socket)\n//   - AvgAggregation: Average values (e.g., temperature: core→socket)\npackage metricstore\n\nimport (\n\t\"fmt\"\n\t\"time\"\n\n\t\"github.com/ClusterCockpit/cc-backend/pkg/archive\"\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n)\n\nconst (\n\tDefaultMaxWorkers                 = 10\n\tDefaultBufferCapacity             = 512\n\tDefaultGCTriggerInterval          = 100\n\tDefaultMemoryUsageTrackerInterval = 5 * time.Minute\n)\n\n// Checkpoints configures periodic persistence of in-memory metric data.\n//\n// Fields:\n//   - FileFormat:  \"json\" (human-readable, periodic) or \"wal\" (binary snapshot + WAL, crash-safe); default is \"wal\"\n//   - RootDir:     Filesystem path for checkpoint files (created if missing)\n//   - MaxWALSize:  Maximum size in bytes for a single host's WAL file; 0 = unlimited (default).\n//     When exceeded the WAL is force-rotated to prevent unbounded disk growth.\ntype Checkpoints struct {\n\tFileFormat string `json:\"file-format\"`\n\tRootDir    string `json:\"directory\"`\n\tMaxWALSize int64  `json:\"max-wal-size,omitempty\"`\n}\n\n// Debug provides development and profiling options.\n//\n// Fields:\n//   - DumpToFile: Path to dump checkpoint data for inspection (empty = disabled)\n//   - EnableGops: Enable gops agent for live runtime debugging (https://github.com/google/gops)\ntype Debug struct {\n\tDumpToFile string `json:\"dump-to-file\"`\n\tEnableGops bool   `json:\"gops\"`\n}\n\n// Cleanup configures long-term storage of old metric data.\n//\n// Data older than RetentionInMemory is archived to disk or deleted.\n// The cleanup interval is always RetentionInMemory.\n//\n// Fields:\n//   - RootDir: Filesystem path for archived data (used in \"archive\" mode)\n//   - Mode:    \"delete\" (discard old data) or \"archive\" (write to RootDir)\ntype Cleanup struct {\n\tRootDir string `json:\"directory\"`\n\tMode    string `json:\"mode\"`\n}\n\n// Subscriptions defines NATS topics to subscribe to for metric ingestion.\n//\n// Each subscription receives metrics via NATS messaging, enabling real-time\n// data collection from compute nodes.\n//\n// Fields:\n//   - SubscribeTo: NATS subject/channel name (e.g., \"metrics.compute.*\")\n//   - ClusterTag:  Default cluster name for metrics without cluster tag (optional)\ntype Subscriptions []struct {\n\t// Channel name\n\tSubscribeTo string `json:\"subscribe-to\"`\n\n\t// Allow lines without a cluster tag, use this as default, optional\n\tClusterTag string `json:\"cluster-tag\"`\n}\n\n// MetricStoreConfig defines the main configuration for the metricstore.\n//\n// Loaded from cc-backend's config.json \"metricstore\" section. Controls memory usage,\n// persistence, archiving, and metric ingestion.\n//\n// Fields:\n//   - NumWorkers:        Parallel workers for checkpoint/archive (0 = auto: min(NumCPU/2+1, 10))\n//   - RetentionInMemory: Duration string (e.g., \"48h\") for in-memory data retention\n//   - MemoryCap:         Max memory in GB for buffer data (0 = unlimited); triggers forceFree when exceeded\n//   - Checkpoints:       Periodic persistence configuration\n//   - Debug:             Development/profiling options (nil = disabled)\n//   - Archive:           Long-term storage configuration (nil = disabled)\n//   - Subscriptions:     NATS topics for metric ingestion (nil = polling only)\ntype MetricStoreConfig struct {\n\t// Number of concurrent workers for checkpoint and archive operations.\n\t// If not set or 0, defaults to min(runtime.NumCPU()/2+1, 10)\n\tNumWorkers         int            `json:\"num-workers\"`\n\tRetentionInMemory  string         `json:\"retention-in-memory\"`\n\tCheckpointInterval string         `json:\"checkpoint-interval,omitempty\"`\n\tMemoryCap          int            `json:\"memory-cap\"`\n\tCheckpoints        Checkpoints    `json:\"checkpoints\"`\n\tDebug              *Debug         `json:\"debug\"`\n\tCleanup            *Cleanup       `json:\"cleanup\"`\n\tSubscriptions      *Subscriptions `json:\"nats-subscriptions\"`\n}\n\n// Keys is the global metricstore configuration instance.\n//\n// Initialized with defaults, then overwritten by cc-backend's config.json.\n// Accessed by Init(), Checkpointing(), and other lifecycle functions.\nvar Keys MetricStoreConfig = MetricStoreConfig{\n\tCheckpoints: Checkpoints{\n\t\tFileFormat: \"wal\",\n\t\tRootDir:    \"./var/checkpoints\",\n\t},\n\tCleanup: &Cleanup{\n\t\tMode: \"delete\",\n\t},\n}\n\n// AggregationStrategy defines how to combine metric values across hierarchy levels.\n//\n// Used when transforming data from finer-grained scopes (e.g., core) to coarser scopes\n// (e.g., socket). This is SPATIAL aggregation, not TEMPORAL (time-based) aggregation.\n//\n// Values:\n//   - NoAggregation:  Do not aggregate (incompatible scopes or non-aggregatable metrics)\n//   - SumAggregation: Add values (e.g., power: sum core power → socket power)\n//   - AvgAggregation: Average values (e.g., temperature: average core temps → socket temp)\ntype AggregationStrategy int\n\nconst (\n\tNoAggregation  AggregationStrategy = iota // Do not aggregate\n\tSumAggregation                            // Sum values (e.g., power, energy)\n\tAvgAggregation                            // Average values (e.g., temperature, utilization)\n)\n\n// AssignAggregationStrategy parses a string into an AggregationStrategy value.\n//\n// Used when loading metric configurations from JSON/YAML files.\n//\n// Parameters:\n//   - str: \"sum\", \"avg\", or \"\" (empty string for NoAggregation)\n//\n// Returns:\n//   - AggregationStrategy: Parsed value\n//   - error:               Non-nil if str is unrecognized\nfunc AssignAggregationStrategy(str string) (AggregationStrategy, error) {\n\tswitch str {\n\tcase \"\":\n\t\treturn NoAggregation, nil\n\tcase \"sum\":\n\t\treturn SumAggregation, nil\n\tcase \"avg\":\n\t\treturn AvgAggregation, nil\n\tdefault:\n\t\treturn NoAggregation, fmt.Errorf(\"[METRICSTORE]> unknown aggregation strategy: %s\", str)\n\t}\n}\n\n// MetricConfig defines configuration for a single metric type.\n//\n// Stored in the global Metrics map, keyed by metric name (e.g., \"cpu_load\").\n//\n// Fields:\n//   - Frequency:   Measurement interval in seconds (e.g., 60 for 1-minute granularity)\n//   - Aggregation: How to combine values across hierarchy levels (sum/avg/none)\n//   - offset:      Internal index into Level.metrics slice (assigned during Init)\ntype MetricConfig struct {\n\t// Interval in seconds at which measurements are stored\n\tFrequency int64\n\n\t// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.\n\tAggregation AggregationStrategy\n\n\t// Private, used internally...\n\toffset int\n}\n\nfunc BuildMetricList() map[string]MetricConfig {\n\tvar metrics map[string]MetricConfig = make(map[string]MetricConfig)\n\n\taddMetric := func(name string, metric MetricConfig) error {\n\t\tif metrics == nil {\n\t\t\tmetrics = make(map[string]MetricConfig, 0)\n\t\t}\n\n\t\tif existingMetric, ok := metrics[name]; ok {\n\t\t\tif existingMetric.Frequency != metric.Frequency {\n\t\t\t\tif existingMetric.Frequency < metric.Frequency {\n\t\t\t\t\texistingMetric.Frequency = metric.Frequency\n\t\t\t\t\tmetrics[name] = existingMetric\n\t\t\t\t}\n\t\t\t}\n\t\t} else {\n\t\t\tmetrics[name] = metric\n\t\t}\n\n\t\treturn nil\n\t}\n\n\t// Helper function to add metric configuration\n\taddMetricConfig := func(mc *schema.MetricConfig) {\n\t\tagg, err := AssignAggregationStrategy(mc.Aggregation)\n\t\tif err != nil {\n\t\t\tcclog.Warnf(\"Could not find aggregation strategy for metric config '%s': %s\", mc.Name, err.Error())\n\t\t}\n\n\t\taddMetric(mc.Name, MetricConfig{\n\t\t\tFrequency:   int64(mc.Timestep),\n\t\t\tAggregation: agg,\n\t\t})\n\t}\n\tfor _, c := range archive.Clusters {\n\t\tfor _, mc := range c.MetricConfig {\n\t\t\taddMetricConfig(mc)\n\t\t}\n\n\t\tfor _, sc := range c.SubClusters {\n\t\t\tfor _, mc := range sc.MetricConfig {\n\t\t\t\taddMetricConfig(mc)\n\t\t\t}\n\t\t}\n\t}\n\n\treturn metrics\n}\n","numLines":258,"startLine":1,"totalLines":258}},"sourceToolAssistantUUID":"058c570b-090b-46f4-be66-e53c4d83814b"}
{"parentUuid":"18274ebe-67b8-442b-bdef-cd6d99916c29","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01BTzFaAmKApCLCwStTPQxag","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01VcoPjVxdL68ZsXmW2AYRzQ","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5260,"cache_creation_input_tokens":22492,"cache_read_input_tokens":51780,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":22492},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzh1ioFQXKBiazHM9K","type":"assistant","uuid":"4c8ba73c-da5c-4447-9352-1c35b5979d77","timestamp":"2026-03-29T19:34:38.938Z"}
{"parentUuid":"4c8ba73c-da5c-4447-9352-1c35b5979d77","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01VcoPjVxdL68ZsXmW2AYRzQ","toolUseID":"toolu_01VcoPjVxdL68ZsXmW2AYRzQ","timestamp":"2026-03-29T19:34:38.991Z","uuid":"292f73c6-0c41-4397-bb6d-ecb34cb821e3"}
{"parentUuid":"4c8ba73c-da5c-4447-9352-1c35b5979d77","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01VcoPjVxdL68ZsXmW2AYRzQ","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→package metricstore\n     7→\n     8→import (\n     9→\t\"context\"\n    10→\t\"errors\"\n    11→\t\"fmt\"\n    12→\t\"os\"\n    13→\t\"path/filepath\"\n    14→\t\"strings\"\n    15→\t\"sync\"\n    16→\t\"sync/atomic\"\n    17→\t\"time\"\n    18→\n    19→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    20→)\n    21→\n    22→// Worker for either Archiving or Deleting files\n    23→\n    24→func CleanUp(wg *sync.WaitGroup, ctx context.Context) {\n    25→\tif Keys.Cleanup.Mode == \"archive\" {\n    26→\t\tcclog.Info(\"[METRICSTORE]> enable archive cleanup to parquet\")\n    27→\t\t// Run as Archiver\n    28→\t\tcleanUpWorker(wg, ctx,\n    29→\t\t\tKeys.RetentionInMemory,\n    30→\t\t\t\"archiving\",\n    31→\t\t\tKeys.Cleanup.RootDir,\n    32→\t\t\tfalse,\n    33→\t\t)\n    34→\t} else {\n    35→\t\t// Run as Deleter\n    36→\t\tcleanUpWorker(wg, ctx,\n    37→\t\t\tKeys.RetentionInMemory,\n    38→\t\t\t\"deleting\",\n    39→\t\t\t\"\",\n    40→\t\t\ttrue,\n    41→\t\t)\n    42→\t}\n    43→}\n    44→\n    45→// cleanUpWorker takes simple values to configure what it does\n    46→func cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {\n    47→\twg.Go(func() {\n    48→\t\td, err := time.ParseDuration(interval)\n    49→\t\tif err != nil {\n    50→\t\t\tcclog.Fatalf(\"[METRICSTORE]> error parsing %s interval duration: %v\\n\", mode, err)\n    51→\t\t}\n    52→\t\tif d <= 0 {\n    53→\t\t\treturn\n    54→\t\t}\n    55→\n    56→\t\tticker := time.NewTicker(d)\n    57→\t\tdefer ticker.Stop()\n    58→\n    59→\t\tfor {\n    60→\t\t\tselect {\n    61→\t\t\tcase <-ctx.Done():\n    62→\t\t\t\treturn\n    63→\t\t\tcase <-ticker.C:\n    64→\t\t\t\tt := time.Now().Add(-d)\n    65→\t\t\t\tcclog.Infof(\"[METRICSTORE]> start %s checkpoints (older than %s)...\", mode, t.Format(time.RFC3339))\n    66→\n    67→\t\t\t\tn, err := CleanupCheckpoints(Keys.Checkpoints.RootDir, cleanupDir, t.Unix(), delete)\n    68→\n    69→\t\t\t\tif err != nil {\n    70→\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> %s failed: %s\", mode, err.Error())\n    71→\t\t\t\t} else {\n    72→\t\t\t\t\tif delete {\n    73→\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoints deleted\", n)\n    74→\t\t\t\t\t} else {\n    75→\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoint files archived to parquet\", n)\n    76→\t\t\t\t\t}\n    77→\t\t\t\t}\n    78→\t\t\t}\n    79→\t\t}\n    80→\t})\n    81→}\n    82→\n    83→var ErrNoNewArchiveData error = errors.New(\"all data already archived\")\n    84→\n    85→// CleanupCheckpoints deletes or archives all checkpoint files older than `from`.\n    86→// When archiving, consolidates all hosts per cluster into a single Parquet file.\n    87→func CleanupCheckpoints(checkpointsDir, cleanupDir string, from int64, deleteInstead bool) (int, error) {\n    88→\tif deleteInstead {\n    89→\t\treturn deleteCheckpoints(checkpointsDir, from)\n    90→\t}\n    91→\n    92→\treturn archiveCheckpoints(checkpointsDir, cleanupDir, from)\n    93→}\n    94→\n    95→// deleteCheckpoints removes checkpoint files older than `from` across all clusters/hosts.\n    96→func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {\n    97→\tentries1, err := os.ReadDir(checkpointsDir)\n    98→\tif err != nil {\n    99→\t\treturn 0, err\n   100→\t}\n   101→\n   102→\ttype workItem struct {\n   103→\t\tdir           string\n   104→\t\tcluster, host string\n   105→\t}\n   106→\n   107→\tvar wg sync.WaitGroup\n   108→\tn, errs := int32(0), int32(0)\n   109→\twork := make(chan workItem, Keys.NumWorkers)\n   110→\n   111→\twg.Add(Keys.NumWorkers)\n   112→\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n   113→\t\tgo func() {\n   114→\t\t\tdefer wg.Done()\n   115→\t\t\tfor item := range work {\n   116→\t\t\t\tentries, err := os.ReadDir(item.dir)\n   117→\t\t\t\tif err != nil {\n   118→\t\t\t\t\tcclog.Errorf(\"error reading %s/%s: %s\", item.cluster, item.host, err.Error())\n   119→\t\t\t\t\tatomic.AddInt32(&errs, 1)\n   120→\t\t\t\t\tcontinue\n   121→\t\t\t\t}\n   122→\n   123→\t\t\t\tfiles, err := findFiles(entries, from, false)\n   124→\t\t\t\tif err != nil {\n   125→\t\t\t\t\tcclog.Errorf(\"error finding files in %s/%s: %s\", item.cluster, item.host, err.Error())\n   126→\t\t\t\t\tatomic.AddInt32(&errs, 1)\n   127→\t\t\t\t\tcontinue\n   128→\t\t\t\t}\n   129→\n   130→\t\t\t\tfor _, checkpoint := range files {\n   131→\t\t\t\t\tif err := os.Remove(filepath.Join(item.dir, checkpoint)); err != nil {\n   132→\t\t\t\t\t\tcclog.Errorf(\"error deleting %s/%s/%s: %s\", item.cluster, item.host, checkpoint, err.Error())\n   133→\t\t\t\t\t\tatomic.AddInt32(&errs, 1)\n   134→\t\t\t\t\t} else {\n   135→\t\t\t\t\t\tatomic.AddInt32(&n, 1)\n   136→\t\t\t\t\t}\n   137→\t\t\t\t}\n   138→\t\t\t}\n   139→\t\t}()\n   140→\t}\n   141→\n   142→\tfor _, de1 := range entries1 {\n   143→\t\tentries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))\n   144→\t\tif e != nil {\n   145→\t\t\terr = e\n   146→\t\t\tcontinue\n   147→\t\t}\n   148→\n   149→\t\tfor _, de2 := range entries2 {\n   150→\t\t\twork <- workItem{\n   151→\t\t\t\tdir:     filepath.Join(checkpointsDir, de1.Name(), de2.Name()),\n   152→\t\t\t\tcluster: de1.Name(),\n   153→\t\t\t\thost:    de2.Name(),\n   154→\t\t\t}\n   155→\t\t}\n   156→\t}\n   157→\n   158→\tclose(work)\n   159→\twg.Wait()\n   160→\n   161→\tif err != nil {\n   162→\t\treturn int(n), err\n   163→\t}\n   164→\tif errs > 0 {\n   165→\t\treturn int(n), fmt.Errorf(\"%d errors happened while deleting (%d successes)\", errs, n)\n   166→\t}\n   167→\treturn int(n), nil\n   168→}\n   169→\n   170→// archiveCheckpoints archives checkpoint files to Parquet format.\n   171→// Produces one Parquet file per cluster: <cleanupDir>/<cluster>/<timestamp>.parquet\n   172→// Workers load checkpoint files from disk and send CheckpointFile trees on a\n   173→// back-pressured channel. The main thread streams each tree directly to Parquet\n   174→// rows without materializing all rows in memory.\n   175→func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, error) {\n   176→\tcclog.Info(\"[METRICSTORE]> start archiving checkpoints to parquet\")\n   177→\tstartTime := time.Now()\n   178→\n   179→\tclusterEntries, err := os.ReadDir(checkpointsDir)\n   180→\tif err != nil {\n   181→\t\treturn 0, err\n   182→\t}\n   183→\n   184→\ttotalFiles := 0\n   185→\tvar clusterErrors []string\n   186→\n   187→\tfor _, clusterEntry := range clusterEntries {\n   188→\t\tif !clusterEntry.IsDir() {\n   189→\t\t\tcontinue\n   190→\t\t}\n   191→\n   192→\t\tcluster := clusterEntry.Name()\n   193→\t\thostEntries, err := os.ReadDir(filepath.Join(checkpointsDir, cluster))\n   194→\t\tif err != nil {\n   195→\t\t\tcclog.Errorf(\"[METRICSTORE]> error reading host entries for cluster %s: %s\", cluster, err.Error())\n   196→\t\t\tclusterErrors = append(clusterErrors, cluster)\n   197→\t\t\tcontinue\n   198→\t\t}\n   199→\n   200→\t\t// Workers load checkpoint files from disk; main thread writes to parquet.\n   201→\t\ttype hostResult struct {\n   202→\t\t\tcheckpoints []*CheckpointFile\n   203→\t\t\thostname    string\n   204→\t\t\tfiles       []string // checkpoint filenames to delete after successful write\n   205→\t\t\tdir         string   // checkpoint directory for this host\n   206→\t\t}\n   207→\n   208→\t\t// Small buffer provides back-pressure: at most NumWorkers+2 results in flight.\n   209→\t\tresults := make(chan hostResult, 2)\n   210→\t\twork := make(chan struct {\n   211→\t\t\tdir, host string\n   212→\t\t}, Keys.NumWorkers)\n   213→\n   214→\t\tvar wg sync.WaitGroup\n   215→\t\terrs := int32(0)\n   216→\n   217→\t\twg.Add(Keys.NumWorkers)\n   218→\t\tfor w := 0; w < Keys.NumWorkers; w++ {\n   219→\t\t\tgo func() {\n   220→\t\t\t\tdefer wg.Done()\n   221→\t\t\t\tfor item := range work {\n   222→\t\t\t\t\tcheckpoints, files, err := loadCheckpointFiles(item.dir, from)\n   223→\t\t\t\t\tif err != nil {\n   224→\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error reading checkpoints for %s/%s: %s\", cluster, item.host, err.Error())\n   225→\t\t\t\t\t\tatomic.AddInt32(&errs, 1)\n   226→\t\t\t\t\t\tcontinue\n   227→\t\t\t\t\t}\n   228→\t\t\t\t\tif len(checkpoints) > 0 {\n   229→\t\t\t\t\t\tresults <- hostResult{\n   230→\t\t\t\t\t\t\tcheckpoints: checkpoints,\n   231→\t\t\t\t\t\t\thostname:    item.host,\n   232→\t\t\t\t\t\t\tfiles:       files,\n   233→\t\t\t\t\t\t\tdir:         item.dir,\n   234→\t\t\t\t\t\t}\n   235→\t\t\t\t\t}\n   236→\t\t\t\t}\n   237→\t\t\t}()\n   238→\t\t}\n   239→\n   240→\t\tgo func() {\n   241→\t\t\tfor _, hostEntry := range hostEntries {\n   242→\t\t\t\tif !hostEntry.IsDir() {\n   243→\t\t\t\t\tcontinue\n   244→\t\t\t\t}\n   245→\t\t\t\tdir := filepath.Join(checkpointsDir, cluster, hostEntry.Name())\n   246→\t\t\t\twork <- struct {\n   247→\t\t\t\t\tdir, host string\n   248→\t\t\t\t}{dir: dir, host: hostEntry.Name()}\n   249→\t\t\t}\n   250→\t\t\tclose(work)\n   251→\t\t\twg.Wait()\n   252→\t\t\tclose(results)\n   253→\t\t}()\n   254→\n   255→\t\t// Open streaming writer and write each host's checkpoint files as a row group\n   256→\t\tparquetFile := filepath.Join(cleanupDir, cluster, fmt.Sprintf(\"%d.parquet\", from))\n   257→\t\twriter, err := newParquetArchiveWriter(parquetFile)\n   258→\t\tif err != nil {\n   259→\t\t\t// Drain results channel to unblock workers\n   260→\t\t\tfor range results {\n   261→\t\t\t}\n   262→\t\t\tcclog.Errorf(\"[METRICSTORE]> error creating parquet writer for cluster %s: %s\", cluster, err.Error())\n   263→\t\t\tclusterErrors = append(clusterErrors, cluster)\n   264→\t\t\tcontinue\n   265→\t\t}\n   266→\n   267→\t\ttype deleteItem struct {\n   268→\t\t\tdir   string\n   269→\t\t\tfiles []string\n   270→\t\t}\n   271→\t\tvar toDelete []deleteItem\n   272→\t\twriteErr := error(nil)\n   273→\n   274→\t\tfor r := range results {\n   275→\t\t\tif writeErr == nil {\n   276→\t\t\t\t// Stream each checkpoint file directly to parquet rows.\n   277→\t\t\t\t// Each checkpoint is processed and discarded before the next.\n   278→\t\t\t\tfor _, cf := range r.checkpoints {\n   279→\t\t\t\t\tif err := writer.WriteCheckpointFile(cf, cluster, r.hostname, \"node\", \"\"); err != nil {\n   280→\t\t\t\t\t\twriteErr = err\n   281→\t\t\t\t\t\tbreak\n   282→\t\t\t\t\t}\n   283→\t\t\t\t}\n   284→\t\t\t\t// Flush once per host to keep row group count within parquet limits.\n   285→\t\t\t\tif writeErr == nil {\n   286→\t\t\t\t\tif err := writer.FlushRowGroup(); err != nil {\n   287→\t\t\t\t\t\twriteErr = err\n   288→\t\t\t\t\t}\n   289→\t\t\t\t}\n   290→\t\t\t}\n   291→\t\t\t// Always track files for deletion (even if write failed, we still drain)\n   292→\t\t\ttoDelete = append(toDelete, deleteItem{dir: r.dir, files: r.files})\n   293→\t\t}\n   294→\n   295→\t\tif err := writer.Close(); err != nil && writeErr == nil {\n   296→\t\t\twriteErr = err\n   297→\t\t}\n   298→\n   299→\t\tif errs > 0 {\n   300→\t\t\tcclog.Errorf(\"[METRICSTORE]> %d errors reading checkpoints for cluster %s\", errs, cluster)\n   301→\t\t\tclusterErrors = append(clusterErrors, cluster)\n   302→\t\t\tos.Remove(parquetFile)\n   303→\t\t\tcontinue\n   304→\t\t}\n   305→\n   306→\t\tif writer.count == 0 {\n   307→\t\t\t// No data written — remove empty file\n   308→\t\t\tos.Remove(parquetFile)\n   309→\t\t\tcontinue\n   310→\t\t}\n   311→\n   312→\t\tif writeErr != nil {\n   313→\t\t\tos.Remove(parquetFile)\n   314→\t\t\tcclog.Errorf(\"[METRICSTORE]> error writing parquet archive for cluster %s: %s\", cluster, writeErr.Error())\n   315→\t\t\tclusterErrors = append(clusterErrors, cluster)\n   316→\t\t\tcontinue\n   317→\t\t}\n   318→\n   319→\t\t// Delete archived checkpoint files\n   320→\t\tfor _, item := range toDelete {\n   321→\t\t\tfor _, file := range item.files {\n   322→\t\t\t\tfilename := filepath.Join(item.dir, file)\n   323→\t\t\t\tif err := os.Remove(filename); err != nil {\n   324→\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> could not remove archived checkpoint %s: %v\", filename, err)\n   325→\t\t\t\t} else {\n   326→\t\t\t\t\ttotalFiles++\n   327→\t\t\t\t}\n   328→\t\t\t}\n   329→\t\t}\n   330→\n   331→\t\tcclog.Infof(\"[METRICSTORE]> archived %d rows from %d files for cluster %s to %s\",\n   332→\t\t\twriter.count, totalFiles, cluster, parquetFile)\n   333→\t}\n   334→\n   335→\tcclog.Infof(\"[METRICSTORE]> archiving checkpoints completed in %s (%d files)\", time.Since(startTime).Round(time.Millisecond), totalFiles)\n   336→\n   337→\tif len(clusterErrors) > 0 {\n   338→\t\treturn totalFiles, fmt.Errorf(\"archiving failed for clusters: %s\", strings.Join(clusterErrors, \", \"))\n   339→\t}\n   340→\n   341→\treturn totalFiles, nil\n   342→}\n   343→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"86d992bc-fc66-45fa-a79e-71431de3a202","timestamp":"2026-03-29T19:34:38.988Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\npackage metricstore\n\nimport (\n\t\"context\"\n\t\"errors\"\n\t\"fmt\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n)\n\n// Worker for either Archiving or Deleting files\n\nfunc CleanUp(wg *sync.WaitGroup, ctx context.Context) {\n\tif Keys.Cleanup.Mode == \"archive\" {\n\t\tcclog.Info(\"[METRICSTORE]> enable archive cleanup to parquet\")\n\t\t// Run as Archiver\n\t\tcleanUpWorker(wg, ctx,\n\t\t\tKeys.RetentionInMemory,\n\t\t\t\"archiving\",\n\t\t\tKeys.Cleanup.RootDir,\n\t\t\tfalse,\n\t\t)\n\t} else {\n\t\t// Run as Deleter\n\t\tcleanUpWorker(wg, ctx,\n\t\t\tKeys.RetentionInMemory,\n\t\t\t\"deleting\",\n\t\t\t\"\",\n\t\t\ttrue,\n\t\t)\n\t}\n}\n\n// cleanUpWorker takes simple values to configure what it does\nfunc cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {\n\twg.Go(func() {\n\t\td, err := time.ParseDuration(interval)\n\t\tif err != nil {\n\t\t\tcclog.Fatalf(\"[METRICSTORE]> error parsing %s interval duration: %v\\n\", mode, err)\n\t\t}\n\t\tif d <= 0 {\n\t\t\treturn\n\t\t}\n\n\t\tticker := time.NewTicker(d)\n\t\tdefer ticker.Stop()\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tt := time.Now().Add(-d)\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> start %s checkpoints (older than %s)...\", mode, t.Format(time.RFC3339))\n\n\t\t\t\tn, err := CleanupCheckpoints(Keys.Checkpoints.RootDir, cleanupDir, t.Unix(), delete)\n\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> %s failed: %s\", mode, err.Error())\n\t\t\t\t} else {\n\t\t\t\t\tif delete {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoints deleted\", n)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoint files archived to parquet\", n)\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t})\n}\n\nvar ErrNoNewArchiveData error = errors.New(\"all data already archived\")\n\n// CleanupCheckpoints deletes or archives all checkpoint files older than `from`.\n// When archiving, consolidates all hosts per cluster into a single Parquet file.\nfunc CleanupCheckpoints(checkpointsDir, cleanupDir string, from int64, deleteInstead bool) (int, error) {\n\tif deleteInstead {\n\t\treturn deleteCheckpoints(checkpointsDir, from)\n\t}\n\n\treturn archiveCheckpoints(checkpointsDir, cleanupDir, from)\n}\n\n// deleteCheckpoints removes checkpoint files older than `from` across all clusters/hosts.\nfunc deleteCheckpoints(checkpointsDir string, from int64) (int, error) {\n\tentries1, err := os.ReadDir(checkpointsDir)\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\ttype workItem struct {\n\t\tdir           string\n\t\tcluster, host string\n\t}\n\n\tvar wg sync.WaitGroup\n\tn, errs := int32(0), int32(0)\n\twork := make(chan workItem, Keys.NumWorkers)\n\n\twg.Add(Keys.NumWorkers)\n\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\t\t\tfor item := range work {\n\t\t\t\tentries, err := os.ReadDir(item.dir)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"error reading %s/%s: %s\", item.cluster, item.host, err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tfiles, err := findFiles(entries, from, false)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"error finding files in %s/%s: %s\", item.cluster, item.host, err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tfor _, checkpoint := range files {\n\t\t\t\t\tif err := os.Remove(filepath.Join(item.dir, checkpoint)); err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"error deleting %s/%s/%s: %s\", item.cluster, item.host, checkpoint, err.Error())\n\t\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tatomic.AddInt32(&n, 1)\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}()\n\t}\n\n\tfor _, de1 := range entries1 {\n\t\tentries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))\n\t\tif e != nil {\n\t\t\terr = e\n\t\t\tcontinue\n\t\t}\n\n\t\tfor _, de2 := range entries2 {\n\t\t\twork <- workItem{\n\t\t\t\tdir:     filepath.Join(checkpointsDir, de1.Name(), de2.Name()),\n\t\t\t\tcluster: de1.Name(),\n\t\t\t\thost:    de2.Name(),\n\t\t\t}\n\t\t}\n\t}\n\n\tclose(work)\n\twg.Wait()\n\n\tif err != nil {\n\t\treturn int(n), err\n\t}\n\tif errs > 0 {\n\t\treturn int(n), fmt.Errorf(\"%d errors happened while deleting (%d successes)\", errs, n)\n\t}\n\treturn int(n), nil\n}\n\n// archiveCheckpoints archives checkpoint files to Parquet format.\n// Produces one Parquet file per cluster: <cleanupDir>/<cluster>/<timestamp>.parquet\n// Workers load checkpoint files from disk and send CheckpointFile trees on a\n// back-pressured channel. The main thread streams each tree directly to Parquet\n// rows without materializing all rows in memory.\nfunc archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, error) {\n\tcclog.Info(\"[METRICSTORE]> start archiving checkpoints to parquet\")\n\tstartTime := time.Now()\n\n\tclusterEntries, err := os.ReadDir(checkpointsDir)\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\ttotalFiles := 0\n\tvar clusterErrors []string\n\n\tfor _, clusterEntry := range clusterEntries {\n\t\tif !clusterEntry.IsDir() {\n\t\t\tcontinue\n\t\t}\n\n\t\tcluster := clusterEntry.Name()\n\t\thostEntries, err := os.ReadDir(filepath.Join(checkpointsDir, cluster))\n\t\tif err != nil {\n\t\t\tcclog.Errorf(\"[METRICSTORE]> error reading host entries for cluster %s: %s\", cluster, err.Error())\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Workers load checkpoint files from disk; main thread writes to parquet.\n\t\ttype hostResult struct {\n\t\t\tcheckpoints []*CheckpointFile\n\t\t\thostname    string\n\t\t\tfiles       []string // checkpoint filenames to delete after successful write\n\t\t\tdir         string   // checkpoint directory for this host\n\t\t}\n\n\t\t// Small buffer provides back-pressure: at most NumWorkers+2 results in flight.\n\t\tresults := make(chan hostResult, 2)\n\t\twork := make(chan struct {\n\t\t\tdir, host string\n\t\t}, Keys.NumWorkers)\n\n\t\tvar wg sync.WaitGroup\n\t\terrs := int32(0)\n\n\t\twg.Add(Keys.NumWorkers)\n\t\tfor w := 0; w < Keys.NumWorkers; w++ {\n\t\t\tgo func() {\n\t\t\t\tdefer wg.Done()\n\t\t\t\tfor item := range work {\n\t\t\t\t\tcheckpoints, files, err := loadCheckpointFiles(item.dir, from)\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error reading checkpoints for %s/%s: %s\", cluster, item.host, err.Error())\n\t\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\t\tcontinue\n\t\t\t\t\t}\n\t\t\t\t\tif len(checkpoints) > 0 {\n\t\t\t\t\t\tresults <- hostResult{\n\t\t\t\t\t\t\tcheckpoints: checkpoints,\n\t\t\t\t\t\t\thostname:    item.host,\n\t\t\t\t\t\t\tfiles:       files,\n\t\t\t\t\t\t\tdir:         item.dir,\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}()\n\t\t}\n\n\t\tgo func() {\n\t\t\tfor _, hostEntry := range hostEntries {\n\t\t\t\tif !hostEntry.IsDir() {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tdir := filepath.Join(checkpointsDir, cluster, hostEntry.Name())\n\t\t\t\twork <- struct {\n\t\t\t\t\tdir, host string\n\t\t\t\t}{dir: dir, host: hostEntry.Name()}\n\t\t\t}\n\t\t\tclose(work)\n\t\t\twg.Wait()\n\t\t\tclose(results)\n\t\t}()\n\n\t\t// Open streaming writer and write each host's checkpoint files as a row group\n\t\tparquetFile := filepath.Join(cleanupDir, cluster, fmt.Sprintf(\"%d.parquet\", from))\n\t\twriter, err := newParquetArchiveWriter(parquetFile)\n\t\tif err != nil {\n\t\t\t// Drain results channel to unblock workers\n\t\t\tfor range results {\n\t\t\t}\n\t\t\tcclog.Errorf(\"[METRICSTORE]> error creating parquet writer for cluster %s: %s\", cluster, err.Error())\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\ttype deleteItem struct {\n\t\t\tdir   string\n\t\t\tfiles []string\n\t\t}\n\t\tvar toDelete []deleteItem\n\t\twriteErr := error(nil)\n\n\t\tfor r := range results {\n\t\t\tif writeErr == nil {\n\t\t\t\t// Stream each checkpoint file directly to parquet rows.\n\t\t\t\t// Each checkpoint is processed and discarded before the next.\n\t\t\t\tfor _, cf := range r.checkpoints {\n\t\t\t\t\tif err := writer.WriteCheckpointFile(cf, cluster, r.hostname, \"node\", \"\"); err != nil {\n\t\t\t\t\t\twriteErr = err\n\t\t\t\t\t\tbreak\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\t// Flush once per host to keep row group count within parquet limits.\n\t\t\t\tif writeErr == nil {\n\t\t\t\t\tif err := writer.FlushRowGroup(); err != nil {\n\t\t\t\t\t\twriteErr = err\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t\t// Always track files for deletion (even if write failed, we still drain)\n\t\t\ttoDelete = append(toDelete, deleteItem{dir: r.dir, files: r.files})\n\t\t}\n\n\t\tif err := writer.Close(); err != nil && writeErr == nil {\n\t\t\twriteErr = err\n\t\t}\n\n\t\tif errs > 0 {\n\t\t\tcclog.Errorf(\"[METRICSTORE]> %d errors reading checkpoints for cluster %s\", errs, cluster)\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tos.Remove(parquetFile)\n\t\t\tcontinue\n\t\t}\n\n\t\tif writer.count == 0 {\n\t\t\t// No data written — remove empty file\n\t\t\tos.Remove(parquetFile)\n\t\t\tcontinue\n\t\t}\n\n\t\tif writeErr != nil {\n\t\t\tos.Remove(parquetFile)\n\t\t\tcclog.Errorf(\"[METRICSTORE]> error writing parquet archive for cluster %s: %s\", cluster, writeErr.Error())\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Delete archived checkpoint files\n\t\tfor _, item := range toDelete {\n\t\t\tfor _, file := range item.files {\n\t\t\t\tfilename := filepath.Join(item.dir, file)\n\t\t\t\tif err := os.Remove(filename); err != nil {\n\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> could not remove archived checkpoint %s: %v\", filename, err)\n\t\t\t\t} else {\n\t\t\t\t\ttotalFiles++\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tcclog.Infof(\"[METRICSTORE]> archived %d rows from %d files for cluster %s to %s\",\n\t\t\twriter.count, totalFiles, cluster, parquetFile)\n\t}\n\n\tcclog.Infof(\"[METRICSTORE]> archiving checkpoints completed in %s (%d files)\", time.Since(startTime).Round(time.Millisecond), totalFiles)\n\n\tif len(clusterErrors) > 0 {\n\t\treturn totalFiles, fmt.Errorf(\"archiving failed for clusters: %s\", strings.Join(clusterErrors, \", \"))\n\t}\n\n\treturn totalFiles, nil\n}\n","numLines":343,"startLine":1,"totalLines":343}},"sourceToolAssistantUUID":"4c8ba73c-da5c-4447-9352-1c35b5979d77"}
{"parentUuid":"86d992bc-fc66-45fa-a79e-71431de3a202","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01BTzFaAmKApCLCwStTPQxag","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_016dAoutRJWU14LrEWS2VNVe","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/healthcheck.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5260,"cache_creation_input_tokens":22492,"cache_read_input_tokens":51780,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":22492},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzh1ioFQXKBiazHM9K","type":"assistant","uuid":"8a415338-6277-44ac-badc-bb0c9c7b67b8","timestamp":"2026-03-29T19:34:39.374Z"}
{"parentUuid":"8a415338-6277-44ac-badc-bb0c9c7b67b8","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_016dAoutRJWU14LrEWS2VNVe","toolUseID":"toolu_016dAoutRJWU14LrEWS2VNVe","timestamp":"2026-03-29T19:34:39.434Z","uuid":"55c65980-cb00-4c46-a2e5-efb1b7a637a8"}
{"parentUuid":"8a415338-6277-44ac-badc-bb0c9c7b67b8","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_016dAoutRJWU14LrEWS2VNVe","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→package metricstore\n     7→\n     8→import (\n     9→\t\"encoding/json\"\n    10→\t\"fmt\"\n    11→\t\"time\"\n    12→\n    13→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    14→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    15→)\n    16→\n    17→// HealthCheckResponse represents the result of a health check operation.\n    18→type HealthCheckResponse struct {\n    19→\tStatus schema.MonitoringState\n    20→\tError  error\n    21→}\n    22→\n    23→// HealthCheckResult holds the monitoring state and raw JSON health metrics\n    24→// for a single node as determined by HealthCheck.\n    25→type HealthCheckResult struct {\n    26→\tState         schema.MonitoringState\n    27→\tHealthMetrics string // JSON: {\"missing\":[...],\"degraded\":[...]}\n    28→}\n    29→\n    30→// MaxMissingDataPoints is the threshold for stale data detection.\n    31→// A buffer is considered healthy if the gap between its last data point\n    32→// and the current time is within MaxMissingDataPoints * frequency.\n    33→const MaxMissingDataPoints int64 = 5\n    34→\n    35→// bufferExists returns true if the buffer is non-nil and contains data.\n    36→func (b *buffer) bufferExists() bool {\n    37→\tif b == nil || b.data == nil || len(b.data) == 0 {\n    38→\t\treturn false\n    39→\t}\n    40→\n    41→\treturn true\n    42→}\n    43→\n    44→// isBufferHealthy returns true if the buffer has recent data within\n    45→// MaxMissingDataPoints * frequency of the current time.\n    46→func (b *buffer) isBufferHealthy() bool {\n    47→\tbufferEnd := b.start + b.frequency*int64(len(b.data))\n    48→\tt := time.Now().Unix()\n    49→\n    50→\treturn t-bufferEnd <= MaxMissingDataPoints*b.frequency\n    51→}\n    52→\n    53→// collectMetricStatus walks the subtree rooted at l and classifies each\n    54→// expected metric into the healthy or degraded map.\n    55→//\n    56→// Classification rules (evaluated per buffer, pessimistic):\n    57→//   - A single stale buffer marks the metric as degraded permanently.\n    58→//   - A healthy buffer only counts if no stale buffer has been seen.\n    59→//   - Metrics absent from the global config or without any buffer remain\n    60→//     in neither map and are later reported as missing.\n    61→func (l *Level) collectMetricStatus(m *MemoryStore, expectedMetrics []string, healthy, degraded map[string]bool) {\n    62→\tl.lock.RLock()\n    63→\tdefer l.lock.RUnlock()\n    64→\n    65→\tfor _, metricName := range expectedMetrics {\n    66→\t\tif degraded[metricName] {\n    67→\t\t\tcontinue // already degraded, cannot improve\n    68→\t\t}\n    69→\t\tmc, ok := m.Metrics[metricName]\n    70→\t\tif !ok {\n    71→\t\t\tcontinue // unknown metric, will be reported as missing\n    72→\t\t}\n    73→\t\tb := l.metrics[mc.offset]\n    74→\t\tif b.bufferExists() {\n    75→\t\t\tif !b.isBufferHealthy() {\n    76→\t\t\t\tdegraded[metricName] = true\n    77→\t\t\t\tdelete(healthy, metricName)\n    78→\t\t\t} else if !degraded[metricName] {\n    79→\t\t\t\thealthy[metricName] = true\n    80→\t\t\t}\n    81→\t\t}\n    82→\t}\n    83→\n    84→\tfor _, lvl := range l.children {\n    85→\t\tlvl.collectMetricStatus(m, expectedMetrics, healthy, degraded)\n    86→\t}\n    87→}\n    88→\n    89→// getHealthyMetrics walks the complete subtree rooted at l and classifies\n    90→// each expected metric by comparing the collected status against the\n    91→// expected list.\n    92→//\n    93→// Returns:\n    94→//   - missingList: metrics not found in global config or without any buffer\n    95→//   - degradedList: metrics with at least one stale buffer in the subtree\n    96→func (l *Level) getHealthyMetrics(m *MemoryStore, expectedMetrics []string) ([]string, []string) {\n    97→\thealthy := make(map[string]bool, len(expectedMetrics))\n    98→\tdegraded := make(map[string]bool)\n    99→\n   100→\tl.collectMetricStatus(m, expectedMetrics, healthy, degraded)\n   101→\n   102→\tmissingList := make([]string, 0)\n   103→\tdegradedList := make([]string, 0)\n   104→\n   105→\tfor _, metricName := range expectedMetrics {\n   106→\t\tif healthy[metricName] {\n   107→\t\t\tcontinue\n   108→\t\t}\n   109→\n   110→\t\tif degraded[metricName] {\n   111→\t\t\tdegradedList = append(degradedList, metricName)\n   112→\t\t} else {\n   113→\t\t\tmissingList = append(missingList, metricName)\n   114→\t\t}\n   115→\t}\n   116→\n   117→\treturn degradedList, missingList\n   118→}\n   119→\n   120→// GetHealthyMetrics returns missing and degraded metric lists for a node.\n   121→//\n   122→// It walks the metric tree starting from the node identified by selector\n   123→// and classifies each expected metric:\n   124→//   - Missing: no buffer anywhere in the subtree, or metric not in global config\n   125→//   - Degraded: at least one stale buffer exists in the subtree\n   126→//\n   127→// Metrics present in expectedMetrics but absent from both returned lists\n   128→// are considered fully healthy.\n   129→func (m *MemoryStore) GetHealthyMetrics(selector []string, expectedMetrics []string) ([]string, []string, error) {\n   130→\tlvl := m.root.findLevel(selector)\n   131→\tif lvl == nil {\n   132→\t\treturn nil, nil, fmt.Errorf(\"[METRICSTORE]> GetHealthyMetrics: host not found: %#v\", selector)\n   133→\t}\n   134→\n   135→\tdegradedList, missingList := lvl.getHealthyMetrics(m, expectedMetrics)\n   136→\treturn degradedList, missingList, nil\n   137→}\n   138→\n   139→type HealthCheckReq struct {\n   140→\tCluster     string   `json:\"cluster\" example:\"fritz\"`\n   141→\tNodes       []string `json:\"nodes\"`\n   142→\tMetricNames []string `json:\"metric-names\"`\n   143→}\n   144→\n   145→// HealthCheck evaluates multiple nodes against a set of expected metrics\n   146→// and returns a monitoring state per node.\n   147→//\n   148→// States:\n   149→//   - MonitoringStateFull: all expected metrics are healthy\n   150→//   - MonitoringStatePartial: some metrics are missing or degraded\n   151→//   - MonitoringStateFailed: node not found, or no healthy metrics at all\n   152→func (m *MemoryStore) HealthCheck(cluster string,\n   153→\tnodes []string, expectedMetrics []string,\n   154→) (map[string]HealthCheckResult, error) {\n   155→\tresults := make(map[string]HealthCheckResult, len(nodes))\n   156→\n   157→\tfor _, hostname := range nodes {\n   158→\t\tselector := []string{cluster, hostname}\n   159→\n   160→\t\tdegradedList, missingList, err := m.GetHealthyMetrics(selector, expectedMetrics)\n   161→\t\tif err != nil {\n   162→\t\t\tresults[hostname] = HealthCheckResult{State: schema.MonitoringStateFailed}\n   163→\t\t\tcontinue\n   164→\t\t}\n   165→\n   166→\t\tdegradedCount := len(degradedList)\n   167→\t\tmissingCount := len(missingList)\n   168→\n   169→\t\thealthyCount := len(expectedMetrics) - degradedCount - missingCount\n   170→\n   171→\t\tif degradedCount > 0 {\n   172→\t\t\tcclog.ComponentDebug(\"metricstore\", \"HealthCheck: node \", hostname, \"degraded metrics:\", degradedList)\n   173→\t\t}\n   174→\t\tif missingCount > 0 {\n   175→\t\t\tcclog.ComponentDebug(\"metricstore\", \"HealthCheck: node \", hostname, \"missing metrics:\", missingList)\n   176→\t\t}\n   177→\n   178→\t\tvar state schema.MonitoringState\n   179→\t\tswitch {\n   180→\t\tcase degradedCount == 0 && missingCount == 0:\n   181→\t\t\tstate = schema.MonitoringStateFull\n   182→\t\tcase healthyCount == 0:\n   183→\t\t\tstate = schema.MonitoringStateFailed\n   184→\t\tdefault:\n   185→\t\t\tstate = schema.MonitoringStatePartial\n   186→\t\t}\n   187→\n   188→\t\thm, _ := json.Marshal(map[string][]string{\n   189→\t\t\t\"missing\":  missingList,\n   190→\t\t\t\"degraded\": degradedList,\n   191→\t\t})\n   192→\n   193→\t\tresults[hostname] = HealthCheckResult{\n   194→\t\t\tState:         state,\n   195→\t\t\tHealthMetrics: string(hm),\n   196→\t\t}\n   197→\t}\n   198→\n   199→\treturn results, nil\n   200→}\n   201→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"9b5bf86e-c618-4e04-91b7-ab9a3dbe5601","timestamp":"2026-03-29T19:34:39.431Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/healthcheck.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\npackage metricstore\n\nimport (\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"time\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n)\n\n// HealthCheckResponse represents the result of a health check operation.\ntype HealthCheckResponse struct {\n\tStatus schema.MonitoringState\n\tError  error\n}\n\n// HealthCheckResult holds the monitoring state and raw JSON health metrics\n// for a single node as determined by HealthCheck.\ntype HealthCheckResult struct {\n\tState         schema.MonitoringState\n\tHealthMetrics string // JSON: {\"missing\":[...],\"degraded\":[...]}\n}\n\n// MaxMissingDataPoints is the threshold for stale data detection.\n// A buffer is considered healthy if the gap between its last data point\n// and the current time is within MaxMissingDataPoints * frequency.\nconst MaxMissingDataPoints int64 = 5\n\n// bufferExists returns true if the buffer is non-nil and contains data.\nfunc (b *buffer) bufferExists() bool {\n\tif b == nil || b.data == nil || len(b.data) == 0 {\n\t\treturn false\n\t}\n\n\treturn true\n}\n\n// isBufferHealthy returns true if the buffer has recent data within\n// MaxMissingDataPoints * frequency of the current time.\nfunc (b *buffer) isBufferHealthy() bool {\n\tbufferEnd := b.start + b.frequency*int64(len(b.data))\n\tt := time.Now().Unix()\n\n\treturn t-bufferEnd <= MaxMissingDataPoints*b.frequency\n}\n\n// collectMetricStatus walks the subtree rooted at l and classifies each\n// expected metric into the healthy or degraded map.\n//\n// Classification rules (evaluated per buffer, pessimistic):\n//   - A single stale buffer marks the metric as degraded permanently.\n//   - A healthy buffer only counts if no stale buffer has been seen.\n//   - Metrics absent from the global config or without any buffer remain\n//     in neither map and are later reported as missing.\nfunc (l *Level) collectMetricStatus(m *MemoryStore, expectedMetrics []string, healthy, degraded map[string]bool) {\n\tl.lock.RLock()\n\tdefer l.lock.RUnlock()\n\n\tfor _, metricName := range expectedMetrics {\n\t\tif degraded[metricName] {\n\t\t\tcontinue // already degraded, cannot improve\n\t\t}\n\t\tmc, ok := m.Metrics[metricName]\n\t\tif !ok {\n\t\t\tcontinue // unknown metric, will be reported as missing\n\t\t}\n\t\tb := l.metrics[mc.offset]\n\t\tif b.bufferExists() {\n\t\t\tif !b.isBufferHealthy() {\n\t\t\t\tdegraded[metricName] = true\n\t\t\t\tdelete(healthy, metricName)\n\t\t\t} else if !degraded[metricName] {\n\t\t\t\thealthy[metricName] = true\n\t\t\t}\n\t\t}\n\t}\n\n\tfor _, lvl := range l.children {\n\t\tlvl.collectMetricStatus(m, expectedMetrics, healthy, degraded)\n\t}\n}\n\n// getHealthyMetrics walks the complete subtree rooted at l and classifies\n// each expected metric by comparing the collected status against the\n// expected list.\n//\n// Returns:\n//   - missingList: metrics not found in global config or without any buffer\n//   - degradedList: metrics with at least one stale buffer in the subtree\nfunc (l *Level) getHealthyMetrics(m *MemoryStore, expectedMetrics []string) ([]string, []string) {\n\thealthy := make(map[string]bool, len(expectedMetrics))\n\tdegraded := make(map[string]bool)\n\n\tl.collectMetricStatus(m, expectedMetrics, healthy, degraded)\n\n\tmissingList := make([]string, 0)\n\tdegradedList := make([]string, 0)\n\n\tfor _, metricName := range expectedMetrics {\n\t\tif healthy[metricName] {\n\t\t\tcontinue\n\t\t}\n\n\t\tif degraded[metricName] {\n\t\t\tdegradedList = append(degradedList, metricName)\n\t\t} else {\n\t\t\tmissingList = append(missingList, metricName)\n\t\t}\n\t}\n\n\treturn degradedList, missingList\n}\n\n// GetHealthyMetrics returns missing and degraded metric lists for a node.\n//\n// It walks the metric tree starting from the node identified by selector\n// and classifies each expected metric:\n//   - Missing: no buffer anywhere in the subtree, or metric not in global config\n//   - Degraded: at least one stale buffer exists in the subtree\n//\n// Metrics present in expectedMetrics but absent from both returned lists\n// are considered fully healthy.\nfunc (m *MemoryStore) GetHealthyMetrics(selector []string, expectedMetrics []string) ([]string, []string, error) {\n\tlvl := m.root.findLevel(selector)\n\tif lvl == nil {\n\t\treturn nil, nil, fmt.Errorf(\"[METRICSTORE]> GetHealthyMetrics: host not found: %#v\", selector)\n\t}\n\n\tdegradedList, missingList := lvl.getHealthyMetrics(m, expectedMetrics)\n\treturn degradedList, missingList, nil\n}\n\ntype HealthCheckReq struct {\n\tCluster     string   `json:\"cluster\" example:\"fritz\"`\n\tNodes       []string `json:\"nodes\"`\n\tMetricNames []string `json:\"metric-names\"`\n}\n\n// HealthCheck evaluates multiple nodes against a set of expected metrics\n// and returns a monitoring state per node.\n//\n// States:\n//   - MonitoringStateFull: all expected metrics are healthy\n//   - MonitoringStatePartial: some metrics are missing or degraded\n//   - MonitoringStateFailed: node not found, or no healthy metrics at all\nfunc (m *MemoryStore) HealthCheck(cluster string,\n\tnodes []string, expectedMetrics []string,\n) (map[string]HealthCheckResult, error) {\n\tresults := make(map[string]HealthCheckResult, len(nodes))\n\n\tfor _, hostname := range nodes {\n\t\tselector := []string{cluster, hostname}\n\n\t\tdegradedList, missingList, err := m.GetHealthyMetrics(selector, expectedMetrics)\n\t\tif err != nil {\n\t\t\tresults[hostname] = HealthCheckResult{State: schema.MonitoringStateFailed}\n\t\t\tcontinue\n\t\t}\n\n\t\tdegradedCount := len(degradedList)\n\t\tmissingCount := len(missingList)\n\n\t\thealthyCount := len(expectedMetrics) - degradedCount - missingCount\n\n\t\tif degradedCount > 0 {\n\t\t\tcclog.ComponentDebug(\"metricstore\", \"HealthCheck: node \", hostname, \"degraded metrics:\", degradedList)\n\t\t}\n\t\tif missingCount > 0 {\n\t\t\tcclog.ComponentDebug(\"metricstore\", \"HealthCheck: node \", hostname, \"missing metrics:\", missingList)\n\t\t}\n\n\t\tvar state schema.MonitoringState\n\t\tswitch {\n\t\tcase degradedCount == 0 && missingCount == 0:\n\t\t\tstate = schema.MonitoringStateFull\n\t\tcase healthyCount == 0:\n\t\t\tstate = schema.MonitoringStateFailed\n\t\tdefault:\n\t\t\tstate = schema.MonitoringStatePartial\n\t\t}\n\n\t\thm, _ := json.Marshal(map[string][]string{\n\t\t\t\"missing\":  missingList,\n\t\t\t\"degraded\": degradedList,\n\t\t})\n\n\t\tresults[hostname] = HealthCheckResult{\n\t\t\tState:         state,\n\t\t\tHealthMetrics: string(hm),\n\t\t}\n\t}\n\n\treturn results, nil\n}\n","numLines":201,"startLine":1,"totalLines":201}},"sourceToolAssistantUUID":"8a415338-6277-44ac-badc-bb0c9c7b67b8"}
{"parentUuid":"9b5bf86e-c618-4e04-91b7-ab9a3dbe5601","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01BTzFaAmKApCLCwStTPQxag","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_014muvdjQ7F3hLGPycj2HzpM","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/scopequery.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5260,"cache_creation_input_tokens":22492,"cache_read_input_tokens":51780,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":22492},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzh1ioFQXKBiazHM9K","type":"assistant","uuid":"86c6097e-366e-447c-a40c-a5d123492501","timestamp":"2026-03-29T19:34:39.757Z"}
{"parentUuid":"86c6097e-366e-447c-a40c-a5d123492501","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_014muvdjQ7F3hLGPycj2HzpM","toolUseID":"toolu_014muvdjQ7F3hLGPycj2HzpM","timestamp":"2026-03-29T19:34:39.835Z","uuid":"6f77bf8c-9112-4ed0-97bc-531250436e23"}
{"parentUuid":"86c6097e-366e-447c-a40c-a5d123492501","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_014muvdjQ7F3hLGPycj2HzpM","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// This file contains shared scope transformation logic used by both the internal\n     7→// metric store (pkg/metricstore) and the external cc-metric-store client\n     8→// (internal/metricstoreclient). It extracts the common algorithm for mapping\n     9→// between native metric scopes and requested scopes based on cluster topology.\n    10→package metricstore\n    11→\n    12→import (\n    13→\t\"strconv\"\n    14→\n    15→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    16→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    17→)\n    18→\n    19→// Pre-converted scope strings avoid repeated string(MetricScope) allocations\n    20→// during query construction. Used in ScopeQueryResult.Type field.\n    21→var (\n    22→\tHWThreadString     = string(schema.MetricScopeHWThread)\n    23→\tCoreString         = string(schema.MetricScopeCore)\n    24→\tMemoryDomainString = string(schema.MetricScopeMemoryDomain)\n    25→\tSocketString       = string(schema.MetricScopeSocket)\n    26→\tAcceleratorString  = string(schema.MetricScopeAccelerator)\n    27→)\n    28→\n    29→// ScopeQueryResult is a package-independent intermediate type returned by\n    30→// BuildScopeQueries. Each consumer converts it to their own APIQuery type\n    31→// (adding Resolution and any other package-specific fields).\n    32→type ScopeQueryResult struct {\n    33→\tType      *string\n    34→\tMetric    string\n    35→\tHostname  string\n    36→\tTypeIds   []string\n    37→\tScope     schema.MetricScope\n    38→\tAggregate bool\n    39→}\n    40→\n    41→// BuildScopeQueries generates scope query results for a given scope transformation.\n    42→// It returns a slice of results and a boolean indicating success.\n    43→// An empty slice means an expected exception (skip this combination).\n    44→// ok=false means an unhandled case (caller should return an error).\n    45→func BuildScopeQueries(\n    46→\tnativeScope, requestedScope schema.MetricScope,\n    47→\tmetric, hostname string,\n    48→\ttopology *schema.Topology,\n    49→\thwthreads []int,\n    50→\taccelerators []string,\n    51→) ([]ScopeQueryResult, bool) {\n    52→\tscope := nativeScope.Max(requestedScope)\n    53→\tresults := []ScopeQueryResult{}\n    54→\n    55→\thwthreadsStr := IntToStringSlice(hwthreads)\n    56→\n    57→\t// Accelerator -> Accelerator (Use \"accelerator\" scope if requested scope is lower than node)\n    58→\tif nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {\n    59→\t\tif scope != schema.MetricScopeAccelerator {\n    60→\t\t\t// Expected Exception -> Return Empty Slice\n    61→\t\t\treturn results, true\n    62→\t\t}\n    63→\n    64→\t\tresults = append(results, ScopeQueryResult{\n    65→\t\t\tMetric:    metric,\n    66→\t\t\tHostname:  hostname,\n    67→\t\t\tAggregate: false,\n    68→\t\t\tType:      &AcceleratorString,\n    69→\t\t\tTypeIds:   accelerators,\n    70→\t\t\tScope:     schema.MetricScopeAccelerator,\n    71→\t\t})\n    72→\t\treturn results, true\n    73→\t}\n    74→\n    75→\t// Accelerator -> Node\n    76→\tif nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode {\n    77→\t\tif len(accelerators) == 0 {\n    78→\t\t\t// Expected Exception -> Return Empty Slice\n    79→\t\t\treturn results, true\n    80→\t\t}\n    81→\n    82→\t\tresults = append(results, ScopeQueryResult{\n    83→\t\t\tMetric:    metric,\n    84→\t\t\tHostname:  hostname,\n    85→\t\t\tAggregate: true,\n    86→\t\t\tType:      &AcceleratorString,\n    87→\t\t\tTypeIds:   accelerators,\n    88→\t\t\tScope:     scope,\n    89→\t\t})\n    90→\t\treturn results, true\n    91→\t}\n    92→\n    93→\t// HWThread -> HWThread\n    94→\tif nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {\n    95→\t\tresults = append(results, ScopeQueryResult{\n    96→\t\t\tMetric:    metric,\n    97→\t\t\tHostname:  hostname,\n    98→\t\t\tAggregate: false,\n    99→\t\t\tType:      &HWThreadString,\n   100→\t\t\tTypeIds:   hwthreadsStr,\n   101→\t\t\tScope:     scope,\n   102→\t\t})\n   103→\t\treturn results, true\n   104→\t}\n   105→\n   106→\t// HWThread -> Core\n   107→\tif nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {\n   108→\t\tcores, _ := topology.GetCoresFromHWThreads(hwthreads)\n   109→\t\tfor _, core := range cores {\n   110→\t\t\tresults = append(results, ScopeQueryResult{\n   111→\t\t\t\tMetric:    metric,\n   112→\t\t\t\tHostname:  hostname,\n   113→\t\t\t\tAggregate: true,\n   114→\t\t\t\tType:      &HWThreadString,\n   115→\t\t\t\tTypeIds:   IntToStringSlice(topology.Core[core]),\n   116→\t\t\t\tScope:     scope,\n   117→\t\t\t})\n   118→\t\t}\n   119→\t\treturn results, true\n   120→\t}\n   121→\n   122→\t// HWThread -> Socket\n   123→\tif nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {\n   124→\t\tsockets, _ := topology.GetSocketsFromHWThreads(hwthreads)\n   125→\t\tfor _, socket := range sockets {\n   126→\t\t\tresults = append(results, ScopeQueryResult{\n   127→\t\t\t\tMetric:    metric,\n   128→\t\t\t\tHostname:  hostname,\n   129→\t\t\t\tAggregate: true,\n   130→\t\t\t\tType:      &HWThreadString,\n   131→\t\t\t\tTypeIds:   IntToStringSlice(topology.Socket[socket]),\n   132→\t\t\t\tScope:     scope,\n   133→\t\t\t})\n   134→\t\t}\n   135→\t\treturn results, true\n   136→\t}\n   137→\n   138→\t// HWThread -> Node\n   139→\tif nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {\n   140→\t\tresults = append(results, ScopeQueryResult{\n   141→\t\t\tMetric:    metric,\n   142→\t\t\tHostname:  hostname,\n   143→\t\t\tAggregate: true,\n   144→\t\t\tType:      &HWThreadString,\n   145→\t\t\tTypeIds:   hwthreadsStr,\n   146→\t\t\tScope:     scope,\n   147→\t\t})\n   148→\t\treturn results, true\n   149→\t}\n   150→\n   151→\t// Core -> Core\n   152→\tif nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {\n   153→\t\tcores, _ := topology.GetCoresFromHWThreads(hwthreads)\n   154→\t\tresults = append(results, ScopeQueryResult{\n   155→\t\t\tMetric:    metric,\n   156→\t\t\tHostname:  hostname,\n   157→\t\t\tAggregate: false,\n   158→\t\t\tType:      &CoreString,\n   159→\t\t\tTypeIds:   IntToStringSlice(cores),\n   160→\t\t\tScope:     scope,\n   161→\t\t})\n   162→\t\treturn results, true\n   163→\t}\n   164→\n   165→\t// Core -> Socket\n   166→\tif nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {\n   167→\t\tsockets, _ := topology.GetSocketsFromCores(hwthreads)\n   168→\t\tfor _, socket := range sockets {\n   169→\t\t\tresults = append(results, ScopeQueryResult{\n   170→\t\t\t\tMetric:    metric,\n   171→\t\t\t\tHostname:  hostname,\n   172→\t\t\t\tAggregate: true,\n   173→\t\t\t\tType:      &CoreString,\n   174→\t\t\t\tTypeIds:   IntToStringSlice(topology.Socket[socket]),\n   175→\t\t\t\tScope:     scope,\n   176→\t\t\t})\n   177→\t\t}\n   178→\t\treturn results, true\n   179→\t}\n   180→\n   181→\t// Core -> Node\n   182→\tif nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {\n   183→\t\tcores, _ := topology.GetCoresFromHWThreads(hwthreads)\n   184→\t\tresults = append(results, ScopeQueryResult{\n   185→\t\t\tMetric:    metric,\n   186→\t\t\tHostname:  hostname,\n   187→\t\t\tAggregate: true,\n   188→\t\t\tType:      &CoreString,\n   189→\t\t\tTypeIds:   IntToStringSlice(cores),\n   190→\t\t\tScope:     scope,\n   191→\t\t})\n   192→\t\treturn results, true\n   193→\t}\n   194→\n   195→\t// MemoryDomain -> MemoryDomain\n   196→\tif nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {\n   197→\t\tmemDomains, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)\n   198→\t\tresults = append(results, ScopeQueryResult{\n   199→\t\t\tMetric:    metric,\n   200→\t\t\tHostname:  hostname,\n   201→\t\t\tAggregate: false,\n   202→\t\t\tType:      &MemoryDomainString,\n   203→\t\t\tTypeIds:   IntToStringSlice(memDomains),\n   204→\t\t\tScope:     scope,\n   205→\t\t})\n   206→\t\treturn results, true\n   207→\t}\n   208→\n   209→\t// MemoryDomain -> Socket\n   210→\tif nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeSocket {\n   211→\t\tmemDomains, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)\n   212→\t\tsocketToDomains, err := topology.GetMemoryDomainsBySocket(memDomains)\n   213→\t\tif err != nil {\n   214→\t\t\tcclog.Errorf(\"Error mapping memory domains to sockets, return unchanged: %v\", err)\n   215→\t\t\t// Rare Error Case -> Still Continue -> Return Empty Slice\n   216→\t\t\treturn results, true\n   217→\t\t}\n   218→\n   219→\t\t// Create a query for each socket\n   220→\t\tfor _, domains := range socketToDomains {\n   221→\t\t\tresults = append(results, ScopeQueryResult{\n   222→\t\t\t\tMetric:    metric,\n   223→\t\t\t\tHostname:  hostname,\n   224→\t\t\t\tAggregate: true,\n   225→\t\t\t\tType:      &MemoryDomainString,\n   226→\t\t\t\tTypeIds:   IntToStringSlice(domains),\n   227→\t\t\t\tScope:     scope,\n   228→\t\t\t})\n   229→\t\t}\n   230→\t\treturn results, true\n   231→\t}\n   232→\n   233→\t// MemoryDomain -> Node\n   234→\tif nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {\n   235→\t\tmemDomains, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)\n   236→\t\tresults = append(results, ScopeQueryResult{\n   237→\t\t\tMetric:    metric,\n   238→\t\t\tHostname:  hostname,\n   239→\t\t\tAggregate: true,\n   240→\t\t\tType:      &MemoryDomainString,\n   241→\t\t\tTypeIds:   IntToStringSlice(memDomains),\n   242→\t\t\tScope:     scope,\n   243→\t\t})\n   244→\t\treturn results, true\n   245→\t}\n   246→\n   247→\t// Socket -> Socket\n   248→\tif nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {\n   249→\t\tsockets, _ := topology.GetSocketsFromHWThreads(hwthreads)\n   250→\t\tresults = append(results, ScopeQueryResult{\n   251→\t\t\tMetric:    metric,\n   252→\t\t\tHostname:  hostname,\n   253→\t\t\tAggregate: false,\n   254→\t\t\tType:      &SocketString,\n   255→\t\t\tTypeIds:   IntToStringSlice(sockets),\n   256→\t\t\tScope:     scope,\n   257→\t\t})\n   258→\t\treturn results, true\n   259→\t}\n   260→\n   261→\t// Socket -> Node\n   262→\tif nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {\n   263→\t\tsockets, _ := topology.GetSocketsFromHWThreads(hwthreads)\n   264→\t\tresults = append(results, ScopeQueryResult{\n   265→\t\t\tMetric:    metric,\n   266→\t\t\tHostname:  hostname,\n   267→\t\t\tAggregate: true,\n   268→\t\t\tType:      &SocketString,\n   269→\t\t\tTypeIds:   IntToStringSlice(sockets),\n   270→\t\t\tScope:     scope,\n   271→\t\t})\n   272→\t\treturn results, true\n   273→\t}\n   274→\n   275→\t// Node -> Node\n   276→\tif nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {\n   277→\t\tresults = append(results, ScopeQueryResult{\n   278→\t\t\tMetric:   metric,\n   279→\t\t\tHostname: hostname,\n   280→\t\t\tScope:    scope,\n   281→\t\t})\n   282→\t\treturn results, true\n   283→\t}\n   284→\n   285→\t// Unhandled Case\n   286→\treturn nil, false\n   287→}\n   288→\n   289→// IntToStringSlice converts a slice of integers to a slice of strings.\n   290→// Used to convert hardware thread/core/socket IDs from topology (int) to query TypeIds (string).\n   291→// Optimized to reuse a byte buffer for string conversion, reducing allocations.\n   292→func IntToStringSlice(is []int) []string {\n   293→\tif len(is) == 0 {\n   294→\t\treturn nil\n   295→\t}\n   296→\n   297→\tss := make([]string, len(is))\n   298→\tbuf := make([]byte, 0, 16) // Reusable buffer for integer conversion\n   299→\tfor i, x := range is {\n   300→\t\tbuf = strconv.AppendInt(buf[:0], int64(x), 10)\n   301→\t\tss[i] = string(buf)\n   302→\t}\n   303→\treturn ss\n   304→}\n   305→\n   306→// ExtractTypeID returns the type ID at the given index from a query's TypeIds slice.\n   307→// Returns nil if queryType is nil (no type filtering). Logs a warning and returns nil\n   308→// if the index is out of range.\n   309→func ExtractTypeID(queryType *string, typeIds []string, ndx int, metric, hostname string) *string {\n   310→\tif queryType == nil {\n   311→\t\treturn nil\n   312→\t}\n   313→\tif ndx < len(typeIds) {\n   314→\t\tid := typeIds[ndx]\n   315→\t\treturn &id\n   316→\t}\n   317→\tcclog.Warnf(\"TypeIds index out of range: %d with length %d for metric %s on host %s\",\n   318→\t\tndx, len(typeIds), metric, hostname)\n   319→\treturn nil\n   320→}\n   321→\n   322→// IsMetricRemovedForSubCluster checks whether a metric is marked as removed\n   323→// for the given subcluster in its per-subcluster configuration.\n   324→func IsMetricRemovedForSubCluster(mc *schema.MetricConfig, subCluster string) bool {\n   325→\tfor _, scConfig := range mc.SubClusters {\n   326→\t\tif scConfig.Name == subCluster && scConfig.Remove {\n   327→\t\t\treturn true\n   328→\t\t}\n   329→\t}\n   330→\treturn false\n   331→}\n   332→\n   333→// SanitizeStats replaces NaN values in statistics with 0 to enable JSON marshaling.\n   334→// If ANY of avg/min/max is NaN, ALL three are zeroed for consistency.\n   335→func SanitizeStats(avg, min, max *schema.Float) {\n   336→\tif avg.IsNaN() || min.IsNaN() || max.IsNaN() {\n   337→\t\t*avg = schema.Float(0)\n   338→\t\t*min = schema.Float(0)\n   339→\t\t*max = schema.Float(0)\n   340→\t}\n   341→}\n   342→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"0b568db1-90d8-4754-b004-221c0513bc28","timestamp":"2026-03-29T19:34:39.833Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/scopequery.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// This file contains shared scope transformation logic used by both the internal\n// metric store (pkg/metricstore) and the external cc-metric-store client\n// (internal/metricstoreclient). It extracts the common algorithm for mapping\n// between native metric scopes and requested scopes based on cluster topology.\npackage metricstore\n\nimport (\n\t\"strconv\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n)\n\n// Pre-converted scope strings avoid repeated string(MetricScope) allocations\n// during query construction. Used in ScopeQueryResult.Type field.\nvar (\n\tHWThreadString     = string(schema.MetricScopeHWThread)\n\tCoreString         = string(schema.MetricScopeCore)\n\tMemoryDomainString = string(schema.MetricScopeMemoryDomain)\n\tSocketString       = string(schema.MetricScopeSocket)\n\tAcceleratorString  = string(schema.MetricScopeAccelerator)\n)\n\n// ScopeQueryResult is a package-independent intermediate type returned by\n// BuildScopeQueries. Each consumer converts it to their own APIQuery type\n// (adding Resolution and any other package-specific fields).\ntype ScopeQueryResult struct {\n\tType      *string\n\tMetric    string\n\tHostname  string\n\tTypeIds   []string\n\tScope     schema.MetricScope\n\tAggregate bool\n}\n\n// BuildScopeQueries generates scope query results for a given scope transformation.\n// It returns a slice of results and a boolean indicating success.\n// An empty slice means an expected exception (skip this combination).\n// ok=false means an unhandled case (caller should return an error).\nfunc BuildScopeQueries(\n\tnativeScope, requestedScope schema.MetricScope,\n\tmetric, hostname string,\n\ttopology *schema.Topology,\n\thwthreads []int,\n\taccelerators []string,\n) ([]ScopeQueryResult, bool) {\n\tscope := nativeScope.Max(requestedScope)\n\tresults := []ScopeQueryResult{}\n\n\thwthreadsStr := IntToStringSlice(hwthreads)\n\n\t// Accelerator -> Accelerator (Use \"accelerator\" scope if requested scope is lower than node)\n\tif nativeScope == schema.MetricScopeAccelerator && scope.LT(schema.MetricScopeNode) {\n\t\tif scope != schema.MetricScopeAccelerator {\n\t\t\t// Expected Exception -> Return Empty Slice\n\t\t\treturn results, true\n\t\t}\n\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: false,\n\t\t\tType:      &AcceleratorString,\n\t\t\tTypeIds:   accelerators,\n\t\t\tScope:     schema.MetricScopeAccelerator,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// Accelerator -> Node\n\tif nativeScope == schema.MetricScopeAccelerator && scope == schema.MetricScopeNode {\n\t\tif len(accelerators) == 0 {\n\t\t\t// Expected Exception -> Return Empty Slice\n\t\t\treturn results, true\n\t\t}\n\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: true,\n\t\t\tType:      &AcceleratorString,\n\t\t\tTypeIds:   accelerators,\n\t\t\tScope:     scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// HWThread -> HWThread\n\tif nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeHWThread {\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: false,\n\t\t\tType:      &HWThreadString,\n\t\t\tTypeIds:   hwthreadsStr,\n\t\t\tScope:     scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// HWThread -> Core\n\tif nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeCore {\n\t\tcores, _ := topology.GetCoresFromHWThreads(hwthreads)\n\t\tfor _, core := range cores {\n\t\t\tresults = append(results, ScopeQueryResult{\n\t\t\t\tMetric:    metric,\n\t\t\t\tHostname:  hostname,\n\t\t\t\tAggregate: true,\n\t\t\t\tType:      &HWThreadString,\n\t\t\t\tTypeIds:   IntToStringSlice(topology.Core[core]),\n\t\t\t\tScope:     scope,\n\t\t\t})\n\t\t}\n\t\treturn results, true\n\t}\n\n\t// HWThread -> Socket\n\tif nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeSocket {\n\t\tsockets, _ := topology.GetSocketsFromHWThreads(hwthreads)\n\t\tfor _, socket := range sockets {\n\t\t\tresults = append(results, ScopeQueryResult{\n\t\t\t\tMetric:    metric,\n\t\t\t\tHostname:  hostname,\n\t\t\t\tAggregate: true,\n\t\t\t\tType:      &HWThreadString,\n\t\t\t\tTypeIds:   IntToStringSlice(topology.Socket[socket]),\n\t\t\t\tScope:     scope,\n\t\t\t})\n\t\t}\n\t\treturn results, true\n\t}\n\n\t// HWThread -> Node\n\tif nativeScope == schema.MetricScopeHWThread && scope == schema.MetricScopeNode {\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: true,\n\t\t\tType:      &HWThreadString,\n\t\t\tTypeIds:   hwthreadsStr,\n\t\t\tScope:     scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// Core -> Core\n\tif nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeCore {\n\t\tcores, _ := topology.GetCoresFromHWThreads(hwthreads)\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: false,\n\t\t\tType:      &CoreString,\n\t\t\tTypeIds:   IntToStringSlice(cores),\n\t\t\tScope:     scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// Core -> Socket\n\tif nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeSocket {\n\t\tsockets, _ := topology.GetSocketsFromCores(hwthreads)\n\t\tfor _, socket := range sockets {\n\t\t\tresults = append(results, ScopeQueryResult{\n\t\t\t\tMetric:    metric,\n\t\t\t\tHostname:  hostname,\n\t\t\t\tAggregate: true,\n\t\t\t\tType:      &CoreString,\n\t\t\t\tTypeIds:   IntToStringSlice(topology.Socket[socket]),\n\t\t\t\tScope:     scope,\n\t\t\t})\n\t\t}\n\t\treturn results, true\n\t}\n\n\t// Core -> Node\n\tif nativeScope == schema.MetricScopeCore && scope == schema.MetricScopeNode {\n\t\tcores, _ := topology.GetCoresFromHWThreads(hwthreads)\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: true,\n\t\t\tType:      &CoreString,\n\t\t\tTypeIds:   IntToStringSlice(cores),\n\t\t\tScope:     scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// MemoryDomain -> MemoryDomain\n\tif nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeMemoryDomain {\n\t\tmemDomains, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: false,\n\t\t\tType:      &MemoryDomainString,\n\t\t\tTypeIds:   IntToStringSlice(memDomains),\n\t\t\tScope:     scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// MemoryDomain -> Socket\n\tif nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeSocket {\n\t\tmemDomains, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)\n\t\tsocketToDomains, err := topology.GetMemoryDomainsBySocket(memDomains)\n\t\tif err != nil {\n\t\t\tcclog.Errorf(\"Error mapping memory domains to sockets, return unchanged: %v\", err)\n\t\t\t// Rare Error Case -> Still Continue -> Return Empty Slice\n\t\t\treturn results, true\n\t\t}\n\n\t\t// Create a query for each socket\n\t\tfor _, domains := range socketToDomains {\n\t\t\tresults = append(results, ScopeQueryResult{\n\t\t\t\tMetric:    metric,\n\t\t\t\tHostname:  hostname,\n\t\t\t\tAggregate: true,\n\t\t\t\tType:      &MemoryDomainString,\n\t\t\t\tTypeIds:   IntToStringSlice(domains),\n\t\t\t\tScope:     scope,\n\t\t\t})\n\t\t}\n\t\treturn results, true\n\t}\n\n\t// MemoryDomain -> Node\n\tif nativeScope == schema.MetricScopeMemoryDomain && scope == schema.MetricScopeNode {\n\t\tmemDomains, _ := topology.GetMemoryDomainsFromHWThreads(hwthreads)\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: true,\n\t\t\tType:      &MemoryDomainString,\n\t\t\tTypeIds:   IntToStringSlice(memDomains),\n\t\t\tScope:     scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// Socket -> Socket\n\tif nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeSocket {\n\t\tsockets, _ := topology.GetSocketsFromHWThreads(hwthreads)\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: false,\n\t\t\tType:      &SocketString,\n\t\t\tTypeIds:   IntToStringSlice(sockets),\n\t\t\tScope:     scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// Socket -> Node\n\tif nativeScope == schema.MetricScopeSocket && scope == schema.MetricScopeNode {\n\t\tsockets, _ := topology.GetSocketsFromHWThreads(hwthreads)\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:    metric,\n\t\t\tHostname:  hostname,\n\t\t\tAggregate: true,\n\t\t\tType:      &SocketString,\n\t\t\tTypeIds:   IntToStringSlice(sockets),\n\t\t\tScope:     scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// Node -> Node\n\tif nativeScope == schema.MetricScopeNode && scope == schema.MetricScopeNode {\n\t\tresults = append(results, ScopeQueryResult{\n\t\t\tMetric:   metric,\n\t\t\tHostname: hostname,\n\t\t\tScope:    scope,\n\t\t})\n\t\treturn results, true\n\t}\n\n\t// Unhandled Case\n\treturn nil, false\n}\n\n// IntToStringSlice converts a slice of integers to a slice of strings.\n// Used to convert hardware thread/core/socket IDs from topology (int) to query TypeIds (string).\n// Optimized to reuse a byte buffer for string conversion, reducing allocations.\nfunc IntToStringSlice(is []int) []string {\n\tif len(is) == 0 {\n\t\treturn nil\n\t}\n\n\tss := make([]string, len(is))\n\tbuf := make([]byte, 0, 16) // Reusable buffer for integer conversion\n\tfor i, x := range is {\n\t\tbuf = strconv.AppendInt(buf[:0], int64(x), 10)\n\t\tss[i] = string(buf)\n\t}\n\treturn ss\n}\n\n// ExtractTypeID returns the type ID at the given index from a query's TypeIds slice.\n// Returns nil if queryType is nil (no type filtering). Logs a warning and returns nil\n// if the index is out of range.\nfunc ExtractTypeID(queryType *string, typeIds []string, ndx int, metric, hostname string) *string {\n\tif queryType == nil {\n\t\treturn nil\n\t}\n\tif ndx < len(typeIds) {\n\t\tid := typeIds[ndx]\n\t\treturn &id\n\t}\n\tcclog.Warnf(\"TypeIds index out of range: %d with length %d for metric %s on host %s\",\n\t\tndx, len(typeIds), metric, hostname)\n\treturn nil\n}\n\n// IsMetricRemovedForSubCluster checks whether a metric is marked as removed\n// for the given subcluster in its per-subcluster configuration.\nfunc IsMetricRemovedForSubCluster(mc *schema.MetricConfig, subCluster string) bool {\n\tfor _, scConfig := range mc.SubClusters {\n\t\tif scConfig.Name == subCluster && scConfig.Remove {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\n\n// SanitizeStats replaces NaN values in statistics with 0 to enable JSON marshaling.\n// If ANY of avg/min/max is NaN, ALL three are zeroed for consistency.\nfunc SanitizeStats(avg, min, max *schema.Float) {\n\tif avg.IsNaN() || min.IsNaN() || max.IsNaN() {\n\t\t*avg = schema.Float(0)\n\t\t*min = schema.Float(0)\n\t\t*max = schema.Float(0)\n\t}\n}\n","numLines":342,"startLine":1,"totalLines":342}},"sourceToolAssistantUUID":"86c6097e-366e-447c-a40c-a5d123492501"}
{"parentUuid":"0b568db1-90d8-4754-b004-221c0513bc28","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01BTzFaAmKApCLCwStTPQxag","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01MHB4R1CyUYrURRS9VMHBgb","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/stats.go"},"caller":{"type":"direct"}}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":5260,"cache_creation_input_tokens":22492,"cache_read_input_tokens":51780,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":22492},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXvzh1ioFQXKBiazHM9K","type":"assistant","uuid":"23062778-8765-4095-a360-5c408c680b0c","timestamp":"2026-03-29T19:34:40.172Z"}
{"parentUuid":"23062778-8765-4095-a360-5c408c680b0c","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01MHB4R1CyUYrURRS9VMHBgb","toolUseID":"toolu_01MHB4R1CyUYrURRS9VMHBgb","timestamp":"2026-03-29T19:34:40.256Z","uuid":"430211d5-8b35-4b50-a090-eb0d511f178a"}
{"parentUuid":"23062778-8765-4095-a360-5c408c680b0c","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01MHB4R1CyUYrURRS9VMHBgb","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→package metricstore\n     7→\n     8→import (\n     9→\t\"errors\"\n    10→\t\"math\"\n    11→\n    12→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    13→\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n    14→)\n    15→\n    16→type Stats struct {\n    17→\tSamples int\n    18→\tAvg     schema.Float\n    19→\tMin     schema.Float\n    20→\tMax     schema.Float\n    21→}\n    22→\n    23→func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) {\n    24→\tif from < b.start {\n    25→\t\tif b.prev != nil {\n    26→\t\t\treturn b.prev.stats(from, to)\n    27→\t\t}\n    28→\t\tfrom = b.start\n    29→\t}\n    30→\n    31→\t// TODO: Check if b.closed and if so and the full buffer is queried,\n    32→\t// use b.statistics instead of iterating over the buffer.\n    33→\n    34→\tsamples := 0\n    35→\tsum, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32\n    36→\n    37→\tvar t int64\n    38→\tfor t = from; t < to; t += b.frequency {\n    39→\t\tidx := int((t - b.start) / b.frequency)\n    40→\t\tif idx >= cap(b.data) {\n    41→\t\t\tb = b.next\n    42→\t\t\tif b == nil {\n    43→\t\t\t\tbreak\n    44→\t\t\t}\n    45→\t\t\tidx = int((t - b.start) / b.frequency)\n    46→\t\t}\n    47→\n    48→\t\tif t < b.start || idx >= len(b.data) {\n    49→\t\t\tcontinue\n    50→\t\t}\n    51→\n    52→\t\txf := float64(b.data[idx])\n    53→\t\tif math.IsNaN(xf) {\n    54→\t\t\tcontinue\n    55→\t\t}\n    56→\n    57→\t\tsamples += 1\n    58→\t\tsum += xf\n    59→\t\tmin = math.Min(min, xf)\n    60→\t\tmax = math.Max(max, xf)\n    61→\t}\n    62→\n    63→\treturn Stats{\n    64→\t\tSamples: samples,\n    65→\t\tAvg:     schema.Float(sum) / schema.Float(samples),\n    66→\t\tMin:     schema.Float(min),\n    67→\t\tMax:     schema.Float(max),\n    68→\t}, from, t, nil\n    69→}\n    70→\n    71→// Returns statistics for the requested metric on the selected node/level.\n    72→// Data is aggregated to the selected level the same way as in `MemoryStore.Read`.\n    73→// If `Stats.Samples` is zero, the statistics should not be considered as valid.\n    74→func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int64) (*Stats, int64, int64, error) {\n    75→\tif from > to {\n    76→\t\treturn nil, 0, 0, errors.New(\"invalid time range\")\n    77→\t}\n    78→\n    79→\tminfo, ok := m.Metrics[metric]\n    80→\tif !ok {\n    81→\t\treturn nil, 0, 0, errors.New(\"unknown metric: \" + metric)\n    82→\t}\n    83→\n    84→\tn, samples := 0, 0\n    85→\tavg, min, max := schema.Float(0), math.MaxFloat32, -math.MaxFloat32\n    86→\terr := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {\n    87→\t\tstats, cfrom, cto, err := b.stats(from, to)\n    88→\t\tif err != nil {\n    89→\t\t\treturn err\n    90→\t\t}\n    91→\n    92→\t\tif n == 0 {\n    93→\t\t\tfrom, to = cfrom, cto\n    94→\t\t} else if from != cfrom {\n    95→\t\t\treturn ErrDataDoesNotAlignMissingFront\n    96→\t\t} else if to != cto {\n    97→\t\t\treturn ErrDataDoesNotAlignMissingBack\n    98→\t\t}\n    99→\n   100→\t\tsamples += stats.Samples\n   101→\t\tavg += stats.Avg\n   102→\t\tmin = math.Min(min, float64(stats.Min))\n   103→\t\tmax = math.Max(max, float64(stats.Max))\n   104→\t\tn += 1\n   105→\t\treturn nil\n   106→\t})\n   107→\tif err != nil {\n   108→\t\treturn nil, 0, 0, err\n   109→\t}\n   110→\n   111→\tif n == 0 {\n   112→\t\treturn nil, 0, 0, ErrNoData\n   113→\t}\n   114→\n   115→\tif minfo.Aggregation == AvgAggregation {\n   116→\t\tavg /= schema.Float(n)\n   117→\t} else if n > 1 && minfo.Aggregation != SumAggregation {\n   118→\t\treturn nil, 0, 0, errors.New(\"invalid aggregation\")\n   119→\t}\n   120→\n   121→\treturn &Stats{\n   122→\t\tSamples: samples,\n   123→\t\tAvg:     avg,\n   124→\t\tMin:     schema.Float(min),\n   125→\t\tMax:     schema.Float(max),\n   126→\t}, from, to, nil\n   127→}\n   128→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"3f6af033-82e6-4bce-b410-3d3fa0d629ca","timestamp":"2026-03-29T19:34:40.254Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/stats.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\npackage metricstore\n\nimport (\n\t\"errors\"\n\t\"math\"\n\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n)\n\ntype Stats struct {\n\tSamples int\n\tAvg     schema.Float\n\tMin     schema.Float\n\tMax     schema.Float\n}\n\nfunc (b *buffer) stats(from, to int64) (Stats, int64, int64, error) {\n\tif from < b.start {\n\t\tif b.prev != nil {\n\t\t\treturn b.prev.stats(from, to)\n\t\t}\n\t\tfrom = b.start\n\t}\n\n\t// TODO: Check if b.closed and if so and the full buffer is queried,\n\t// use b.statistics instead of iterating over the buffer.\n\n\tsamples := 0\n\tsum, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32\n\n\tvar t int64\n\tfor t = from; t < to; t += b.frequency {\n\t\tidx := int((t - b.start) / b.frequency)\n\t\tif idx >= cap(b.data) {\n\t\t\tb = b.next\n\t\t\tif b == nil {\n\t\t\t\tbreak\n\t\t\t}\n\t\t\tidx = int((t - b.start) / b.frequency)\n\t\t}\n\n\t\tif t < b.start || idx >= len(b.data) {\n\t\t\tcontinue\n\t\t}\n\n\t\txf := float64(b.data[idx])\n\t\tif math.IsNaN(xf) {\n\t\t\tcontinue\n\t\t}\n\n\t\tsamples += 1\n\t\tsum += xf\n\t\tmin = math.Min(min, xf)\n\t\tmax = math.Max(max, xf)\n\t}\n\n\treturn Stats{\n\t\tSamples: samples,\n\t\tAvg:     schema.Float(sum) / schema.Float(samples),\n\t\tMin:     schema.Float(min),\n\t\tMax:     schema.Float(max),\n\t}, from, t, nil\n}\n\n// Returns statistics for the requested metric on the selected node/level.\n// Data is aggregated to the selected level the same way as in `MemoryStore.Read`.\n// If `Stats.Samples` is zero, the statistics should not be considered as valid.\nfunc (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int64) (*Stats, int64, int64, error) {\n\tif from > to {\n\t\treturn nil, 0, 0, errors.New(\"invalid time range\")\n\t}\n\n\tminfo, ok := m.Metrics[metric]\n\tif !ok {\n\t\treturn nil, 0, 0, errors.New(\"unknown metric: \" + metric)\n\t}\n\n\tn, samples := 0, 0\n\tavg, min, max := schema.Float(0), math.MaxFloat32, -math.MaxFloat32\n\terr := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {\n\t\tstats, cfrom, cto, err := b.stats(from, to)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\tif n == 0 {\n\t\t\tfrom, to = cfrom, cto\n\t\t} else if from != cfrom {\n\t\t\treturn ErrDataDoesNotAlignMissingFront\n\t\t} else if to != cto {\n\t\t\treturn ErrDataDoesNotAlignMissingBack\n\t\t}\n\n\t\tsamples += stats.Samples\n\t\tavg += stats.Avg\n\t\tmin = math.Min(min, float64(stats.Min))\n\t\tmax = math.Max(max, float64(stats.Max))\n\t\tn += 1\n\t\treturn nil\n\t})\n\tif err != nil {\n\t\treturn nil, 0, 0, err\n\t}\n\n\tif n == 0 {\n\t\treturn nil, 0, 0, ErrNoData\n\t}\n\n\tif minfo.Aggregation == AvgAggregation {\n\t\tavg /= schema.Float(n)\n\t} else if n > 1 && minfo.Aggregation != SumAggregation {\n\t\treturn nil, 0, 0, errors.New(\"invalid aggregation\")\n\t}\n\n\treturn &Stats{\n\t\tSamples: samples,\n\t\tAvg:     avg,\n\t\tMin:     schema.Float(min),\n\t\tMax:     schema.Float(max),\n\t}, from, to, nil\n}\n","numLines":128,"startLine":1,"totalLines":128}},"sourceToolAssistantUUID":"23062778-8765-4095-a360-5c408c680b0c"}
{"parentUuid":"3f6af033-82e6-4bce-b410-3d3fa0d629ca","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01BTzFaAmKApCLCwStTPQxag","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01LEcst8LrZ4DUCBajDSeScD","name":"Read","input":{"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/lineprotocol.go"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":5260,"cache_creation_input_tokens":22492,"cache_read_input_tokens":51780,"output_tokens":493,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":22492,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXvzh1ioFQXKBiazHM9K","type":"assistant","uuid":"1bc6adfc-8b1c-4c7c-9632-94c6746778f8","timestamp":"2026-03-29T19:34:40.576Z"}
{"parentUuid":"1bc6adfc-8b1c-4c7c-9632-94c6746778f8","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Read","command":"callback"},"parentToolUseID":"toolu_01LEcst8LrZ4DUCBajDSeScD","toolUseID":"toolu_01LEcst8LrZ4DUCBajDSeScD","timestamp":"2026-03-29T19:34:40.684Z","uuid":"77b4d7a7-3fa5-45de-bdcd-1c5739dfbd44"}
{"parentUuid":"1bc6adfc-8b1c-4c7c-9632-94c6746778f8","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01LEcst8LrZ4DUCBajDSeScD","type":"tool_result","content":"     1→// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n     2→// All rights reserved. This file is part of cc-backend.\n     3→// Use of this source code is governed by a MIT-style\n     4→// license that can be found in the LICENSE file.\n     5→\n     6→// This file implements ingestion of InfluxDB line-protocol metric data received\n     7→// over NATS. Each line encodes one metric sample with the following structure:\n     8→//\n     9→//\t<measurement>[,cluster=<c>][,hostname=<h>][,type=<t>][,type-id=<id>][,subtype=<s>][,stype-id=<id>] value=<v> [<timestamp>]\n    10→//\n    11→// The measurement name identifies the metric (e.g. \"cpu_load\"). Tags provide\n    12→// routing information (cluster, host) and optional sub-device selectors (type,\n    13→// subtype). Only one field is expected per line: \"value\".\n    14→//\n    15→// After decoding, each sample is:\n    16→//  1. Written to the in-memory store via ms.WriteToLevel.\n    17→//  2. If the checkpoint format is \"wal\", also forwarded to the WAL staging\n    18→//     goroutine via the WALMessages channel for durable write-ahead logging.\n    19→package metricstore\n    20→\n    21→import (\n    22→\t\"bytes\"\n    23→\t\"context\"\n    24→\t\"fmt\"\n    25→\t\"sync\"\n    26→\t\"sync/atomic\"\n    27→\t\"time\"\n    28→\n    29→\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n    30→\t\"github.com/ClusterCockpit/cc-lib/v2/nats\"\n    31→\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n    32→\t\"github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol\"\n    33→)\n    34→\n    35→// ReceiveNats subscribes to all configured NATS subjects and feeds incoming\n    36→// line-protocol messages into the MemoryStore.\n    37→//\n    38→// When workers > 1 a pool of goroutines drains a shared channel so that\n    39→// multiple messages can be decoded in parallel. With workers == 1 the NATS\n    40→// callback decodes inline (no channel overhead, lower latency).\n    41→//\n    42→// The function blocks until ctx is cancelled and all worker goroutines have\n    43→// finished. It returns nil when the NATS client is not configured; callers\n    44→// should treat that as a no-op rather than an error.\n    45→func ReceiveNats(ms *MemoryStore,\n    46→\tworkers int,\n    47→\tctx context.Context,\n    48→) error {\n    49→\tnc := nats.GetClient()\n    50→\n    51→\tif nc == nil {\n    52→\t\tcclog.Warn(\"NATS client not initialized\")\n    53→\t\treturn nil\n    54→\t}\n    55→\n    56→\tvar wg sync.WaitGroup\n    57→\tmsgs := make(chan []byte, max(workers*256, 8192))\n    58→\n    59→\tfor _, sc := range *Keys.Subscriptions {\n    60→\t\tclusterTag := sc.ClusterTag\n    61→\t\tif workers > 1 {\n    62→\t\t\twg.Add(workers)\n    63→\n    64→\t\t\tfor range workers {\n    65→\t\t\t\tgo func() {\n    66→\t\t\t\t\tdefer wg.Done()\n    67→\t\t\t\t\tfor m := range msgs {\n    68→\t\t\t\t\t\tdec := lineprotocol.NewDecoderWithBytes(m)\n    69→\t\t\t\t\t\tif err := DecodeLine(dec, ms, clusterTag); err != nil {\n    70→\t\t\t\t\t\t\tcclog.Errorf(\"error: %s\", err.Error())\n    71→\t\t\t\t\t\t}\n    72→\t\t\t\t\t}\n    73→\t\t\t\t}()\n    74→\t\t\t}\n    75→\n    76→\t\t\tnc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {\n    77→\t\t\t\tselect {\n    78→\t\t\t\tcase msgs <- data:\n    79→\t\t\t\tcase <-ctx.Done():\n    80→\t\t\t\t}\n    81→\t\t\t})\n    82→\t\t} else {\n    83→\t\t\tnc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {\n    84→\t\t\t\tdec := lineprotocol.NewDecoderWithBytes(data)\n    85→\t\t\t\tif err := DecodeLine(dec, ms, clusterTag); err != nil {\n    86→\t\t\t\t\tcclog.Errorf(\"error: %s\", err.Error())\n    87→\t\t\t\t}\n    88→\t\t\t})\n    89→\t\t}\n    90→\t\tcclog.Infof(\"NATS subscription to '%s' established\", sc.SubscribeTo)\n    91→\t}\n    92→\n    93→\tgo func() {\n    94→\t\t<-ctx.Done()\n    95→\t\tclose(msgs)\n    96→\t}()\n    97→\n    98→\twg.Wait()\n    99→\n   100→\treturn nil\n   101→}\n   102→\n   103→// reorder prepends prefix to buf in-place when buf has enough spare capacity,\n   104→// avoiding an allocation. Falls back to a regular append otherwise.\n   105→//\n   106→// It is used to assemble the \"type<type-id>\" and \"subtype<stype-id>\" selector\n   107→// strings when the type tag arrives before the type-id tag in the line, so the\n   108→// two byte slices need to be concatenated in tag-declaration order regardless\n   109→// of wire order.\n   110→func reorder(buf, prefix []byte) []byte {\n   111→\tn := len(prefix)\n   112→\tm := len(buf)\n   113→\tif cap(buf) < m+n {\n   114→\t\treturn append(prefix[:n:n], buf...)\n   115→\t} else {\n   116→\t\tbuf = buf[:n+m]\n   117→\t\tfor i := m - 1; i >= 0; i-- {\n   118→\t\t\tbuf[i+n] = buf[i]\n   119→\t\t}\n   120→\t\tfor i := range n {\n   121→\t\t\tbuf[i] = prefix[i]\n   122→\t\t}\n   123→\t\treturn buf\n   124→\t}\n   125→}\n   126→\n   127→// walDropped counts WAL messages dropped due to a full channel.\n   128→// Logged periodically; not critical since binary snapshots capture the data.\n   129→var walDropped atomic.Int64\n   130→\n   131→// decodeState holds the per-call scratch buffers used by DecodeLine.\n   132→// Instances are recycled via decodeStatePool to avoid repeated allocations\n   133→// during high-throughput ingestion.\n   134→type decodeState struct {\n   135→\t// metricBuf holds a copy of the current measurement name (line-protocol\n   136→\t// measurement field). Copied because dec.Measurement() returns a slice\n   137→\t// that is invalidated by the next decoder call.\n   138→\tmetricBuf []byte\n   139→\n   140→\t// selector is the sub-device path passed to WriteToLevel and WALMessage\n   141→\t// (e.g. [\"socket0\"] or [\"socket0\", \"memctrl1\"]). Reused across lines.\n   142→\tselector []string\n   143→\n   144→\t// typeBuf accumulates the concatenated \"type\"+\"type-id\" tag value for the\n   145→\t// current line. Reset at the start of each line's tag-decode loop.\n   146→\ttypeBuf []byte\n   147→\n   148→\t// subTypeBuf accumulates the concatenated \"subtype\"+\"stype-id\" tag value.\n   149→\t// Reset at the start of each line's tag-decode loop.\n   150→\tsubTypeBuf []byte\n   151→\n   152→\t// prevTypeBytes / prevTypeStr cache the last seen typeBuf content and its\n   153→\t// string conversion. Because consecutive lines in a batch typically address\n   154→\t// the same sub-device, the cache hit rate is very high and avoids\n   155→\t// repeated []byte→string allocations.\n   156→\tprevTypeBytes []byte\n   157→\tprevTypeStr   string\n   158→\n   159→\t// prevSubTypeBytes / prevSubTypeStr are the same cache for the subtype.\n   160→\tprevSubTypeBytes []byte\n   161→\tprevSubTypeStr   string\n   162→}\n   163→\n   164→// decodeStatePool recycles decodeState values across DecodeLine calls to\n   165→// reduce GC pressure during sustained metric ingestion.\n   166→var decodeStatePool = sync.Pool{\n   167→\tNew: func() any {\n   168→\t\treturn &decodeState{\n   169→\t\t\tmetricBuf:  make([]byte, 0, 16),\n   170→\t\t\tselector:   make([]string, 0, 4),\n   171→\t\t\ttypeBuf:    make([]byte, 0, 16),\n   172→\t\t\tsubTypeBuf: make([]byte, 0, 16),\n   173→\t\t}\n   174→\t},\n   175→}\n   176→\n   177→// DecodeLine reads all lines from dec (InfluxDB line-protocol) and writes each\n   178→// decoded metric sample into ms.\n   179→//\n   180→// clusterDefault is used as the cluster name for lines that do not carry a\n   181→// \"cluster\" tag. Callers typically supply the ClusterTag value from the NATS\n   182→// subscription configuration.\n   183→//\n   184→// Performance notes:\n   185→//   - A decodeState is obtained from decodeStatePool to reuse scratch buffers.\n   186→//   - The Level pointer (host-level node in the metric tree) is cached across\n   187→//     consecutive lines that share the same cluster+host pair to avoid\n   188→//     repeated lock acquisitions on the root and cluster levels.\n   189→//   - []byte→string conversions for type/subtype selectors are cached via\n   190→//     prevType*/prevSubType* fields because batches typically repeat the same\n   191→//     sub-device identifiers.\n   192→//   - Timestamp parsing tries Second precision first; if that fails it retries\n   193→//     Millisecond, Microsecond, and Nanosecond in turn. A missing timestamp\n   194→//     falls back to time.Now().\n   195→//\n   196→// When the checkpoint format is \"wal\" each successfully decoded sample is also\n   197→// sent to WALMessages so the WAL staging goroutine can persist it durably\n   198→// before the next binary snapshot.\n   199→func DecodeLine(dec *lineprotocol.Decoder,\n   200→\tms *MemoryStore,\n   201→\tclusterDefault string,\n   202→) error {\n   203→\t// Reduce allocations in loop:\n   204→\tt := time.Now()\n   205→\tmetric := Metric{}\n   206→\tst := decodeStatePool.Get().(*decodeState)\n   207→\tdefer decodeStatePool.Put(st)\n   208→\n   209→\t// Optimize for the case where all lines in a \"batch\" are about the same\n   210→\t// cluster and host. By using `WriteToLevel` (level = host), we do not need\n   211→\t// to take the root- and cluster-level lock as often.\n   212→\tvar lvl *Level = nil\n   213→\tprevCluster, prevHost := \"\", \"\"\n   214→\n   215→\tvar ok bool\n   216→\tfor dec.Next() {\n   217→\t\trawmeasurement, err := dec.Measurement()\n   218→\t\tif err != nil {\n   219→\t\t\treturn err\n   220→\t\t}\n   221→\n   222→\t\t// Needs to be copied because another call to dec.* would\n   223→\t\t// invalidate the returned slice.\n   224→\t\tst.metricBuf = append(st.metricBuf[:0], rawmeasurement...)\n   225→\n   226→\t\t// The go compiler optimizes map[string(byteslice)] lookups:\n   227→\t\tmetric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]\n   228→\t\tif !ok {\n   229→\t\t\tcontinue\n   230→\t\t}\n   231→\n   232→\t\tst.typeBuf, st.subTypeBuf = st.typeBuf[:0], st.subTypeBuf[:0]\n   233→\t\tcluster, host := clusterDefault, \"\"\n   234→\t\tfor {\n   235→\t\t\tkey, val, err := dec.NextTag()\n   236→\t\t\tif err != nil {\n   237→\t\t\t\treturn err\n   238→\t\t\t}\n   239→\t\t\tif key == nil {\n   240→\t\t\t\tbreak\n   241→\t\t\t}\n   242→\n   243→\t\t\t// The go compiler optimizes string([]byte{...}) == \"...\":\n   244→\t\t\tswitch string(key) {\n   245→\t\t\tcase \"cluster\":\n   246→\t\t\t\tif string(val) == prevCluster {\n   247→\t\t\t\t\tcluster = prevCluster\n   248→\t\t\t\t} else {\n   249→\t\t\t\t\tcluster = string(val)\n   250→\t\t\t\t\tlvl = nil\n   251→\t\t\t\t}\n   252→\t\t\tcase \"hostname\", \"host\":\n   253→\t\t\t\tif string(val) == prevHost {\n   254→\t\t\t\t\thost = prevHost\n   255→\t\t\t\t} else {\n   256→\t\t\t\t\thost = string(val)\n   257→\t\t\t\t\tlvl = nil\n   258→\t\t\t\t}\n   259→\t\t\tcase \"type\":\n   260→\t\t\t\tif string(val) == \"node\" {\n   261→\t\t\t\t\tbreak\n   262→\t\t\t\t}\n   263→\n   264→\t\t\t\t// We cannot be sure that the \"type\" tag comes before the \"type-id\" tag:\n   265→\t\t\t\tif len(st.typeBuf) == 0 {\n   266→\t\t\t\t\tst.typeBuf = append(st.typeBuf, val...)\n   267→\t\t\t\t} else {\n   268→\t\t\t\t\tst.typeBuf = reorder(st.typeBuf, val)\n   269→\t\t\t\t}\n   270→\t\t\tcase \"type-id\":\n   271→\t\t\t\tst.typeBuf = append(st.typeBuf, val...)\n   272→\t\t\tcase \"subtype\":\n   273→\t\t\t\t// We cannot be sure that the \"subtype\" tag comes before the \"stype-id\" tag:\n   274→\t\t\t\tif len(st.subTypeBuf) == 0 {\n   275→\t\t\t\t\tst.subTypeBuf = append(st.subTypeBuf, val...)\n   276→\t\t\t\t} else {\n   277→\t\t\t\t\tst.subTypeBuf = reorder(st.subTypeBuf, val)\n   278→\t\t\t\t}\n   279→\t\t\tcase \"stype-id\":\n   280→\t\t\t\tst.subTypeBuf = append(st.subTypeBuf, val...)\n   281→\t\t\tdefault:\n   282→\t\t\t}\n   283→\t\t}\n   284→\n   285→\t\t// If the cluster or host changed, the lvl was set to nil\n   286→\t\tif lvl == nil {\n   287→\t\t\tst.selector = st.selector[:2]\n   288→\t\t\tst.selector[0], st.selector[1] = cluster, host\n   289→\t\t\tlvl = ms.GetLevel(st.selector)\n   290→\t\t\tprevCluster, prevHost = cluster, host\n   291→\t\t}\n   292→\n   293→\t\t// subtypes: cache []byte→string conversions; messages in a batch typically\n   294→\t\t// share the same type/subtype so the hit rate is very high.\n   295→\t\tst.selector = st.selector[:0]\n   296→\t\tif len(st.typeBuf) > 0 {\n   297→\t\t\tif !bytes.Equal(st.typeBuf, st.prevTypeBytes) {\n   298→\t\t\t\tst.prevTypeBytes = append(st.prevTypeBytes[:0], st.typeBuf...)\n   299→\t\t\t\tst.prevTypeStr = string(st.typeBuf)\n   300→\t\t\t}\n   301→\t\t\tst.selector = append(st.selector, st.prevTypeStr)\n   302→\t\t\tif len(st.subTypeBuf) > 0 {\n   303→\t\t\t\tif !bytes.Equal(st.subTypeBuf, st.prevSubTypeBytes) {\n   304→\t\t\t\t\tst.prevSubTypeBytes = append(st.prevSubTypeBytes[:0], st.subTypeBuf...)\n   305→\t\t\t\t\tst.prevSubTypeStr = string(st.subTypeBuf)\n   306→\t\t\t\t}\n   307→\t\t\t\tst.selector = append(st.selector, st.prevSubTypeStr)\n   308→\t\t\t}\n   309→\t\t}\n   310→\n   311→\t\tfor {\n   312→\t\t\tkey, val, err := dec.NextField()\n   313→\t\t\tif err != nil {\n   314→\t\t\t\treturn err\n   315→\t\t\t}\n   316→\n   317→\t\t\tif key == nil {\n   318→\t\t\t\tbreak\n   319→\t\t\t}\n   320→\n   321→\t\t\tif string(key) != \"value\" {\n   322→\t\t\t\treturn fmt.Errorf(\"host %s: unknown field: '%s' (value: %#v)\", host, string(key), val)\n   323→\t\t\t}\n   324→\n   325→\t\t\tif val.Kind() == lineprotocol.Float {\n   326→\t\t\t\tmetric.Value = schema.Float(val.FloatV())\n   327→\t\t\t} else if val.Kind() == lineprotocol.Int {\n   328→\t\t\t\tmetric.Value = schema.Float(val.IntV())\n   329→\t\t\t} else if val.Kind() == lineprotocol.Uint {\n   330→\t\t\t\tmetric.Value = schema.Float(val.UintV())\n   331→\t\t\t} else {\n   332→\t\t\t\treturn fmt.Errorf(\"host %s: unsupported value type in message: %s\", host, val.Kind().String())\n   333→\t\t\t}\n   334→\t\t}\n   335→\n   336→\t\tif t, err = dec.Time(lineprotocol.Second, t); err != nil {\n   337→\t\t\tt = time.Now()\n   338→\t\t\tif t, err = dec.Time(lineprotocol.Millisecond, t); err != nil {\n   339→\t\t\t\tt = time.Now()\n   340→\t\t\t\tif t, err = dec.Time(lineprotocol.Microsecond, t); err != nil {\n   341→\t\t\t\t\tt = time.Now()\n   342→\t\t\t\t\tif t, err = dec.Time(lineprotocol.Nanosecond, t); err != nil {\n   343→\t\t\t\t\t\treturn fmt.Errorf(\"host %s: timestamp : %#v with error : %#v\", host, t, err.Error())\n   344→\t\t\t\t\t}\n   345→\t\t\t\t}\n   346→\t\t\t}\n   347→\t\t}\n   348→\n   349→\t\tif err != nil {\n   350→\t\t\treturn fmt.Errorf(\"host %s: timestamp : %#v with error : %#v\", host, t, err.Error())\n   351→\t\t}\n   352→\n   353→\t\ttime := t.Unix()\n   354→\n   355→\t\tif Keys.Checkpoints.FileFormat == \"wal\" {\n   356→\t\t\tmsg := &WALMessage{\n   357→\t\t\t\tMetricName: string(st.metricBuf),\n   358→\t\t\t\tCluster:    cluster,\n   359→\t\t\t\tNode:       host,\n   360→\t\t\t\tSelector:   append([]string{}, st.selector...),\n   361→\t\t\t\tValue:      metric.Value,\n   362→\t\t\t\tTimestamp:  time,\n   363→\t\t\t}\n   364→\t\t\tif !SendWALMessage(msg) {\n   365→\t\t\t\t// WAL shard channel full — metric is written to memory store but not WAL.\n   366→\t\t\t\t// Next binary snapshot will capture it.\n   367→\t\t\t\tif dropped := walDropped.Add(1); dropped%10000 == 1 {\n   368→\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL shard channel full, dropped %d messages (data safe in memory, next snapshot will capture)\", dropped)\n   369→\t\t\t\t}\n   370→\t\t\t}\n   371→\t\t}\n   372→\n   373→\t\tif err := ms.WriteToLevel(lvl, st.selector, time, []Metric{metric}); err != nil {\n   374→\t\t\tcclog.Warnf(\"write error for host %s metric %s at ts %d: %s\", host, string(st.metricBuf), time, err.Error())\n   375→\t\t\tcontinue\n   376→\t\t}\n   377→\t}\n   378→\treturn nil\n   379→}\n   380→\n\n<system-reminder>\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n</system-reminder>\n"}]},"uuid":"b98b4b7d-c14b-40f3-8615-568be15f64b0","timestamp":"2026-03-29T19:34:40.681Z","toolUseResult":{"type":"text","file":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/lineprotocol.go","content":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// This file implements ingestion of InfluxDB line-protocol metric data received\n// over NATS. Each line encodes one metric sample with the following structure:\n//\n//\t<measurement>[,cluster=<c>][,hostname=<h>][,type=<t>][,type-id=<id>][,subtype=<s>][,stype-id=<id>] value=<v> [<timestamp>]\n//\n// The measurement name identifies the metric (e.g. \"cpu_load\"). Tags provide\n// routing information (cluster, host) and optional sub-device selectors (type,\n// subtype). Only one field is expected per line: \"value\".\n//\n// After decoding, each sample is:\n//  1. Written to the in-memory store via ms.WriteToLevel.\n//  2. If the checkpoint format is \"wal\", also forwarded to the WAL staging\n//     goroutine via the WALMessages channel for durable write-ahead logging.\npackage metricstore\n\nimport (\n\t\"bytes\"\n\t\"context\"\n\t\"fmt\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/nats\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n\t\"github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol\"\n)\n\n// ReceiveNats subscribes to all configured NATS subjects and feeds incoming\n// line-protocol messages into the MemoryStore.\n//\n// When workers > 1 a pool of goroutines drains a shared channel so that\n// multiple messages can be decoded in parallel. With workers == 1 the NATS\n// callback decodes inline (no channel overhead, lower latency).\n//\n// The function blocks until ctx is cancelled and all worker goroutines have\n// finished. It returns nil when the NATS client is not configured; callers\n// should treat that as a no-op rather than an error.\nfunc ReceiveNats(ms *MemoryStore,\n\tworkers int,\n\tctx context.Context,\n) error {\n\tnc := nats.GetClient()\n\n\tif nc == nil {\n\t\tcclog.Warn(\"NATS client not initialized\")\n\t\treturn nil\n\t}\n\n\tvar wg sync.WaitGroup\n\tmsgs := make(chan []byte, max(workers*256, 8192))\n\n\tfor _, sc := range *Keys.Subscriptions {\n\t\tclusterTag := sc.ClusterTag\n\t\tif workers > 1 {\n\t\t\twg.Add(workers)\n\n\t\t\tfor range workers {\n\t\t\t\tgo func() {\n\t\t\t\t\tdefer wg.Done()\n\t\t\t\t\tfor m := range msgs {\n\t\t\t\t\t\tdec := lineprotocol.NewDecoderWithBytes(m)\n\t\t\t\t\t\tif err := DecodeLine(dec, ms, clusterTag); err != nil {\n\t\t\t\t\t\t\tcclog.Errorf(\"error: %s\", err.Error())\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}()\n\t\t\t}\n\n\t\t\tnc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {\n\t\t\t\tselect {\n\t\t\t\tcase msgs <- data:\n\t\t\t\tcase <-ctx.Done():\n\t\t\t\t}\n\t\t\t})\n\t\t} else {\n\t\t\tnc.Subscribe(sc.SubscribeTo, func(subject string, data []byte) {\n\t\t\t\tdec := lineprotocol.NewDecoderWithBytes(data)\n\t\t\t\tif err := DecodeLine(dec, ms, clusterTag); err != nil {\n\t\t\t\t\tcclog.Errorf(\"error: %s\", err.Error())\n\t\t\t\t}\n\t\t\t})\n\t\t}\n\t\tcclog.Infof(\"NATS subscription to '%s' established\", sc.SubscribeTo)\n\t}\n\n\tgo func() {\n\t\t<-ctx.Done()\n\t\tclose(msgs)\n\t}()\n\n\twg.Wait()\n\n\treturn nil\n}\n\n// reorder prepends prefix to buf in-place when buf has enough spare capacity,\n// avoiding an allocation. Falls back to a regular append otherwise.\n//\n// It is used to assemble the \"type<type-id>\" and \"subtype<stype-id>\" selector\n// strings when the type tag arrives before the type-id tag in the line, so the\n// two byte slices need to be concatenated in tag-declaration order regardless\n// of wire order.\nfunc reorder(buf, prefix []byte) []byte {\n\tn := len(prefix)\n\tm := len(buf)\n\tif cap(buf) < m+n {\n\t\treturn append(prefix[:n:n], buf...)\n\t} else {\n\t\tbuf = buf[:n+m]\n\t\tfor i := m - 1; i >= 0; i-- {\n\t\t\tbuf[i+n] = buf[i]\n\t\t}\n\t\tfor i := range n {\n\t\t\tbuf[i] = prefix[i]\n\t\t}\n\t\treturn buf\n\t}\n}\n\n// walDropped counts WAL messages dropped due to a full channel.\n// Logged periodically; not critical since binary snapshots capture the data.\nvar walDropped atomic.Int64\n\n// decodeState holds the per-call scratch buffers used by DecodeLine.\n// Instances are recycled via decodeStatePool to avoid repeated allocations\n// during high-throughput ingestion.\ntype decodeState struct {\n\t// metricBuf holds a copy of the current measurement name (line-protocol\n\t// measurement field). Copied because dec.Measurement() returns a slice\n\t// that is invalidated by the next decoder call.\n\tmetricBuf []byte\n\n\t// selector is the sub-device path passed to WriteToLevel and WALMessage\n\t// (e.g. [\"socket0\"] or [\"socket0\", \"memctrl1\"]). Reused across lines.\n\tselector []string\n\n\t// typeBuf accumulates the concatenated \"type\"+\"type-id\" tag value for the\n\t// current line. Reset at the start of each line's tag-decode loop.\n\ttypeBuf []byte\n\n\t// subTypeBuf accumulates the concatenated \"subtype\"+\"stype-id\" tag value.\n\t// Reset at the start of each line's tag-decode loop.\n\tsubTypeBuf []byte\n\n\t// prevTypeBytes / prevTypeStr cache the last seen typeBuf content and its\n\t// string conversion. Because consecutive lines in a batch typically address\n\t// the same sub-device, the cache hit rate is very high and avoids\n\t// repeated []byte→string allocations.\n\tprevTypeBytes []byte\n\tprevTypeStr   string\n\n\t// prevSubTypeBytes / prevSubTypeStr are the same cache for the subtype.\n\tprevSubTypeBytes []byte\n\tprevSubTypeStr   string\n}\n\n// decodeStatePool recycles decodeState values across DecodeLine calls to\n// reduce GC pressure during sustained metric ingestion.\nvar decodeStatePool = sync.Pool{\n\tNew: func() any {\n\t\treturn &decodeState{\n\t\t\tmetricBuf:  make([]byte, 0, 16),\n\t\t\tselector:   make([]string, 0, 4),\n\t\t\ttypeBuf:    make([]byte, 0, 16),\n\t\t\tsubTypeBuf: make([]byte, 0, 16),\n\t\t}\n\t},\n}\n\n// DecodeLine reads all lines from dec (InfluxDB line-protocol) and writes each\n// decoded metric sample into ms.\n//\n// clusterDefault is used as the cluster name for lines that do not carry a\n// \"cluster\" tag. Callers typically supply the ClusterTag value from the NATS\n// subscription configuration.\n//\n// Performance notes:\n//   - A decodeState is obtained from decodeStatePool to reuse scratch buffers.\n//   - The Level pointer (host-level node in the metric tree) is cached across\n//     consecutive lines that share the same cluster+host pair to avoid\n//     repeated lock acquisitions on the root and cluster levels.\n//   - []byte→string conversions for type/subtype selectors are cached via\n//     prevType*/prevSubType* fields because batches typically repeat the same\n//     sub-device identifiers.\n//   - Timestamp parsing tries Second precision first; if that fails it retries\n//     Millisecond, Microsecond, and Nanosecond in turn. A missing timestamp\n//     falls back to time.Now().\n//\n// When the checkpoint format is \"wal\" each successfully decoded sample is also\n// sent to WALMessages so the WAL staging goroutine can persist it durably\n// before the next binary snapshot.\nfunc DecodeLine(dec *lineprotocol.Decoder,\n\tms *MemoryStore,\n\tclusterDefault string,\n) error {\n\t// Reduce allocations in loop:\n\tt := time.Now()\n\tmetric := Metric{}\n\tst := decodeStatePool.Get().(*decodeState)\n\tdefer decodeStatePool.Put(st)\n\n\t// Optimize for the case where all lines in a \"batch\" are about the same\n\t// cluster and host. By using `WriteToLevel` (level = host), we do not need\n\t// to take the root- and cluster-level lock as often.\n\tvar lvl *Level = nil\n\tprevCluster, prevHost := \"\", \"\"\n\n\tvar ok bool\n\tfor dec.Next() {\n\t\trawmeasurement, err := dec.Measurement()\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\t// Needs to be copied because another call to dec.* would\n\t\t// invalidate the returned slice.\n\t\tst.metricBuf = append(st.metricBuf[:0], rawmeasurement...)\n\n\t\t// The go compiler optimizes map[string(byteslice)] lookups:\n\t\tmetric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]\n\t\tif !ok {\n\t\t\tcontinue\n\t\t}\n\n\t\tst.typeBuf, st.subTypeBuf = st.typeBuf[:0], st.subTypeBuf[:0]\n\t\tcluster, host := clusterDefault, \"\"\n\t\tfor {\n\t\t\tkey, val, err := dec.NextTag()\n\t\t\tif err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t\tif key == nil {\n\t\t\t\tbreak\n\t\t\t}\n\n\t\t\t// The go compiler optimizes string([]byte{...}) == \"...\":\n\t\t\tswitch string(key) {\n\t\t\tcase \"cluster\":\n\t\t\t\tif string(val) == prevCluster {\n\t\t\t\t\tcluster = prevCluster\n\t\t\t\t} else {\n\t\t\t\t\tcluster = string(val)\n\t\t\t\t\tlvl = nil\n\t\t\t\t}\n\t\t\tcase \"hostname\", \"host\":\n\t\t\t\tif string(val) == prevHost {\n\t\t\t\t\thost = prevHost\n\t\t\t\t} else {\n\t\t\t\t\thost = string(val)\n\t\t\t\t\tlvl = nil\n\t\t\t\t}\n\t\t\tcase \"type\":\n\t\t\t\tif string(val) == \"node\" {\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// We cannot be sure that the \"type\" tag comes before the \"type-id\" tag:\n\t\t\t\tif len(st.typeBuf) == 0 {\n\t\t\t\t\tst.typeBuf = append(st.typeBuf, val...)\n\t\t\t\t} else {\n\t\t\t\t\tst.typeBuf = reorder(st.typeBuf, val)\n\t\t\t\t}\n\t\t\tcase \"type-id\":\n\t\t\t\tst.typeBuf = append(st.typeBuf, val...)\n\t\t\tcase \"subtype\":\n\t\t\t\t// We cannot be sure that the \"subtype\" tag comes before the \"stype-id\" tag:\n\t\t\t\tif len(st.subTypeBuf) == 0 {\n\t\t\t\t\tst.subTypeBuf = append(st.subTypeBuf, val...)\n\t\t\t\t} else {\n\t\t\t\t\tst.subTypeBuf = reorder(st.subTypeBuf, val)\n\t\t\t\t}\n\t\t\tcase \"stype-id\":\n\t\t\t\tst.subTypeBuf = append(st.subTypeBuf, val...)\n\t\t\tdefault:\n\t\t\t}\n\t\t}\n\n\t\t// If the cluster or host changed, the lvl was set to nil\n\t\tif lvl == nil {\n\t\t\tst.selector = st.selector[:2]\n\t\t\tst.selector[0], st.selector[1] = cluster, host\n\t\t\tlvl = ms.GetLevel(st.selector)\n\t\t\tprevCluster, prevHost = cluster, host\n\t\t}\n\n\t\t// subtypes: cache []byte→string conversions; messages in a batch typically\n\t\t// share the same type/subtype so the hit rate is very high.\n\t\tst.selector = st.selector[:0]\n\t\tif len(st.typeBuf) > 0 {\n\t\t\tif !bytes.Equal(st.typeBuf, st.prevTypeBytes) {\n\t\t\t\tst.prevTypeBytes = append(st.prevTypeBytes[:0], st.typeBuf...)\n\t\t\t\tst.prevTypeStr = string(st.typeBuf)\n\t\t\t}\n\t\t\tst.selector = append(st.selector, st.prevTypeStr)\n\t\t\tif len(st.subTypeBuf) > 0 {\n\t\t\t\tif !bytes.Equal(st.subTypeBuf, st.prevSubTypeBytes) {\n\t\t\t\t\tst.prevSubTypeBytes = append(st.prevSubTypeBytes[:0], st.subTypeBuf...)\n\t\t\t\t\tst.prevSubTypeStr = string(st.subTypeBuf)\n\t\t\t\t}\n\t\t\t\tst.selector = append(st.selector, st.prevSubTypeStr)\n\t\t\t}\n\t\t}\n\n\t\tfor {\n\t\t\tkey, val, err := dec.NextField()\n\t\t\tif err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\n\t\t\tif key == nil {\n\t\t\t\tbreak\n\t\t\t}\n\n\t\t\tif string(key) != \"value\" {\n\t\t\t\treturn fmt.Errorf(\"host %s: unknown field: '%s' (value: %#v)\", host, string(key), val)\n\t\t\t}\n\n\t\t\tif val.Kind() == lineprotocol.Float {\n\t\t\t\tmetric.Value = schema.Float(val.FloatV())\n\t\t\t} else if val.Kind() == lineprotocol.Int {\n\t\t\t\tmetric.Value = schema.Float(val.IntV())\n\t\t\t} else if val.Kind() == lineprotocol.Uint {\n\t\t\t\tmetric.Value = schema.Float(val.UintV())\n\t\t\t} else {\n\t\t\t\treturn fmt.Errorf(\"host %s: unsupported value type in message: %s\", host, val.Kind().String())\n\t\t\t}\n\t\t}\n\n\t\tif t, err = dec.Time(lineprotocol.Second, t); err != nil {\n\t\t\tt = time.Now()\n\t\t\tif t, err = dec.Time(lineprotocol.Millisecond, t); err != nil {\n\t\t\t\tt = time.Now()\n\t\t\t\tif t, err = dec.Time(lineprotocol.Microsecond, t); err != nil {\n\t\t\t\t\tt = time.Now()\n\t\t\t\t\tif t, err = dec.Time(lineprotocol.Nanosecond, t); err != nil {\n\t\t\t\t\t\treturn fmt.Errorf(\"host %s: timestamp : %#v with error : %#v\", host, t, err.Error())\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tif err != nil {\n\t\t\treturn fmt.Errorf(\"host %s: timestamp : %#v with error : %#v\", host, t, err.Error())\n\t\t}\n\n\t\ttime := t.Unix()\n\n\t\tif Keys.Checkpoints.FileFormat == \"wal\" {\n\t\t\tmsg := &WALMessage{\n\t\t\t\tMetricName: string(st.metricBuf),\n\t\t\t\tCluster:    cluster,\n\t\t\t\tNode:       host,\n\t\t\t\tSelector:   append([]string{}, st.selector...),\n\t\t\t\tValue:      metric.Value,\n\t\t\t\tTimestamp:  time,\n\t\t\t}\n\t\t\tif !SendWALMessage(msg) {\n\t\t\t\t// WAL shard channel full — metric is written to memory store but not WAL.\n\t\t\t\t// Next binary snapshot will capture it.\n\t\t\t\tif dropped := walDropped.Add(1); dropped%10000 == 1 {\n\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL shard channel full, dropped %d messages (data safe in memory, next snapshot will capture)\", dropped)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tif err := ms.WriteToLevel(lvl, st.selector, time, []Metric{metric}); err != nil {\n\t\t\tcclog.Warnf(\"write error for host %s metric %s at ts %d: %s\", host, string(st.metricBuf), time, err.Error())\n\t\t\tcontinue\n\t\t}\n\t}\n\treturn nil\n}\n","numLines":380,"startLine":1,"totalLines":380}},"sourceToolAssistantUUID":"1bc6adfc-8b1c-4c7c-9632-94c6746778f8"}
{"parentUuid":"b98b4b7d-c14b-40f3-8615-568be15f64b0","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_011BgSkFxKe8tnuAhEi2JNoe","type":"message","role":"assistant","content":[{"type":"thinking","thinking":"Now I have a comprehensive understanding of the metricstore package. Let me analyze what doc comments need to be updated to reflect the current implementation.\n\nAfter reviewing all files, the documentation is actually quite comprehensive and up-to-date. Let me identify specific issues:\n\n1. **metricstore.go package doc**: Mentions \"JSON or Avro format\" for checkpointing - Avro is gone, current formats are JSON and WAL. Also mentions \"NATS integration for metric ingestion\" which is correct.\n\n2. **metricstore.go `Init` function doc**: Says \"Restores checkpoints within the retention window\" - correct. Also says \"Optionally subscribes to NATS for real-time metric ingestion\" - correct. \n\nThe rawConfig parameter doc is fine, but the `metrics map[string]MetricConfig` parameter is missing from the documentation. The `Checkpointing` doc in checkpoint.go claims checkpoints are written every 12 hours as hardcoded, but the code actually reads from `Keys.CheckpointInterval` first, so that needs fixing. Archive.go is missing a file-level doc comment and the `CleanUp` function lacks documentation, though `archiveCheckpoints` correctly describes producing one Parquet file per cluster. The `MemoryStore.Stats` function in stats.go has a comment that I should verify.\n\nI still need to check debug.go, configSchema.go, and parquetArchive.go. Looking at the main issues: the package doc in metricstore.go incorrectly mentions \"JSON or Avro format\" when it should say \"JSON or WAL format\", and the `Checkpointing` doc needs to clarify that the 12-hour interval is configurable through `Keys.CheckpointInterval` rather than hardcoded.\n\nThe `Init` function documentation is also incomplete—it documents the `rawConfig` and `wg` parameters but omits the `metrics map[string]MetricConfig` parameter entirely. Additionally, I notice the `walDropped` variable in lineprotocol.go has a comment but the periodic reset in checkpoint.go doesn't reference it properly.\n\nLooking at the remaining issues: archive.go is missing both a package-level doc comment and a doc comment for the `CleanUp` function, and stats.go has an inline comment for `MemoryStore.Stats` instead of a proper doc comment format.\n\nNow I can map out the specific fixes needed. The package documentation in metricstore.go incorrectly mentions \"JSON or Avro format\" when it should say \"JSON or WAL format.\" The `Checkpointing` function doc overstates that checkpoints are hardcoded to 12 hours when they're actually configurable with that as the default. And the `Init` doc needs the missing `metrics` parameter documented. Looking at the `walDropped` counter, it's being reset after checkpoint and WAL rotation in `checkpoint.go`, which is properly documented in `lineprotocol.go`. Now I'm checking `RotateWALFilesAfterShutdown` in `walCheckpoint.go` and noticing its doc comment is incomplete—it describes sending rotation requests but cuts off mid-sentence, so I need to verify what it actually does and update that accordingly.\n\nI've identified several documentation issues to fix: the package-level doc in `metricstore.go` incorrectly mentions Avro format instead of WAL format, the `Checkpointing` doc makes an inaccurate claim about hardcoded values, the `Init` function is missing the `metrics` parameter in its docs, `RotateWALFilesAfterShutdown` has a misleading comment that doesn't match its actual behavior, `archive.go` is missing file and function documentation, and `stats.go` has inconsistent comment formatting before the `MemoryStore.Stats` function.\n\nLooking more closely at the `RotateWALFilesAfterShutdown` function, the doc comment describing it as sending rotation requests and blocking is actually incorrect—the function directly removes files rather than sending requests.","signature":"Ess3ClsIDBgCKkBudCGsP2bIPjPuHC/oN/0nhMgnxX9w6zZs1gJmRWZxmd4VMZfWtihIK2ay/9D9+XHpxo0uHB98O9ABkZzrGj1fMhFjbGF1ZGUtc29ubmV0LTQtNjgAEgwEUsT22BbvTImfvVYaDDxrSKfq89iXxSBK2CIwz+TUuruyIUlN0t1j6aV3CcoA1IoDFqJqqTvM1lzT15bg7JCZ47bBk8bb9UJ8UXcGKp02/12rbaH0TA0qo8cFyOux+CPtOHPFhZM+qX1z+msIst2Qoe+p9oC7Zkup2uP331E1h9dJc3me6WdU5qW/cMsbSz6aPhLFf1YCswlo0D+/C1SPtubLCX2cSJw1k+RxBpRZlEh42ylTO/znO2lS5uUhIS02m8aIp8bDBzO7mLmjW4lWRMTyrLkSwcOfjpQO8QJjpyNaMpdwsn18hpmssa7Q+EA1GMLfP428EVudLPQ458wTsvg9OrIeGcpOsYgx39RscwNxdNF7LE/LFft+VkVF6VtDEhANJ2ptqiVV3MHEcjXka1XnYDEqzXfzSfGJBe1Gm8nJaPJudGQh3XobVeBwXU5/tyqoDXZcK5WR5zKmW6p//MaDvsnC5GMvd3veqSdRdxMT8/frsejDChJtdadz40iKBteH7OHT7WnNFTMXuUID1T008LGsqIEPjw2fw0q1gV5dq4EBoBJorfF8rGOrFPCHZWsG+itbJ9bKCOCypff7LEUqpSDBG5uRuiml6U6+jGpHCqHD0pPdjJgtBQFBKz+zWG9NMZc+4IOUbr5PPytSPXCTnJUL7CkV8dOvdI6QZfQNXscxBghjNBYB8u15D7SS7tf5QViWc5He4aBXNzcmKywhe96SH2nkhIPNxuIh7mP0xY7aDlpKjEOu7xHIXxahyRPONQnFePdA6OClV/PsccgRgpJhZBvunCOudaedPUkohok+PLmh13DNGbKgV7/zg/wILDoPcb4fjKgWbll/c0TAWsaHt8fMtU9zHiNGKkMr9sQ3drMakxzn+tFtM7BHP9u60HArJRhk1pqhQ3PwNI10jRgm/O9yByZ0EfTQCq7EvGRjogbSbV+RTPnPby23mCgbtDQoBB9MjtEDsbKOdLF4hPW4STcZ/IJVTCqrS2NoDUifMHTQszYU8w/sMu97FMmuw3jr5oqK3+NqFpq/QuShHvSFZB+OYmhGUxbFfTpb9MQix3Sj17vp9kiZIvfQZ9JDr7+P2sXxI0ULeGpvuCCOKI3lg6zbWUgh0f/iCng8pkomArlE94qB+oxwWwUSa1pvtTuGnL+NRVkb+7/kQAAM9cMFVlcYiRs1yVYfd47K0ieuA12KOPYGhi1jc5S2RiCoRWPnjrvbfH5mCkfP8f5mtuRO/XWLH081tkWzEBjjH2xfV/dN7BvsRhr30g4Gn44GlpXyU0YpzE/SlQSbiL6Pc6/EUfwgfNEtoyNhta3F3gukDoae7aAixBAjmDCAuobxyJnh4qcvuMXB8BRVQN0+A3JIflgcnUye8T9u4L9N3sgjF51XOF7WlgGSStxfMIWJH89oparJ9zbZG43zOLmYe2a7elytQWM1gF9e/M4b60Eka2+MZZ7KQ91odgdrDgmKJrqELdnLqY6vM7l9+tiIdvOgGUQ8AwNAuz5OTZl9B/cJavFC8XEaD/1tFO5gWGqNZXm46MnKKxOU5lshZS82yUEz5xoYtX7pAptg12cvZ/A1Stpr0DtHhiqHERnMXGiv7iaAFuwKLg2B3Hweuk5f3HKZEELW7peBSwCh11foZeePjNVRd01NTYJHoakoZBCHpnJB91nR6B2qcSUh5mQvl9a+V2MeF0lctxJvro7IxpH3l4UyYQNRGSWfYil4CPV9AIjXcN3bHhiCCL29NGgv5tfDVHcGQhzZjSwMzNeXL4WR5pGP5CDj18d6UEtKyzQ9jFj9P0wRbHnHk70DycURdivA1Xti9jiVgaOtw74wpqffNFVdbyGLU5LTYHNaSUQ5pvts96heKVOiKj/uQTGEGsd4UAfg+KeCu1rYejuBjOP9f+veVcFkPqvYE6YJMzfZKYTXFvfcx/Vch3V9oeuqzqd6yGjjPXGxoz1KW8IxglPPs7AzbIbPJYa2iSPFOZvyfRG/g4m/HSlshccic+TTkFAKU68/jzmPi5qePzgbbKp8NWj44CEUMDf6HdsIuQ/6WhLz7AQinFS98mAJyjUKidpujreePFdtpbeQea/kwS/4uCfuRBiMSV47U8pp3FBTnH+1HhslZIV3szMtxiLbNF2zrDTSIr0ijK+PRDFbiTz5Nb4MvkOm+YTqS0BrLBtLGFGp0T42hu7zaFOVEEydNZoH+HMxS+YJZ1MhtMMJwh8P4XM/J1FlvPEdpiwcQ+PznwsfmzeXBoMPn2U/4LNovK3+xfYVQnMZN++JXxB9Gsiew8kQFv7x+Iq+nnoSw6t7aQDjvymZoF4D6m/Z2kv0srEppxYnPqziNOVWTDOzI2TD0h5V8kftCnKsczvsd4RzFp0KaI7hkR+sTTzZ4ZMXGTULLNjLY7owXAE+IL1DYbJDLlXp2cWBh6VSM6hUko26t29FZP8t0SW8EtYxqyi+m4bj9oVbb7yQYF/thK6Vg8nD/ZcIU6Xo1hR2Hi36JDCfDWw+x2HbdIRl/s+ON3F+nZ59ywmB5gmtdMLGxyWJZ2Jj7tGy6bVGNbNIm3t1NfJsiqYtnL8xzSo6/8QapSb+9YEBHxODPeMA/GZwIjH80X9qbcNauTWPR6LqNKp8P0gdaPb7NJQVjg/D6QwWJut3j/8lu0OsekGb7Qqa/qYQuMsWgAOLtSVtoQ9ivQfPXZ95S4oTZ1Jdf23/ZzPWnaPNydiYef10DQmJGXoE0VvsOSLUlouI/bhMNyJKo3LKDHOaOIzW+pHkk/Y8zvbo2h3o8WpFmm1yjT6YJsFmnZsq2z7mmrOJ8xyI4PSUaxkAaAYarvyT61G6rg5Yo8zIZE1wG4jQ8yW0YDtEHCnBMpWF9zAjEtH+CLod667nqjFSJ4xnx9l4mjqA5ANbwFQMcDyfPCOofRF6Q3Uu4ui6RhcTuCfmAGAL+nTcMYch5qD6/6QlhpffATcChD6fHX3UVaOq3BvD8Jo/hhYC55sZK1jvHBPkTMCP+/xMZnl9tAhS5AwLImheGmFPVbKOBfkcHsmbJ01/v4z2r3kLiiiZPHGdbkJ+i8SeipkOPWnQOT9s8+7rg9aF9Bradj6Hvpc+G0pyYA59usI5AStV9BMrfUWMYAZBwzm9d36bWg8D1XOsLnID0W7CAu+n6AmuSsQ5VBILiUAOidXO3keplK7rvwha0WiJNsL0fRzRWTU7AG/Qv55C61tjn2Sx5DMG5kps9FSLR1Sf4JWhohjLOVsZrIWQW0z5LQzvBqARHEmS7JAH88Ir7Pywh12EohJuKA/eK/VXUcKYK6j0vXaM4Sq7+oF+j7XiLgN2KRQabAKFeQIBtn6aASjhaIxALyx75C60ghIsZya+wZAQi0YdalFigqnZAsGzoLqWYnxqbGvwX2DY4Cf7pXLC1pidqOfPP8U4v4cJuy9/cf/bBOHBIaok08Ev9g737CBOrjULA5jIlzlYuwKgqCyoUJrxF9z/3qEY7xIxVKj7Vi3TFbbVsC060NitjO7xvd9sdN6SNQHhtIuw/Wq/Qatvc4/1Pbzsrj/X4xpqLaDe+VsCQI+PUatmjBS7RNl6XZ3mp40++S2joQxZPdmEaPshgDCZZyTp2JMO5vCfsjSV2zosreIFojmn6NZX/vxoW/fhP4VZRnhCbhTvCxCp5rCzSPIBasrESYYmQ77IxfrmpFpCvvfynVWuEnG1Iz554OPQKxosFv2efGfO8Oa98hISfXkESuVEWfsz2GNIHudRsgcwGZgzTuBn+zPJjncj6Eo570qvoh7I64Q1F5JTDJt2A6ojyO4CoALzu2anLNSxz/okAbWXgaj74ZWtuzbzEwCMLdU7dVN85hDoEspaP5HVMLpyL8+Wh3eYeDvOgHhcbdjdVvdbn2kleQ1boaC0/Jc25PbHRyNxE5zR0hYo9B5yWlMIBrbTXTtxoEXUabECaoQIFUvBPGzAJPUuz3JFn7HySIkhy/g10XSUguIaVJLTcP/ckn5GXeOgazfa3kFUEuz3B07avyBAx1czVHLWQO+WdT8ODKqKycs6fGzJ3nj4rlBEyagWElq/K0B9YNKFSHvX2s7Q4o7YXvsMucHd3Td8rOzGHz3CB5S3HGiXVy8WRXJrwGA3t4WEQWqi8Am4oncdg/87PixaG8qYUseSKxpuucbu+zZLNuv5z3q0+yf/t1DJAbhkXCZWAFJJwMK5BBzM4jYR9faSbGrVQNt2Y2R1jeWqY2aPR4K2iYejCpei2tj9oNKANmlttxSwCL3lx7JbFenT1RInFzLdg1DJ95MgOwx1Ismgk952j3a96yt1jjx+6cz7vfxAda/aN9u/JNeLJ0iadZWYWloFFVeIv/VIrb+hWc2JKLuIbs/rkwXuTuLHu0ciEXY3lD93/2EvC8RBqLbcL1jPiBZcIPdzWgh5CozlAetpY83lqJ+eE5jUGQgdL4JQ6EQr7NbsiuN7UpzfvGMk2AcviSy7uyIJgsJaTyQz4W0yCC2ZzFYMiB8jQiRfIIpWN9V+FVgCrVedGvzOiixuk+xMifQCEg9YKVxZ/m0IMtbrFbT8fXzmvD5c2E0c6iR7r22pMbW1Nz5za5ucOFEBlzbii0Vcei5XXgiGQkTb0ud7Ku8OghxsahG+Z0EF2IoDqA1GbdLDmUBps1TVRshSCCeHPLdf6b/3c9x2I8Xq9vX3W0ug0v0dQAY/79HuCDasKMWm+KE2mrHxu5tzMN40/PUvt1QV1yZffvqJDdWRqQDI+0wokjB4pvVZKvTQk6oawYYHLZdiuIEPXqX+ewC+IqXa86/4YSkt8Wy1tJCDTu+/6C9XMGx90YDJrDgsgstG2Qhr6ZIVGpPfMy/gF0v0KdFc0aFbWKc+2JV6XgFcp9Z0j5eRcQnoEzpMEKLz9fHzC9KPJLDfkvrIbjCH7eeZWn5W1yv7wU/Z1YMMlvCKoxWYVmrh2VBzR1+K6JwhUKLW7kZigYZGPwc5OPgnqiut+/fgzVhklvUbfFQZDP4EPjkzkO5oyhOCqqx9LoX/7xjchBdJj+i3rXLk8WUn4U+szhQG1qXmeBOQyHa2SODvvrYARKbAB6Xotz6B038X3DM4WsiZId4s+Ca+hs/F8Z20KZS7yn33qqI0hYu3afd4uh0G6xD22kupigFzwOgyldnlQU5WiEb3NxszKnvv6WDMLPt+u/GhuGIhlFZq9WB7VscuE6xRrnIQojPG90xTKIIBl+NcU2QfWMXcs5QJNsrnyUwLTyOXzG2x7IOkYbRqymh1FI9fgZ81mFNOkSh16N0s/YtbBhqgwjAyP9OBi4JfIhs8gRu6fLXEjrbAd/LilRUa+5SIZbsGEss/RvQngrhtt+0loErUu/Mi4WDHkAVLknpPPHJkWZ3QCKk5eF6q6vb+hPJcq3eyed5bmpBZgPexXX5uLBmP/3NMA7GN6MujFXHfTbCERc15zx3E5cU1WTxYzkHN+0KflNYAr1/mOP9meeSb+KlpgO9eC5AsJ8eGgvMLYu8J6PYMGgiRrlZ3bJBkkDfiX2DKLuJQyNIwrZWQvSac2w4QXlhYioqCYf/a7BMp34oszl4+pQv0tMx/YUgcOsbgTLGukX/Z5K2sl9K37p/ZJQMeO6d9CtY2INd8h4jGhMIRRWC2HJGRrMvPFKiUflM2fMHHxVwZr3NwBUVQ7LPV3cZwVH8hrv4q/fr0S+ck8RMRIRn4okt1Gp45KeEfqbI2i+/DqU7rrQLKhIMMH0MJLoIIJgu1H/etd9Pmk9YFHksEQkWyA0Oc0Ec3M4eAqZWi5swidiHObQzmjddOI8uHXt2wpSN+IdVI44IqHuuygUxcFB/8NOOHP7+EdQWD6hW7oR/Kxyj+Ib3A2AI2YIuP49eoaTHyA4EJnJ/N/iABJ80zDObXCIDyPIGJub7w1P65fR2cR56hLqXbDn7n+6c6VfPzOqyty/1yMs+KIoK0P1lNP1cr1KxleJ4tDH/uGGfgghOYYah91luTpmy4Aas7M3wgEUWq3AIx7C5xXNe36G9bz8T8+CdrCZ3TPioLmTiYOq7A2HB2tW1gOz6MAWWN07Y+UP5k7sjRFOoaZWXM+LX4SEx1yoxA3jYDikLsuGS4+J0nBavAyK+Y0PXlm6acky6cX/HbVxvVEmwHMPpV2ywCZBU2ilH8d823vj9LzQl+yJdMLsPJm9bVgqsHXg1RKpE1SP/mzU7n/i8pZDpAV2ik44ygWSvU5IvDuKp/UNBMWlqGV1nK5HOlkm/gRc3daj8Kz9vDLTWRGFkrekfyoiQdHSo1z7Zw47FEc4Ab9dMhR0x94kUNrkLGj6xfAtbWi2DFLzQM5KKj4J+gu0vlUsPx9CKkU3cKvuhoTIP7Tr5Htxs5dfsaInUe9MH9ir2rdm5pQfm4Sqit15D6VBTqU7vO9lyqYIgZrCzzfeat6hXa3fTvFnQWZG8taG2xEP5elM+9+XWFQtRIOADAEau5VGnzmysH03r0L2sGyLH64bftrXixb2tODy1J7C8LMS35UOCA9vMGJh6dlgroBXOsPRHPyWgLlaJZOVqYQCLqKZccNQpWOA7mCUk7Z5eBcoh8mhoB2JEP3d3Eb46aZn4gBLk+PGQKk9tVH8stj5a/GufD/7Rcwgbukw2IhQ9RQSYvJ5t5LwwC7dNLPC19z91ej2QgFiZ97bUHrutC5101kc3EprQQAd00Dj/KvbHG2UQSulTgqXKbJEZu/aYY0nWJImEX7ditTriU9uxiXpQrAVP+SVY/ZGFfzmhvOBqy/A3dVpHZrf8wIAAxYQBnrY8pCCd5wob50v9F3/9pBeUO2wHoznirad7bQZ4+n5yxXhoWttlJwE0RcPzmFqX3FnKMCQnw9YBXMAFNYjPY8sHysKX0BPR5ir3ulJr1F7HptdT6HcK/zps73trZKpm/yfwLHpEu4hvhWKPqiWgeLVHOYNYeaeLyoovkHvSe/1X7/SwplzyDf8AWPDSxQhRVF7VOB22EPi7fsePXpSLgGa/SD5xqDAqLIsUXEjK4fRVU9wWqqBrdmqU5vcyOQxBt1tFba5gz2FH9WEF2tKSb6KT/ry55U/Anbbq705WluYTn2c+oRzXaF+jpir8gOPa3bEEpCj4hlX0GbkMbddN5H+L0wLy/diHvc0MQrQD9FEyf8munOSKjtfOaEayBVDwu+gCNHjC+RQwloMmRQdg/LRxukE5EsRT96nx7CRACUHPjg98n/82wv1+yh7KAqn2GipDnF6FYoodE6F1v76F45aMUhRIWH4CNG/n36+JGkOh6AuOeqQ/18TR0RkT3vv0ZMAQcVbwBM6OPVSrHIreeCwdxXwohdDDLBoGGebmbasX1ri7L/9bM/O4+64ZILcV0+eQmxQWVYlvT/QRNArKVRATs2n4/bGAv6exmXfc/OCoRNyKGDfvhkakUanjhR7/QgL9PNYhkiSl9+mV43181CiPJ1HCykb1iNPdKyHEAnHQkbmUxW/3hkzGfjUwOqFzjNMeRN/DefRgBgbtkx4fhrvFbDp1ij3z1ljB0Nc7GZvT3kbaH42z1JuiVrGbwojSlwhB/wGqBtEOd/HXRuo5ExVhBMgDUn6bORGnUmDcch4WWrbZFVB4JB5nEdnQoZyTVnFR5IcemkmotAL0vPmOzzt7Uxrl9D57IxkhLQREyiVEXFkaNv3xKDkdEpEwiF3GlnHErXSXSeOZgTljimI4dgS2oIZi+ReGZkuEJrpQmuReCWfvnmIUOg2hdpuYzgjHZLHc7x8srjZuhgM0i5GLSUoZuCzWx+pUzZwAgFQkYjERYC1XPrcb17LyLlEN8Ely5ytW2YII06cz6CGQTrZuF3/wuZz+oQgcUfytjUAlll2NS31Fv63jyuxi2pWHbvBEVfRIa40hyHgwysCR+HKguc02o0ZfawLOlclQMX/L7YafXJ80C2LlITL6r45sabXPq/jYdMeJ7FjnaIdq6ZTkKIRwtcGTPt4RwK6W2z2Va8uscWeC1xFfy4djnZhBJ5xBMSOa3w5VAvcsxdzsvQZ5HCpRAOMoY0V8bK65FdzrsUTAonAxmW+FW4IjRwgVivp1YnH28Z9y35Rfy4Jk4w0wkiw9jrWfe70Q2uJzcHwXNh7S1bZvYfrSCGM/FS/s/KKsduf51k84qApmErU7ZF/pV3EDSRWZbYEpQHGgHuvlonZXvueNnRngHf2uTSpVOrsRZEcVjbkB5P9dwU5lu+GGv5bnsAPxHMV/Y/Ne6UaLVFiWrW8HPVijtiuUe1e1gLoRLx3YVaxLtWXq9YIXwqLlOU/xM6m3V2qrTljJv6ZZtqHUyn0E5yoj1d7r6+5Ku4iRZUg11c/NXXUQFtbC2bpq+RMosYrXDiePSH0hgTNYDjPOzHpagvhtBSEuyO36VSdSsAP+LmHGZBuit1/PSWg80xzrBd+SYmJvFJ08KG5DxWblsc2EjY9SBbuMZbMHBeIaBaYIe1jtW5T3n/nT87IvmMwr0Cw+E+s9Nx//mmLlsRe9nZK8/4TEPw4EOnO+/yTz75mZUpBhfdXOK88skap4wchR7VDi3mly2lGDVUpPBO1ZaRjHVbp3051IU65NTq8KOmfYk5nq8H41pXY9rKx7eUlxQnB53pn+AEtnOha8D3WICzI7y6VX1ktM7nguk/Xvo8MXJXWrEgWd3BLGYSYcCvNPyxv0Kg646oYsURQ0j/hFVzDWHL9wtg2YeBHgjzFuxnDD/0NWlcTAUcpaSSLrCRsz8ocjdWZuIbwtNHLi5LY+Oa2ru7Y8pfzKXrK6z7rd+X478vUwLJLsnI6d2g+aN7lmmdBKT3MLcTMx6YMOhoGR1lA93SeOQgdJEhvtzDB6oNAncyKyrZfo7HKo1KJ8SKU0fkGF+hH90DG3Nr7x4AOSNl2d1FTvjbskLUcFajb357At6iW3LBf5XQ5D63giRZA+euHsxxKvDD4NeTw5MUEC8SiNI4Nfxqtfv8Dt4uFNuYp/3HqHAXy/gveXdaYEVJjuUqBQG31dB/1AGFgNzssbWAA1ZlG0c0UBg0I1KFSr0uVVJ0JQj21R2gBRSG9BnuFuz94G2r9jQ4qVog3KkznYfd1GIAVbKxBNQ00EvHk+P17gotIhW1gl2tpvSqTLPuk4/WGljC+XP9qR86RdUbrNE/cv2aUJhUdw6r7olghMQq8EWll5GGxptqcZWY+ziod/MvmS7iGrm6ZI0UAY0KhdoNIElaZ8jJcsM3QOc3bTAMXu7f2onWzCVhXYtAgXtx1iXZnjFEXIKDWBMZY83x8Pn1ElJTG4IqahFU00DSXybxxYNyOZR+SWfdzYds+XMpDSw1G6qX2RU0rGrVzMTmQkzD6ymbzigGnlwZDA0co0MTIORMfc6S/AOvPbuEVlUMNTRLaWRxVsTmfEEVvnW20YYAQ=="}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":33852,"cache_read_input_tokens":74272,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":33852},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXw1FrfaetD2ki76f4zs","type":"assistant","uuid":"e8884a6a-1272-468a-baf6-4f96cec3ae49","timestamp":"2026-03-29T19:35:16.740Z"}
{"parentUuid":"e8884a6a-1272-468a-baf6-4f96cec3ae49","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_011BgSkFxKe8tnuAhEi2JNoe","type":"message","role":"assistant","content":[{"type":"text","text":"Now I have a complete picture of the package. Let me identify the actual inaccuracies and missing documentation:\n\n1. **metricstore.go package doc**: mentions \"Avro format\" which no longer exists (replaced by WAL)\n2. **checkpoint.go `Checkpointing`**: says \"hardcoded\" 12 hours but it's configurable via `Keys.CheckpointInterval`\n3. **metricstore.go `Init`**: missing the `metrics` parameter in the Parameters section\n4. **walCheckpoint.go `RotateWALFilesAfterShutdown`**: copy-pasted doc from `RotateWALFiles` — wrong, this function directly removes files post-shutdown\n5. **archive.go**: no file-level doc comment and `CleanUp` has no doc comment"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":33852,"cache_read_input_tokens":74272,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":33852},"output_tokens":8,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXw1FrfaetD2ki76f4zs","type":"assistant","uuid":"b48cb196-8152-48f9-9b18-70dc305fd93e","timestamp":"2026-03-29T19:35:18.773Z"}
{"parentUuid":"b48cb196-8152-48f9-9b18-70dc305fd93e","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_011BgSkFxKe8tnuAhEi2JNoe","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_0198aNTY9sga2ePA2SRQsX2b","name":"Edit","input":{"replace_all":false,"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go","old_string":"// Package metricstore provides an efficient in-memory time-series metric storage system\n// with support for hierarchical data organization, checkpointing, and archiving.\n//\n// The package organizes metrics in a tree structure (cluster → host → component) and\n// provides concurrent read/write access to metric data with configurable aggregation strategies.\n// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,\n// and enforcing retention policies.\n//\n// Key features:\n//   - In-memory metric storage with configurable retention\n//   - Hierarchical data organization (selectors)\n//   - Concurrent checkpoint/archive workers\n//   - Support for sum and average aggregation\n//   - NATS integration for metric ingestion","new_string":"// Package metricstore provides an efficient in-memory time-series metric storage system\n// with support for hierarchical data organization, checkpointing, and archiving.\n//\n// The package organizes metrics in a tree structure (cluster → host → component) and\n// provides concurrent read/write access to metric data with configurable aggregation strategies.\n// Background goroutines handle periodic checkpointing (JSON or WAL/binary format), archiving old data,\n// and enforcing retention policies.\n//\n// Key features:\n//   - In-memory metric storage with configurable retention\n//   - Hierarchical data organization (selectors)\n//   - Concurrent checkpoint/archive workers\n//   - Support for sum and average aggregation\n//   - NATS integration for metric ingestion via InfluxDB line protocol"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":33852,"cache_read_input_tokens":74272,"output_tokens":2610,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":33852,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXw1FrfaetD2ki76f4zs","type":"assistant","uuid":"504f5612-18ba-42a4-a3d2-787e39cd5aa3","timestamp":"2026-03-29T19:35:27.949Z"}
{"type":"file-history-snapshot","messageId":"504f5612-18ba-42a4-a3d2-787e39cd5aa3","snapshot":{"messageId":"5850443c-83f3-4981-96ba-d5e60679f257","trackedFileBackups":{"pkg/metricstore/metricstore.go":{"backupFileName":"94484c782e7b75a8@v1","version":1,"backupTime":"2026-03-29T19:35:39.217Z"}},"timestamp":"2026-03-29T19:34:20.233Z"},"isSnapshotUpdate":true}
{"parentUuid":"504f5612-18ba-42a4-a3d2-787e39cd5aa3","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Edit","command":"callback"},"parentToolUseID":"toolu_0198aNTY9sga2ePA2SRQsX2b","toolUseID":"toolu_0198aNTY9sga2ePA2SRQsX2b","timestamp":"2026-03-29T19:35:39.242Z","uuid":"7b748180-1b6b-4a6c-bcc2-de5df13b5262"}
{"parentUuid":"504f5612-18ba-42a4-a3d2-787e39cd5aa3","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_0198aNTY9sga2ePA2SRQsX2b","type":"tool_result","content":"The file /home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go has been updated successfully."}]},"uuid":"efe49036-707f-45fa-8c09-8e615214bef7","timestamp":"2026-03-29T19:35:39.238Z","toolUseResult":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go","oldString":"// Package metricstore provides an efficient in-memory time-series metric storage system\n// with support for hierarchical data organization, checkpointing, and archiving.\n//\n// The package organizes metrics in a tree structure (cluster → host → component) and\n// provides concurrent read/write access to metric data with configurable aggregation strategies.\n// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,\n// and enforcing retention policies.\n//\n// Key features:\n//   - In-memory metric storage with configurable retention\n//   - Hierarchical data organization (selectors)\n//   - Concurrent checkpoint/archive workers\n//   - Support for sum and average aggregation\n//   - NATS integration for metric ingestion","newString":"// Package metricstore provides an efficient in-memory time-series metric storage system\n// with support for hierarchical data organization, checkpointing, and archiving.\n//\n// The package organizes metrics in a tree structure (cluster → host → component) and\n// provides concurrent read/write access to metric data with configurable aggregation strategies.\n// Background goroutines handle periodic checkpointing (JSON or WAL/binary format), archiving old data,\n// and enforcing retention policies.\n//\n// Key features:\n//   - In-memory metric storage with configurable retention\n//   - Hierarchical data organization (selectors)\n//   - Concurrent checkpoint/archive workers\n//   - Support for sum and average aggregation\n//   - NATS integration for metric ingestion via InfluxDB line protocol","originalFile":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// Package metricstore provides an efficient in-memory time-series metric storage system\n// with support for hierarchical data organization, checkpointing, and archiving.\n//\n// The package organizes metrics in a tree structure (cluster → host → component) and\n// provides concurrent read/write access to metric data with configurable aggregation strategies.\n// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,\n// and enforcing retention policies.\n//\n// Key features:\n//   - In-memory metric storage with configurable retention\n//   - Hierarchical data organization (selectors)\n//   - Concurrent checkpoint/archive workers\n//   - Support for sum and average aggregation\n//   - NATS integration for metric ingestion\npackage metricstore\n\nimport (\n\t\"bytes\"\n\t\"context\"\n\t\"encoding/json\"\n\t\"errors\"\n\t\"fmt\"\n\t\"runtime\"\n\t\"runtime/debug\"\n\t\"slices\"\n\t\"sync\"\n\t\"time\"\n\n\t\"github.com/ClusterCockpit/cc-backend/internal/config\"\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/resampler\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n)\n\n// GlobalState holds the global state for the metric store with thread-safe access.\ntype GlobalState struct {\n\tmu                sync.RWMutex\n\tlastRetentionTime int64\n\tselectorsExcluded bool\n}\n\nvar (\n\tsingleton  sync.Once\n\tmsInstance *MemoryStore\n\t// shutdownFunc stores the context cancellation function created in Init\n\t// and is called during Shutdown to cancel all background goroutines\n\tshutdownFunc   context.CancelFunc\n\tshutdownFuncMu sync.Mutex // Protects shutdownFunc from concurrent access\n\t// Create a global instance\n\tstate = &GlobalState{}\n)\n\n// NodeProvider provides information about nodes currently in use by running jobs.\n//\n// This interface allows metricstore to query job information without directly\n// depending on the repository package, breaking the import cycle.\n//\n// Implementations should return nodes that are actively processing jobs started\n// before the given timestamp. These nodes will be excluded from retention-based\n// garbage collection to prevent data loss for jobs that are still running or\n// recently completed.\ntype NodeProvider interface {\n\t// GetUsedNodes returns a map of cluster names to sorted lists of unique hostnames\n\t// that are currently in use by jobs that started before the given timestamp.\n\t//\n\t// Parameters:\n\t//   - ts: Unix timestamp threshold - returns nodes with jobs started before this time\n\t//\n\t// Returns:\n\t//   - Map of cluster names to lists of node hostnames that should be excluded from garbage collection\n\t//   - Error if the query fails\n\tGetUsedNodes(ts int64) (map[string][]string, error)\n}\n\n// Metric represents a single metric data point to be written to the store.\ntype Metric struct {\n\tName  string\n\tValue schema.Float\n\t// MetricConfig contains frequency and aggregation settings for this metric.\n\t// If Frequency is 0, configuration will be looked up from MemoryStore.Metrics during Write().\n\tMetricConfig MetricConfig\n}\n\n// MemoryStore is the main in-memory time-series metric storage implementation.\n//\n// It organizes metrics in a hierarchical tree structure where each level represents\n// a component of the system hierarchy (e.g., cluster → host → CPU). Each level can\n// store multiple metrics as time-series buffers.\n//\n// The store is initialized as a singleton via InitMetrics() and accessed via GetMemoryStore().\n// All public methods are safe for concurrent use.\ntype MemoryStore struct {\n\tMetrics      map[string]MetricConfig\n\troot         Level\n\tnodeProvider NodeProvider\n}\n\n// Init initializes the metric store from configuration and starts background workers.\n//\n// This function must be called exactly once before any other metricstore operations.\n// It performs the following initialization steps:\n//  1. Validates and decodes the metric store configuration\n//  2. Configures worker pool size (defaults to NumCPU/2+1, max 10)\n//  3. Loads metric configurations from all registered clusters\n//  4. Restores checkpoints within the retention window\n//  5. Starts background workers for retention, checkpointing, archiving, and monitoring\n//  6. Optionally subscribes to NATS for real-time metric ingestion\n//\n// Parameters:\n//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig)\n//   - wg: WaitGroup that will be incremented for each background goroutine started\n//\n// The function will call cclog.Fatal on critical errors during initialization.\n// Use Shutdown() to cleanly stop all background workers started by Init().\n//\n// Note: Signal handling must be implemented by the caller. Call Shutdown() when\n// receiving termination signals to ensure checkpoint data is persisted.\nfunc Init(rawConfig json.RawMessage, metrics map[string]MetricConfig, wg *sync.WaitGroup) {\n\tstartupTime := time.Now()\n\n\tif rawConfig != nil {\n\t\tconfig.Validate(configSchema, rawConfig)\n\t\tdec := json.NewDecoder(bytes.NewReader(rawConfig))\n\t\tdec.DisallowUnknownFields()\n\t\tif err := dec.Decode(&Keys); err != nil {\n\t\t\tcclog.Abortf(\"[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\\nError: %s\\n\", rawConfig, err.Error())\n\t\t}\n\t}\n\n\t// Set NumWorkers from config or use default\n\tif Keys.NumWorkers <= 0 {\n\t\tKeys.NumWorkers = min(runtime.NumCPU()/2+1, DefaultMaxWorkers)\n\t}\n\tcclog.Debugf(\"[METRICSTORE]> Using %d workers for checkpoint/archive operations\\n\", Keys.NumWorkers)\n\n\t// Pass the config.MetricStoreKeys\n\tInitMetrics(metrics)\n\n\tms := GetMemoryStore()\n\n\td, err := time.ParseDuration(Keys.RetentionInMemory)\n\tif err != nil {\n\t\tcclog.Fatal(err)\n\t}\n\n\trestoreFrom := startupTime.Add(-d)\n\tcclog.Infof(\"[METRICSTORE]> Loading checkpoints newer than %s\\n\", restoreFrom.Format(time.RFC3339))\n\tfiles, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix())\n\tloadedData := ms.SizeInBytes() / 1024 / 1024 // In MB\n\tif err != nil {\n\t\tcclog.Fatalf(\"[METRICSTORE]> Loading checkpoints failed: %s\\n\", err.Error())\n\t} else {\n\t\tcclog.Infof(\"[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\\n\", files, loadedData, time.Since(startupTime).Seconds())\n\t}\n\n\t// Try to use less memory by forcing a GC run here and then\n\t// lowering the target percentage. The default of 100 means\n\t// that only once the ratio of new allocations execeds the\n\t// previously active heap, a GC is triggered.\n\t// Forcing a GC here will set the \"previously active heap\"\n\t// to a minumum.\n\t// runtime.GC()\n\n\tctx, shutdown := context.WithCancel(context.Background())\n\n\tRetention(wg, ctx)\n\tCheckpointing(wg, ctx)\n\tCleanUp(wg, ctx)\n\tWALStaging(wg, ctx)\n\tMemoryUsageTracker(wg, ctx)\n\n\t// Note: Signal handling has been removed from this function.\n\t// The caller is responsible for handling shutdown signals and calling\n\t// the shutdown() function when appropriate.\n\t// Store the shutdown function for later use by Shutdown()\n\tshutdownFuncMu.Lock()\n\tshutdownFunc = shutdown\n\tshutdownFuncMu.Unlock()\n\n\tif Keys.Subscriptions != nil {\n\t\twg.Go(func() {\n\t\t\tif err := ReceiveNats(ms, Keys.NumWorkers, ctx); err != nil {\n\t\t\t\tcclog.Fatal(err)\n\t\t\t}\n\t\t})\n\t}\n}\n\n// InitMetrics initializes the singleton MemoryStore instance with the given metric configurations.\n//\n// This function must be called before GetMemoryStore() and can only be called once due to\n// the singleton pattern. It assigns each metric an internal offset for efficient buffer indexing.\n//\n// Parameters:\n//   - metrics: Map of metric names to their configurations (frequency and aggregation strategy)\n//\n// Panics if any metric has Frequency == 0, which indicates an invalid configuration.\n//\n// After this call, the global msInstance is ready for use via GetMemoryStore().\nfunc InitMetrics(metrics map[string]MetricConfig) {\n\tsingleton.Do(func() {\n\t\toffset := 0\n\t\tfor key, cfg := range metrics {\n\t\t\tif cfg.Frequency == 0 {\n\t\t\t\tpanic(\"[METRICSTORE]> invalid frequency\")\n\t\t\t}\n\n\t\t\tmetrics[key] = MetricConfig{\n\t\t\t\tFrequency:   cfg.Frequency,\n\t\t\t\tAggregation: cfg.Aggregation,\n\t\t\t\toffset:      offset,\n\t\t\t}\n\t\t\toffset += 1\n\t\t}\n\n\t\tmsInstance = &MemoryStore{\n\t\t\troot: Level{\n\t\t\t\tmetrics:  make([]*buffer, len(metrics)),\n\t\t\t\tchildren: make(map[string]*Level),\n\t\t\t},\n\t\t\tMetrics: metrics,\n\t\t}\n\t})\n}\n\n// GetMemoryStore returns the singleton MemoryStore instance.\n//\n// Returns the initialized MemoryStore singleton. Calls cclog.Fatal if InitMetrics() was not called first.\n//\n// This function is safe for concurrent use after initialization.\nfunc GetMemoryStore() *MemoryStore {\n\tif msInstance == nil {\n\t\tcclog.Warnf(\"[METRICSTORE]> MemoryStore not initialized!\")\n\t}\n\n\treturn msInstance\n}\n\nfunc (ms *MemoryStore) GetMetricFrequency(metricName string) (int64, error) {\n\tif metric, ok := ms.Metrics[metricName]; ok {\n\t\treturn metric.Frequency, nil\n\t}\n\treturn 0, fmt.Errorf(\"[METRICSTORE]> metric %s not found\", metricName)\n}\n\n// SetNodeProvider sets the NodeProvider implementation for the MemoryStore.\n// This must be called during initialization to provide job state information\n// for selective buffer retention during Free operations.\n// If not set, the Free function will fall back to freeing all buffers.\nfunc (ms *MemoryStore) SetNodeProvider(provider NodeProvider) {\n\tms.nodeProvider = provider\n}\n\n// Shutdown performs a graceful shutdown of the metric store.\n//\n// This function cancels all background goroutines started by Init() and writes\n// a final checkpoint to disk before returning. It should be called when the\n// application receives a termination signal.\n//\n// The function will:\n//  1. Cancel the context to stop all background workers\n//  2. Close the WAL messages channel if using WAL format\n//  3. Write a final checkpoint to preserve in-memory data\n//  4. Log any errors encountered during shutdown\n//\n// Note: This function blocks until the final checkpoint is written.\nfunc Shutdown() {\n\ttotalStart := time.Now()\n\n\tshutdownFuncMu.Lock()\n\tdefer shutdownFuncMu.Unlock()\n\tif shutdownFunc != nil {\n\t\tshutdownFunc()\n\t}\n\tcclog.Infof(\"[METRICSTORE]> Background workers cancelled (%v)\", time.Since(totalStart))\n\n\tif Keys.Checkpoints.FileFormat == \"wal\" {\n\t\t// Signal producers to stop sending before closing channels,\n\t\t// preventing send-on-closed-channel panics from in-flight NATS workers.\n\t\twalShuttingDown.Store(true)\n\t\t// Brief grace period for in-flight DecodeLine calls to complete.\n\t\ttime.Sleep(100 * time.Millisecond)\n\n\t\tfor _, ch := range walShardChs {\n\t\t\tclose(ch)\n\t\t}\n\t\tdrainStart := time.Now()\n\t\tWaitForWALStagingDrain()\n\t\tcclog.Infof(\"[METRICSTORE]> WAL staging goroutines exited (%v)\", time.Since(drainStart))\n\t}\n\n\tcclog.Infof(\"[METRICSTORE]> Writing checkpoint to '%s'...\", Keys.Checkpoints.RootDir)\n\tcheckpointStart := time.Now()\n\tvar files int\n\tvar err error\n\n\tms := GetMemoryStore()\n\n\tlastCheckpointMu.Lock()\n\tfrom := lastCheckpoint\n\tlastCheckpointMu.Unlock()\n\n\tif Keys.Checkpoints.FileFormat == \"wal\" {\n\t\t// WAL files are deleted per-host inside ToCheckpointWAL workers.\n\t\tfiles, _, err = ms.ToCheckpointWAL(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())\n\t} else {\n\t\tfiles, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())\n\t}\n\n\tif err != nil {\n\t\tcclog.Errorf(\"[METRICSTORE]> Writing checkpoint failed: %s\", err.Error())\n\t}\n\tcclog.Infof(\"[METRICSTORE]> Done! (%d files written in %v, total shutdown: %v)\", files, time.Since(checkpointStart), time.Since(totalStart))\n}\n\n// Retention starts a background goroutine that periodically frees old metric data.\n//\n// This worker runs at half the retention interval and calls Free() to remove buffers\n// older than the configured retention time. It respects the NodeProvider to preserve\n// data for nodes with active jobs.\n//\n// Parameters:\n//   - wg: WaitGroup to signal completion when context is cancelled\n//   - ctx: Context for cancellation signal\n//\n// The goroutine exits when ctx is cancelled.\nfunc Retention(wg *sync.WaitGroup, ctx context.Context) {\n\tms := GetMemoryStore()\n\n\twg.Go(func() {\n\t\td, err := time.ParseDuration(Keys.RetentionInMemory)\n\t\tif err != nil {\n\t\t\tcclog.Fatal(err)\n\t\t}\n\t\tif d <= 0 {\n\t\t\treturn\n\t\t}\n\n\t\ttickInterval := d / 2\n\t\tif tickInterval <= 0 {\n\t\t\treturn\n\t\t}\n\t\tticker := time.NewTicker(tickInterval)\n\t\tdefer ticker.Stop()\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tstate.mu.Lock()\n\n\t\t\t\tt := time.Now().Add(-d)\n\n\t\t\t\tstate.lastRetentionTime = t.Unix()\n\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> start freeing buffers (older than %s)...\\n\", t.Format(time.RFC3339))\n\n\t\t\t\tfreed, err := Free(ms, t)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> freeing up buffers failed: %s\\n\", err.Error())\n\t\t\t\t} else {\n\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d buffers freed\\n\", freed)\n\t\t\t\t}\n\n\t\t\t\tstate.mu.Unlock()\n\n\t\t\t\t// Clean up the buffer pool\n\t\t\t\tbufferPool.Clean(state.lastRetentionTime)\n\t\t\t}\n\t\t}\n\t})\n}\n\n// MemoryUsageTracker starts a background goroutine that monitors memory usage.\n//\n// This worker checks actual process memory usage (via runtime.MemStats) periodically\n// and force-frees buffers if memory exceeds the configured cap. It uses FreeOSMemory()\n// to return memory to the OS after freeing buffers, avoiding aggressive GC that causes\n// performance issues.\n//\n// The tracker logs both actual memory usage (heap allocated) and metric data size for\n// visibility into memory overhead from Go runtime structures and allocations.\n//\n// Parameters:\n//   - wg: WaitGroup to signal completion when context is cancelled\n//   - ctx: Context for cancellation signal\n//\n// The goroutine exits when ctx is cancelled.\nfunc MemoryUsageTracker(wg *sync.WaitGroup, ctx context.Context) {\n\tms := GetMemoryStore()\n\n\twg.Go(func() {\n\t\tnormalInterval := DefaultMemoryUsageTrackerInterval\n\t\tfastInterval := 30 * time.Second\n\n\t\tif normalInterval <= 0 {\n\t\t\treturn\n\t\t}\n\n\t\tticker := time.NewTicker(normalInterval)\n\t\tdefer ticker.Stop()\n\t\tcurrentInterval := normalInterval\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tvar mem runtime.MemStats\n\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\tactualMemoryGB := float64(mem.Alloc) / 1e9\n\t\t\t\tmetricDataGB := ms.SizeInGB()\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory usage: %.2f GB actual (%.2f GB metric data)\", actualMemoryGB, metricDataGB)\n\n\t\t\t\tfreedExcluded := 0\n\t\t\t\tfreedEmergency := 0\n\t\t\t\tvar err error\n\n\t\t\t\tstate.mu.RLock()\n\t\t\t\tlastRetention := state.lastRetentionTime\n\t\t\t\tselectorsExcluded := state.selectorsExcluded\n\t\t\t\tstate.mu.RUnlock()\n\n\t\t\t\tif lastRetention != 0 && selectorsExcluded {\n\t\t\t\t\tfreedExcluded, err = ms.Free(nil, lastRetention)\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while force-freeing the excluded buffers: %s\", err)\n\t\t\t\t\t}\n\n\t\t\t\t\tif freedExcluded > 0 {\n\t\t\t\t\t\tdebug.FreeOSMemory()\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d excluded buffers force-freed\", freedExcluded)\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n\n\t\t\t\tif actualMemoryGB > float64(Keys.MemoryCap) {\n\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> memory usage %.2f GB exceeds cap %d GB, starting emergency buffer freeing\", actualMemoryGB, Keys.MemoryCap)\n\n\t\t\t\t\t// Use progressive time-based Free with increasing threshold\n\t\t\t\t\t// instead of ForceFree loop — fewer tree traversals, more effective\n\t\t\t\t\td, parseErr := time.ParseDuration(Keys.RetentionInMemory)\n\t\t\t\t\tif parseErr != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> cannot parse retention duration: %s\", parseErr)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tthresholds := []float64{0.75, 0.5, 0.25}\n\t\t\t\t\t\tfor _, fraction := range thresholds {\n\t\t\t\t\t\t\tthreshold := time.Now().Add(-time.Duration(float64(d) * fraction))\n\t\t\t\t\t\t\tfreed, freeErr := ms.Free(nil, threshold.Unix())\n\t\t\t\t\t\t\tif freeErr != nil {\n\t\t\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while freeing buffers at %.0f%% retention: %s\", fraction*100, freeErr)\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\tfreedEmergency += freed\n\n\t\t\t\t\t\t\tbufferPool.Clear()\n\t\t\t\t\t\t\truntime.GC()\n\t\t\t\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n\n\t\t\t\t\t\t\tif actualMemoryGB < float64(Keys.MemoryCap) {\n\t\t\t\t\t\t\t\tbreak\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tbufferPool.Clear()\n\t\t\t\t\tdebug.FreeOSMemory()\n\n\t\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n\n\t\t\t\t\tif actualMemoryGB >= float64(Keys.MemoryCap) {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> after emergency frees (%d buffers), memory usage %.2f GB still at/above cap %d GB\", freedEmergency, actualMemoryGB, Keys.MemoryCap)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> emergency freeing complete: %d buffers freed, memory now %.2f GB\", freedEmergency, actualMemoryGB)\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// Adaptive ticker: check more frequently when memory is high\n\t\t\t\tmemoryRatio := actualMemoryGB / float64(Keys.MemoryCap)\n\t\t\t\tif memoryRatio > 0.8 && currentInterval != fastInterval {\n\t\t\t\t\tticker.Reset(fastInterval)\n\t\t\t\t\tcurrentInterval = fastInterval\n\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory at %.0f%% of cap, switching to fast check interval (30s)\", memoryRatio*100)\n\t\t\t\t} else if memoryRatio <= 0.8 && currentInterval != normalInterval {\n\t\t\t\t\tticker.Reset(normalInterval)\n\t\t\t\t\tcurrentInterval = normalInterval\n\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory at %.0f%% of cap, switching to normal check interval\", memoryRatio*100)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t})\n}\n\n// Free removes metric data older than the given time while preserving data for active nodes.\n//\n// This function implements intelligent retention by consulting the NodeProvider (if configured)\n// to determine which nodes are currently in use by running jobs. Data for these nodes is\n// preserved even if older than the retention time.\n//\n// Parameters:\n//   - ms: The MemoryStore instance\n//   - t: Time threshold - buffers with data older than this will be freed\n//\n// Returns:\n//   - Number of buffers freed\n//   - Error if NodeProvider query fails\n//\n// Behavior:\n//   - If no NodeProvider is set: frees all buffers older than t\n//   - If NodeProvider returns empty map: frees all buffers older than t\n//   - Otherwise: preserves buffers for nodes returned by GetUsedNodes(), frees others\nfunc Free(ms *MemoryStore, t time.Time) (int, error) {\n\t// If no NodeProvider is configured, free all buffers older than t\n\tif ms.nodeProvider == nil {\n\t\treturn ms.Free(nil, t.Unix())\n\t}\n\n\texcludeSelectors, err := ms.nodeProvider.GetUsedNodes(t.Unix())\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\tswitch lenMap := len(excludeSelectors); lenMap {\n\n\t// If the length of the map returned by GetUsedNodes() is 0,\n\t// then use default Free method with nil selector\n\tcase 0:\n\t\tstate.selectorsExcluded = false\n\t\treturn ms.Free(nil, t.Unix())\n\n\t// Else formulate selectors, exclude those from the map\n\t// and free the rest of the selectors\n\tdefault:\n\t\tstate.selectorsExcluded = true\n\t\tselectors := GetSelectors(ms, excludeSelectors)\n\t\treturn FreeSelected(ms, selectors, t)\n\t}\n}\n\n// FreeSelected frees buffers for specific selectors while preserving others.\n//\n// This function is used when we want to retain some specific nodes beyond the retention time.\n// It iterates through the provided selectors and frees their associated buffers.\n//\n// Parameters:\n//   - ms: The MemoryStore instance\n//   - selectors: List of selector paths to free (e.g., [[\"cluster1\", \"node1\"], [\"cluster2\", \"node2\"]])\n//   - t: Time threshold for freeing buffers\n//\n// Returns the total number of buffers freed and any error encountered.\nfunc FreeSelected(ms *MemoryStore, selectors [][]string, t time.Time) (int, error) {\n\tfreed := 0\n\n\tfor _, selector := range selectors {\n\n\t\tfreedBuffers, err := ms.Free(selector, t.Unix())\n\t\tif err != nil {\n\t\t\tcclog.Errorf(\"error while freeing selected buffers: %#v\", err)\n\t\t}\n\t\tfreed += freedBuffers\n\n\t}\n\n\treturn freed, nil\n}\n\n// GetSelectors returns all selectors at depth 2 (cluster/node level) that are NOT in the exclusion map.\n//\n// This function generates a list of selectors whose buffers should be freed by excluding\n// selectors that correspond to nodes currently in use by running jobs.\n//\n// Parameters:\n//   - ms: The MemoryStore instance\n//   - excludeSelectors: Map of cluster names to node hostnames that should NOT be freed\n//\n// Returns a list of selectors ([]string paths) that can be safely freed.\n//\n// Example:\n//\n//\tIf the tree has paths [\"emmy\", \"node001\"] and [\"emmy\", \"node002\"],\n//\tand excludeSelectors contains {\"emmy\": [\"node001\"]},\n//\tthen only [[\"emmy\", \"node002\"]] is returned.\nfunc GetSelectors(ms *MemoryStore, excludeSelectors map[string][]string) [][]string {\n\tallSelectors := ms.GetPaths(2)\n\n\tfilteredSelectors := make([][]string, 0, len(allSelectors))\n\n\tfor _, path := range allSelectors {\n\t\tif len(path) < 2 {\n\t\t\tcontinue\n\t\t}\n\n\t\tkey := path[0]   // The \"Key\" (Level 1)\n\t\tvalue := path[1] // The \"Value\" (Level 2)\n\n\t\texclude := false\n\n\t\t// Check if the key exists in our exclusion map\n\t\tif excludedValues, exists := excludeSelectors[key]; exists {\n\t\t\t// The key exists, now check if the specific value is in the exclusion list\n\t\t\tif slices.Contains(excludedValues, value) {\n\t\t\t\texclude = true\n\t\t\t}\n\t\t}\n\n\t\tif !exclude {\n\t\t\tfilteredSelectors = append(filteredSelectors, path)\n\t\t}\n\t}\n\n\treturn filteredSelectors\n}\n\n// GetPaths returns a list of lists (paths) to the specified depth.\nfunc (ms *MemoryStore) GetPaths(targetDepth int) [][]string {\n\tvar results [][]string\n\n\t// Start recursion. Initial path is empty.\n\t// We treat Root as depth 0.\n\tms.root.collectPaths(0, targetDepth, []string{}, &results)\n\n\treturn results\n}\n\n// Write all values in `metrics` to the level specified by `selector` for time `ts`.\n// Look at `findLevelOrCreate` for how selectors work.\nfunc (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {\n\tvar ok bool\n\tfor i, metric := range metrics {\n\t\tif metric.MetricConfig.Frequency == 0 {\n\t\t\tmetric.MetricConfig, ok = m.Metrics[metric.Name]\n\t\t\tif !ok {\n\t\t\t\tcclog.Debugf(\"[METRICSTORE]> Unknown metric '%s' in Write() - skipping\", metric.Name)\n\t\t\t\tmetric.MetricConfig.Frequency = 0\n\t\t\t}\n\t\t\tmetrics[i] = metric\n\t\t}\n\t}\n\n\treturn m.WriteToLevel(&m.root, selector, ts, metrics)\n}\n\nfunc (m *MemoryStore) GetLevel(selector []string) *Level {\n\treturn m.root.findLevelOrCreate(selector, len(m.Metrics))\n}\n\n// WriteToLevel assumes that `minfo` in `metrics` is filled in\nfunc (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error {\n\tl = l.findLevelOrCreate(selector, len(m.Metrics))\n\tl.lock.Lock()\n\tdefer l.lock.Unlock()\n\n\tfor _, metric := range metrics {\n\t\tif metric.MetricConfig.Frequency == 0 {\n\t\t\tcontinue\n\t\t}\n\n\t\tb := l.metrics[metric.MetricConfig.offset]\n\t\tif b == nil {\n\t\t\t// First write to this metric and level\n\t\t\tb = newBuffer(ts, metric.MetricConfig.Frequency)\n\t\t\tl.metrics[metric.MetricConfig.offset] = b\n\t\t}\n\n\t\tnb, err := b.write(ts, metric.Value)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\t// Last write created a new buffer...\n\t\tif b != nb {\n\t\t\tl.metrics[metric.MetricConfig.offset] = nb\n\t\t}\n\t}\n\treturn nil\n}\n\n// Read returns all values for metric `metric` from `from` to `to` for the selected level(s).\n// If the level does not hold the metric itself, the data will be aggregated recursively from the children.\n// The second and third return value are the actual from/to for the data. Those can be different from\n// the range asked for if no data was available.\nfunc (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {\n\tif from > to {\n\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> invalid time range\")\n\t}\n\n\tminfo, ok := m.Metrics[metric]\n\tif !ok {\n\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> unknown metric: \" + metric)\n\t}\n\n\tn, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)\n\n\terr := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {\n\t\tcdata, cfrom, cto, err := b.read(from, to, data)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\tif n == 0 {\n\t\t\tfrom, to = cfrom, cto\n\t\t} else if from != cfrom || to != cto || len(data) != len(cdata) {\n\t\t\tmissingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)\n\t\t\tif missingfront != 0 {\n\t\t\t\treturn ErrDataDoesNotAlignMissingFront\n\t\t\t}\n\n\t\t\tnewlen := len(cdata) - missingback\n\t\t\tif newlen < 1 {\n\t\t\t\treturn ErrDataDoesNotAlignMissingBack\n\t\t\t}\n\t\t\tcdata = cdata[0:newlen]\n\t\t\tif len(cdata) != len(data) {\n\t\t\t\treturn ErrDataDoesNotAlignDataLenMismatch\n\t\t\t}\n\n\t\t\tfrom, to = cfrom, cto\n\t\t}\n\n\t\tdata = cdata\n\t\tn += 1\n\t\treturn nil\n\t})\n\n\tif err != nil {\n\t\treturn nil, 0, 0, 0, err\n\t} else if n == 0 {\n\t\treturn nil, 0, 0, 0, ErrNoHostOrMetric\n\t} else if n > 1 {\n\t\tif minfo.Aggregation == AvgAggregation {\n\t\t\tnormalize := 1. / schema.Float(n)\n\t\t\tfor i := 0; i < len(data); i++ {\n\t\t\t\tdata[i] *= normalize\n\t\t\t}\n\t\t} else if minfo.Aggregation != SumAggregation {\n\t\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> invalid aggregation\")\n\t\t}\n\t}\n\n\tdata, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)\n\tif err != nil {\n\t\treturn nil, 0, 0, 0, err\n\t}\n\n\treturn data, from, to, resolution, nil\n}\n\n// Free releases all buffers for the selected level and all its children that\n// contain only values older than `t`.\nfunc (m *MemoryStore) Free(selector []string, t int64) (int, error) {\n\treturn m.GetLevel(selector).free(t)\n}\n\n// ForceFree unconditionally removes the oldest buffer from each metric chain.\nfunc (m *MemoryStore) ForceFree() (int, error) {\n\treturn m.GetLevel(nil).forceFree()\n}\n\nfunc (m *MemoryStore) FreeAll() error {\n\tm.root.lock.Lock()\n\tdefer m.root.lock.Unlock()\n\n\tfor k := range m.root.children {\n\t\tdelete(m.root.children, k)\n\t}\n\n\treturn nil\n}\n\nfunc (m *MemoryStore) SizeInBytes() int64 {\n\treturn m.root.sizeInBytes()\n}\n\nfunc (m *MemoryStore) SizeInGB() float64 {\n\treturn float64(m.root.sizeInBytes()) / 1e9\n}\n\n// ListChildren , given a selector, returns a list of all children of the level\n// selected.\nfunc (m *MemoryStore) ListChildren(selector []string) []string {\n\tlvl := &m.root\n\tfor lvl != nil && len(selector) != 0 {\n\t\tlvl.lock.RLock()\n\t\tnext := lvl.children[selector[0]]\n\t\tlvl.lock.RUnlock()\n\t\tlvl = next\n\t\tselector = selector[1:]\n\t}\n\n\tif lvl == nil {\n\t\treturn nil\n\t}\n\n\tlvl.lock.RLock()\n\tdefer lvl.lock.RUnlock()\n\n\tchildren := make([]string, 0, len(lvl.children))\n\tfor child := range lvl.children {\n\t\tchildren = append(children, child)\n\t}\n\n\treturn children\n}\n","structuredPatch":[{"oldStart":8,"oldLines":7,"newStart":8,"newLines":7,"lines":[" //"," // The package organizes metrics in a tree structure (cluster → host → component) and"," // provides concurrent read/write access to metric data with configurable aggregation strategies.","-// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,","+// Background goroutines handle periodic checkpointing (JSON or WAL/binary format), archiving old data,"," // and enforcing retention policies."," //"," // Key features:"]},{"oldStart":16,"oldLines":7,"newStart":16,"newLines":7,"lines":[" //   - Hierarchical data organization (selectors)"," //   - Concurrent checkpoint/archive workers"," //   - Support for sum and average aggregation","-//   - NATS integration for metric ingestion","+//   - NATS integration for metric ingestion via InfluxDB line protocol"," package metricstore"," "," import ("]}],"userModified":false,"replaceAll":false},"sourceToolAssistantUUID":"504f5612-18ba-42a4-a3d2-787e39cd5aa3"}
{"parentUuid":"efe49036-707f-45fa-8c09-8e615214bef7","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01PdZYqLuFEA8tUTCkaB7Jvx","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_017EdmVnG5de4xuZSzfbN4HV","name":"Edit","input":{"replace_all":false,"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go","old_string":"// Parameters:\n//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig)\n//   - wg: WaitGroup that will be incremented for each background goroutine started","new_string":"// Parameters:\n//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig); may be nil to use defaults\n//   - metrics: Map of metric names to their configurations (frequency and aggregation strategy)\n//   - wg: WaitGroup that will be incremented for each background goroutine started"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":2673,"cache_read_input_tokens":108124,"output_tokens":235,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":2673,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXw5b1hZtvCNBicFiMTN","type":"assistant","uuid":"6370b39e-8df7-4f63-9d57-72f16766f0a7","timestamp":"2026-03-29T19:35:44.909Z"}
{"parentUuid":"6370b39e-8df7-4f63-9d57-72f16766f0a7","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Edit","command":"callback"},"parentToolUseID":"toolu_017EdmVnG5de4xuZSzfbN4HV","toolUseID":"toolu_017EdmVnG5de4xuZSzfbN4HV","timestamp":"2026-03-29T19:35:44.980Z","uuid":"5585c17d-19c7-4738-80c3-4c902785e374"}
{"parentUuid":"6370b39e-8df7-4f63-9d57-72f16766f0a7","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_017EdmVnG5de4xuZSzfbN4HV","type":"tool_result","content":"The file /home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go has been updated successfully."}]},"uuid":"9de9703c-4090-475f-b6d6-be01a9ee2eb5","timestamp":"2026-03-29T19:35:44.978Z","toolUseResult":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/metricstore.go","oldString":"// Parameters:\n//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig)\n//   - wg: WaitGroup that will be incremented for each background goroutine started","newString":"// Parameters:\n//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig); may be nil to use defaults\n//   - metrics: Map of metric names to their configurations (frequency and aggregation strategy)\n//   - wg: WaitGroup that will be incremented for each background goroutine started","originalFile":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// Package metricstore provides an efficient in-memory time-series metric storage system\n// with support for hierarchical data organization, checkpointing, and archiving.\n//\n// The package organizes metrics in a tree structure (cluster → host → component) and\n// provides concurrent read/write access to metric data with configurable aggregation strategies.\n// Background goroutines handle periodic checkpointing (JSON or WAL/binary format), archiving old data,\n// and enforcing retention policies.\n//\n// Key features:\n//   - In-memory metric storage with configurable retention\n//   - Hierarchical data organization (selectors)\n//   - Concurrent checkpoint/archive workers\n//   - Support for sum and average aggregation\n//   - NATS integration for metric ingestion via InfluxDB line protocol\npackage metricstore\n\nimport (\n\t\"bytes\"\n\t\"context\"\n\t\"encoding/json\"\n\t\"errors\"\n\t\"fmt\"\n\t\"runtime\"\n\t\"runtime/debug\"\n\t\"slices\"\n\t\"sync\"\n\t\"time\"\n\n\t\"github.com/ClusterCockpit/cc-backend/internal/config\"\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/resampler\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/util\"\n)\n\n// GlobalState holds the global state for the metric store with thread-safe access.\ntype GlobalState struct {\n\tmu                sync.RWMutex\n\tlastRetentionTime int64\n\tselectorsExcluded bool\n}\n\nvar (\n\tsingleton  sync.Once\n\tmsInstance *MemoryStore\n\t// shutdownFunc stores the context cancellation function created in Init\n\t// and is called during Shutdown to cancel all background goroutines\n\tshutdownFunc   context.CancelFunc\n\tshutdownFuncMu sync.Mutex // Protects shutdownFunc from concurrent access\n\t// Create a global instance\n\tstate = &GlobalState{}\n)\n\n// NodeProvider provides information about nodes currently in use by running jobs.\n//\n// This interface allows metricstore to query job information without directly\n// depending on the repository package, breaking the import cycle.\n//\n// Implementations should return nodes that are actively processing jobs started\n// before the given timestamp. These nodes will be excluded from retention-based\n// garbage collection to prevent data loss for jobs that are still running or\n// recently completed.\ntype NodeProvider interface {\n\t// GetUsedNodes returns a map of cluster names to sorted lists of unique hostnames\n\t// that are currently in use by jobs that started before the given timestamp.\n\t//\n\t// Parameters:\n\t//   - ts: Unix timestamp threshold - returns nodes with jobs started before this time\n\t//\n\t// Returns:\n\t//   - Map of cluster names to lists of node hostnames that should be excluded from garbage collection\n\t//   - Error if the query fails\n\tGetUsedNodes(ts int64) (map[string][]string, error)\n}\n\n// Metric represents a single metric data point to be written to the store.\ntype Metric struct {\n\tName  string\n\tValue schema.Float\n\t// MetricConfig contains frequency and aggregation settings for this metric.\n\t// If Frequency is 0, configuration will be looked up from MemoryStore.Metrics during Write().\n\tMetricConfig MetricConfig\n}\n\n// MemoryStore is the main in-memory time-series metric storage implementation.\n//\n// It organizes metrics in a hierarchical tree structure where each level represents\n// a component of the system hierarchy (e.g., cluster → host → CPU). Each level can\n// store multiple metrics as time-series buffers.\n//\n// The store is initialized as a singleton via InitMetrics() and accessed via GetMemoryStore().\n// All public methods are safe for concurrent use.\ntype MemoryStore struct {\n\tMetrics      map[string]MetricConfig\n\troot         Level\n\tnodeProvider NodeProvider\n}\n\n// Init initializes the metric store from configuration and starts background workers.\n//\n// This function must be called exactly once before any other metricstore operations.\n// It performs the following initialization steps:\n//  1. Validates and decodes the metric store configuration\n//  2. Configures worker pool size (defaults to NumCPU/2+1, max 10)\n//  3. Loads metric configurations from all registered clusters\n//  4. Restores checkpoints within the retention window\n//  5. Starts background workers for retention, checkpointing, archiving, and monitoring\n//  6. Optionally subscribes to NATS for real-time metric ingestion\n//\n// Parameters:\n//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig)\n//   - wg: WaitGroup that will be incremented for each background goroutine started\n//\n// The function will call cclog.Fatal on critical errors during initialization.\n// Use Shutdown() to cleanly stop all background workers started by Init().\n//\n// Note: Signal handling must be implemented by the caller. Call Shutdown() when\n// receiving termination signals to ensure checkpoint data is persisted.\nfunc Init(rawConfig json.RawMessage, metrics map[string]MetricConfig, wg *sync.WaitGroup) {\n\tstartupTime := time.Now()\n\n\tif rawConfig != nil {\n\t\tconfig.Validate(configSchema, rawConfig)\n\t\tdec := json.NewDecoder(bytes.NewReader(rawConfig))\n\t\tdec.DisallowUnknownFields()\n\t\tif err := dec.Decode(&Keys); err != nil {\n\t\t\tcclog.Abortf(\"[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\\nError: %s\\n\", rawConfig, err.Error())\n\t\t}\n\t}\n\n\t// Set NumWorkers from config or use default\n\tif Keys.NumWorkers <= 0 {\n\t\tKeys.NumWorkers = min(runtime.NumCPU()/2+1, DefaultMaxWorkers)\n\t}\n\tcclog.Debugf(\"[METRICSTORE]> Using %d workers for checkpoint/archive operations\\n\", Keys.NumWorkers)\n\n\t// Pass the config.MetricStoreKeys\n\tInitMetrics(metrics)\n\n\tms := GetMemoryStore()\n\n\td, err := time.ParseDuration(Keys.RetentionInMemory)\n\tif err != nil {\n\t\tcclog.Fatal(err)\n\t}\n\n\trestoreFrom := startupTime.Add(-d)\n\tcclog.Infof(\"[METRICSTORE]> Loading checkpoints newer than %s\\n\", restoreFrom.Format(time.RFC3339))\n\tfiles, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix())\n\tloadedData := ms.SizeInBytes() / 1024 / 1024 // In MB\n\tif err != nil {\n\t\tcclog.Fatalf(\"[METRICSTORE]> Loading checkpoints failed: %s\\n\", err.Error())\n\t} else {\n\t\tcclog.Infof(\"[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\\n\", files, loadedData, time.Since(startupTime).Seconds())\n\t}\n\n\t// Try to use less memory by forcing a GC run here and then\n\t// lowering the target percentage. The default of 100 means\n\t// that only once the ratio of new allocations execeds the\n\t// previously active heap, a GC is triggered.\n\t// Forcing a GC here will set the \"previously active heap\"\n\t// to a minumum.\n\t// runtime.GC()\n\n\tctx, shutdown := context.WithCancel(context.Background())\n\n\tRetention(wg, ctx)\n\tCheckpointing(wg, ctx)\n\tCleanUp(wg, ctx)\n\tWALStaging(wg, ctx)\n\tMemoryUsageTracker(wg, ctx)\n\n\t// Note: Signal handling has been removed from this function.\n\t// The caller is responsible for handling shutdown signals and calling\n\t// the shutdown() function when appropriate.\n\t// Store the shutdown function for later use by Shutdown()\n\tshutdownFuncMu.Lock()\n\tshutdownFunc = shutdown\n\tshutdownFuncMu.Unlock()\n\n\tif Keys.Subscriptions != nil {\n\t\twg.Go(func() {\n\t\t\tif err := ReceiveNats(ms, Keys.NumWorkers, ctx); err != nil {\n\t\t\t\tcclog.Fatal(err)\n\t\t\t}\n\t\t})\n\t}\n}\n\n// InitMetrics initializes the singleton MemoryStore instance with the given metric configurations.\n//\n// This function must be called before GetMemoryStore() and can only be called once due to\n// the singleton pattern. It assigns each metric an internal offset for efficient buffer indexing.\n//\n// Parameters:\n//   - metrics: Map of metric names to their configurations (frequency and aggregation strategy)\n//\n// Panics if any metric has Frequency == 0, which indicates an invalid configuration.\n//\n// After this call, the global msInstance is ready for use via GetMemoryStore().\nfunc InitMetrics(metrics map[string]MetricConfig) {\n\tsingleton.Do(func() {\n\t\toffset := 0\n\t\tfor key, cfg := range metrics {\n\t\t\tif cfg.Frequency == 0 {\n\t\t\t\tpanic(\"[METRICSTORE]> invalid frequency\")\n\t\t\t}\n\n\t\t\tmetrics[key] = MetricConfig{\n\t\t\t\tFrequency:   cfg.Frequency,\n\t\t\t\tAggregation: cfg.Aggregation,\n\t\t\t\toffset:      offset,\n\t\t\t}\n\t\t\toffset += 1\n\t\t}\n\n\t\tmsInstance = &MemoryStore{\n\t\t\troot: Level{\n\t\t\t\tmetrics:  make([]*buffer, len(metrics)),\n\t\t\t\tchildren: make(map[string]*Level),\n\t\t\t},\n\t\t\tMetrics: metrics,\n\t\t}\n\t})\n}\n\n// GetMemoryStore returns the singleton MemoryStore instance.\n//\n// Returns the initialized MemoryStore singleton. Calls cclog.Fatal if InitMetrics() was not called first.\n//\n// This function is safe for concurrent use after initialization.\nfunc GetMemoryStore() *MemoryStore {\n\tif msInstance == nil {\n\t\tcclog.Warnf(\"[METRICSTORE]> MemoryStore not initialized!\")\n\t}\n\n\treturn msInstance\n}\n\nfunc (ms *MemoryStore) GetMetricFrequency(metricName string) (int64, error) {\n\tif metric, ok := ms.Metrics[metricName]; ok {\n\t\treturn metric.Frequency, nil\n\t}\n\treturn 0, fmt.Errorf(\"[METRICSTORE]> metric %s not found\", metricName)\n}\n\n// SetNodeProvider sets the NodeProvider implementation for the MemoryStore.\n// This must be called during initialization to provide job state information\n// for selective buffer retention during Free operations.\n// If not set, the Free function will fall back to freeing all buffers.\nfunc (ms *MemoryStore) SetNodeProvider(provider NodeProvider) {\n\tms.nodeProvider = provider\n}\n\n// Shutdown performs a graceful shutdown of the metric store.\n//\n// This function cancels all background goroutines started by Init() and writes\n// a final checkpoint to disk before returning. It should be called when the\n// application receives a termination signal.\n//\n// The function will:\n//  1. Cancel the context to stop all background workers\n//  2. Close the WAL messages channel if using WAL format\n//  3. Write a final checkpoint to preserve in-memory data\n//  4. Log any errors encountered during shutdown\n//\n// Note: This function blocks until the final checkpoint is written.\nfunc Shutdown() {\n\ttotalStart := time.Now()\n\n\tshutdownFuncMu.Lock()\n\tdefer shutdownFuncMu.Unlock()\n\tif shutdownFunc != nil {\n\t\tshutdownFunc()\n\t}\n\tcclog.Infof(\"[METRICSTORE]> Background workers cancelled (%v)\", time.Since(totalStart))\n\n\tif Keys.Checkpoints.FileFormat == \"wal\" {\n\t\t// Signal producers to stop sending before closing channels,\n\t\t// preventing send-on-closed-channel panics from in-flight NATS workers.\n\t\twalShuttingDown.Store(true)\n\t\t// Brief grace period for in-flight DecodeLine calls to complete.\n\t\ttime.Sleep(100 * time.Millisecond)\n\n\t\tfor _, ch := range walShardChs {\n\t\t\tclose(ch)\n\t\t}\n\t\tdrainStart := time.Now()\n\t\tWaitForWALStagingDrain()\n\t\tcclog.Infof(\"[METRICSTORE]> WAL staging goroutines exited (%v)\", time.Since(drainStart))\n\t}\n\n\tcclog.Infof(\"[METRICSTORE]> Writing checkpoint to '%s'...\", Keys.Checkpoints.RootDir)\n\tcheckpointStart := time.Now()\n\tvar files int\n\tvar err error\n\n\tms := GetMemoryStore()\n\n\tlastCheckpointMu.Lock()\n\tfrom := lastCheckpoint\n\tlastCheckpointMu.Unlock()\n\n\tif Keys.Checkpoints.FileFormat == \"wal\" {\n\t\t// WAL files are deleted per-host inside ToCheckpointWAL workers.\n\t\tfiles, _, err = ms.ToCheckpointWAL(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())\n\t} else {\n\t\tfiles, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())\n\t}\n\n\tif err != nil {\n\t\tcclog.Errorf(\"[METRICSTORE]> Writing checkpoint failed: %s\", err.Error())\n\t}\n\tcclog.Infof(\"[METRICSTORE]> Done! (%d files written in %v, total shutdown: %v)\", files, time.Since(checkpointStart), time.Since(totalStart))\n}\n\n// Retention starts a background goroutine that periodically frees old metric data.\n//\n// This worker runs at half the retention interval and calls Free() to remove buffers\n// older than the configured retention time. It respects the NodeProvider to preserve\n// data for nodes with active jobs.\n//\n// Parameters:\n//   - wg: WaitGroup to signal completion when context is cancelled\n//   - ctx: Context for cancellation signal\n//\n// The goroutine exits when ctx is cancelled.\nfunc Retention(wg *sync.WaitGroup, ctx context.Context) {\n\tms := GetMemoryStore()\n\n\twg.Go(func() {\n\t\td, err := time.ParseDuration(Keys.RetentionInMemory)\n\t\tif err != nil {\n\t\t\tcclog.Fatal(err)\n\t\t}\n\t\tif d <= 0 {\n\t\t\treturn\n\t\t}\n\n\t\ttickInterval := d / 2\n\t\tif tickInterval <= 0 {\n\t\t\treturn\n\t\t}\n\t\tticker := time.NewTicker(tickInterval)\n\t\tdefer ticker.Stop()\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tstate.mu.Lock()\n\n\t\t\t\tt := time.Now().Add(-d)\n\n\t\t\t\tstate.lastRetentionTime = t.Unix()\n\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> start freeing buffers (older than %s)...\\n\", t.Format(time.RFC3339))\n\n\t\t\t\tfreed, err := Free(ms, t)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> freeing up buffers failed: %s\\n\", err.Error())\n\t\t\t\t} else {\n\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d buffers freed\\n\", freed)\n\t\t\t\t}\n\n\t\t\t\tstate.mu.Unlock()\n\n\t\t\t\t// Clean up the buffer pool\n\t\t\t\tbufferPool.Clean(state.lastRetentionTime)\n\t\t\t}\n\t\t}\n\t})\n}\n\n// MemoryUsageTracker starts a background goroutine that monitors memory usage.\n//\n// This worker checks actual process memory usage (via runtime.MemStats) periodically\n// and force-frees buffers if memory exceeds the configured cap. It uses FreeOSMemory()\n// to return memory to the OS after freeing buffers, avoiding aggressive GC that causes\n// performance issues.\n//\n// The tracker logs both actual memory usage (heap allocated) and metric data size for\n// visibility into memory overhead from Go runtime structures and allocations.\n//\n// Parameters:\n//   - wg: WaitGroup to signal completion when context is cancelled\n//   - ctx: Context for cancellation signal\n//\n// The goroutine exits when ctx is cancelled.\nfunc MemoryUsageTracker(wg *sync.WaitGroup, ctx context.Context) {\n\tms := GetMemoryStore()\n\n\twg.Go(func() {\n\t\tnormalInterval := DefaultMemoryUsageTrackerInterval\n\t\tfastInterval := 30 * time.Second\n\n\t\tif normalInterval <= 0 {\n\t\t\treturn\n\t\t}\n\n\t\tticker := time.NewTicker(normalInterval)\n\t\tdefer ticker.Stop()\n\t\tcurrentInterval := normalInterval\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tvar mem runtime.MemStats\n\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\tactualMemoryGB := float64(mem.Alloc) / 1e9\n\t\t\t\tmetricDataGB := ms.SizeInGB()\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory usage: %.2f GB actual (%.2f GB metric data)\", actualMemoryGB, metricDataGB)\n\n\t\t\t\tfreedExcluded := 0\n\t\t\t\tfreedEmergency := 0\n\t\t\t\tvar err error\n\n\t\t\t\tstate.mu.RLock()\n\t\t\t\tlastRetention := state.lastRetentionTime\n\t\t\t\tselectorsExcluded := state.selectorsExcluded\n\t\t\t\tstate.mu.RUnlock()\n\n\t\t\t\tif lastRetention != 0 && selectorsExcluded {\n\t\t\t\t\tfreedExcluded, err = ms.Free(nil, lastRetention)\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while force-freeing the excluded buffers: %s\", err)\n\t\t\t\t\t}\n\n\t\t\t\t\tif freedExcluded > 0 {\n\t\t\t\t\t\tdebug.FreeOSMemory()\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d excluded buffers force-freed\", freedExcluded)\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n\n\t\t\t\tif actualMemoryGB > float64(Keys.MemoryCap) {\n\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> memory usage %.2f GB exceeds cap %d GB, starting emergency buffer freeing\", actualMemoryGB, Keys.MemoryCap)\n\n\t\t\t\t\t// Use progressive time-based Free with increasing threshold\n\t\t\t\t\t// instead of ForceFree loop — fewer tree traversals, more effective\n\t\t\t\t\td, parseErr := time.ParseDuration(Keys.RetentionInMemory)\n\t\t\t\t\tif parseErr != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> cannot parse retention duration: %s\", parseErr)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tthresholds := []float64{0.75, 0.5, 0.25}\n\t\t\t\t\t\tfor _, fraction := range thresholds {\n\t\t\t\t\t\t\tthreshold := time.Now().Add(-time.Duration(float64(d) * fraction))\n\t\t\t\t\t\t\tfreed, freeErr := ms.Free(nil, threshold.Unix())\n\t\t\t\t\t\t\tif freeErr != nil {\n\t\t\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while freeing buffers at %.0f%% retention: %s\", fraction*100, freeErr)\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\tfreedEmergency += freed\n\n\t\t\t\t\t\t\tbufferPool.Clear()\n\t\t\t\t\t\t\truntime.GC()\n\t\t\t\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n\n\t\t\t\t\t\t\tif actualMemoryGB < float64(Keys.MemoryCap) {\n\t\t\t\t\t\t\t\tbreak\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tbufferPool.Clear()\n\t\t\t\t\tdebug.FreeOSMemory()\n\n\t\t\t\t\truntime.ReadMemStats(&mem)\n\t\t\t\t\tactualMemoryGB = float64(mem.Alloc) / 1e9\n\n\t\t\t\t\tif actualMemoryGB >= float64(Keys.MemoryCap) {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> after emergency frees (%d buffers), memory usage %.2f GB still at/above cap %d GB\", freedEmergency, actualMemoryGB, Keys.MemoryCap)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> emergency freeing complete: %d buffers freed, memory now %.2f GB\", freedEmergency, actualMemoryGB)\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// Adaptive ticker: check more frequently when memory is high\n\t\t\t\tmemoryRatio := actualMemoryGB / float64(Keys.MemoryCap)\n\t\t\t\tif memoryRatio > 0.8 && currentInterval != fastInterval {\n\t\t\t\t\tticker.Reset(fastInterval)\n\t\t\t\t\tcurrentInterval = fastInterval\n\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory at %.0f%% of cap, switching to fast check interval (30s)\", memoryRatio*100)\n\t\t\t\t} else if memoryRatio <= 0.8 && currentInterval != normalInterval {\n\t\t\t\t\tticker.Reset(normalInterval)\n\t\t\t\t\tcurrentInterval = normalInterval\n\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> memory at %.0f%% of cap, switching to normal check interval\", memoryRatio*100)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t})\n}\n\n// Free removes metric data older than the given time while preserving data for active nodes.\n//\n// This function implements intelligent retention by consulting the NodeProvider (if configured)\n// to determine which nodes are currently in use by running jobs. Data for these nodes is\n// preserved even if older than the retention time.\n//\n// Parameters:\n//   - ms: The MemoryStore instance\n//   - t: Time threshold - buffers with data older than this will be freed\n//\n// Returns:\n//   - Number of buffers freed\n//   - Error if NodeProvider query fails\n//\n// Behavior:\n//   - If no NodeProvider is set: frees all buffers older than t\n//   - If NodeProvider returns empty map: frees all buffers older than t\n//   - Otherwise: preserves buffers for nodes returned by GetUsedNodes(), frees others\nfunc Free(ms *MemoryStore, t time.Time) (int, error) {\n\t// If no NodeProvider is configured, free all buffers older than t\n\tif ms.nodeProvider == nil {\n\t\treturn ms.Free(nil, t.Unix())\n\t}\n\n\texcludeSelectors, err := ms.nodeProvider.GetUsedNodes(t.Unix())\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\tswitch lenMap := len(excludeSelectors); lenMap {\n\n\t// If the length of the map returned by GetUsedNodes() is 0,\n\t// then use default Free method with nil selector\n\tcase 0:\n\t\tstate.selectorsExcluded = false\n\t\treturn ms.Free(nil, t.Unix())\n\n\t// Else formulate selectors, exclude those from the map\n\t// and free the rest of the selectors\n\tdefault:\n\t\tstate.selectorsExcluded = true\n\t\tselectors := GetSelectors(ms, excludeSelectors)\n\t\treturn FreeSelected(ms, selectors, t)\n\t}\n}\n\n// FreeSelected frees buffers for specific selectors while preserving others.\n//\n// This function is used when we want to retain some specific nodes beyond the retention time.\n// It iterates through the provided selectors and frees their associated buffers.\n//\n// Parameters:\n//   - ms: The MemoryStore instance\n//   - selectors: List of selector paths to free (e.g., [[\"cluster1\", \"node1\"], [\"cluster2\", \"node2\"]])\n//   - t: Time threshold for freeing buffers\n//\n// Returns the total number of buffers freed and any error encountered.\nfunc FreeSelected(ms *MemoryStore, selectors [][]string, t time.Time) (int, error) {\n\tfreed := 0\n\n\tfor _, selector := range selectors {\n\n\t\tfreedBuffers, err := ms.Free(selector, t.Unix())\n\t\tif err != nil {\n\t\t\tcclog.Errorf(\"error while freeing selected buffers: %#v\", err)\n\t\t}\n\t\tfreed += freedBuffers\n\n\t}\n\n\treturn freed, nil\n}\n\n// GetSelectors returns all selectors at depth 2 (cluster/node level) that are NOT in the exclusion map.\n//\n// This function generates a list of selectors whose buffers should be freed by excluding\n// selectors that correspond to nodes currently in use by running jobs.\n//\n// Parameters:\n//   - ms: The MemoryStore instance\n//   - excludeSelectors: Map of cluster names to node hostnames that should NOT be freed\n//\n// Returns a list of selectors ([]string paths) that can be safely freed.\n//\n// Example:\n//\n//\tIf the tree has paths [\"emmy\", \"node001\"] and [\"emmy\", \"node002\"],\n//\tand excludeSelectors contains {\"emmy\": [\"node001\"]},\n//\tthen only [[\"emmy\", \"node002\"]] is returned.\nfunc GetSelectors(ms *MemoryStore, excludeSelectors map[string][]string) [][]string {\n\tallSelectors := ms.GetPaths(2)\n\n\tfilteredSelectors := make([][]string, 0, len(allSelectors))\n\n\tfor _, path := range allSelectors {\n\t\tif len(path) < 2 {\n\t\t\tcontinue\n\t\t}\n\n\t\tkey := path[0]   // The \"Key\" (Level 1)\n\t\tvalue := path[1] // The \"Value\" (Level 2)\n\n\t\texclude := false\n\n\t\t// Check if the key exists in our exclusion map\n\t\tif excludedValues, exists := excludeSelectors[key]; exists {\n\t\t\t// The key exists, now check if the specific value is in the exclusion list\n\t\t\tif slices.Contains(excludedValues, value) {\n\t\t\t\texclude = true\n\t\t\t}\n\t\t}\n\n\t\tif !exclude {\n\t\t\tfilteredSelectors = append(filteredSelectors, path)\n\t\t}\n\t}\n\n\treturn filteredSelectors\n}\n\n// GetPaths returns a list of lists (paths) to the specified depth.\nfunc (ms *MemoryStore) GetPaths(targetDepth int) [][]string {\n\tvar results [][]string\n\n\t// Start recursion. Initial path is empty.\n\t// We treat Root as depth 0.\n\tms.root.collectPaths(0, targetDepth, []string{}, &results)\n\n\treturn results\n}\n\n// Write all values in `metrics` to the level specified by `selector` for time `ts`.\n// Look at `findLevelOrCreate` for how selectors work.\nfunc (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {\n\tvar ok bool\n\tfor i, metric := range metrics {\n\t\tif metric.MetricConfig.Frequency == 0 {\n\t\t\tmetric.MetricConfig, ok = m.Metrics[metric.Name]\n\t\t\tif !ok {\n\t\t\t\tcclog.Debugf(\"[METRICSTORE]> Unknown metric '%s' in Write() - skipping\", metric.Name)\n\t\t\t\tmetric.MetricConfig.Frequency = 0\n\t\t\t}\n\t\t\tmetrics[i] = metric\n\t\t}\n\t}\n\n\treturn m.WriteToLevel(&m.root, selector, ts, metrics)\n}\n\nfunc (m *MemoryStore) GetLevel(selector []string) *Level {\n\treturn m.root.findLevelOrCreate(selector, len(m.Metrics))\n}\n\n// WriteToLevel assumes that `minfo` in `metrics` is filled in\nfunc (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error {\n\tl = l.findLevelOrCreate(selector, len(m.Metrics))\n\tl.lock.Lock()\n\tdefer l.lock.Unlock()\n\n\tfor _, metric := range metrics {\n\t\tif metric.MetricConfig.Frequency == 0 {\n\t\t\tcontinue\n\t\t}\n\n\t\tb := l.metrics[metric.MetricConfig.offset]\n\t\tif b == nil {\n\t\t\t// First write to this metric and level\n\t\t\tb = newBuffer(ts, metric.MetricConfig.Frequency)\n\t\t\tl.metrics[metric.MetricConfig.offset] = b\n\t\t}\n\n\t\tnb, err := b.write(ts, metric.Value)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\t// Last write created a new buffer...\n\t\tif b != nb {\n\t\t\tl.metrics[metric.MetricConfig.offset] = nb\n\t\t}\n\t}\n\treturn nil\n}\n\n// Read returns all values for metric `metric` from `from` to `to` for the selected level(s).\n// If the level does not hold the metric itself, the data will be aggregated recursively from the children.\n// The second and third return value are the actual from/to for the data. Those can be different from\n// the range asked for if no data was available.\nfunc (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {\n\tif from > to {\n\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> invalid time range\")\n\t}\n\n\tminfo, ok := m.Metrics[metric]\n\tif !ok {\n\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> unknown metric: \" + metric)\n\t}\n\n\tn, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)\n\n\terr := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {\n\t\tcdata, cfrom, cto, err := b.read(from, to, data)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\tif n == 0 {\n\t\t\tfrom, to = cfrom, cto\n\t\t} else if from != cfrom || to != cto || len(data) != len(cdata) {\n\t\t\tmissingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)\n\t\t\tif missingfront != 0 {\n\t\t\t\treturn ErrDataDoesNotAlignMissingFront\n\t\t\t}\n\n\t\t\tnewlen := len(cdata) - missingback\n\t\t\tif newlen < 1 {\n\t\t\t\treturn ErrDataDoesNotAlignMissingBack\n\t\t\t}\n\t\t\tcdata = cdata[0:newlen]\n\t\t\tif len(cdata) != len(data) {\n\t\t\t\treturn ErrDataDoesNotAlignDataLenMismatch\n\t\t\t}\n\n\t\t\tfrom, to = cfrom, cto\n\t\t}\n\n\t\tdata = cdata\n\t\tn += 1\n\t\treturn nil\n\t})\n\n\tif err != nil {\n\t\treturn nil, 0, 0, 0, err\n\t} else if n == 0 {\n\t\treturn nil, 0, 0, 0, ErrNoHostOrMetric\n\t} else if n > 1 {\n\t\tif minfo.Aggregation == AvgAggregation {\n\t\t\tnormalize := 1. / schema.Float(n)\n\t\t\tfor i := 0; i < len(data); i++ {\n\t\t\t\tdata[i] *= normalize\n\t\t\t}\n\t\t} else if minfo.Aggregation != SumAggregation {\n\t\t\treturn nil, 0, 0, 0, errors.New(\"[METRICSTORE]> invalid aggregation\")\n\t\t}\n\t}\n\n\tdata, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)\n\tif err != nil {\n\t\treturn nil, 0, 0, 0, err\n\t}\n\n\treturn data, from, to, resolution, nil\n}\n\n// Free releases all buffers for the selected level and all its children that\n// contain only values older than `t`.\nfunc (m *MemoryStore) Free(selector []string, t int64) (int, error) {\n\treturn m.GetLevel(selector).free(t)\n}\n\n// ForceFree unconditionally removes the oldest buffer from each metric chain.\nfunc (m *MemoryStore) ForceFree() (int, error) {\n\treturn m.GetLevel(nil).forceFree()\n}\n\nfunc (m *MemoryStore) FreeAll() error {\n\tm.root.lock.Lock()\n\tdefer m.root.lock.Unlock()\n\n\tfor k := range m.root.children {\n\t\tdelete(m.root.children, k)\n\t}\n\n\treturn nil\n}\n\nfunc (m *MemoryStore) SizeInBytes() int64 {\n\treturn m.root.sizeInBytes()\n}\n\nfunc (m *MemoryStore) SizeInGB() float64 {\n\treturn float64(m.root.sizeInBytes()) / 1e9\n}\n\n// ListChildren , given a selector, returns a list of all children of the level\n// selected.\nfunc (m *MemoryStore) ListChildren(selector []string) []string {\n\tlvl := &m.root\n\tfor lvl != nil && len(selector) != 0 {\n\t\tlvl.lock.RLock()\n\t\tnext := lvl.children[selector[0]]\n\t\tlvl.lock.RUnlock()\n\t\tlvl = next\n\t\tselector = selector[1:]\n\t}\n\n\tif lvl == nil {\n\t\treturn nil\n\t}\n\n\tlvl.lock.RLock()\n\tdefer lvl.lock.RUnlock()\n\n\tchildren := make([]string, 0, len(lvl.children))\n\tfor child := range lvl.children {\n\t\tchildren = append(children, child)\n\t}\n\n\treturn children\n}\n","structuredPatch":[{"oldStart":113,"oldLines":7,"newStart":113,"newLines":8,"lines":[" //  6. Optionally subscribes to NATS for real-time metric ingestion"," //"," // Parameters:","-//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig)","+//   - rawConfig: JSON configuration for the metric store (see MetricStoreConfig); may be nil to use defaults","+//   - metrics: Map of metric names to their configurations (frequency and aggregation strategy)"," //   - wg: WaitGroup that will be incremented for each background goroutine started"," //"," // The function will call cclog.Fatal on critical errors during initialization."]}],"userModified":false,"replaceAll":false},"sourceToolAssistantUUID":"6370b39e-8df7-4f63-9d57-72f16766f0a7"}
{"parentUuid":"9de9703c-4090-475f-b6d6-be01a9ee2eb5","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_017KGWqFwBpLEiNVNeL6wzzF","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01BDu4bDDrbsDBB7EbrKX3UU","name":"Edit","input":{"replace_all":false,"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/checkpoint.go","old_string":"// Checkpointing starts a background worker that periodically saves metric data to disk.\n//\n// Checkpoints are written every 12 hours (hardcoded).\n//\n// Format behaviour:\n//   - \"json\": Periodic checkpointing every checkpointInterval\n//   - \"wal\":  Periodic binary snapshots + WAL rotation every checkpointInterval","new_string":"// Checkpointing starts a background worker that periodically saves metric data to disk.\n//\n// The checkpoint interval is read from Keys.CheckpointInterval (default: 12 hours).\n//\n// Format behaviour:\n//   - \"json\": Writes a JSON snapshot file per host every interval\n//   - \"wal\":  Writes a binary snapshot file per host every interval, then rotates\n//     the current.wal files for all successfully checkpointed hosts"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":301,"cache_read_input_tokens":110797,"output_tokens":323,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":301,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXw5zseqiCEcLdZv8Lk1","type":"assistant","uuid":"1893e431-17cc-49c7-b537-350d07be971a","timestamp":"2026-03-29T19:35:52.424Z"}
{"type":"file-history-snapshot","messageId":"1893e431-17cc-49c7-b537-350d07be971a","snapshot":{"messageId":"5850443c-83f3-4981-96ba-d5e60679f257","trackedFileBackups":{"pkg/metricstore/metricstore.go":{"backupFileName":"94484c782e7b75a8@v1","version":1,"backupTime":"2026-03-29T19:35:39.217Z"},"pkg/metricstore/checkpoint.go":{"backupFileName":"1948c8214a69da10@v1","version":1,"backupTime":"2026-03-29T19:35:52.527Z"}},"timestamp":"2026-03-29T19:34:20.233Z"},"isSnapshotUpdate":true}
{"parentUuid":"1893e431-17cc-49c7-b537-350d07be971a","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Edit","command":"callback"},"parentToolUseID":"toolu_01BDu4bDDrbsDBB7EbrKX3UU","toolUseID":"toolu_01BDu4bDDrbsDBB7EbrKX3UU","timestamp":"2026-03-29T19:35:52.536Z","uuid":"48f86bcc-47a1-4284-9f53-194a3a42bfa2"}
{"parentUuid":"1893e431-17cc-49c7-b537-350d07be971a","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01BDu4bDDrbsDBB7EbrKX3UU","type":"tool_result","content":"The file /home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/checkpoint.go has been updated successfully."}]},"uuid":"9d39bf6e-7ef8-4b73-8ef7-cbfbeab8e208","timestamp":"2026-03-29T19:35:52.535Z","toolUseResult":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/checkpoint.go","oldString":"// Checkpointing starts a background worker that periodically saves metric data to disk.\n//\n// Checkpoints are written every 12 hours (hardcoded).\n//\n// Format behaviour:\n//   - \"json\": Periodic checkpointing every checkpointInterval\n//   - \"wal\":  Periodic binary snapshots + WAL rotation every checkpointInterval","newString":"// Checkpointing starts a background worker that periodically saves metric data to disk.\n//\n// The checkpoint interval is read from Keys.CheckpointInterval (default: 12 hours).\n//\n// Format behaviour:\n//   - \"json\": Writes a JSON snapshot file per host every interval\n//   - \"wal\":  Writes a binary snapshot file per host every interval, then rotates\n//     the current.wal files for all successfully checkpointed hosts","originalFile":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// This file implements checkpoint persistence for the in-memory metric store.\n//\n// Checkpoints enable graceful restarts by periodically saving in-memory metric\n// data to disk in JSON or binary format. The checkpoint system:\n//\n// Key Features:\n//   - Periodic background checkpointing via the Checkpointing() worker\n//   - Two format families: JSON (human-readable) and WAL+binary (compact, crash-safe)\n//   - Parallel checkpoint creation and loading using worker pools\n//   - Hierarchical file organization: checkpoint_dir/cluster/host/timestamp.{json|bin}\n//   - WAL file: checkpoint_dir/cluster/host/current.wal (append-only, per-entry)\n//   - Only saves unarchived data (archived data is already persisted elsewhere)\n//   - GC optimization during loading to prevent excessive heap growth\n//\n// Checkpoint Workflow:\n//  1. Init() loads checkpoints within retention window at startup\n//  2. Checkpointing() worker periodically saves new data\n//  3. Shutdown() writes final checkpoint before exit\n//\n// File Organization:\n//\n//\tcheckpoints/\n//\t  cluster1/\n//\t    host001/\n//\t      1234567890.json  (JSON format: full subtree snapshot)\n//\t      1234567890.bin   (binary format: full subtree snapshot)\n//\t      current.wal      (WAL format: append-only per-entry log)\n//\t    host002/\n//\t      ...\npackage metricstore\n\nimport (\n\t\"bufio\"\n\t\"context\"\n\t\"encoding/json\"\n\t\"errors\"\n\t\"fmt\"\n\t\"io/fs\"\n\t\"os\"\n\t\"path\"\n\t\"path/filepath\"\n\t\"sort\"\n\t\"strconv\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n)\n\nconst (\n\tCheckpointFilePerms = 0o644                    // File permissions for checkpoint files\n\tCheckpointDirPerms  = 0o755                    // Directory permissions for checkpoint directories\n\tGCTriggerInterval   = DefaultGCTriggerInterval // Interval for triggering GC during checkpoint loading\n)\n\n// CheckpointMetrics represents metric data in a checkpoint file.\n// Whenever the structure changes, update MarshalJSON as well!\ntype CheckpointMetrics struct {\n\tData      []schema.Float `json:\"data\"`\n\tFrequency int64          `json:\"frequency\"`\n\tStart     int64          `json:\"start\"`\n}\n\n// CheckpointFile represents the hierarchical structure of a checkpoint file.\n// It mirrors the Level tree structure from the MemoryStore.\ntype CheckpointFile struct {\n\tMetrics  map[string]*CheckpointMetrics `json:\"metrics\"`\n\tChildren map[string]*CheckpointFile    `json:\"children\"`\n\tFrom     int64                         `json:\"from\"`\n\tTo       int64                         `json:\"to\"`\n}\n\n// lastCheckpoint tracks the timestamp of the last checkpoint creation.\nvar (\n\tlastCheckpoint   time.Time\n\tlastCheckpointMu sync.Mutex\n)\n\n// Checkpointing starts a background worker that periodically saves metric data to disk.\n//\n// Checkpoints are written every 12 hours (hardcoded).\n//\n// Format behaviour:\n//   - \"json\": Periodic checkpointing every checkpointInterval\n//   - \"wal\":  Periodic binary snapshots + WAL rotation every checkpointInterval\nfunc Checkpointing(wg *sync.WaitGroup, ctx context.Context) {\n\tlastCheckpointMu.Lock()\n\tlastCheckpoint = time.Now()\n\tlastCheckpointMu.Unlock()\n\n\tms := GetMemoryStore()\n\n\twg.Go(func() {\n\n\t\td := 12 * time.Hour // default checkpoint interval\n\t\tif Keys.CheckpointInterval != \"\" {\n\t\t\tparsed, err := time.ParseDuration(Keys.CheckpointInterval)\n\t\t\tif err != nil {\n\t\t\t\tcclog.Errorf(\"[METRICSTORE]> invalid checkpoint-interval %q: %s, using default 12h\", Keys.CheckpointInterval, err)\n\t\t\t} else {\n\t\t\t\td = parsed\n\t\t\t}\n\t\t}\n\n\t\tticker := time.NewTicker(d)\n\t\tdefer ticker.Stop()\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tlastCheckpointMu.Lock()\n\t\t\t\tfrom := lastCheckpoint\n\t\t\t\tlastCheckpointMu.Unlock()\n\n\t\t\t\tnow := time.Now()\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> start checkpointing (starting at %s)...\", from.Format(time.RFC3339))\n\n\t\t\t\tif Keys.Checkpoints.FileFormat == \"wal\" {\n\t\t\t\t\t// Pause WAL writes: the binary snapshot captures all in-memory\n\t\t\t\t\t// data, so WAL records written during checkpoint are redundant\n\t\t\t\t\t// and would be deleted during rotation anyway.\n\t\t\t\t\twalCheckpointActive.Store(true)\n\t\t\t\t\tn, hostDirs, err := ms.ToCheckpointWAL(Keys.Checkpoints.RootDir, from.Unix(), now.Unix())\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> binary checkpointing failed: %s\", err.Error())\n\t\t\t\t\t}\n\t\t\t\t\tif n > 0 {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d binary snapshot files created\", n)\n\t\t\t\t\t\tlastCheckpointMu.Lock()\n\t\t\t\t\t\tlastCheckpoint = now\n\t\t\t\t\t\tlastCheckpointMu.Unlock()\n\t\t\t\t\t\t// Rotate WAL files for successfully checkpointed hosts.\n\t\t\t\t\t\tRotateWALFiles(hostDirs)\n\t\t\t\t\t}\n\t\t\t\t\twalCheckpointActive.Store(false)\n\t\t\t\t\twalDropped.Store(0)\n\t\t\t\t} else {\n\t\t\t\t\tn, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir, from.Unix(), now.Unix())\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> checkpointing failed: %s\", err.Error())\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoint files created\", n)\n\t\t\t\t\t\tlastCheckpointMu.Lock()\n\t\t\t\t\t\tlastCheckpoint = now\n\t\t\t\t\t\tlastCheckpointMu.Unlock()\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t})\n}\n\n// MarshalJSON provides optimized JSON encoding for CheckpointMetrics.\n//\n// Since schema.Float has custom MarshalJSON, serializing []Float has significant overhead.\n// This method manually constructs JSON to avoid allocations and interface conversions.\nfunc (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {\n\tbuf := make([]byte, 0, 128+len(cm.Data)*8)\n\tbuf = append(buf, `{\"frequency\":`...)\n\tbuf = strconv.AppendInt(buf, cm.Frequency, 10)\n\tbuf = append(buf, `,\"start\":`...)\n\tbuf = strconv.AppendInt(buf, cm.Start, 10)\n\tbuf = append(buf, `,\"data\":[`...)\n\tfor i, x := range cm.Data {\n\t\tif i != 0 {\n\t\t\tbuf = append(buf, ',')\n\t\t}\n\t\tif x.IsNaN() {\n\t\t\tbuf = append(buf, `null`...)\n\t\t} else {\n\t\t\tbuf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)\n\t\t}\n\t}\n\tbuf = append(buf, `]}`...)\n\treturn buf, nil\n}\n\n// ToCheckpoint writes metric data to checkpoint files in parallel (JSON format).\n//\n// Metrics at root and cluster levels are skipped. One file per host is created.\n// Uses worker pool (Keys.NumWorkers) for parallel processing. Only locks one host\n// at a time, allowing concurrent writes/reads to other hosts.\n//\n// Returns the number of checkpoint files created and any errors encountered.\nfunc (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {\n\t// Pre-calculate capacity by counting cluster/host pairs\n\tm.root.lock.RLock()\n\ttotalHosts := 0\n\tfor _, l1 := range m.root.children {\n\t\tl1.lock.RLock()\n\t\ttotalHosts += len(l1.children)\n\t\tl1.lock.RUnlock()\n\t}\n\tm.root.lock.RUnlock()\n\n\tlevels := make([]*Level, 0, totalHosts)\n\tselectors := make([][]string, 0, totalHosts)\n\n\tm.root.lock.RLock()\n\tfor sel1, l1 := range m.root.children {\n\t\tl1.lock.RLock()\n\t\tfor sel2, l2 := range l1.children {\n\t\t\tlevels = append(levels, l2)\n\t\t\tselectors = append(selectors, []string{sel1, sel2})\n\t\t}\n\t\tl1.lock.RUnlock()\n\t}\n\tm.root.lock.RUnlock()\n\n\ttype workItem struct {\n\t\tlevel    *Level\n\t\tdir      string\n\t\tselector []string\n\t}\n\n\tn, errs := int32(0), int32(0)\n\n\tvar wg sync.WaitGroup\n\twg.Add(Keys.NumWorkers)\n\twork := make(chan workItem, Keys.NumWorkers*2)\n\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\n\t\t\tfor workItem := range work {\n\t\t\t\tif err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {\n\t\t\t\t\tif err == ErrNoNewArchiveData {\n\t\t\t\t\t\tcontinue\n\t\t\t\t\t}\n\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while checkpointing %#v: %s\", workItem.selector, err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t} else {\n\t\t\t\t\tatomic.AddInt32(&n, 1)\n\t\t\t\t}\n\t\t\t}\n\t\t}()\n\t}\n\n\tfor i := 0; i < len(levels); i++ {\n\t\tdir := path.Join(dir, path.Join(selectors[i]...))\n\t\twork <- workItem{\n\t\t\tlevel:    levels[i],\n\t\t\tdir:      dir,\n\t\t\tselector: selectors[i],\n\t\t}\n\t}\n\n\tclose(work)\n\twg.Wait()\n\n\tif errs > 0 {\n\t\treturn int(n), fmt.Errorf(\"[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)\", errs, n)\n\t}\n\treturn int(n), nil\n}\n\n// toCheckpointFile recursively converts a Level tree to CheckpointFile structure.\n// Skips metrics that are already archived. Returns nil if no unarchived data exists.\nfunc (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {\n\tl.lock.RLock()\n\tdefer l.lock.RUnlock()\n\n\tretval := &CheckpointFile{\n\t\tFrom:     from,\n\t\tTo:       to,\n\t\tMetrics:  make(map[string]*CheckpointMetrics),\n\t\tChildren: make(map[string]*CheckpointFile),\n\t}\n\n\tfor metric, minfo := range m.Metrics {\n\t\tb := l.metrics[minfo.offset]\n\t\tif b == nil {\n\t\t\tcontinue\n\t\t}\n\n\t\tallArchived := true\n\t\tb.iterFromTo(from, to, func(b *buffer) error {\n\t\t\tif !b.archived {\n\t\t\t\tallArchived = false\n\t\t\t\treturn fmt.Errorf(\"stop\") // Early termination signal\n\t\t\t}\n\t\t\treturn nil\n\t\t})\n\n\t\tif allArchived {\n\t\t\tcontinue\n\t\t}\n\n\t\tdata := make([]schema.Float, (to-from)/b.frequency+1)\n\t\tdata, start, end, err := b.read(from, to, data)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\tfor i := int((end - start) / b.frequency); i < len(data); i++ {\n\t\t\tdata[i] = schema.NaN\n\t\t}\n\n\t\tretval.Metrics[metric] = &CheckpointMetrics{\n\t\t\tFrequency: b.frequency,\n\t\t\tStart:     start,\n\t\t\tData:      data,\n\t\t}\n\t}\n\n\tfor name, child := range l.children {\n\t\tval, err := child.toCheckpointFile(from, to, m)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\n\t\tif val != nil {\n\t\t\tretval.Children[name] = val\n\t\t}\n\t}\n\n\tif len(retval.Children) == 0 && len(retval.Metrics) == 0 {\n\t\treturn nil, nil\n\t}\n\n\treturn retval, nil\n}\n\n// toCheckpoint writes a Level's data to a JSON checkpoint file.\n// Creates directory if needed. Returns ErrNoNewArchiveData if nothing to save.\nfunc (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {\n\tcf, err := l.toCheckpointFile(from, to, m)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\tif cf == nil {\n\t\treturn ErrNoNewArchiveData\n\t}\n\n\tfilepath := path.Join(dir, fmt.Sprintf(\"%d.json\", from))\n\tf, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)\n\tif err != nil && os.IsNotExist(err) {\n\t\terr = os.MkdirAll(dir, CheckpointDirPerms)\n\t\tif err == nil {\n\t\t\tf, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)\n\t\t}\n\t}\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer f.Close()\n\n\tbw := bufio.NewWriter(f)\n\tif err = json.NewEncoder(bw).Encode(cf); err != nil {\n\t\treturn err\n\t}\n\n\treturn bw.Flush()\n}\n\n// enqueueCheckpointHosts traverses checkpoint directory and enqueues cluster/host pairs.\n// Returns error if directory structure is invalid.\nfunc enqueueCheckpointHosts(dir string, work chan<- [2]string) error {\n\tclustersDir, err := os.ReadDir(dir)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\tfor _, clusterDir := range clustersDir {\n\t\tif !clusterDir.IsDir() {\n\t\t\treturn errors.New(\"[METRICSTORE]> expected only directories at first level of checkpoints/ directory\")\n\t\t}\n\n\t\thostsDir, err := os.ReadDir(filepath.Join(dir, clusterDir.Name()))\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\tfor _, hostDir := range hostsDir {\n\t\t\tif !hostDir.IsDir() {\n\t\t\t\treturn errors.New(\"[METRICSTORE]> expected only directories at second level of checkpoints/ directory\")\n\t\t\t}\n\n\t\t\twork <- [2]string{clusterDir.Name(), hostDir.Name()}\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// FromCheckpoint loads checkpoint files from disk into memory in parallel.\n//\n// Uses worker pool to load cluster/host combinations. Returns number of files\n// loaded and any errors.\nfunc (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {\n\tvar wg sync.WaitGroup\n\twork := make(chan [2]string, Keys.NumWorkers*4)\n\tn, errs := int32(0), int32(0)\n\n\twg.Add(Keys.NumWorkers)\n\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\t\t\tfor host := range work {\n\t\t\t\tlvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))\n\t\t\t\tnn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error while loading checkpoints for %s/%s: %s\", host[0], host[1], err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t}\n\t\t\t\tatomic.AddInt32(&n, int32(nn))\n\t\t\t}\n\t\t}()\n\t}\n\n\terr := enqueueCheckpointHosts(dir, work)\n\tclose(work)\n\twg.Wait()\n\n\tif err != nil {\n\t\treturn int(n), err\n\t}\n\n\tif errs > 0 {\n\t\treturn int(n), fmt.Errorf(\"[METRICSTORE]> %d errors happened while creating checkpoints (%d successes)\", errs, n)\n\t}\n\treturn int(n), nil\n}\n\n// FromCheckpointFiles is the main entry point for loading checkpoints at startup.\n//\n// Creates checkpoint directory if it doesn't exist. This function must be called\n// before any writes or reads, and can only be called once.\nfunc (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {\n\tif _, err := os.Stat(dir); os.IsNotExist(err) {\n\t\terr := os.MkdirAll(dir, CheckpointDirPerms)\n\t\tif err != nil {\n\t\t\tcclog.Fatalf(\"[METRICSTORE]> Error creating directory: %#v\\n\", err)\n\t\t}\n\t\tcclog.Debugf(\"[METRICSTORE]> %#v Directory created successfully\", dir)\n\t}\n\n\treturn m.FromCheckpoint(dir, from)\n}\n\nfunc (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {\n\tbr := bufio.NewReader(f)\n\tcf := &CheckpointFile{}\n\tif err := json.NewDecoder(br).Decode(cf); err != nil {\n\t\treturn err\n\t}\n\n\tif cf.To != 0 && cf.To < from {\n\t\treturn nil\n\t}\n\n\tif err := l.loadFile(cf, m); err != nil {\n\t\treturn err\n\t}\n\n\treturn nil\n}\n\nfunc (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {\n\tfor name, metric := range cf.Metrics {\n\t\tn := len(metric.Data)\n\t\tb := &buffer{\n\t\t\tfrequency: metric.Frequency,\n\t\t\tstart:     metric.Start,\n\t\t\tdata:      metric.Data[0:n:n],\n\t\t\tprev:      nil,\n\t\t\tnext:      nil,\n\t\t\tarchived:  true,\n\t\t}\n\n\t\tminfo, ok := m.Metrics[name]\n\t\tif !ok {\n\t\t\tcontinue\n\t\t}\n\n\t\tprev := l.metrics[minfo.offset]\n\t\tif prev == nil {\n\t\t\tl.metrics[minfo.offset] = b\n\t\t} else {\n\t\t\tif prev.start > b.start {\n\t\t\t\treturn fmt.Errorf(\"[METRICSTORE]> buffer start time %d is before previous buffer start %d\", b.start, prev.start)\n\t\t\t}\n\n\t\t\tb.prev = prev\n\t\t\tprev.next = b\n\t\t}\n\t\tl.metrics[minfo.offset] = b\n\t}\n\n\tif len(cf.Children) > 0 && l.children == nil {\n\t\tl.children = make(map[string]*Level)\n\t}\n\n\tfor sel, childCf := range cf.Children {\n\t\tchild, ok := l.children[sel]\n\t\tif !ok {\n\t\t\tchild = &Level{\n\t\t\t\tmetrics:  make([]*buffer, len(m.Metrics)),\n\t\t\t\tchildren: nil,\n\t\t\t}\n\t\t\tl.children[sel] = child\n\t\t}\n\n\t\tif err := child.loadFile(childCf, m); err != nil {\n\t\t\treturn err\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// fromCheckpoint loads all checkpoint files (JSON, binary snapshot, WAL) for a\n// single host directory. Snapshot files are loaded first (sorted by timestamp),\n// then current.wal is replayed on top.\nfunc (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, error) {\n\tdirentries, err := os.ReadDir(dir)\n\tif err != nil {\n\t\tif os.IsNotExist(err) {\n\t\t\treturn 0, nil\n\t\t}\n\t\treturn 0, err\n\t}\n\n\tallFiles := make([]fs.DirEntry, 0)\n\tvar walEntry fs.DirEntry\n\tfilesLoaded := 0\n\n\tfor _, e := range direntries {\n\t\tif e.IsDir() {\n\t\t\t// Legacy: skip subdirectories (only used by old Avro format).\n\t\t\t// These are ignored; their data is not loaded.\n\t\t\tcclog.Debugf(\"[METRICSTORE]> skipping subdirectory %s in checkpoint dir %s\", e.Name(), dir)\n\t\t\tcontinue\n\t\t}\n\n\t\tname := e.Name()\n\t\tif strings.HasSuffix(name, \".json\") || strings.HasSuffix(name, \".bin\") {\n\t\t\tallFiles = append(allFiles, e)\n\t\t} else if name == \"current.wal\" {\n\t\t\twalEntry = e\n\t\t}\n\t\t// Silently ignore other files (e.g., .tmp, .bin.tmp from interrupted writes).\n\t}\n\n\tfiles, err := findFiles(allFiles, from, true)\n\tif err != nil {\n\t\treturn filesLoaded, err\n\t}\n\n\tloaders := map[string]func(*MemoryStore, *os.File, int64) error{\n\t\t\".json\": l.loadJSONFile,\n\t\t\".bin\":  l.loadBinaryFile,\n\t}\n\n\tfor _, filename := range files {\n\t\text := filepath.Ext(filename)\n\t\tloader := loaders[ext]\n\t\tif loader == nil {\n\t\t\tcclog.Warnf(\"[METRICSTORE]> unknown extension for checkpoint file %s\", filename)\n\t\t\tcontinue\n\t\t}\n\n\t\terr := func() error {\n\t\t\tf, err := os.Open(path.Join(dir, filename))\n\t\t\tif err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t\tdefer f.Close()\n\t\t\treturn loader(m, f, from)\n\t\t}()\n\t\tif err != nil {\n\t\t\treturn filesLoaded, err\n\t\t}\n\t\tfilesLoaded++\n\t}\n\n\t// Replay WAL after all snapshot files so it fills in data since the last snapshot.\n\tif walEntry != nil {\n\t\terr := func() error {\n\t\t\tf, err := os.Open(path.Join(dir, walEntry.Name()))\n\t\t\tif err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t\tdefer f.Close()\n\t\t\treturn l.loadWALFile(m, f, from)\n\t\t}()\n\t\tif err != nil {\n\t\t\t// WAL errors are non-fatal: the snapshot already loaded the bulk of data.\n\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL replay error for %s: %v (data since last snapshot may be missing)\", dir, err)\n\t\t} else {\n\t\t\tfilesLoaded++\n\t\t}\n\t}\n\n\treturn filesLoaded, nil\n}\n\n// parseTimestampFromFilename extracts a Unix timestamp from a checkpoint filename.\n// Supports \".json\" (format: \"<ts>.json\") and \".bin\" (format: \"<ts>.bin\").\nfunc parseTimestampFromFilename(name string) (int64, error) {\n\tswitch {\n\tcase strings.HasSuffix(name, \".json\"):\n\t\treturn strconv.ParseInt(name[:len(name)-5], 10, 64)\n\tcase strings.HasSuffix(name, \".bin\"):\n\t\treturn strconv.ParseInt(name[:len(name)-4], 10, 64)\n\tdefault:\n\t\treturn 0, fmt.Errorf(\"unknown checkpoint extension for file %q\", name)\n\t}\n}\n\n// findFiles returns filenames from direntries whose timestamps satisfy the filter.\n// If findMoreRecentFiles is true, returns files with timestamps >= t (plus the\n// last file before t if t falls between two files).\nfunc findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {\n\tnums := map[string]int64{}\n\tfor _, e := range direntries {\n\t\tname := e.Name()\n\t\tif !strings.HasSuffix(name, \".json\") && !strings.HasSuffix(name, \".bin\") {\n\t\t\tcontinue\n\t\t}\n\n\t\tts, err := parseTimestampFromFilename(name)\n\t\tif err != nil {\n\t\t\treturn nil, err\n\t\t}\n\t\tnums[name] = ts\n\t}\n\n\tsort.Slice(direntries, func(i, j int) bool {\n\t\ta, b := direntries[i], direntries[j]\n\t\treturn nums[a.Name()] < nums[b.Name()]\n\t})\n\n\tif len(nums) == 0 {\n\t\treturn nil, nil\n\t}\n\n\tfilenames := make([]string, 0)\n\n\tfor i, e := range direntries {\n\t\tts1 := nums[e.Name()]\n\n\t\tif findMoreRecentFiles && t <= ts1 {\n\t\t\tfilenames = append(filenames, e.Name())\n\t\t} else if !findMoreRecentFiles && ts1 <= t && ts1 != 0 {\n\t\t\tfilenames = append(filenames, e.Name())\n\t\t}\n\n\t\tif i == len(direntries)-1 {\n\t\t\tcontinue\n\t\t}\n\n\t\tenext := direntries[i+1]\n\t\tts2 := nums[enext.Name()]\n\n\t\tif findMoreRecentFiles {\n\t\t\tif ts1 < t && t < ts2 {\n\t\t\t\tfilenames = append(filenames, e.Name())\n\t\t\t}\n\t\t}\n\t}\n\n\treturn filenames, nil\n}\n","structuredPatch":[{"oldStart":86,"oldLines":11,"newStart":86,"newLines":12,"lines":[" "," // Checkpointing starts a background worker that periodically saves metric data to disk."," //","-// Checkpoints are written every 12 hours (hardcoded).","+// The checkpoint interval is read from Keys.CheckpointInterval (default: 12 hours)."," //"," // Format behaviour:","-//   - \"json\": Periodic checkpointing every checkpointInterval","-//   - \"wal\":  Periodic binary snapshots + WAL rotation every checkpointInterval","+//   - \"json\": Writes a JSON snapshot file per host every interval","+//   - \"wal\":  Writes a binary snapshot file per host every interval, then rotates","+//     the current.wal files for all successfully checkpointed hosts"," func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {","   lastCheckpointMu.Lock()","   lastCheckpoint = time.Now()"]}],"userModified":false,"replaceAll":false},"sourceToolAssistantUUID":"1893e431-17cc-49c7-b537-350d07be971a"}
{"parentUuid":"9d39bf6e-7ef8-4b73-8ef7-cbfbeab8e208","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01ThARvBKbwZMvqsExuC3Z4t","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_016bXo11NzDGKvztjAXxcfBr","name":"Edit","input":{"replace_all":false,"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/walCheckpoint.go","old_string":"// RotateWALFiles sends rotation requests for the given host directories\n// and blocks until all rotations complete.\nfunc RotateWALFilesAfterShutdown(hostDirs []string) {","new_string":"// RotateWALFilesAfterShutdown directly removes current.wal files for the given\n// host directories. Used after shutdown, when WALStaging goroutines have already\n// exited and the channel-based RotateWALFiles is no longer safe to call.\nfunc RotateWALFilesAfterShutdown(hostDirs []string) {"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":368,"cache_read_input_tokens":111098,"output_tokens":274,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":368,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXw6ZJYL81svDRMgBkTq","type":"assistant","uuid":"b96fb57f-55ab-4fed-8973-b1e835518afd","timestamp":"2026-03-29T19:36:03.578Z"}
{"type":"file-history-snapshot","messageId":"b96fb57f-55ab-4fed-8973-b1e835518afd","snapshot":{"messageId":"5850443c-83f3-4981-96ba-d5e60679f257","trackedFileBackups":{"pkg/metricstore/metricstore.go":{"backupFileName":"94484c782e7b75a8@v1","version":1,"backupTime":"2026-03-29T19:35:39.217Z"},"pkg/metricstore/checkpoint.go":{"backupFileName":"1948c8214a69da10@v1","version":1,"backupTime":"2026-03-29T19:35:52.527Z"},"pkg/metricstore/walCheckpoint.go":{"backupFileName":"bfe2cc03c42797a1@v1","version":1,"backupTime":"2026-03-29T19:36:03.683Z"}},"timestamp":"2026-03-29T19:34:20.233Z"},"isSnapshotUpdate":true}
{"parentUuid":"b96fb57f-55ab-4fed-8973-b1e835518afd","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Edit","command":"callback"},"parentToolUseID":"toolu_016bXo11NzDGKvztjAXxcfBr","toolUseID":"toolu_016bXo11NzDGKvztjAXxcfBr","timestamp":"2026-03-29T19:36:03.697Z","uuid":"76c8b12f-489e-4458-bc10-5b48ec207696"}
{"parentUuid":"b96fb57f-55ab-4fed-8973-b1e835518afd","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_016bXo11NzDGKvztjAXxcfBr","type":"tool_result","content":"The file /home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/walCheckpoint.go has been updated successfully."}]},"uuid":"6a0f0479-dfeb-4d96-ad6c-509bf73f8cb8","timestamp":"2026-03-29T19:36:03.695Z","toolUseResult":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/walCheckpoint.go","oldString":"// RotateWALFiles sends rotation requests for the given host directories\n// and blocks until all rotations complete.\nfunc RotateWALFilesAfterShutdown(hostDirs []string) {","newString":"// RotateWALFilesAfterShutdown directly removes current.wal files for the given\n// host directories. Used after shutdown, when WALStaging goroutines have already\n// exited and the channel-based RotateWALFiles is no longer safe to call.\nfunc RotateWALFilesAfterShutdown(hostDirs []string) {","originalFile":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// Package metricstore provides walCheckpoint.go: WAL-based checkpoint implementation.\n//\n// This replaces the Avro shadow tree with an append-only Write-Ahead Log (WAL)\n// per host, eliminating the extra memory overhead of the AvroStore and providing\n// truly continuous (per-write) crash safety.\n//\n// # Architecture\n//\n//\tMetric write (DecodeLine)\n//\t      │\n//\t      ├─► WriteToLevel()    → main MemoryStore (unchanged)\n//\t      │\n//\t      └─► WALMessages channel\n//\t               │\n//\t               ▼\n//\t        WALStaging goroutine\n//\t               │\n//\t               ▼\n//\t        checkpoints/cluster/host/current.wal  (append-only, binary)\n//\n//\tPeriodic checkpoint (Checkpointing goroutine):\n//\t  1. Write <timestamp>.bin snapshot (column-oriented, from main tree)\n//\t  2. Signal WALStaging to truncate current.wal per host\n//\n//\tOn restart (FromCheckpoint):\n//\t  1. Load most recent <timestamp>.bin snapshot\n//\t  2. Replay current.wal (overwrite-safe: buffer.write handles duplicate timestamps)\n//\n// # WAL Record Format\n//\n//\t[4B magic 0xCC1DA7A1][4B payload_len][payload][4B CRC32]\n//\n//\tpayload:\n//\t  [8B timestamp int64]\n//\t  [2B metric_name_len uint16][N metric name bytes]\n//\t  [1B selector_count uint8]\n//\t  per selector: [1B selector_len uint8][M selector bytes]\n//\t  [4B value float32 bits]\n//\n// # Binary Snapshot Format\n//\n//\t[4B magic 0xCC5B0001][8B from int64][8B to int64]\n//\tLevel tree (recursive):\n//\t  [4B num_metrics uint32]\n//\t  per metric:\n//\t    [2B name_len uint16][N name bytes]\n//\t    [8B frequency int64][8B start int64]\n//\t    [4B num_values uint32][num_values × 4B float32]\n//\t  [4B num_children uint32]\n//\t  per child: [2B name_len uint16][N name bytes] + Level (recursive)\npackage metricstore\n\nimport (\n\t\"bufio\"\n\t\"context\"\n\t\"encoding/binary\"\n\t\"fmt\"\n\t\"hash/crc32\"\n\t\"hash/fnv\"\n\t\"io\"\n\t\"math\"\n\t\"os\"\n\t\"path\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n\t\"github.com/ClusterCockpit/cc-lib/v2/schema\"\n)\n\n// Magic numbers for binary formats.\nconst (\n\twalFileMagic   = uint32(0xCC1DA701) // WAL file header magic\n\twalRecordMagic = uint32(0xCC1DA7A1) // WAL record magic\n\tsnapFileMagic  = uint32(0xCC5B0001) // Binary snapshot magic\n)\n\n// walShardChs holds per-shard channels for WAL messages.\n// Initialized by WALStaging; nil when WAL is not active.\nvar walShardChs []chan *WALMessage\n\n// walShardRotateChs holds per-shard channels for WAL rotation requests.\nvar walShardRotateChs []chan walRotateReq\n\n// walNumShards stores the number of shards (set during WALStaging init).\nvar walNumShards int\n\n// walStagingWg tracks WALStaging goroutine exits for shutdown synchronization.\nvar walStagingWg sync.WaitGroup\n\n// walShuttingDown is set before closing shard channels to prevent\n// SendWALMessage from sending on a closed channel (which panics in Go).\nvar walShuttingDown atomic.Bool\n\n// walCheckpointActive is set during binary checkpoint writes.\n// While active, SendWALMessage skips sending (returns true) because the\n// snapshot captures all in-memory data, making WAL writes redundant.\nvar walCheckpointActive atomic.Bool\n\n// WALMessage represents a single metric write to be appended to the WAL.\n// Cluster and Node are NOT stored in the WAL record (inferred from file path).\ntype WALMessage struct {\n\tMetricName string\n\tCluster    string\n\tNode       string\n\tSelector   []string\n\tValue      schema.Float\n\tTimestamp  int64\n}\n\n// walRotateReq requests WAL file rotation for a specific host directory.\n// The done channel is closed by the WAL goroutine when rotation is complete.\ntype walRotateReq struct {\n\thostDir string\n\tdone    chan struct{}\n}\n\n// walFileState holds an open WAL file handle and buffered writer for one host directory.\ntype walFileState struct {\n\tf     *os.File\n\tw     *bufio.Writer\n\tdirty bool\n\tsize  int64 // approximate bytes written (tracked from open + writes)\n}\n\n// walFlushInterval controls how often dirty WAL files are flushed to disk.\n// Decoupling flushes from message processing lets the consumer run at memory\n// speed, amortizing syscall overhead across many writes.\nconst walFlushInterval = 1 * time.Second\n\n// walShardIndex computes which shard a message belongs to based on cluster+node.\n// Uses FNV-1a hash for fast, well-distributed mapping.\nfunc walShardIndex(cluster, node string) int {\n\th := fnv.New32a()\n\th.Write([]byte(cluster))\n\th.Write([]byte{0})\n\th.Write([]byte(node))\n\treturn int(h.Sum32()) % walNumShards\n}\n\n// SendWALMessage routes a WAL message to the appropriate shard channel.\n// Returns false if the channel is full or shutdown is in progress.\nfunc SendWALMessage(msg *WALMessage) bool {\n\tif walShardChs == nil || walShuttingDown.Load() {\n\t\treturn false\n\t}\n\tif walCheckpointActive.Load() {\n\t\treturn true // Data safe in memory; snapshot will capture it\n\t}\n\tshard := walShardIndex(msg.Cluster, msg.Node)\n\tselect {\n\tcase walShardChs[shard] <- msg:\n\t\treturn true\n\tdefault:\n\t\treturn false\n\t}\n}\n\n// WALStaging starts N sharded background goroutines that receive WALMessage items\n// and append binary WAL records to per-host current.wal files.\n// Each shard owns a subset of hosts (determined by hash of cluster+node),\n// parallelizing serialization and I/O while keeping per-host writes serial.\nfunc WALStaging(wg *sync.WaitGroup, ctx context.Context) {\n\tif Keys.Checkpoints.FileFormat == \"json\" {\n\t\treturn\n\t}\n\n\twalNumShards = max(Keys.NumWorkers, 1)\n\tchBufSize := max(65536/walNumShards, 1024)\n\n\twalShardChs = make([]chan *WALMessage, walNumShards)\n\twalShardRotateChs = make([]chan walRotateReq, walNumShards)\n\n\tfor i := range walNumShards {\n\t\twalShardChs[i] = make(chan *WALMessage, chBufSize)\n\t\twalShardRotateChs[i] = make(chan walRotateReq, 64)\n\t}\n\n\tfor i := range walNumShards {\n\t\tmsgCh := walShardChs[i]\n\t\trotateCh := walShardRotateChs[i]\n\n\t\twalStagingWg.Add(1)\n\t\twg.Go(func() {\n\t\t\tdefer walStagingWg.Done()\n\t\t\thostFiles := make(map[string]*walFileState)\n\n\t\t\tdefer func() {\n\t\t\t\tfor _, ws := range hostFiles {\n\t\t\t\t\tif ws.f != nil {\n\t\t\t\t\t\tws.w.Flush()\n\t\t\t\t\t\tws.f.Close()\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}()\n\n\t\t\tgetOrOpenWAL := func(hostDir string) *walFileState {\n\t\t\t\tws, ok := hostFiles[hostDir]\n\t\t\t\tif ok {\n\t\t\t\t\treturn ws\n\t\t\t\t}\n\n\t\t\t\tif err := os.MkdirAll(hostDir, CheckpointDirPerms); err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: mkdir %s: %v\", hostDir, err)\n\t\t\t\t\treturn nil\n\t\t\t\t}\n\n\t\t\t\twalPath := path.Join(hostDir, \"current.wal\")\n\t\t\t\tf, err := os.OpenFile(walPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, CheckpointFilePerms)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: open %s: %v\", walPath, err)\n\t\t\t\t\treturn nil\n\t\t\t\t}\n\n\t\t\t\tw := bufio.NewWriter(f)\n\n\t\t\t\t// Write file header magic if file is new (empty).\n\t\t\t\tinfo, err := f.Stat()\n\t\t\t\tvar fileSize int64\n\t\t\t\tif err == nil {\n\t\t\t\t\tfileSize = info.Size()\n\t\t\t\t}\n\t\t\t\tif err == nil && fileSize == 0 {\n\t\t\t\t\tvar hdr [4]byte\n\t\t\t\t\tbinary.LittleEndian.PutUint32(hdr[:], walFileMagic)\n\t\t\t\t\tif _, err := w.Write(hdr[:]); err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: write header %s: %v\", walPath, err)\n\t\t\t\t\t\tf.Close()\n\t\t\t\t\t\treturn nil\n\t\t\t\t\t}\n\t\t\t\t\tfileSize = 4\n\t\t\t\t}\n\n\t\t\t\tws = &walFileState{f: f, w: w, size: fileSize}\n\t\t\t\thostFiles[hostDir] = ws\n\t\t\t\treturn ws\n\t\t\t}\n\n\t\t\tprocessMsg := func(msg *WALMessage) {\n\t\t\t\thostDir := path.Join(Keys.Checkpoints.RootDir, msg.Cluster, msg.Node)\n\t\t\t\tws := getOrOpenWAL(hostDir)\n\t\t\t\tif ws == nil {\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\t// Enforce max WAL size: force-rotate before writing if limit is exceeded.\n\t\t\t\t// The in-memory store still holds the data; only crash-recovery coverage is lost.\n\t\t\t\tif maxSize := Keys.Checkpoints.MaxWALSize; maxSize > 0 && ws.size >= maxSize {\n\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL: force-rotating %s (size %d >= limit %d)\",\n\t\t\t\t\t\thostDir, ws.size, maxSize)\n\t\t\t\t\tws.w.Flush()\n\t\t\t\t\tws.f.Close()\n\t\t\t\t\twalPath := path.Join(hostDir, \"current.wal\")\n\t\t\t\t\tif err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: remove %s: %v\", walPath, err)\n\t\t\t\t\t}\n\t\t\t\t\tdelete(hostFiles, hostDir)\n\t\t\t\t\tws = getOrOpenWAL(hostDir)\n\t\t\t\t\tif ws == nil {\n\t\t\t\t\t\treturn\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tn, err := writeWALRecordDirect(ws.w, msg)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: write record: %v\", err)\n\t\t\t\t}\n\t\t\t\tws.size += int64(n)\n\t\t\t\tws.dirty = true\n\t\t\t}\n\n\t\t\tprocessRotate := func(req walRotateReq) {\n\t\t\t\tws, ok := hostFiles[req.hostDir]\n\t\t\t\tif ok && ws.f != nil {\n\t\t\t\t\tws.w.Flush()\n\t\t\t\t\tws.f.Close()\n\t\t\t\t\twalPath := path.Join(req.hostDir, \"current.wal\")\n\t\t\t\t\tif err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: remove %s: %v\", walPath, err)\n\t\t\t\t\t}\n\t\t\t\t\tdelete(hostFiles, req.hostDir)\n\t\t\t\t}\n\t\t\t\tclose(req.done)\n\t\t\t}\n\n\t\t\tflushDirty := func() {\n\t\t\t\tfor _, ws := range hostFiles {\n\t\t\t\t\tif ws.dirty {\n\t\t\t\t\t\tws.w.Flush()\n\t\t\t\t\t\tws.dirty = false\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tticker := time.NewTicker(walFlushInterval)\n\t\t\tdefer ticker.Stop()\n\n\t\t\t// drainBatch processes up to 4096 pending messages without blocking.\n\t\t\t// Returns false if the channel was closed.\n\t\t\tdrainBatch := func() bool {\n\t\t\t\tfor range 4096 {\n\t\t\t\t\tselect {\n\t\t\t\t\tcase msg, ok := <-msgCh:\n\t\t\t\t\t\tif !ok {\n\t\t\t\t\t\t\tflushDirty()\n\t\t\t\t\t\t\treturn false\n\t\t\t\t\t\t}\n\t\t\t\t\t\tprocessMsg(msg)\n\t\t\t\t\tcase req := <-rotateCh:\n\t\t\t\t\t\tprocessRotate(req)\n\t\t\t\t\tdefault:\n\t\t\t\t\t\treturn true\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\treturn true\n\t\t\t}\n\n\t\t\tfor {\n\t\t\t\tselect {\n\t\t\t\tcase <-ctx.Done():\n\t\t\t\t\t// On shutdown, skip draining buffered messages — a full binary\n\t\t\t\t\t// checkpoint will be written from in-memory state, making\n\t\t\t\t\t// buffered WAL records redundant.\n\t\t\t\t\tflushDirty()\n\t\t\t\t\treturn\n\t\t\t\tcase msg, ok := <-msgCh:\n\t\t\t\t\tif !ok {\n\t\t\t\t\t\treturn\n\t\t\t\t\t}\n\t\t\t\t\tprocessMsg(msg)\n\t\t\t\t\tif !drainBatch() {\n\t\t\t\t\t\treturn\n\t\t\t\t\t}\n\t\t\t\t\t// No flush here — timer handles periodic flushing.\n\t\t\t\tcase <-ticker.C:\n\t\t\t\t\tflushDirty()\n\t\t\t\tcase req := <-rotateCh:\n\t\t\t\t\tprocessRotate(req)\n\t\t\t\t}\n\t\t\t}\n\t\t})\n\t}\n}\n\n// WaitForWALStagingDrain blocks until all WALStaging goroutines have exited.\n// Must be called after closing walShardChs to ensure all file handles are\n// flushed and closed before checkpoint writes begin.\nfunc WaitForWALStagingDrain() {\n\twalStagingWg.Wait()\n}\n\n// RotateWALFiles sends rotation requests for the given host directories\n// and blocks until all rotations complete. Each request is routed to the\n// shard that owns the host directory.\n//\n// If shutdown is in progress (WAL staging goroutines may have exited),\n// rotation is skipped to avoid deadlocking on abandoned channels.\nfunc RotateWALFiles(hostDirs []string) {\n\tif walShardRotateChs == nil || walShuttingDown.Load() {\n\t\treturn\n\t}\n\tdeadline := time.After(2 * time.Minute)\n\tdones := make([]chan struct{}, 0, len(hostDirs))\n\tfor _, dir := range hostDirs {\n\t\tdone := make(chan struct{})\n\t\tshard := walShardIndexFromDir(dir)\n\t\tselect {\n\t\tcase walShardRotateChs[shard] <- walRotateReq{hostDir: dir, done: done}:\n\t\t\tdones = append(dones, done)\n\t\tcase <-deadline:\n\t\t\tcclog.Warnf(\"[METRICSTORE]> WAL rotation send timed out, %d of %d hosts remaining\",\n\t\t\t\tlen(hostDirs)-len(dones), len(hostDirs))\n\t\t\tgoto waitDones\n\t\t}\n\t}\nwaitDones:\n\tfor _, done := range dones {\n\t\tselect {\n\t\tcase <-done:\n\t\tcase <-time.After(30 * time.Second):\n\t\t\tcclog.Warn(\"[METRICSTORE]> WAL rotation completion timed out, continuing\")\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// walShardIndexFromDir extracts cluster and node from a host directory path\n// and returns the shard index. The path format is: rootDir/cluster/node\nfunc walShardIndexFromDir(hostDir string) int {\n\t// hostDir = .../cluster/node — extract last two path components\n\tnode := path.Base(hostDir)\n\tcluster := path.Base(path.Dir(hostDir))\n\treturn walShardIndex(cluster, node)\n}\n\n// RotateWALFiles sends rotation requests for the given host directories\n// and blocks until all rotations complete.\nfunc RotateWALFilesAfterShutdown(hostDirs []string) {\n\tfor _, dir := range hostDirs {\n\t\twalPath := path.Join(dir, \"current.wal\")\n\t\tif err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {\n\t\t\tcclog.Errorf(\"[METRICSTORE]> WAL: remove %s: %v\", walPath, err)\n\t\t}\n\t}\n}\n\n// writeWALRecordDirect encodes a WAL record into a contiguous buffer first,\n// then writes it to the bufio.Writer in a single call. This prevents partial\n// records in the write buffer if a write error occurs mid-record (e.g. disk full).\n// Returns the number of bytes written and any error.\nfunc writeWALRecordDirect(w *bufio.Writer, msg *WALMessage) (int, error) {\n\t// Compute payload and total record size.\n\tpayloadSize := 8 + 2 + len(msg.MetricName) + 1 + 4\n\tfor _, s := range msg.Selector {\n\t\tpayloadSize += 1 + len(s)\n\t}\n\t// Total: 8 (header) + payload + 4 (CRC).\n\ttotalSize := 8 + payloadSize + 4\n\n\t// Use stack buffer for typical small records, heap-allocate only for large ones.\n\tvar stackBuf [256]byte\n\tvar buf []byte\n\tif totalSize <= len(stackBuf) {\n\t\tbuf = stackBuf[:totalSize]\n\t} else {\n\t\tbuf = make([]byte, totalSize)\n\t}\n\n\t// Header: magic + payload length.\n\tbinary.LittleEndian.PutUint32(buf[0:4], walRecordMagic)\n\tbinary.LittleEndian.PutUint32(buf[4:8], uint32(payloadSize))\n\n\t// Payload starts at offset 8.\n\tp := 8\n\n\t// Timestamp (8 bytes).\n\tbinary.LittleEndian.PutUint64(buf[p:p+8], uint64(msg.Timestamp))\n\tp += 8\n\n\t// Metric name length (2 bytes) + metric name.\n\tbinary.LittleEndian.PutUint16(buf[p:p+2], uint16(len(msg.MetricName)))\n\tp += 2\n\tp += copy(buf[p:], msg.MetricName)\n\n\t// Selector count (1 byte).\n\tbuf[p] = byte(len(msg.Selector))\n\tp++\n\n\t// Selectors (1-byte length + bytes each).\n\tfor _, sel := range msg.Selector {\n\t\tbuf[p] = byte(len(sel))\n\t\tp++\n\t\tp += copy(buf[p:], sel)\n\t}\n\n\t// Value (4 bytes, float32 bits).\n\tbinary.LittleEndian.PutUint32(buf[p:p+4], math.Float32bits(float32(msg.Value)))\n\tp += 4\n\n\t// CRC32 over payload (bytes 8..8+payloadSize).\n\tcrc := crc32.ChecksumIEEE(buf[8 : 8+payloadSize])\n\tbinary.LittleEndian.PutUint32(buf[p:p+4], crc)\n\n\t// Single atomic write to the buffered writer.\n\tn, err := w.Write(buf)\n\treturn n, err\n}\n\n// readWALRecord reads one WAL record from the reader.\n// Returns (nil, nil) on clean EOF. Returns error on data corruption.\n// A CRC mismatch indicates a truncated trailing record (expected on crash).\nfunc readWALRecord(r io.Reader) (*WALMessage, error) {\n\tvar magic uint32\n\tif err := binary.Read(r, binary.LittleEndian, &magic); err != nil {\n\t\tif err == io.EOF {\n\t\t\treturn nil, nil // Clean EOF\n\t\t}\n\t\treturn nil, fmt.Errorf(\"read record magic: %w\", err)\n\t}\n\n\tif magic != walRecordMagic {\n\t\treturn nil, fmt.Errorf(\"invalid record magic 0x%08X (expected 0x%08X)\", magic, walRecordMagic)\n\t}\n\n\tvar payloadLen uint32\n\tif err := binary.Read(r, binary.LittleEndian, &payloadLen); err != nil {\n\t\treturn nil, fmt.Errorf(\"read payload length: %w\", err)\n\t}\n\n\tif payloadLen > 1<<20 { // 1 MB sanity limit\n\t\treturn nil, fmt.Errorf(\"record payload too large: %d bytes\", payloadLen)\n\t}\n\n\tpayload := make([]byte, payloadLen)\n\tif _, err := io.ReadFull(r, payload); err != nil {\n\t\treturn nil, fmt.Errorf(\"read payload: %w\", err)\n\t}\n\n\tvar storedCRC uint32\n\tif err := binary.Read(r, binary.LittleEndian, &storedCRC); err != nil {\n\t\treturn nil, fmt.Errorf(\"read CRC: %w\", err)\n\t}\n\n\tif crc32.ChecksumIEEE(payload) != storedCRC {\n\t\treturn nil, fmt.Errorf(\"CRC mismatch (truncated write or corruption)\")\n\t}\n\n\treturn parseWALPayload(payload)\n}\n\n// parseWALPayload decodes a binary payload into a WALMessage.\nfunc parseWALPayload(payload []byte) (*WALMessage, error) {\n\tif len(payload) < 8+2+1+4 {\n\t\treturn nil, fmt.Errorf(\"payload too short: %d bytes\", len(payload))\n\t}\n\n\toffset := 0\n\n\t// Timestamp (8 bytes)\n\tts := int64(binary.LittleEndian.Uint64(payload[offset : offset+8]))\n\toffset += 8\n\n\t// Metric name (2-byte length + bytes)\n\tif offset+2 > len(payload) {\n\t\treturn nil, fmt.Errorf(\"metric name length overflows payload\")\n\t}\n\tmLen := int(binary.LittleEndian.Uint16(payload[offset : offset+2]))\n\toffset += 2\n\n\tif offset+mLen > len(payload) {\n\t\treturn nil, fmt.Errorf(\"metric name overflows payload\")\n\t}\n\tmetricName := string(payload[offset : offset+mLen])\n\toffset += mLen\n\n\t// Selector count (1 byte)\n\tif offset >= len(payload) {\n\t\treturn nil, fmt.Errorf(\"selector count overflows payload\")\n\t}\n\tselCount := int(payload[offset])\n\toffset++\n\n\tselectors := make([]string, selCount)\n\tfor i := range selCount {\n\t\tif offset >= len(payload) {\n\t\t\treturn nil, fmt.Errorf(\"selector[%d] length overflows payload\", i)\n\t\t}\n\t\tsLen := int(payload[offset])\n\t\toffset++\n\n\t\tif offset+sLen > len(payload) {\n\t\t\treturn nil, fmt.Errorf(\"selector[%d] data overflows payload\", i)\n\t\t}\n\t\tselectors[i] = string(payload[offset : offset+sLen])\n\t\toffset += sLen\n\t}\n\n\t// Value (4 bytes, float32 bits)\n\tif offset+4 > len(payload) {\n\t\treturn nil, fmt.Errorf(\"value overflows payload\")\n\t}\n\tbits := binary.LittleEndian.Uint32(payload[offset : offset+4])\n\tvalue := schema.Float(math.Float32frombits(bits))\n\n\treturn &WALMessage{\n\t\tMetricName: metricName,\n\t\tTimestamp:  ts,\n\t\tSelector:   selectors,\n\t\tValue:      value,\n\t}, nil\n}\n\n// loadWALFile reads a WAL file and replays all valid records into the Level tree.\n// l is the host-level node. Corrupt or partial trailing records are silently skipped\n// (expected on crash). Records older than 'from' are skipped.\nfunc (l *Level) loadWALFile(m *MemoryStore, f *os.File, from int64) error {\n\tbr := bufio.NewReader(f)\n\n\t// Verify file header magic.\n\tvar fileMagic uint32\n\tif err := binary.Read(br, binary.LittleEndian, &fileMagic); err != nil {\n\t\tif err == io.EOF {\n\t\t\treturn nil // Empty file, no data\n\t\t}\n\t\treturn fmt.Errorf(\"[METRICSTORE]> WAL: read file header: %w\", err)\n\t}\n\n\tif fileMagic != walFileMagic {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> WAL: invalid file magic 0x%08X (expected 0x%08X)\", fileMagic, walFileMagic)\n\t}\n\n\t// Cache level lookups to avoid repeated tree traversal.\n\tlvlCache := make(map[string]*Level)\n\n\tfor {\n\t\tmsg, err := readWALRecord(br)\n\t\tif err != nil {\n\t\t\t// Truncated trailing record is expected after a crash; stop replaying.\n\t\t\tcclog.Debugf(\"[METRICSTORE]> WAL: stopping replay at corrupted/partial record: %v\", err)\n\t\t\tbreak\n\t\t}\n\t\tif msg == nil {\n\t\t\tbreak // Clean EOF\n\t\t}\n\n\t\tif msg.Timestamp < from {\n\t\t\tcontinue // Older than retention window\n\t\t}\n\n\t\tminfo, ok := m.Metrics[msg.MetricName]\n\t\tif !ok {\n\t\t\tcontinue // Unknown metric (config may have changed)\n\t\t}\n\n\t\t// Cache key is the null-separated selector path.\n\t\tcacheKey := joinSelector(msg.Selector)\n\t\tlvl, ok := lvlCache[cacheKey]\n\t\tif !ok {\n\t\t\tlvl = l.findLevelOrCreate(msg.Selector, len(m.Metrics))\n\t\t\tlvlCache[cacheKey] = lvl\n\t\t}\n\n\t\t// Write directly to the buffer, same as WriteToLevel but without the\n\t\t// global level lookup (we already have the right level).\n\t\tlvl.lock.Lock()\n\t\tb := lvl.metrics[minfo.offset]\n\t\tif b == nil {\n\t\t\tb = newBuffer(msg.Timestamp, minfo.Frequency)\n\t\t\tlvl.metrics[minfo.offset] = b\n\t\t}\n\t\tnb, writeErr := b.write(msg.Timestamp, msg.Value)\n\t\tif writeErr == nil && b != nb {\n\t\t\tlvl.metrics[minfo.offset] = nb\n\t\t}\n\t\t// Ignore write errors for timestamps before buffer start (can happen when\n\t\t// replaying WAL entries that predate a loaded snapshot's start time).\n\t\tlvl.lock.Unlock()\n\t}\n\n\treturn nil\n}\n\n// joinSelector builds a cache key from a selector slice using null bytes as separators.\nfunc joinSelector(sel []string) string {\n\tif len(sel) == 0 {\n\t\treturn \"\"\n\t}\n\tvar result strings.Builder\n\tresult.WriteString(sel[0])\n\tfor i := 1; i < len(sel); i++ {\n\t\tresult.WriteString(\"\\x00\" + sel[i])\n\t}\n\treturn result.String()\n}\n\n// ToCheckpointWAL writes binary snapshot files for all hosts in parallel.\n// Returns the number of files written, the list of host directories that were\n// successfully checkpointed (for WAL rotation), and any errors.\nfunc (m *MemoryStore) ToCheckpointWAL(dir string, from, to int64) (int, []string, error) {\n\t// Collect all cluster/host pairs.\n\tm.root.lock.RLock()\n\ttotalHosts := 0\n\tfor _, l1 := range m.root.children {\n\t\tl1.lock.RLock()\n\t\ttotalHosts += len(l1.children)\n\t\tl1.lock.RUnlock()\n\t}\n\tm.root.lock.RUnlock()\n\n\tlevels := make([]*Level, 0, totalHosts)\n\tselectors := make([][]string, 0, totalHosts)\n\n\tm.root.lock.RLock()\n\tfor sel1, l1 := range m.root.children {\n\t\tl1.lock.RLock()\n\t\tfor sel2, l2 := range l1.children {\n\t\t\tlevels = append(levels, l2)\n\t\t\tselectors = append(selectors, []string{sel1, sel2})\n\t\t}\n\t\tl1.lock.RUnlock()\n\t}\n\tm.root.lock.RUnlock()\n\n\ttype workItem struct {\n\t\tlevel    *Level\n\t\thostDir  string\n\t\tselector []string\n\t}\n\n\ttotalWork := len(levels)\n\tcclog.Infof(\"[METRICSTORE]> Starting binary checkpoint for %d hosts with %d workers\", totalWork, Keys.NumWorkers)\n\n\tn, errs, completed := int32(0), int32(0), int32(0)\n\tvar successDirs []string\n\tvar successMu sync.Mutex\n\n\tvar wg sync.WaitGroup\n\twg.Add(Keys.NumWorkers)\n\twork := make(chan workItem, Keys.NumWorkers*2)\n\n\t// Progress logging goroutine.\n\tstopProgress := make(chan struct{})\n\tgo func() {\n\t\tticker := time.NewTicker(10 * time.Second)\n\t\tdefer ticker.Stop()\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ticker.C:\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> Checkpoint progress: %d/%d hosts (%d written, %d errors)\",\n\t\t\t\t\tatomic.LoadInt32(&completed), totalWork, atomic.LoadInt32(&n), atomic.LoadInt32(&errs))\n\t\t\tcase <-stopProgress:\n\t\t\t\treturn\n\t\t\t}\n\t\t}\n\t}()\n\n\tfor range Keys.NumWorkers {\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\t\t\tfor wi := range work {\n\t\t\t\terr := wi.level.toCheckpointBinary(wi.hostDir, from, to, m)\n\t\t\t\tif err != nil {\n\t\t\t\t\tif err == ErrNoNewArchiveData {\n\t\t\t\t\t\tatomic.AddInt32(&completed, 1)\n\t\t\t\t\t\tcontinue\n\t\t\t\t\t}\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> binary checkpoint error for %s: %v\", wi.hostDir, err)\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t} else {\n\t\t\t\t\tatomic.AddInt32(&n, 1)\n\t\t\t\t\tsuccessMu.Lock()\n\t\t\t\t\tsuccessDirs = append(successDirs, wi.hostDir)\n\t\t\t\t\tsuccessMu.Unlock()\n\t\t\t\t}\n\t\t\t\tatomic.AddInt32(&completed, 1)\n\t\t\t}\n\t\t}()\n\t}\n\n\tfor i := range levels {\n\t\thostDir := path.Join(dir, path.Join(selectors[i]...))\n\t\twork <- workItem{\n\t\t\tlevel:    levels[i],\n\t\t\thostDir:  hostDir,\n\t\t\tselector: selectors[i],\n\t\t}\n\t}\n\tclose(work)\n\twg.Wait()\n\tclose(stopProgress)\n\n\tif errs > 0 {\n\t\treturn int(n), successDirs, fmt.Errorf(\"[METRICSTORE]> %d errors during binary checkpoint (%d successes)\", errs, n)\n\t}\n\treturn int(n), successDirs, nil\n}\n\n// toCheckpointBinary writes a binary snapshot file for a single host-level node.\n// Uses atomic rename (write to .tmp then rename) to avoid partial reads on crash.\nfunc (l *Level) toCheckpointBinary(dir string, from, to int64, m *MemoryStore) error {\n\tcf, err := l.toCheckpointFile(from, to, m)\n\tif err != nil {\n\t\treturn err\n\t}\n\tif cf == nil {\n\t\treturn ErrNoNewArchiveData\n\t}\n\n\tif err := os.MkdirAll(dir, CheckpointDirPerms); err != nil {\n\t\treturn fmt.Errorf(\"mkdir %s: %w\", dir, err)\n\t}\n\n\t// Write to a temp file first, then rename (atomic on POSIX).\n\ttmpPath := path.Join(dir, fmt.Sprintf(\"%d.bin.tmp\", from))\n\tfinalPath := path.Join(dir, fmt.Sprintf(\"%d.bin\", from))\n\n\tf, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"open binary snapshot %s: %w\", tmpPath, err)\n\t}\n\n\tbw := bufio.NewWriter(f)\n\tif err := writeBinarySnapshotFile(bw, cf); err != nil {\n\t\tf.Close()\n\t\tos.Remove(tmpPath)\n\t\treturn fmt.Errorf(\"write binary snapshot: %w\", err)\n\t}\n\tif err := bw.Flush(); err != nil {\n\t\tf.Close()\n\t\tos.Remove(tmpPath)\n\t\treturn err\n\t}\n\tf.Close()\n\n\treturn os.Rename(tmpPath, finalPath)\n}\n\n// writeBinarySnapshotFile writes the binary snapshot file header and level tree.\nfunc writeBinarySnapshotFile(w io.Writer, cf *CheckpointFile) error {\n\tif err := binary.Write(w, binary.LittleEndian, snapFileMagic); err != nil {\n\t\treturn err\n\t}\n\tif err := binary.Write(w, binary.LittleEndian, cf.From); err != nil {\n\t\treturn err\n\t}\n\tif err := binary.Write(w, binary.LittleEndian, cf.To); err != nil {\n\t\treturn err\n\t}\n\treturn writeBinaryLevel(w, cf)\n}\n\n// writeBinaryLevel recursively writes a CheckpointFile level in binary format.\nfunc writeBinaryLevel(w io.Writer, cf *CheckpointFile) error {\n\tif err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Metrics))); err != nil {\n\t\treturn err\n\t}\n\n\tfor name, metric := range cf.Metrics {\n\t\tif err := writeString16(w, name); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif err := binary.Write(w, binary.LittleEndian, metric.Frequency); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif err := binary.Write(w, binary.LittleEndian, metric.Start); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif err := binary.Write(w, binary.LittleEndian, uint32(len(metric.Data))); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tfor _, v := range metric.Data {\n\t\t\tif err := binary.Write(w, binary.LittleEndian, math.Float32bits(float32(v))); err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t}\n\t}\n\n\tif err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Children))); err != nil {\n\t\treturn err\n\t}\n\n\tfor name, child := range cf.Children {\n\t\tif err := writeString16(w, name); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tif err := writeBinaryLevel(w, child); err != nil {\n\t\t\treturn err\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// writeString16 writes a 2-byte length-prefixed string to w.\nfunc writeString16(w io.Writer, s string) error {\n\tif err := binary.Write(w, binary.LittleEndian, uint16(len(s))); err != nil {\n\t\treturn err\n\t}\n\t_, err := io.WriteString(w, s)\n\treturn err\n}\n\n// loadBinaryFile reads a binary snapshot file and loads data into the Level tree.\n// The retention check (from) is applied to the file's 'to' timestamp.\nfunc (l *Level) loadBinaryFile(m *MemoryStore, f *os.File, from int64) error {\n\tbr := bufio.NewReader(f)\n\n\tvar magic uint32\n\tif err := binary.Read(br, binary.LittleEndian, &magic); err != nil {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read magic: %w\", err)\n\t}\n\tif magic != snapFileMagic {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: invalid magic 0x%08X (expected 0x%08X)\", magic, snapFileMagic)\n\t}\n\n\tvar fileFrom, fileTo int64\n\tif err := binary.Read(br, binary.LittleEndian, &fileFrom); err != nil {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read from: %w\", err)\n\t}\n\tif err := binary.Read(br, binary.LittleEndian, &fileTo); err != nil {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read to: %w\", err)\n\t}\n\n\tif fileTo != 0 && fileTo < from {\n\t\treturn nil // File is older than retention window, skip it\n\t}\n\n\tcf, err := readBinaryLevel(br)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"[METRICSTORE]> binary snapshot: read level tree: %w\", err)\n\t}\n\tcf.From = fileFrom\n\tcf.To = fileTo\n\n\treturn l.loadFile(cf, m)\n}\n\n// readBinaryLevel recursively reads a level from the binary snapshot format.\nfunc readBinaryLevel(r io.Reader) (*CheckpointFile, error) {\n\tcf := &CheckpointFile{\n\t\tMetrics:  make(map[string]*CheckpointMetrics),\n\t\tChildren: make(map[string]*CheckpointFile),\n\t}\n\n\tvar numMetrics uint32\n\tif err := binary.Read(r, binary.LittleEndian, &numMetrics); err != nil {\n\t\treturn nil, fmt.Errorf(\"read num_metrics: %w\", err)\n\t}\n\n\tfor range numMetrics {\n\t\tname, err := readString16(r)\n\t\tif err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read metric name: %w\", err)\n\t\t}\n\n\t\tvar freq, start int64\n\t\tif err := binary.Read(r, binary.LittleEndian, &freq); err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read frequency for %s: %w\", name, err)\n\t\t}\n\t\tif err := binary.Read(r, binary.LittleEndian, &start); err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read start for %s: %w\", name, err)\n\t\t}\n\n\t\tvar numValues uint32\n\t\tif err := binary.Read(r, binary.LittleEndian, &numValues); err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read num_values for %s: %w\", name, err)\n\t\t}\n\n\t\tdata := make([]schema.Float, numValues)\n\t\tfor i := range numValues {\n\t\t\tvar bits uint32\n\t\t\tif err := binary.Read(r, binary.LittleEndian, &bits); err != nil {\n\t\t\t\treturn nil, fmt.Errorf(\"read value[%d] for %s: %w\", i, name, err)\n\t\t\t}\n\t\t\tdata[i] = schema.Float(math.Float32frombits(bits))\n\t\t}\n\n\t\tcf.Metrics[name] = &CheckpointMetrics{\n\t\t\tFrequency: freq,\n\t\t\tStart:     start,\n\t\t\tData:      data,\n\t\t}\n\t}\n\n\tvar numChildren uint32\n\tif err := binary.Read(r, binary.LittleEndian, &numChildren); err != nil {\n\t\treturn nil, fmt.Errorf(\"read num_children: %w\", err)\n\t}\n\n\tfor range numChildren {\n\t\tchildName, err := readString16(r)\n\t\tif err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read child name: %w\", err)\n\t\t}\n\n\t\tchild, err := readBinaryLevel(r)\n\t\tif err != nil {\n\t\t\treturn nil, fmt.Errorf(\"read child %s: %w\", childName, err)\n\t\t}\n\t\tcf.Children[childName] = child\n\t}\n\n\treturn cf, nil\n}\n\n// readString16 reads a 2-byte length-prefixed string from r.\nfunc readString16(r io.Reader) (string, error) {\n\tvar sLen uint16\n\tif err := binary.Read(r, binary.LittleEndian, &sLen); err != nil {\n\t\treturn \"\", err\n\t}\n\tbuf := make([]byte, sLen)\n\tif _, err := io.ReadFull(r, buf); err != nil {\n\t\treturn \"\", err\n\t}\n\treturn string(buf), nil\n}\n","structuredPatch":[{"oldStart":400,"oldLines":8,"newStart":400,"newLines":9,"lines":["   return walShardIndex(cluster, node)"," }"," ","-// RotateWALFiles sends rotation requests for the given host directories","-// and blocks until all rotations complete.","+// RotateWALFilesAfterShutdown directly removes current.wal files for the given","+// host directories. Used after shutdown, when WALStaging goroutines have already","+// exited and the channel-based RotateWALFiles is no longer safe to call."," func RotateWALFilesAfterShutdown(hostDirs []string) {","   for _, dir := range hostDirs {","     walPath := path.Join(dir, \"current.wal\")"]}],"userModified":false,"replaceAll":false},"sourceToolAssistantUUID":"b96fb57f-55ab-4fed-8973-b1e835518afd"}
{"parentUuid":"6a0f0479-dfeb-4d96-ad6c-509bf73f8cb8","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01XovgGHQgVScrxFConwzYe1","type":"message","role":"assistant","content":[{"type":"text","text":"Now add the missing doc comment and file-level comment to `archive.go`:"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":321,"cache_read_input_tokens":111466,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":321},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXw7NsGBAa9pDFNdwpzt","type":"assistant","uuid":"f8b2e587-c00f-486d-86ac-5c42b0253e87","timestamp":"2026-03-29T19:36:08.030Z"}
{"parentUuid":"f8b2e587-c00f-486d-86ac-5c42b0253e87","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01XovgGHQgVScrxFConwzYe1","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_015m84n5LhrkvcUSGAv8cgyV","name":"Edit","input":{"replace_all":false,"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go","old_string":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\npackage metricstore","new_string":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// This file implements the cleanup (archiving or deletion) of old checkpoint files.\n//\n// The CleanUp worker runs on a timer equal to RetentionInMemory. In \"archive\" mode\n// it converts checkpoint files older than the retention window into per-cluster\n// Parquet files and then deletes the originals. In \"delete\" mode it simply removes\n// old checkpoint files.\npackage metricstore"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":321,"cache_read_input_tokens":111466,"output_tokens":374,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":321,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXw7NsGBAa9pDFNdwpzt","type":"assistant","uuid":"15a5084e-a60b-4a56-a635-b0a82f874588","timestamp":"2026-03-29T19:36:11.891Z"}
{"type":"file-history-snapshot","messageId":"15a5084e-a60b-4a56-a635-b0a82f874588","snapshot":{"messageId":"5850443c-83f3-4981-96ba-d5e60679f257","trackedFileBackups":{"pkg/metricstore/metricstore.go":{"backupFileName":"94484c782e7b75a8@v1","version":1,"backupTime":"2026-03-29T19:35:39.217Z"},"pkg/metricstore/checkpoint.go":{"backupFileName":"1948c8214a69da10@v1","version":1,"backupTime":"2026-03-29T19:35:52.527Z"},"pkg/metricstore/walCheckpoint.go":{"backupFileName":"bfe2cc03c42797a1@v1","version":1,"backupTime":"2026-03-29T19:36:03.683Z"},"pkg/metricstore/archive.go":{"backupFileName":"05122ac86e9f7be9@v1","version":1,"backupTime":"2026-03-29T19:36:11.939Z"}},"timestamp":"2026-03-29T19:34:20.233Z"},"isSnapshotUpdate":true}
{"parentUuid":"15a5084e-a60b-4a56-a635-b0a82f874588","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Edit","command":"callback"},"parentToolUseID":"toolu_015m84n5LhrkvcUSGAv8cgyV","toolUseID":"toolu_015m84n5LhrkvcUSGAv8cgyV","timestamp":"2026-03-29T19:36:11.949Z","uuid":"02889dd9-cf47-4db0-9c9d-e15c9907a8eb"}
{"parentUuid":"15a5084e-a60b-4a56-a635-b0a82f874588","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_015m84n5LhrkvcUSGAv8cgyV","type":"tool_result","content":"The file /home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go has been updated successfully."}]},"uuid":"9119169a-33c4-4572-884b-ea94408a2602","timestamp":"2026-03-29T19:36:11.947Z","toolUseResult":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go","oldString":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\npackage metricstore","newString":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// This file implements the cleanup (archiving or deletion) of old checkpoint files.\n//\n// The CleanUp worker runs on a timer equal to RetentionInMemory. In \"archive\" mode\n// it converts checkpoint files older than the retention window into per-cluster\n// Parquet files and then deletes the originals. In \"delete\" mode it simply removes\n// old checkpoint files.\npackage metricstore","originalFile":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\npackage metricstore\n\nimport (\n\t\"context\"\n\t\"errors\"\n\t\"fmt\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n)\n\n// Worker for either Archiving or Deleting files\n\nfunc CleanUp(wg *sync.WaitGroup, ctx context.Context) {\n\tif Keys.Cleanup.Mode == \"archive\" {\n\t\tcclog.Info(\"[METRICSTORE]> enable archive cleanup to parquet\")\n\t\t// Run as Archiver\n\t\tcleanUpWorker(wg, ctx,\n\t\t\tKeys.RetentionInMemory,\n\t\t\t\"archiving\",\n\t\t\tKeys.Cleanup.RootDir,\n\t\t\tfalse,\n\t\t)\n\t} else {\n\t\t// Run as Deleter\n\t\tcleanUpWorker(wg, ctx,\n\t\t\tKeys.RetentionInMemory,\n\t\t\t\"deleting\",\n\t\t\t\"\",\n\t\t\ttrue,\n\t\t)\n\t}\n}\n\n// cleanUpWorker takes simple values to configure what it does\nfunc cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {\n\twg.Go(func() {\n\t\td, err := time.ParseDuration(interval)\n\t\tif err != nil {\n\t\t\tcclog.Fatalf(\"[METRICSTORE]> error parsing %s interval duration: %v\\n\", mode, err)\n\t\t}\n\t\tif d <= 0 {\n\t\t\treturn\n\t\t}\n\n\t\tticker := time.NewTicker(d)\n\t\tdefer ticker.Stop()\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tt := time.Now().Add(-d)\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> start %s checkpoints (older than %s)...\", mode, t.Format(time.RFC3339))\n\n\t\t\t\tn, err := CleanupCheckpoints(Keys.Checkpoints.RootDir, cleanupDir, t.Unix(), delete)\n\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> %s failed: %s\", mode, err.Error())\n\t\t\t\t} else {\n\t\t\t\t\tif delete {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoints deleted\", n)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoint files archived to parquet\", n)\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t})\n}\n\nvar ErrNoNewArchiveData error = errors.New(\"all data already archived\")\n\n// CleanupCheckpoints deletes or archives all checkpoint files older than `from`.\n// When archiving, consolidates all hosts per cluster into a single Parquet file.\nfunc CleanupCheckpoints(checkpointsDir, cleanupDir string, from int64, deleteInstead bool) (int, error) {\n\tif deleteInstead {\n\t\treturn deleteCheckpoints(checkpointsDir, from)\n\t}\n\n\treturn archiveCheckpoints(checkpointsDir, cleanupDir, from)\n}\n\n// deleteCheckpoints removes checkpoint files older than `from` across all clusters/hosts.\nfunc deleteCheckpoints(checkpointsDir string, from int64) (int, error) {\n\tentries1, err := os.ReadDir(checkpointsDir)\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\ttype workItem struct {\n\t\tdir           string\n\t\tcluster, host string\n\t}\n\n\tvar wg sync.WaitGroup\n\tn, errs := int32(0), int32(0)\n\twork := make(chan workItem, Keys.NumWorkers)\n\n\twg.Add(Keys.NumWorkers)\n\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\t\t\tfor item := range work {\n\t\t\t\tentries, err := os.ReadDir(item.dir)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"error reading %s/%s: %s\", item.cluster, item.host, err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tfiles, err := findFiles(entries, from, false)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"error finding files in %s/%s: %s\", item.cluster, item.host, err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tfor _, checkpoint := range files {\n\t\t\t\t\tif err := os.Remove(filepath.Join(item.dir, checkpoint)); err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"error deleting %s/%s/%s: %s\", item.cluster, item.host, checkpoint, err.Error())\n\t\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tatomic.AddInt32(&n, 1)\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}()\n\t}\n\n\tfor _, de1 := range entries1 {\n\t\tentries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))\n\t\tif e != nil {\n\t\t\terr = e\n\t\t\tcontinue\n\t\t}\n\n\t\tfor _, de2 := range entries2 {\n\t\t\twork <- workItem{\n\t\t\t\tdir:     filepath.Join(checkpointsDir, de1.Name(), de2.Name()),\n\t\t\t\tcluster: de1.Name(),\n\t\t\t\thost:    de2.Name(),\n\t\t\t}\n\t\t}\n\t}\n\n\tclose(work)\n\twg.Wait()\n\n\tif err != nil {\n\t\treturn int(n), err\n\t}\n\tif errs > 0 {\n\t\treturn int(n), fmt.Errorf(\"%d errors happened while deleting (%d successes)\", errs, n)\n\t}\n\treturn int(n), nil\n}\n\n// archiveCheckpoints archives checkpoint files to Parquet format.\n// Produces one Parquet file per cluster: <cleanupDir>/<cluster>/<timestamp>.parquet\n// Workers load checkpoint files from disk and send CheckpointFile trees on a\n// back-pressured channel. The main thread streams each tree directly to Parquet\n// rows without materializing all rows in memory.\nfunc archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, error) {\n\tcclog.Info(\"[METRICSTORE]> start archiving checkpoints to parquet\")\n\tstartTime := time.Now()\n\n\tclusterEntries, err := os.ReadDir(checkpointsDir)\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\ttotalFiles := 0\n\tvar clusterErrors []string\n\n\tfor _, clusterEntry := range clusterEntries {\n\t\tif !clusterEntry.IsDir() {\n\t\t\tcontinue\n\t\t}\n\n\t\tcluster := clusterEntry.Name()\n\t\thostEntries, err := os.ReadDir(filepath.Join(checkpointsDir, cluster))\n\t\tif err != nil {\n\t\t\tcclog.Errorf(\"[METRICSTORE]> error reading host entries for cluster %s: %s\", cluster, err.Error())\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Workers load checkpoint files from disk; main thread writes to parquet.\n\t\ttype hostResult struct {\n\t\t\tcheckpoints []*CheckpointFile\n\t\t\thostname    string\n\t\t\tfiles       []string // checkpoint filenames to delete after successful write\n\t\t\tdir         string   // checkpoint directory for this host\n\t\t}\n\n\t\t// Small buffer provides back-pressure: at most NumWorkers+2 results in flight.\n\t\tresults := make(chan hostResult, 2)\n\t\twork := make(chan struct {\n\t\t\tdir, host string\n\t\t}, Keys.NumWorkers)\n\n\t\tvar wg sync.WaitGroup\n\t\terrs := int32(0)\n\n\t\twg.Add(Keys.NumWorkers)\n\t\tfor w := 0; w < Keys.NumWorkers; w++ {\n\t\t\tgo func() {\n\t\t\t\tdefer wg.Done()\n\t\t\t\tfor item := range work {\n\t\t\t\t\tcheckpoints, files, err := loadCheckpointFiles(item.dir, from)\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error reading checkpoints for %s/%s: %s\", cluster, item.host, err.Error())\n\t\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\t\tcontinue\n\t\t\t\t\t}\n\t\t\t\t\tif len(checkpoints) > 0 {\n\t\t\t\t\t\tresults <- hostResult{\n\t\t\t\t\t\t\tcheckpoints: checkpoints,\n\t\t\t\t\t\t\thostname:    item.host,\n\t\t\t\t\t\t\tfiles:       files,\n\t\t\t\t\t\t\tdir:         item.dir,\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}()\n\t\t}\n\n\t\tgo func() {\n\t\t\tfor _, hostEntry := range hostEntries {\n\t\t\t\tif !hostEntry.IsDir() {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tdir := filepath.Join(checkpointsDir, cluster, hostEntry.Name())\n\t\t\t\twork <- struct {\n\t\t\t\t\tdir, host string\n\t\t\t\t}{dir: dir, host: hostEntry.Name()}\n\t\t\t}\n\t\t\tclose(work)\n\t\t\twg.Wait()\n\t\t\tclose(results)\n\t\t}()\n\n\t\t// Open streaming writer and write each host's checkpoint files as a row group\n\t\tparquetFile := filepath.Join(cleanupDir, cluster, fmt.Sprintf(\"%d.parquet\", from))\n\t\twriter, err := newParquetArchiveWriter(parquetFile)\n\t\tif err != nil {\n\t\t\t// Drain results channel to unblock workers\n\t\t\tfor range results {\n\t\t\t}\n\t\t\tcclog.Errorf(\"[METRICSTORE]> error creating parquet writer for cluster %s: %s\", cluster, err.Error())\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\ttype deleteItem struct {\n\t\t\tdir   string\n\t\t\tfiles []string\n\t\t}\n\t\tvar toDelete []deleteItem\n\t\twriteErr := error(nil)\n\n\t\tfor r := range results {\n\t\t\tif writeErr == nil {\n\t\t\t\t// Stream each checkpoint file directly to parquet rows.\n\t\t\t\t// Each checkpoint is processed and discarded before the next.\n\t\t\t\tfor _, cf := range r.checkpoints {\n\t\t\t\t\tif err := writer.WriteCheckpointFile(cf, cluster, r.hostname, \"node\", \"\"); err != nil {\n\t\t\t\t\t\twriteErr = err\n\t\t\t\t\t\tbreak\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\t// Flush once per host to keep row group count within parquet limits.\n\t\t\t\tif writeErr == nil {\n\t\t\t\t\tif err := writer.FlushRowGroup(); err != nil {\n\t\t\t\t\t\twriteErr = err\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t\t// Always track files for deletion (even if write failed, we still drain)\n\t\t\ttoDelete = append(toDelete, deleteItem{dir: r.dir, files: r.files})\n\t\t}\n\n\t\tif err := writer.Close(); err != nil && writeErr == nil {\n\t\t\twriteErr = err\n\t\t}\n\n\t\tif errs > 0 {\n\t\t\tcclog.Errorf(\"[METRICSTORE]> %d errors reading checkpoints for cluster %s\", errs, cluster)\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tos.Remove(parquetFile)\n\t\t\tcontinue\n\t\t}\n\n\t\tif writer.count == 0 {\n\t\t\t// No data written — remove empty file\n\t\t\tos.Remove(parquetFile)\n\t\t\tcontinue\n\t\t}\n\n\t\tif writeErr != nil {\n\t\t\tos.Remove(parquetFile)\n\t\t\tcclog.Errorf(\"[METRICSTORE]> error writing parquet archive for cluster %s: %s\", cluster, writeErr.Error())\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Delete archived checkpoint files\n\t\tfor _, item := range toDelete {\n\t\t\tfor _, file := range item.files {\n\t\t\t\tfilename := filepath.Join(item.dir, file)\n\t\t\t\tif err := os.Remove(filename); err != nil {\n\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> could not remove archived checkpoint %s: %v\", filename, err)\n\t\t\t\t} else {\n\t\t\t\t\ttotalFiles++\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tcclog.Infof(\"[METRICSTORE]> archived %d rows from %d files for cluster %s to %s\",\n\t\t\twriter.count, totalFiles, cluster, parquetFile)\n\t}\n\n\tcclog.Infof(\"[METRICSTORE]> archiving checkpoints completed in %s (%d files)\", time.Since(startTime).Round(time.Millisecond), totalFiles)\n\n\tif len(clusterErrors) > 0 {\n\t\treturn totalFiles, fmt.Errorf(\"archiving failed for clusters: %s\", strings.Join(clusterErrors, \", \"))\n\t}\n\n\treturn totalFiles, nil\n}\n","structuredPatch":[{"oldStart":3,"oldLines":6,"newStart":3,"newLines":12,"lines":[" // Use of this source code is governed by a MIT-style"," // license that can be found in the LICENSE file."," ","+// This file implements the cleanup (archiving or deletion) of old checkpoint files.","+//","+// The CleanUp worker runs on a timer equal to RetentionInMemory. In \"archive\" mode","+// it converts checkpoint files older than the retention window into per-cluster","+// Parquet files and then deletes the originals. In \"delete\" mode it simply removes","+// old checkpoint files."," package metricstore"," "," import ("]}],"userModified":false,"replaceAll":false},"sourceToolAssistantUUID":"15a5084e-a60b-4a56-a635-b0a82f874588"}
{"parentUuid":"9119169a-33c4-4572-884b-ea94408a2602","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01483uTo8thbtBP423J9af22","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_014pRXyQka3fDz72idkFyVHH","name":"Edit","input":{"replace_all":false,"file_path":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go","old_string":"// Worker for either Archiving or Deleting files\n\nfunc CleanUp(wg *sync.WaitGroup, ctx context.Context) {","new_string":"// CleanUp starts a background worker that periodically removes or archives\n// checkpoint files older than the configured retention window.\n//\n// In \"archive\" mode, old checkpoint files are converted to Parquet and stored\n// under Keys.Cleanup.RootDir. In \"delete\" mode they are simply removed.\n// The cleanup interval equals Keys.RetentionInMemory.\nfunc CleanUp(wg *sync.WaitGroup, ctx context.Context) {"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":419,"cache_read_input_tokens":111787,"output_tokens":271,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":419,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXw7z8Jv2KbcTKctCD27","type":"assistant","uuid":"8c98e191-96ae-43f4-b39d-cb58db7b996c","timestamp":"2026-03-29T19:36:19.913Z"}
{"parentUuid":"8c98e191-96ae-43f4-b39d-cb58db7b996c","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"PostToolUse","hookName":"PostToolUse:Edit","command":"callback"},"parentToolUseID":"toolu_014pRXyQka3fDz72idkFyVHH","toolUseID":"toolu_014pRXyQka3fDz72idkFyVHH","timestamp":"2026-03-29T19:36:20.115Z","uuid":"6e64f9d2-94e9-4179-b1c8-03804c544344"}
{"parentUuid":"8c98e191-96ae-43f4-b39d-cb58db7b996c","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_014pRXyQka3fDz72idkFyVHH","type":"tool_result","content":"The file /home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go has been updated successfully."}]},"uuid":"cf117f3e-34ed-48f9-96f4-53213e2d777c","timestamp":"2026-03-29T19:36:20.111Z","toolUseResult":{"filePath":"/home/jan/prg/ClusterCockpit/cc-backend/pkg/metricstore/archive.go","oldString":"// Worker for either Archiving or Deleting files\n\nfunc CleanUp(wg *sync.WaitGroup, ctx context.Context) {","newString":"// CleanUp starts a background worker that periodically removes or archives\n// checkpoint files older than the configured retention window.\n//\n// In \"archive\" mode, old checkpoint files are converted to Parquet and stored\n// under Keys.Cleanup.RootDir. In \"delete\" mode they are simply removed.\n// The cleanup interval equals Keys.RetentionInMemory.\nfunc CleanUp(wg *sync.WaitGroup, ctx context.Context) {","originalFile":"// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.\n// All rights reserved. This file is part of cc-backend.\n// Use of this source code is governed by a MIT-style\n// license that can be found in the LICENSE file.\n\n// This file implements the cleanup (archiving or deletion) of old checkpoint files.\n//\n// The CleanUp worker runs on a timer equal to RetentionInMemory. In \"archive\" mode\n// it converts checkpoint files older than the retention window into per-cluster\n// Parquet files and then deletes the originals. In \"delete\" mode it simply removes\n// old checkpoint files.\npackage metricstore\n\nimport (\n\t\"context\"\n\t\"errors\"\n\t\"fmt\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\tcclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\"\n)\n\n// Worker for either Archiving or Deleting files\n\nfunc CleanUp(wg *sync.WaitGroup, ctx context.Context) {\n\tif Keys.Cleanup.Mode == \"archive\" {\n\t\tcclog.Info(\"[METRICSTORE]> enable archive cleanup to parquet\")\n\t\t// Run as Archiver\n\t\tcleanUpWorker(wg, ctx,\n\t\t\tKeys.RetentionInMemory,\n\t\t\t\"archiving\",\n\t\t\tKeys.Cleanup.RootDir,\n\t\t\tfalse,\n\t\t)\n\t} else {\n\t\t// Run as Deleter\n\t\tcleanUpWorker(wg, ctx,\n\t\t\tKeys.RetentionInMemory,\n\t\t\t\"deleting\",\n\t\t\t\"\",\n\t\t\ttrue,\n\t\t)\n\t}\n}\n\n// cleanUpWorker takes simple values to configure what it does\nfunc cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {\n\twg.Go(func() {\n\t\td, err := time.ParseDuration(interval)\n\t\tif err != nil {\n\t\t\tcclog.Fatalf(\"[METRICSTORE]> error parsing %s interval duration: %v\\n\", mode, err)\n\t\t}\n\t\tif d <= 0 {\n\t\t\treturn\n\t\t}\n\n\t\tticker := time.NewTicker(d)\n\t\tdefer ticker.Stop()\n\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase <-ticker.C:\n\t\t\t\tt := time.Now().Add(-d)\n\t\t\t\tcclog.Infof(\"[METRICSTORE]> start %s checkpoints (older than %s)...\", mode, t.Format(time.RFC3339))\n\n\t\t\t\tn, err := CleanupCheckpoints(Keys.Checkpoints.RootDir, cleanupDir, t.Unix(), delete)\n\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> %s failed: %s\", mode, err.Error())\n\t\t\t\t} else {\n\t\t\t\t\tif delete {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoints deleted\", n)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tcclog.Infof(\"[METRICSTORE]> done: %d checkpoint files archived to parquet\", n)\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t})\n}\n\nvar ErrNoNewArchiveData error = errors.New(\"all data already archived\")\n\n// CleanupCheckpoints deletes or archives all checkpoint files older than `from`.\n// When archiving, consolidates all hosts per cluster into a single Parquet file.\nfunc CleanupCheckpoints(checkpointsDir, cleanupDir string, from int64, deleteInstead bool) (int, error) {\n\tif deleteInstead {\n\t\treturn deleteCheckpoints(checkpointsDir, from)\n\t}\n\n\treturn archiveCheckpoints(checkpointsDir, cleanupDir, from)\n}\n\n// deleteCheckpoints removes checkpoint files older than `from` across all clusters/hosts.\nfunc deleteCheckpoints(checkpointsDir string, from int64) (int, error) {\n\tentries1, err := os.ReadDir(checkpointsDir)\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\ttype workItem struct {\n\t\tdir           string\n\t\tcluster, host string\n\t}\n\n\tvar wg sync.WaitGroup\n\tn, errs := int32(0), int32(0)\n\twork := make(chan workItem, Keys.NumWorkers)\n\n\twg.Add(Keys.NumWorkers)\n\tfor worker := 0; worker < Keys.NumWorkers; worker++ {\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\t\t\tfor item := range work {\n\t\t\t\tentries, err := os.ReadDir(item.dir)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"error reading %s/%s: %s\", item.cluster, item.host, err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tfiles, err := findFiles(entries, from, false)\n\t\t\t\tif err != nil {\n\t\t\t\t\tcclog.Errorf(\"error finding files in %s/%s: %s\", item.cluster, item.host, err.Error())\n\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\tfor _, checkpoint := range files {\n\t\t\t\t\tif err := os.Remove(filepath.Join(item.dir, checkpoint)); err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"error deleting %s/%s/%s: %s\", item.cluster, item.host, checkpoint, err.Error())\n\t\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\t} else {\n\t\t\t\t\t\tatomic.AddInt32(&n, 1)\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}()\n\t}\n\n\tfor _, de1 := range entries1 {\n\t\tentries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))\n\t\tif e != nil {\n\t\t\terr = e\n\t\t\tcontinue\n\t\t}\n\n\t\tfor _, de2 := range entries2 {\n\t\t\twork <- workItem{\n\t\t\t\tdir:     filepath.Join(checkpointsDir, de1.Name(), de2.Name()),\n\t\t\t\tcluster: de1.Name(),\n\t\t\t\thost:    de2.Name(),\n\t\t\t}\n\t\t}\n\t}\n\n\tclose(work)\n\twg.Wait()\n\n\tif err != nil {\n\t\treturn int(n), err\n\t}\n\tif errs > 0 {\n\t\treturn int(n), fmt.Errorf(\"%d errors happened while deleting (%d successes)\", errs, n)\n\t}\n\treturn int(n), nil\n}\n\n// archiveCheckpoints archives checkpoint files to Parquet format.\n// Produces one Parquet file per cluster: <cleanupDir>/<cluster>/<timestamp>.parquet\n// Workers load checkpoint files from disk and send CheckpointFile trees on a\n// back-pressured channel. The main thread streams each tree directly to Parquet\n// rows without materializing all rows in memory.\nfunc archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, error) {\n\tcclog.Info(\"[METRICSTORE]> start archiving checkpoints to parquet\")\n\tstartTime := time.Now()\n\n\tclusterEntries, err := os.ReadDir(checkpointsDir)\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\ttotalFiles := 0\n\tvar clusterErrors []string\n\n\tfor _, clusterEntry := range clusterEntries {\n\t\tif !clusterEntry.IsDir() {\n\t\t\tcontinue\n\t\t}\n\n\t\tcluster := clusterEntry.Name()\n\t\thostEntries, err := os.ReadDir(filepath.Join(checkpointsDir, cluster))\n\t\tif err != nil {\n\t\t\tcclog.Errorf(\"[METRICSTORE]> error reading host entries for cluster %s: %s\", cluster, err.Error())\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Workers load checkpoint files from disk; main thread writes to parquet.\n\t\ttype hostResult struct {\n\t\t\tcheckpoints []*CheckpointFile\n\t\t\thostname    string\n\t\t\tfiles       []string // checkpoint filenames to delete after successful write\n\t\t\tdir         string   // checkpoint directory for this host\n\t\t}\n\n\t\t// Small buffer provides back-pressure: at most NumWorkers+2 results in flight.\n\t\tresults := make(chan hostResult, 2)\n\t\twork := make(chan struct {\n\t\t\tdir, host string\n\t\t}, Keys.NumWorkers)\n\n\t\tvar wg sync.WaitGroup\n\t\terrs := int32(0)\n\n\t\twg.Add(Keys.NumWorkers)\n\t\tfor w := 0; w < Keys.NumWorkers; w++ {\n\t\t\tgo func() {\n\t\t\t\tdefer wg.Done()\n\t\t\t\tfor item := range work {\n\t\t\t\t\tcheckpoints, files, err := loadCheckpointFiles(item.dir, from)\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tcclog.Errorf(\"[METRICSTORE]> error reading checkpoints for %s/%s: %s\", cluster, item.host, err.Error())\n\t\t\t\t\t\tatomic.AddInt32(&errs, 1)\n\t\t\t\t\t\tcontinue\n\t\t\t\t\t}\n\t\t\t\t\tif len(checkpoints) > 0 {\n\t\t\t\t\t\tresults <- hostResult{\n\t\t\t\t\t\t\tcheckpoints: checkpoints,\n\t\t\t\t\t\t\thostname:    item.host,\n\t\t\t\t\t\t\tfiles:       files,\n\t\t\t\t\t\t\tdir:         item.dir,\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}()\n\t\t}\n\n\t\tgo func() {\n\t\t\tfor _, hostEntry := range hostEntries {\n\t\t\t\tif !hostEntry.IsDir() {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tdir := filepath.Join(checkpointsDir, cluster, hostEntry.Name())\n\t\t\t\twork <- struct {\n\t\t\t\t\tdir, host string\n\t\t\t\t}{dir: dir, host: hostEntry.Name()}\n\t\t\t}\n\t\t\tclose(work)\n\t\t\twg.Wait()\n\t\t\tclose(results)\n\t\t}()\n\n\t\t// Open streaming writer and write each host's checkpoint files as a row group\n\t\tparquetFile := filepath.Join(cleanupDir, cluster, fmt.Sprintf(\"%d.parquet\", from))\n\t\twriter, err := newParquetArchiveWriter(parquetFile)\n\t\tif err != nil {\n\t\t\t// Drain results channel to unblock workers\n\t\t\tfor range results {\n\t\t\t}\n\t\t\tcclog.Errorf(\"[METRICSTORE]> error creating parquet writer for cluster %s: %s\", cluster, err.Error())\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\ttype deleteItem struct {\n\t\t\tdir   string\n\t\t\tfiles []string\n\t\t}\n\t\tvar toDelete []deleteItem\n\t\twriteErr := error(nil)\n\n\t\tfor r := range results {\n\t\t\tif writeErr == nil {\n\t\t\t\t// Stream each checkpoint file directly to parquet rows.\n\t\t\t\t// Each checkpoint is processed and discarded before the next.\n\t\t\t\tfor _, cf := range r.checkpoints {\n\t\t\t\t\tif err := writer.WriteCheckpointFile(cf, cluster, r.hostname, \"node\", \"\"); err != nil {\n\t\t\t\t\t\twriteErr = err\n\t\t\t\t\t\tbreak\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t\t// Flush once per host to keep row group count within parquet limits.\n\t\t\t\tif writeErr == nil {\n\t\t\t\t\tif err := writer.FlushRowGroup(); err != nil {\n\t\t\t\t\t\twriteErr = err\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t\t// Always track files for deletion (even if write failed, we still drain)\n\t\t\ttoDelete = append(toDelete, deleteItem{dir: r.dir, files: r.files})\n\t\t}\n\n\t\tif err := writer.Close(); err != nil && writeErr == nil {\n\t\t\twriteErr = err\n\t\t}\n\n\t\tif errs > 0 {\n\t\t\tcclog.Errorf(\"[METRICSTORE]> %d errors reading checkpoints for cluster %s\", errs, cluster)\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tos.Remove(parquetFile)\n\t\t\tcontinue\n\t\t}\n\n\t\tif writer.count == 0 {\n\t\t\t// No data written — remove empty file\n\t\t\tos.Remove(parquetFile)\n\t\t\tcontinue\n\t\t}\n\n\t\tif writeErr != nil {\n\t\t\tos.Remove(parquetFile)\n\t\t\tcclog.Errorf(\"[METRICSTORE]> error writing parquet archive for cluster %s: %s\", cluster, writeErr.Error())\n\t\t\tclusterErrors = append(clusterErrors, cluster)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Delete archived checkpoint files\n\t\tfor _, item := range toDelete {\n\t\t\tfor _, file := range item.files {\n\t\t\t\tfilename := filepath.Join(item.dir, file)\n\t\t\t\tif err := os.Remove(filename); err != nil {\n\t\t\t\t\tcclog.Warnf(\"[METRICSTORE]> could not remove archived checkpoint %s: %v\", filename, err)\n\t\t\t\t} else {\n\t\t\t\t\ttotalFiles++\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tcclog.Infof(\"[METRICSTORE]> archived %d rows from %d files for cluster %s to %s\",\n\t\t\twriter.count, totalFiles, cluster, parquetFile)\n\t}\n\n\tcclog.Infof(\"[METRICSTORE]> archiving checkpoints completed in %s (%d files)\", time.Since(startTime).Round(time.Millisecond), totalFiles)\n\n\tif len(clusterErrors) > 0 {\n\t\treturn totalFiles, fmt.Errorf(\"archiving failed for clusters: %s\", strings.Join(clusterErrors, \", \"))\n\t}\n\n\treturn totalFiles, nil\n}\n","structuredPatch":[{"oldStart":25,"oldLines":8,"newStart":25,"newLines":12,"lines":["   cclog \"github.com/ClusterCockpit/cc-lib/v2/ccLogger\""," )"," ","-// Worker for either Archiving or Deleting files","-","+// CleanUp starts a background worker that periodically removes or archives","+// checkpoint files older than the configured retention window.","+//","+// In \"archive\" mode, old checkpoint files are converted to Parquet and stored","+// under Keys.Cleanup.RootDir. In \"delete\" mode they are simply removed.","+// The cleanup interval equals Keys.RetentionInMemory."," func CleanUp(wg *sync.WaitGroup, ctx context.Context) {","   if Keys.Cleanup.Mode == \"archive\" {","     cclog.Info(\"[METRICSTORE]> enable archive cleanup to parquet\")"]}],"userModified":false,"replaceAll":false},"sourceToolAssistantUUID":"8c98e191-96ae-43f4-b39d-cb58db7b996c"}
{"parentUuid":"cf117f3e-34ed-48f9-96f4-53213e2d777c","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01X3BaczQAbyuu713hGcqGtp","type":"message","role":"assistant","content":[{"type":"text","text":"Let me verify the changes look correct:"}],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":316,"cache_read_input_tokens":112206,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":316},"output_tokens":1,"service_tier":"standard","inference_geo":"not_available"}},"requestId":"req_011CZXw8bUa5r8JWisnctYYq","type":"assistant","uuid":"61dca6e1-2322-4a68-903c-05d17e57319a","timestamp":"2026-03-29T19:36:24.409Z"}
{"parentUuid":"61dca6e1-2322-4a68-903c-05d17e57319a","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01X3BaczQAbyuu713hGcqGtp","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01RDbbYWjVEv7GrvNvUt15cx","name":"Bash","input":{"command":"go build ./pkg/metricstore/... 2>&1"},"caller":{"type":"direct"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":316,"cache_read_input_tokens":112206,"output_tokens":93,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":316,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXw8bUa5r8JWisnctYYq","type":"assistant","uuid":"4d11fcfd-f63e-45a9-9a87-40b614d3e9cc","timestamp":"2026-03-29T19:36:24.950Z"}
{"parentUuid":"4d11fcfd-f63e-45a9-9a87-40b614d3e9cc","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"user","message":{"role":"user","content":[{"tool_use_id":"toolu_01RDbbYWjVEv7GrvNvUt15cx","type":"tool_result","content":"","is_error":false}]},"uuid":"26cf332c-8a6f-4b19-a49e-fbc3b8a25f40","timestamp":"2026-03-29T19:36:26.731Z","toolUseResult":{"stdout":"","stderr":"","interrupted":false,"isImage":false,"noOutputExpected":false},"sourceToolAssistantUUID":"4d11fcfd-f63e-45a9-9a87-40b614d3e9cc"}
{"parentUuid":"26cf332c-8a6f-4b19-a49e-fbc3b8a25f40","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","message":{"model":"claude-sonnet-4-6","id":"msg_01WDS5CyVpmzrGHtEywNfcL8","type":"message","role":"assistant","content":[{"type":"text","text":"All changes compile cleanly. Here's a summary of what was updated:\n\n| File | Change |\n|------|--------|\n| `metricstore.go` (package doc) | \"Avro format\" → \"WAL/binary format\"; clarified NATS uses InfluxDB line protocol |\n| `metricstore.go` (`Init` doc) | Added missing `metrics` parameter documentation |\n| `checkpoint.go` (`Checkpointing` doc) | Removed incorrect \"hardcoded\" claim; explained it reads `Keys.CheckpointInterval` with 12h default; updated format descriptions to match actual behavior |\n| `walCheckpoint.go` (`RotateWALFilesAfterShutdown`) | Replaced copy-pasted `RotateWALFiles` doc with accurate description (direct file removal, post-shutdown only) |\n| `archive.go` | Added file-level doc comment explaining cleanup purpose; added `CleanUp` function doc comment |"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":1,"cache_creation_input_tokens":194,"cache_read_input_tokens":112522,"output_tokens":228,"server_tool_use":{"web_search_requests":0,"web_fetch_requests":0},"service_tier":"standard","cache_creation":{"ephemeral_1h_input_tokens":194,"ephemeral_5m_input_tokens":0},"inference_geo":"","iterations":[],"speed":"standard"}},"requestId":"req_011CZXw95BSkzM4f9MsLP2qu","type":"assistant","uuid":"80d07f0b-53bb-4f61-9887-c0cb9b459918","timestamp":"2026-03-29T19:36:35.225Z"}
{"parentUuid":"80d07f0b-53bb-4f61-9887-c0cb9b459918","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"progress","data":{"type":"hook_progress","hookEvent":"Stop","hookName":"Stop","command":"entire hooks claude-code stop"},"parentToolUseID":"3a8de66c-a393-4004-86f0-e48972f2be69","toolUseID":"3a8de66c-a393-4004-86f0-e48972f2be69","timestamp":"2026-03-29T19:36:35.323Z","uuid":"a0e70b90-73ac-46c6-97a9-d82b10373ee8"}
{"parentUuid":"a0e70b90-73ac-46c6-97a9-d82b10373ee8","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"system","subtype":"stop_hook_summary","hookCount":1,"hookInfos":[{"command":"entire hooks claude-code stop","durationMs":1549}],"hookErrors":[],"preventedContinuation":false,"stopReason":"","hasOutput":false,"level":"suggestion","timestamp":"2026-03-29T19:36:36.873Z","uuid":"aa820c16-6020-4afc-852e-1ef417358deb","toolUseID":"3a8de66c-a393-4004-86f0-e48972f2be69"}
{"parentUuid":"aa820c16-6020-4afc-852e-1ef417358deb","isSidechain":false,"userType":"external","cwd":"/home/jan/prg/ClusterCockpit/cc-backend","sessionId":"c7fa539b-6143-451a-98b1-5eeb226e623d","version":"2.1.68","gitBranch":"hotfix","slug":"lively-enchanting-hickey","type":"system","subtype":"turn_duration","durationMs":125504,"timestamp":"2026-03-29T19:36:36.891Z","uuid":"75b1cbc7-6148-421d-9192-0343df8131cb","isMeta":false}