mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-20 23:07:29 +01:00
31
CLAUDE.md
31
CLAUDE.md
@@ -229,6 +229,7 @@ The backend supports a NATS-based API as an alternative to the REST API for job
|
|||||||
### Setup
|
### Setup
|
||||||
|
|
||||||
1. Configure NATS client connection in `config.json`:
|
1. Configure NATS client connection in `config.json`:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"nats": {
|
"nats": {
|
||||||
@@ -240,6 +241,7 @@ The backend supports a NATS-based API as an alternative to the REST API for job
|
|||||||
```
|
```
|
||||||
|
|
||||||
2. Configure API subjects in `config.json` under `main`:
|
2. Configure API subjects in `config.json` under `main`:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"main": {
|
"main": {
|
||||||
@@ -252,6 +254,7 @@ The backend supports a NATS-based API as an alternative to the REST API for job
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
- `subject-job-event` (required): NATS subject for job start/stop events
|
- `subject-job-event` (required): NATS subject for job start/stop events
|
||||||
- `subject-node-state` (required): NATS subject for node state updates
|
- `subject-node-state` (required): NATS subject for node state updates
|
||||||
- `job-concurrency` (optional, default: 8): Number of concurrent worker goroutines for job events
|
- `job-concurrency` (optional, default: 8): Number of concurrent worker goroutines for job events
|
||||||
@@ -264,19 +267,23 @@ Messages use **InfluxDB line protocol** format with the following structure:
|
|||||||
#### Job Events
|
#### Job Events
|
||||||
|
|
||||||
**Start Job:**
|
**Start Job:**
|
||||||
|
|
||||||
```
|
```
|
||||||
job,function=start_job event="{\"jobId\":123,\"user\":\"alice\",\"cluster\":\"test\", ...}" 1234567890000000000
|
job,function=start_job event="{\"jobId\":123,\"user\":\"alice\",\"cluster\":\"test\", ...}" 1234567890000000000
|
||||||
```
|
```
|
||||||
|
|
||||||
**Stop Job:**
|
**Stop Job:**
|
||||||
|
|
||||||
```
|
```
|
||||||
job,function=stop_job event="{\"jobId\":123,\"cluster\":\"test\",\"startTime\":1234567890,\"stopTime\":1234571490,\"jobState\":\"completed\"}" 1234571490000000000
|
job,function=stop_job event="{\"jobId\":123,\"cluster\":\"test\",\"startTime\":1234567890,\"stopTime\":1234571490,\"jobState\":\"completed\"}" 1234571490000000000
|
||||||
```
|
```
|
||||||
|
|
||||||
**Tags:**
|
**Tags:**
|
||||||
|
|
||||||
- `function`: Either `start_job` or `stop_job`
|
- `function`: Either `start_job` or `stop_job`
|
||||||
|
|
||||||
**Fields:**
|
**Fields:**
|
||||||
|
|
||||||
- `event`: JSON payload containing job data (see REST API documentation for schema)
|
- `event`: JSON payload containing job data (see REST API documentation for schema)
|
||||||
|
|
||||||
#### Node State Updates
|
#### Node State Updates
|
||||||
@@ -307,9 +314,31 @@ job,function=stop_job event="{\"jobId\":123,\"cluster\":\"test\",\"startTime\":1
|
|||||||
- Messages are logged; no responses are sent back to publishers
|
- Messages are logged; no responses are sent back to publishers
|
||||||
- If NATS client is unavailable, API subscriptions are skipped (logged as warning)
|
- If NATS client is unavailable, API subscriptions are skipped (logged as warning)
|
||||||
|
|
||||||
|
## Development Guidelines
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
|
||||||
|
This application processes large volumes of HPC monitoring data (metrics, job
|
||||||
|
records, archives) at scale. All code changes must prioritize maximum throughput
|
||||||
|
and minimal latency. Avoid unnecessary allocations, prefer streaming over
|
||||||
|
buffering, and be mindful of lock contention. When in doubt, benchmark.
|
||||||
|
|
||||||
|
### Change Impact Analysis
|
||||||
|
|
||||||
|
For any significant change, you MUST:
|
||||||
|
|
||||||
|
1. **Check all call paths**: Trace every caller of modified functions to ensure
|
||||||
|
correctness is preserved throughout the call chain.
|
||||||
|
2. **Evaluate side effects**: Identify and verify all side effects — database
|
||||||
|
writes, cache invalidations, channel sends, goroutine lifecycle changes, file
|
||||||
|
I/O, and external API calls.
|
||||||
|
3. **Consider concurrency implications**: This codebase uses goroutines and
|
||||||
|
channels extensively. Verify that changes do not introduce races, deadlocks,
|
||||||
|
or contention bottlenecks.
|
||||||
|
|
||||||
## Dependencies
|
## Dependencies
|
||||||
|
|
||||||
- Go 1.24.0+ (check go.mod for exact version)
|
- Go 1.25.0+ (check go.mod for exact version)
|
||||||
- Node.js (for frontend builds)
|
- Node.js (for frontend builds)
|
||||||
- SQLite 3 (only supported database)
|
- SQLite 3 (only supported database)
|
||||||
- Optional: NATS server for NATS API integration
|
- Optional: NATS server for NATS API integration
|
||||||
|
|||||||
@@ -402,12 +402,21 @@ func (api *NatsAPI) processNodestateEvent(msg lp.CCMessage) {
|
|||||||
repo := repository.GetNodeRepository()
|
repo := repository.GetNodeRepository()
|
||||||
requestReceived := time.Now().Unix()
|
requestReceived := time.Now().Unix()
|
||||||
|
|
||||||
// Build nodeList per subcluster for health check
|
// Pre-compute node states; only include non-down nodes in health check
|
||||||
|
nodeStates := make(map[string]schema.SchedulerState, len(req.Nodes))
|
||||||
|
for _, node := range req.Nodes {
|
||||||
|
nodeStates[node.Hostname] = determineState(node.States)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build nodeList per subcluster for health check, skipping down nodes
|
||||||
m := make(map[string][]string)
|
m := make(map[string][]string)
|
||||||
metricNames := make(map[string][]string)
|
metricNames := make(map[string][]string)
|
||||||
healthResults := make(map[string]metricstore.HealthCheckResult)
|
healthResults := make(map[string]metricstore.HealthCheckResult)
|
||||||
|
|
||||||
for _, node := range req.Nodes {
|
for _, node := range req.Nodes {
|
||||||
|
if nodeStates[node.Hostname] == schema.NodeStateDown {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil {
|
if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil {
|
||||||
m[sc] = append(m[sc], node.Hostname)
|
m[sc] = append(m[sc], node.Hostname)
|
||||||
}
|
}
|
||||||
@@ -436,12 +445,17 @@ func (api *NatsAPI) processNodestateEvent(msg lp.CCMessage) {
|
|||||||
|
|
||||||
updates := make([]repository.NodeStateUpdate, 0, len(req.Nodes))
|
updates := make([]repository.NodeStateUpdate, 0, len(req.Nodes))
|
||||||
for _, node := range req.Nodes {
|
for _, node := range req.Nodes {
|
||||||
state := determineState(node.States)
|
state := nodeStates[node.Hostname]
|
||||||
healthState := schema.MonitoringStateFailed
|
var healthState schema.MonitoringState
|
||||||
var healthMetrics string
|
var healthMetrics string
|
||||||
if result, ok := healthResults[node.Hostname]; ok {
|
if state == schema.NodeStateDown {
|
||||||
healthState = result.State
|
healthState = schema.MonitoringStateFull
|
||||||
healthMetrics = result.HealthMetrics
|
} else {
|
||||||
|
healthState = schema.MonitoringStateFailed
|
||||||
|
if result, ok := healthResults[node.Hostname]; ok {
|
||||||
|
healthState = result.State
|
||||||
|
healthMetrics = result.HealthMetrics
|
||||||
|
}
|
||||||
}
|
}
|
||||||
nodeState := schema.NodeStateDB{
|
nodeState := schema.NodeStateDB{
|
||||||
TimeStamp: requestReceived,
|
TimeStamp: requestReceived,
|
||||||
|
|||||||
@@ -34,21 +34,28 @@ func metricListToNames(metricList map[string]*schema.Metric) []string {
|
|||||||
return names
|
return names
|
||||||
}
|
}
|
||||||
|
|
||||||
// this routine assumes that only one of them exists per node
|
// determineState resolves multiple states to a single state using priority order:
|
||||||
|
// allocated > reserved > idle > down > mixed.
|
||||||
|
// Exception: if both idle and down are present, down is returned.
|
||||||
func determineState(states []string) schema.SchedulerState {
|
func determineState(states []string) schema.SchedulerState {
|
||||||
for _, state := range states {
|
stateSet := make(map[string]bool, len(states))
|
||||||
switch strings.ToLower(state) {
|
for _, s := range states {
|
||||||
case "allocated":
|
stateSet[strings.ToLower(s)] = true
|
||||||
return schema.NodeStateAllocated
|
}
|
||||||
case "reserved":
|
|
||||||
return schema.NodeStateReserved
|
switch {
|
||||||
case "idle":
|
case stateSet["allocated"]:
|
||||||
return schema.NodeStateIdle
|
return schema.NodeStateAllocated
|
||||||
case "down":
|
case stateSet["reserved"]:
|
||||||
return schema.NodeStateDown
|
return schema.NodeStateReserved
|
||||||
case "mixed":
|
case stateSet["idle"] && stateSet["down"]:
|
||||||
return schema.NodeStateMixed
|
return schema.NodeStateDown
|
||||||
}
|
case stateSet["idle"]:
|
||||||
|
return schema.NodeStateIdle
|
||||||
|
case stateSet["down"]:
|
||||||
|
return schema.NodeStateDown
|
||||||
|
case stateSet["mixed"]:
|
||||||
|
return schema.NodeStateMixed
|
||||||
}
|
}
|
||||||
|
|
||||||
return schema.NodeStateUnknown
|
return schema.NodeStateUnknown
|
||||||
@@ -79,14 +86,23 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
|||||||
requestReceived := time.Now().Unix()
|
requestReceived := time.Now().Unix()
|
||||||
repo := repository.GetNodeRepository()
|
repo := repository.GetNodeRepository()
|
||||||
|
|
||||||
|
// Step 1: Pre-compute node states; only include non-down nodes in health check
|
||||||
|
nodeStates := make(map[string]schema.SchedulerState, len(req.Nodes))
|
||||||
|
for _, node := range req.Nodes {
|
||||||
|
nodeStates[node.Hostname] = determineState(node.States)
|
||||||
|
}
|
||||||
|
|
||||||
m := make(map[string][]string)
|
m := make(map[string][]string)
|
||||||
metricNames := make(map[string][]string)
|
metricNames := make(map[string][]string)
|
||||||
healthResults := make(map[string]metricstore.HealthCheckResult)
|
healthResults := make(map[string]metricstore.HealthCheckResult)
|
||||||
|
|
||||||
startMs := time.Now()
|
startMs := time.Now()
|
||||||
|
|
||||||
// Step 1: Build nodeList and metricList per subcluster
|
// Step 2: Build nodeList and metricList per subcluster, skipping down nodes
|
||||||
for _, node := range req.Nodes {
|
for _, node := range req.Nodes {
|
||||||
|
if nodeStates[node.Hostname] == schema.NodeStateDown {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil {
|
if sc, err := archive.GetSubClusterByNode(req.Cluster, node.Hostname); err == nil {
|
||||||
m[sc] = append(m[sc], node.Hostname)
|
m[sc] = append(m[sc], node.Hostname)
|
||||||
}
|
}
|
||||||
@@ -99,7 +115,7 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 2: Determine which metric store to query and perform health check
|
// Step 3: Determine which metric store to query and perform health check
|
||||||
healthRepo, err := metricdispatch.GetHealthCheckRepo(req.Cluster)
|
healthRepo, err := metricdispatch.GetHealthCheckRepo(req.Cluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warnf("updateNodeStates: no metric store for cluster %s, skipping health check: %v", req.Cluster, err)
|
cclog.Warnf("updateNodeStates: no metric store for cluster %s, skipping health check: %v", req.Cluster, err)
|
||||||
@@ -118,12 +134,17 @@ func (api *RestAPI) updateNodeStates(rw http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
updates := make([]repository.NodeStateUpdate, 0, len(req.Nodes))
|
updates := make([]repository.NodeStateUpdate, 0, len(req.Nodes))
|
||||||
for _, node := range req.Nodes {
|
for _, node := range req.Nodes {
|
||||||
state := determineState(node.States)
|
state := nodeStates[node.Hostname]
|
||||||
healthState := schema.MonitoringStateFailed
|
var healthState schema.MonitoringState
|
||||||
var healthMetrics string
|
var healthMetrics string
|
||||||
if result, ok := healthResults[node.Hostname]; ok {
|
if state == schema.NodeStateDown {
|
||||||
healthState = result.State
|
healthState = schema.MonitoringStateFull
|
||||||
healthMetrics = result.HealthMetrics
|
} else {
|
||||||
|
healthState = schema.MonitoringStateFailed
|
||||||
|
if result, ok := healthResults[node.Hostname]; ok {
|
||||||
|
healthState = result.State
|
||||||
|
healthMetrics = result.HealthMetrics
|
||||||
|
}
|
||||||
}
|
}
|
||||||
nodeState := schema.NodeStateDB{
|
nodeState := schema.NodeStateDB{
|
||||||
TimeStamp: requestReceived,
|
TimeStamp: requestReceived,
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ func (r *JobRepository) buildStatsQuery(
|
|||||||
|
|
||||||
columns = append(columns, "COUNT(*) as totalJobs")
|
columns = append(columns, "COUNT(*) as totalJobs")
|
||||||
|
|
||||||
if need("totalUsers") && col != "job.hpc_user" {
|
if need("totalUsers") {
|
||||||
columns = append(columns, "COUNT(DISTINCT job.hpc_user) AS totalUsers")
|
columns = append(columns, "COUNT(DISTINCT job.hpc_user) AS totalUsers")
|
||||||
} else {
|
} else {
|
||||||
columns = append(columns, "0 AS totalUsers")
|
columns = append(columns, "0 AS totalUsers")
|
||||||
@@ -360,7 +360,7 @@ func (r *JobRepository) JobsStats(
|
|||||||
|
|
||||||
var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours, runningJobs, shortJobs sql.NullInt64
|
var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours, runningJobs, shortJobs sql.NullInt64
|
||||||
if err := row.Scan(&jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours, &runningJobs, &shortJobs); err != nil {
|
if err := row.Scan(&jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours, &runningJobs, &shortJobs); err != nil {
|
||||||
cclog.Warn("Error while scanning rows")
|
cclog.Warnf("Error scanning job statistics row: %v", err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -168,8 +168,9 @@ func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {
|
|||||||
|
|
||||||
// archiveCheckpoints archives checkpoint files to Parquet format.
|
// archiveCheckpoints archives checkpoint files to Parquet format.
|
||||||
// Produces one Parquet file per cluster: <cleanupDir>/<cluster>/<timestamp>.parquet
|
// Produces one Parquet file per cluster: <cleanupDir>/<cluster>/<timestamp>.parquet
|
||||||
// Each host's rows are written as a separate row group to avoid accumulating
|
// Workers load checkpoint files from disk and send CheckpointFile trees on a
|
||||||
// all data in memory at once.
|
// back-pressured channel. The main thread streams each tree directly to Parquet
|
||||||
|
// rows without materializing all rows in memory.
|
||||||
func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, error) {
|
func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, error) {
|
||||||
cclog.Info("[METRICSTORE]> start archiving checkpoints to parquet")
|
cclog.Info("[METRICSTORE]> start archiving checkpoints to parquet")
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
@@ -192,14 +193,16 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
|||||||
return totalFiles, err
|
return totalFiles, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stream per-host rows to parquet writer via worker pool
|
// Workers load checkpoint files from disk; main thread writes to parquet.
|
||||||
type hostResult struct {
|
type hostResult struct {
|
||||||
rows []ParquetMetricRow
|
checkpoints []*CheckpointFile
|
||||||
files []string // checkpoint filenames to delete after successful write
|
hostname string
|
||||||
dir string // checkpoint directory for this host
|
files []string // checkpoint filenames to delete after successful write
|
||||||
|
dir string // checkpoint directory for this host
|
||||||
}
|
}
|
||||||
|
|
||||||
results := make(chan hostResult, len(hostEntries))
|
// Small buffer provides back-pressure: at most NumWorkers+2 results in flight.
|
||||||
|
results := make(chan hostResult, 2)
|
||||||
work := make(chan struct {
|
work := make(chan struct {
|
||||||
dir, host string
|
dir, host string
|
||||||
}, Keys.NumWorkers)
|
}, Keys.NumWorkers)
|
||||||
@@ -212,14 +215,19 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
|||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
for item := range work {
|
for item := range work {
|
||||||
rows, files, err := archiveCheckpointsToParquet(item.dir, cluster, item.host, from)
|
checkpoints, files, err := loadCheckpointFiles(item.dir, from)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("[METRICSTORE]> error reading checkpoints for %s/%s: %s", cluster, item.host, err.Error())
|
cclog.Errorf("[METRICSTORE]> error reading checkpoints for %s/%s: %s", cluster, item.host, err.Error())
|
||||||
atomic.AddInt32(&errs, 1)
|
atomic.AddInt32(&errs, 1)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if len(rows) > 0 {
|
if len(checkpoints) > 0 {
|
||||||
results <- hostResult{rows: rows, files: files, dir: item.dir}
|
results <- hostResult{
|
||||||
|
checkpoints: checkpoints,
|
||||||
|
hostname: item.host,
|
||||||
|
files: files,
|
||||||
|
dir: item.dir,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@@ -240,7 +248,7 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
|||||||
close(results)
|
close(results)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Open streaming writer and write each host's rows as a row group
|
// Open streaming writer and write each host's checkpoint files as a row group
|
||||||
parquetFile := filepath.Join(cleanupDir, cluster, fmt.Sprintf("%d.parquet", from))
|
parquetFile := filepath.Join(cleanupDir, cluster, fmt.Sprintf("%d.parquet", from))
|
||||||
writer, err := newParquetArchiveWriter(parquetFile)
|
writer, err := newParquetArchiveWriter(parquetFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -259,9 +267,13 @@ func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, err
|
|||||||
|
|
||||||
for r := range results {
|
for r := range results {
|
||||||
if writeErr == nil {
|
if writeErr == nil {
|
||||||
sortParquetRows(r.rows)
|
// Stream each checkpoint file directly to parquet rows.
|
||||||
if err := writer.WriteHostRows(r.rows); err != nil {
|
// Each checkpoint is processed and discarded before the next.
|
||||||
writeErr = err
|
for _, cf := range r.checkpoints {
|
||||||
|
if err := writer.WriteCheckpointFile(cf, cluster, r.hostname, "node", ""); err != nil {
|
||||||
|
writeErr = err
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Always track files for deletion (even if write failed, we still drain)
|
// Always track files for deletion (even if write failed, we still drain)
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sort"
|
"sort"
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
|
||||||
pq "github.com/parquet-go/parquet-go"
|
pq "github.com/parquet-go/parquet-go"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -32,37 +31,6 @@ type ParquetMetricRow struct {
|
|||||||
Value float32 `parquet:"value"`
|
Value float32 `parquet:"value"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// flattenCheckpointFile recursively converts a CheckpointFile tree into Parquet rows.
|
|
||||||
// The scope path is built from the hierarchy: host level is "node", then child names
|
|
||||||
// map to scope/scope_id (e.g., "socket0" → scope="socket", scope_id="0").
|
|
||||||
func flattenCheckpointFile(cf *CheckpointFile, cluster, hostname, scope, scopeID string, rows []ParquetMetricRow) []ParquetMetricRow {
|
|
||||||
for metricName, cm := range cf.Metrics {
|
|
||||||
ts := cm.Start
|
|
||||||
for _, v := range cm.Data {
|
|
||||||
if !v.IsNaN() {
|
|
||||||
rows = append(rows, ParquetMetricRow{
|
|
||||||
Cluster: cluster,
|
|
||||||
Hostname: hostname,
|
|
||||||
Metric: metricName,
|
|
||||||
Scope: scope,
|
|
||||||
ScopeID: scopeID,
|
|
||||||
Timestamp: ts,
|
|
||||||
Frequency: cm.Frequency,
|
|
||||||
Value: float32(v),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
ts += cm.Frequency
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for childName, childCf := range cf.Children {
|
|
||||||
childScope, childScopeID := parseScopeFromName(childName)
|
|
||||||
rows = flattenCheckpointFile(childCf, cluster, hostname, childScope, childScopeID, rows)
|
|
||||||
}
|
|
||||||
|
|
||||||
return rows
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseScopeFromName infers scope and scope_id from a child level name.
|
// parseScopeFromName infers scope and scope_id from a child level name.
|
||||||
// Examples: "socket0" → ("socket", "0"), "core12" → ("core", "12"),
|
// Examples: "socket0" → ("socket", "0"), "core12" → ("core", "12"),
|
||||||
// "a0" (accelerator) → ("accelerator", "0").
|
// "a0" (accelerator) → ("accelerator", "0").
|
||||||
@@ -93,15 +61,17 @@ func parseScopeFromName(name string) (string, string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// parquetArchiveWriter supports incremental writes to a Parquet file.
|
// parquetArchiveWriter supports incremental writes to a Parquet file.
|
||||||
// Each call to WriteHostRows writes one row group (typically one host's data),
|
// Uses streaming writes to avoid accumulating all rows in memory.
|
||||||
// avoiding accumulation of all rows in memory.
|
|
||||||
type parquetArchiveWriter struct {
|
type parquetArchiveWriter struct {
|
||||||
writer *pq.GenericWriter[ParquetMetricRow]
|
writer *pq.GenericWriter[ParquetMetricRow]
|
||||||
bw *bufio.Writer
|
bw *bufio.Writer
|
||||||
f *os.File
|
f *os.File
|
||||||
|
batch []ParquetMetricRow // reusable batch buffer
|
||||||
count int
|
count int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const parquetBatchSize = 1024
|
||||||
|
|
||||||
// newParquetArchiveWriter creates a streaming Parquet writer with Zstd compression.
|
// newParquetArchiveWriter creates a streaming Parquet writer with Zstd compression.
|
||||||
func newParquetArchiveWriter(filename string) (*parquetArchiveWriter, error) {
|
func newParquetArchiveWriter(filename string) (*parquetArchiveWriter, error) {
|
||||||
if err := os.MkdirAll(filepath.Dir(filename), CheckpointDirPerms); err != nil {
|
if err := os.MkdirAll(filepath.Dir(filename), CheckpointDirPerms); err != nil {
|
||||||
@@ -119,31 +89,85 @@ func newParquetArchiveWriter(filename string) (*parquetArchiveWriter, error) {
|
|||||||
pq.Compression(&pq.Zstd),
|
pq.Compression(&pq.Zstd),
|
||||||
)
|
)
|
||||||
|
|
||||||
return &parquetArchiveWriter{writer: writer, bw: bw, f: f}, nil
|
return &parquetArchiveWriter{
|
||||||
|
writer: writer,
|
||||||
|
bw: bw,
|
||||||
|
f: f,
|
||||||
|
batch: make([]ParquetMetricRow, 0, parquetBatchSize),
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// WriteHostRows sorts rows by (metric, timestamp) in-place, writes them,
|
// WriteCheckpointFile streams a CheckpointFile tree directly to Parquet rows,
|
||||||
// and flushes to create a separate row group.
|
// writing metrics in sorted order without materializing all rows in memory.
|
||||||
func (w *parquetArchiveWriter) WriteHostRows(rows []ParquetMetricRow) error {
|
// Produces one row group per call (typically one host's data).
|
||||||
sort.Slice(rows, func(i, j int) bool {
|
func (w *parquetArchiveWriter) WriteCheckpointFile(cf *CheckpointFile, cluster, hostname, scope, scopeID string) error {
|
||||||
if rows[i].Metric != rows[j].Metric {
|
w.writeLevel(cf, cluster, hostname, scope, scopeID)
|
||||||
return rows[i].Metric < rows[j].Metric
|
|
||||||
}
|
|
||||||
return rows[i].Timestamp < rows[j].Timestamp
|
|
||||||
})
|
|
||||||
|
|
||||||
if _, err := w.writer.Write(rows); err != nil {
|
// Flush remaining batch
|
||||||
return fmt.Errorf("writing parquet rows: %w", err)
|
if len(w.batch) > 0 {
|
||||||
|
if _, err := w.writer.Write(w.batch); err != nil {
|
||||||
|
return fmt.Errorf("writing parquet rows: %w", err)
|
||||||
|
}
|
||||||
|
w.count += len(w.batch)
|
||||||
|
w.batch = w.batch[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := w.writer.Flush(); err != nil {
|
if err := w.writer.Flush(); err != nil {
|
||||||
return fmt.Errorf("flushing parquet row group: %w", err)
|
return fmt.Errorf("flushing parquet row group: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
w.count += len(rows)
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// writeLevel recursively writes metrics from a CheckpointFile level.
|
||||||
|
// Metric names and child names are sorted for deterministic, compression-friendly output.
|
||||||
|
func (w *parquetArchiveWriter) writeLevel(cf *CheckpointFile, cluster, hostname, scope, scopeID string) {
|
||||||
|
// Sort metric names for deterministic order
|
||||||
|
metricNames := make([]string, 0, len(cf.Metrics))
|
||||||
|
for name := range cf.Metrics {
|
||||||
|
metricNames = append(metricNames, name)
|
||||||
|
}
|
||||||
|
sort.Strings(metricNames)
|
||||||
|
|
||||||
|
for _, metricName := range metricNames {
|
||||||
|
cm := cf.Metrics[metricName]
|
||||||
|
ts := cm.Start
|
||||||
|
for _, v := range cm.Data {
|
||||||
|
if !v.IsNaN() {
|
||||||
|
w.batch = append(w.batch, ParquetMetricRow{
|
||||||
|
Cluster: cluster,
|
||||||
|
Hostname: hostname,
|
||||||
|
Metric: metricName,
|
||||||
|
Scope: scope,
|
||||||
|
ScopeID: scopeID,
|
||||||
|
Timestamp: ts,
|
||||||
|
Frequency: cm.Frequency,
|
||||||
|
Value: float32(v),
|
||||||
|
})
|
||||||
|
|
||||||
|
if len(w.batch) >= parquetBatchSize {
|
||||||
|
w.writer.Write(w.batch)
|
||||||
|
w.count += len(w.batch)
|
||||||
|
w.batch = w.batch[:0]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ts += cm.Frequency
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort child names for deterministic order
|
||||||
|
childNames := make([]string, 0, len(cf.Children))
|
||||||
|
for name := range cf.Children {
|
||||||
|
childNames = append(childNames, name)
|
||||||
|
}
|
||||||
|
sort.Strings(childNames)
|
||||||
|
|
||||||
|
for _, childName := range childNames {
|
||||||
|
childScope, childScopeID := parseScopeFromName(childName)
|
||||||
|
w.writeLevel(cf.Children[childName], cluster, hostname, childScope, childScopeID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Close finalises the Parquet file (footer, buffered I/O, file handle).
|
// Close finalises the Parquet file (footer, buffered I/O, file handle).
|
||||||
func (w *parquetArchiveWriter) Close() error {
|
func (w *parquetArchiveWriter) Close() error {
|
||||||
if err := w.writer.Close(); err != nil {
|
if err := w.writer.Close(); err != nil {
|
||||||
@@ -159,16 +183,6 @@ func (w *parquetArchiveWriter) Close() error {
|
|||||||
return w.f.Close()
|
return w.f.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
// sortParquetRows sorts rows by (metric, timestamp) in-place.
|
|
||||||
func sortParquetRows(rows []ParquetMetricRow) {
|
|
||||||
sort.Slice(rows, func(i, j int) bool {
|
|
||||||
if rows[i].Metric != rows[j].Metric {
|
|
||||||
return rows[i].Metric < rows[j].Metric
|
|
||||||
}
|
|
||||||
return rows[i].Timestamp < rows[j].Timestamp
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// loadCheckpointFileFromDisk reads a JSON or binary checkpoint file and returns
|
// loadCheckpointFileFromDisk reads a JSON or binary checkpoint file and returns
|
||||||
// a CheckpointFile. Used by the Parquet archiver to read checkpoint data
|
// a CheckpointFile. Used by the Parquet archiver to read checkpoint data
|
||||||
// before converting it to Parquet format.
|
// before converting it to Parquet format.
|
||||||
@@ -218,22 +232,10 @@ func loadCheckpointFileFromDisk(filename string) (*CheckpointFile, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// estimateRowCount estimates the number of Parquet rows a CheckpointFile will produce.
|
// loadCheckpointFiles reads checkpoint files for a host directory and returns
|
||||||
// Used for pre-allocating the rows slice to avoid repeated append doubling.
|
// the loaded CheckpointFiles and their filenames. Processes one file at a time
|
||||||
func estimateRowCount(cf *CheckpointFile) int {
|
// to avoid holding all checkpoint data in memory simultaneously.
|
||||||
n := 0
|
func loadCheckpointFiles(dir string, from int64) ([]*CheckpointFile, []string, error) {
|
||||||
for _, cm := range cf.Metrics {
|
|
||||||
n += len(cm.Data)
|
|
||||||
}
|
|
||||||
for _, child := range cf.Children {
|
|
||||||
n += estimateRowCount(child)
|
|
||||||
}
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
// archiveCheckpointsToParquet reads checkpoint files for a host directory,
|
|
||||||
// converts them to Parquet rows. Returns the rows and filenames that were processed.
|
|
||||||
func archiveCheckpointsToParquet(dir, cluster, host string, from int64) ([]ParquetMetricRow, []string, error) {
|
|
||||||
entries, err := os.ReadDir(dir)
|
entries, err := os.ReadDir(dir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
@@ -248,36 +250,18 @@ func archiveCheckpointsToParquet(dir, cluster, host string, from int64) ([]Parqu
|
|||||||
return nil, nil, nil
|
return nil, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// First pass: load checkpoints and estimate total rows for pre-allocation.
|
var checkpoints []*CheckpointFile
|
||||||
type loaded struct {
|
var processedFiles []string
|
||||||
cf *CheckpointFile
|
|
||||||
filename string
|
|
||||||
}
|
|
||||||
var checkpoints []loaded
|
|
||||||
totalEstimate := 0
|
|
||||||
|
|
||||||
for _, checkpoint := range files {
|
for _, checkpoint := range files {
|
||||||
filename := filepath.Join(dir, checkpoint)
|
filename := filepath.Join(dir, checkpoint)
|
||||||
cf, err := loadCheckpointFileFromDisk(filename)
|
cf, err := loadCheckpointFileFromDisk(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Warnf("[METRICSTORE]> skipping unreadable checkpoint %s: %v", filename, err)
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
totalEstimate += estimateRowCount(cf)
|
checkpoints = append(checkpoints, cf)
|
||||||
checkpoints = append(checkpoints, loaded{cf: cf, filename: checkpoint})
|
processedFiles = append(processedFiles, checkpoint)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(checkpoints) == 0 {
|
return checkpoints, processedFiles, nil
|
||||||
return nil, nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
rows := make([]ParquetMetricRow, 0, totalEstimate)
|
|
||||||
processedFiles := make([]string, 0, len(checkpoints))
|
|
||||||
|
|
||||||
for _, cp := range checkpoints {
|
|
||||||
rows = flattenCheckpointFile(cp.cf, cluster, host, "node", "", rows)
|
|
||||||
processedFiles = append(processedFiles, cp.filename)
|
|
||||||
}
|
|
||||||
|
|
||||||
return rows, processedFiles, nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ func TestParseScopeFromName(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFlattenCheckpointFile(t *testing.T) {
|
func TestWriteCheckpointFile(t *testing.T) {
|
||||||
cf := &CheckpointFile{
|
cf := &CheckpointFile{
|
||||||
From: 1000,
|
From: 1000,
|
||||||
To: 1060,
|
To: 1060,
|
||||||
@@ -69,17 +69,55 @@ func TestFlattenCheckpointFile(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
rows := flattenCheckpointFile(cf, "fritz", "node001", "node", "", nil)
|
tmpDir := t.TempDir()
|
||||||
|
parquetFile := filepath.Join(tmpDir, "test.parquet")
|
||||||
|
writer, err := newParquetArchiveWriter(parquetFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := writer.WriteCheckpointFile(cf, "fritz", "node001", "node", ""); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := writer.Close(); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
// cpu_load: 2 non-NaN values at node scope
|
// cpu_load: 2 non-NaN values at node scope
|
||||||
// mem_bw: 2 non-NaN values at socket0 scope
|
// mem_bw: 2 non-NaN values at socket0 scope
|
||||||
if len(rows) != 4 {
|
if writer.count != 4 {
|
||||||
t.Fatalf("expected 4 rows, got %d", len(rows))
|
t.Fatalf("expected 4 rows written, got %d", writer.count)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read back and verify
|
||||||
|
f, err := os.Open(parquetFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
stat, _ := f.Stat()
|
||||||
|
pf, err := pq.OpenFile(f, stat.Size())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
reader := pq.NewGenericReader[ParquetMetricRow](pf)
|
||||||
|
readRows := make([]ParquetMetricRow, 100)
|
||||||
|
n, err := reader.Read(readRows)
|
||||||
|
if err != nil && n == 0 {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
readRows = readRows[:n]
|
||||||
|
reader.Close()
|
||||||
|
|
||||||
|
if n != 4 {
|
||||||
|
t.Fatalf("expected 4 rows, got %d", n)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify a node-scope row
|
// Verify a node-scope row
|
||||||
found := false
|
found := false
|
||||||
for _, r := range rows {
|
for _, r := range readRows {
|
||||||
if r.Metric == "cpu_load" && r.Timestamp == 1000 {
|
if r.Metric == "cpu_load" && r.Timestamp == 1000 {
|
||||||
found = true
|
found = true
|
||||||
if r.Cluster != "fritz" || r.Hostname != "node001" || r.Scope != "node" || r.Value != 0.5 {
|
if r.Cluster != "fritz" || r.Hostname != "node001" || r.Scope != "node" || r.Value != 0.5 {
|
||||||
@@ -93,7 +131,7 @@ func TestFlattenCheckpointFile(t *testing.T) {
|
|||||||
|
|
||||||
// Verify a socket-scope row
|
// Verify a socket-scope row
|
||||||
found = false
|
found = false
|
||||||
for _, r := range rows {
|
for _, r := range readRows {
|
||||||
if r.Metric == "mem_bw" && r.Scope == "socket" && r.ScopeID == "0" {
|
if r.Metric == "mem_bw" && r.Scope == "socket" && r.ScopeID == "0" {
|
||||||
found = true
|
found = true
|
||||||
}
|
}
|
||||||
@@ -153,7 +191,7 @@ func TestParquetArchiveRoundtrip(t *testing.T) {
|
|||||||
|
|
||||||
// Archive to Parquet
|
// Archive to Parquet
|
||||||
archiveDir := filepath.Join(tmpDir, "archive")
|
archiveDir := filepath.Join(tmpDir, "archive")
|
||||||
rows, files, err := archiveCheckpointsToParquet(cpDir, "testcluster", "node001", 2000)
|
checkpoints, files, err := loadCheckpointFiles(cpDir, 2000)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
@@ -166,9 +204,10 @@ func TestParquetArchiveRoundtrip(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
sortParquetRows(rows)
|
for _, cp := range checkpoints {
|
||||||
if err := writer.WriteHostRows(rows); err != nil {
|
if err := writer.WriteCheckpointFile(cp, "testcluster", "node001", "node", ""); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if err := writer.Close(); err != nil {
|
if err := writer.Close(); err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
|
|||||||
@@ -161,7 +161,7 @@
|
|||||||
|
|
||||||
<hr/>
|
<hr/>
|
||||||
|
|
||||||
<!-- Node Health Pis, later Charts -->
|
<!-- Node State and Metric Health Pis -->
|
||||||
{#if $statusQuery?.fetching}
|
{#if $statusQuery?.fetching}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
@@ -222,7 +222,7 @@
|
|||||||
<div bind:clientWidth={pieWidth}>
|
<div bind:clientWidth={pieWidth}>
|
||||||
{#key refinedHealthData}
|
{#key refinedHealthData}
|
||||||
<h4 class="text-center">
|
<h4 class="text-center">
|
||||||
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Node Health
|
Current {cluster.charAt(0).toUpperCase() + cluster.slice(1)} Metric Health
|
||||||
</h4>
|
</h4>
|
||||||
<Pie
|
<Pie
|
||||||
canvasId="hpcpie-health"
|
canvasId="hpcpie-health"
|
||||||
|
|||||||
Reference in New Issue
Block a user