Files
cc-backend/internal/metricstore/buffer.go
2026-01-16 08:27:46 +01:00

323 lines
9.6 KiB
Go

// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// Package metricstore provides buffer.go: Time-series data buffer implementation.
//
// # Buffer Architecture
//
// Each metric at each hierarchical level (cluster/host/cpu/etc.) uses a linked-list
// chain of fixed-size buffers to store time-series data. This design:
//
// - Avoids reallocation/copying when growing (new links added instead)
// - Enables efficient pooling (buffers returned to sync.Pool)
// - Supports traversal back in time (via prev pointers)
// - Maintains temporal ordering (newer data in later buffers)
//
// # Buffer Chain Example
//
// [oldest buffer] <- prev -- [older] <- prev -- [newest buffer (head)]
// start=1000 start=1512 start=2024
// data=[v0...v511] data=[v0...v511] data=[v0...v42]
//
// When the head buffer reaches capacity (BufferCap = 512), a new buffer becomes
// the new head and the old head is linked via prev.
//
// # Pooling Strategy
//
// sync.Pool reduces GC pressure for the common case (BufferCap-sized allocations).
// Non-standard capacity buffers are not pooled (e.g., from checkpoint deserialization).
//
// # Time Alignment
//
// Timestamps are aligned to measurement frequency intervals:
//
// index = (timestamp - buffer.start) / buffer.frequency
// actualTime = buffer.start + (frequency / 2) + (index * frequency)
//
// Missing data points are represented as NaN values. The read() function performs
// linear interpolation where possible.
package metricstore
import (
"errors"
"sync"
"github.com/ClusterCockpit/cc-lib/v2/schema"
)
// BufferCap is the default buffer capacity.
// buffer.data will only ever grow up to its capacity and a new link
// in the buffer chain will be created if needed so that no copying
// of data or reallocation needs to happen on writes.
const BufferCap int = DefaultBufferCapacity
var bufferPool sync.Pool = sync.Pool{
New: func() any {
return &buffer{
data: make([]schema.Float, 0, BufferCap),
}
},
}
var (
// ErrNoData indicates no time-series data exists for the requested metric/level.
ErrNoData error = errors.New("[METRICSTORE]> no data for this metric/level")
// ErrDataDoesNotAlign indicates that aggregated data from child scopes
// does not align with the parent scope's expected timestamps/intervals.
ErrDataDoesNotAlign error = errors.New("[METRICSTORE]> data from lower granularities does not align")
)
// buffer stores time-series data for a single metric at a specific hierarchical level.
//
// Buffers form doubly-linked chains ordered by time. When capacity is reached,
// a new buffer becomes the head and the old head is linked via prev/next.
//
// Fields:
// - prev: Link to older buffer in the chain (nil if this is oldest)
// - next: Link to newer buffer in the chain (nil if this is newest/head)
// - data: Time-series values (schema.Float supports NaN for missing data)
// - frequency: Measurement interval in seconds
// - start: Start timestamp (adjusted by -frequency/2 for alignment)
// - archived: True if data has been persisted to disk archive
// - closed: True if buffer is no longer accepting writes
//
// Index calculation: index = (timestamp - start) / frequency
// Actual data timestamp: start + (frequency / 2) + (index * frequency)
type buffer struct {
prev *buffer
next *buffer
data []schema.Float
frequency int64
start int64
archived bool
closed bool
}
func newBuffer(ts, freq int64) *buffer {
b := bufferPool.Get().(*buffer)
b.frequency = freq
b.start = ts - (freq / 2)
b.prev = nil
b.next = nil
b.archived = false
b.closed = false
b.data = b.data[:0]
return b
}
// write appends a timestamped value to the buffer chain.
//
// Returns the head buffer (which may be newly created if capacity was reached).
// Timestamps older than the buffer's start are rejected. If the calculated index
// exceeds capacity, a new buffer is allocated and linked as the new head.
//
// Missing timestamps are automatically filled with NaN values to maintain alignment.
// Overwrites are allowed if the index is already within the existing data slice.
//
// Parameters:
// - ts: Unix timestamp in seconds
// - value: Metric value (can be schema.NaN for missing data)
//
// Returns:
// - *buffer: The new head buffer (same as b if no new buffer created)
// - error: Non-nil if timestamp is before buffer start
func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {
if ts < b.start {
return nil, errors.New("[METRICSTORE]> cannot write value to buffer from past")
}
// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)
idx := int((ts - b.start) / b.frequency)
if idx >= cap(b.data) {
newbuf := newBuffer(ts, b.frequency)
newbuf.prev = b
b.next = newbuf
b = newbuf
idx = 0
}
// Overwriting value or writing value from past
if idx < len(b.data) {
b.data[idx] = value
return b, nil
}
// Fill up unwritten slots with NaN
for i := len(b.data); i < idx; i++ {
b.data = append(b.data, schema.NaN)
}
b.data = append(b.data, value)
return b, nil
}
func (b *buffer) end() int64 {
return b.firstWrite() + int64(len(b.data))*b.frequency
}
func (b *buffer) firstWrite() int64 {
return b.start + (b.frequency / 2)
}
// read retrieves time-series data from the buffer chain for the specified time range.
//
// Traverses the buffer chain backwards (via prev links) if 'from' precedes the current
// buffer's start. Missing data points are represented as NaN. Values are accumulated
// into the provided 'data' slice (using +=, so caller must zero-initialize if needed).
//
// The function adjusts the actual time range returned if data is unavailable at the
// boundaries (returned via adjusted from/to timestamps).
//
// Parameters:
// - from: Start timestamp (Unix seconds)
// - to: End timestamp (Unix seconds, exclusive)
// - data: Pre-allocated slice to accumulate results (must be large enough)
//
// Returns:
// - []schema.Float: Slice of data (may be shorter than input 'data' slice)
// - int64: Actual start timestamp with available data
// - int64: Actual end timestamp (exclusive)
// - error: Non-nil on failure
//
// Panics if 'data' slice is too small to hold all values in [from, to).
func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {
if from < b.firstWrite() {
if b.prev != nil {
return b.prev.read(from, to, data)
}
from = b.firstWrite()
}
i := 0
t := from
for ; t < to; t += b.frequency {
idx := int((t - b.start) / b.frequency)
if idx >= cap(b.data) {
if b.next == nil {
break
}
b = b.next
idx = 0
}
if idx >= len(b.data) {
if b.next == nil || to <= b.next.start {
break
}
data[i] += schema.NaN
} else if t < b.start {
data[i] += schema.NaN
} else {
data[i] += b.data[idx]
}
i++
}
return data[:i], from, t, nil
}
// free removes buffers older than the specified timestamp from the chain.
//
// Recursively traverses backwards (via prev) and unlinks buffers whose end time
// is before the retention threshold. Freed buffers are returned to the pool if
// they have the standard capacity (BufferCap).
//
// Parameters:
// - t: Retention threshold timestamp (Unix seconds)
//
// Returns:
// - delme: True if the current buffer itself should be deleted by caller
// - n: Number of buffers freed in this subtree
func (b *buffer) free(t int64) (delme bool, n int) {
if b.prev != nil {
delme, m := b.prev.free(t)
n += m
if delme {
b.prev.next = nil
if cap(b.prev.data) == BufferCap {
bufferPool.Put(b.prev)
}
b.prev = nil
}
}
end := b.end()
if end < t {
return true, n + 1
}
return false, n
}
// forceFreeOldest recursively finds the end of the linked list (the oldest buffer)
// and removes it.
// Returns:
//
// delme: true if 'b' itself is the oldest and should be removed by the caller
// n: the number of buffers freed (will be 1 or 0)
func (b *buffer) forceFreeOldest() (delme bool, n int) {
// If there is a previous buffer, recurse down to find the oldest
if b.prev != nil {
delPrev, freed := b.prev.forceFreeOldest()
// If the previous buffer signals it should be deleted:
if delPrev {
// Unlink references
b.prev.next = nil
// Return to pool if capacity matches
if cap(b.prev.data) == BufferCap {
bufferPool.Put(b.prev)
}
// Remove the link from the current buffer
b.prev = nil
}
return false, freed
}
// If b.prev is nil, THIS buffer is the oldest.
// We return true so the parent (or the Level loop) knows to delete reference to 'b'.
return true, 1
}
// iterFromTo invokes callback on every buffer in the chain that overlaps [from, to].
//
// Traverses backwards (via prev) first, then processes current buffer if it overlaps
// the time range. Used for checkpoint/archive operations that need to serialize buffers
// within a specific time window.
//
// Parameters:
// - from: Start timestamp (Unix seconds, inclusive)
// - to: End timestamp (Unix seconds, inclusive)
// - callback: Function to invoke on each overlapping buffer
//
// Returns:
// - error: First error returned by callback, or nil if all succeeded
func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
if b == nil {
return nil
}
if err := b.prev.iterFromTo(from, to, callback); err != nil {
return err
}
if from <= b.end() && b.start <= to {
return callback(b)
}
return nil
}
func (b *buffer) count() int64 {
res := int64(len(b.data))
if b.prev != nil {
res += b.prev.count()
}
return res
}