Add documentation

This commit is contained in:
2026-01-16 08:27:46 +01:00
parent 93dcfee8c5
commit 9a97d0e8eb
6 changed files with 782 additions and 111 deletions

View File

@@ -3,6 +3,41 @@
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// Package metricstore provides buffer.go: Time-series data buffer implementation.
//
// # Buffer Architecture
//
// Each metric at each hierarchical level (cluster/host/cpu/etc.) uses a linked-list
// chain of fixed-size buffers to store time-series data. This design:
//
// - Avoids reallocation/copying when growing (new links added instead)
// - Enables efficient pooling (buffers returned to sync.Pool)
// - Supports traversal back in time (via prev pointers)
// - Maintains temporal ordering (newer data in later buffers)
//
// # Buffer Chain Example
//
// [oldest buffer] <- prev -- [older] <- prev -- [newest buffer (head)]
// start=1000 start=1512 start=2024
// data=[v0...v511] data=[v0...v511] data=[v0...v42]
//
// When the head buffer reaches capacity (BufferCap = 512), a new buffer becomes
// the new head and the old head is linked via prev.
//
// # Pooling Strategy
//
// sync.Pool reduces GC pressure for the common case (BufferCap-sized allocations).
// Non-standard capacity buffers are not pooled (e.g., from checkpoint deserialization).
//
// # Time Alignment
//
// Timestamps are aligned to measurement frequency intervals:
//
// index = (timestamp - buffer.start) / buffer.frequency
// actualTime = buffer.start + (frequency / 2) + (index * frequency)
//
// Missing data points are represented as NaN values. The read() function performs
// linear interpolation where possible.
package metricstore
import (
@@ -27,14 +62,30 @@ var bufferPool sync.Pool = sync.Pool{
}
var (
ErrNoData error = errors.New("[METRICSTORE]> no data for this metric/level")
// ErrNoData indicates no time-series data exists for the requested metric/level.
ErrNoData error = errors.New("[METRICSTORE]> no data for this metric/level")
// ErrDataDoesNotAlign indicates that aggregated data from child scopes
// does not align with the parent scope's expected timestamps/intervals.
ErrDataDoesNotAlign error = errors.New("[METRICSTORE]> data from lower granularities does not align")
)
// Each metric on each level has it's own buffer.
// This is where the actual values go.
// If `cap(data)` is reached, a new buffer is created and
// becomes the new head of a buffer list.
// buffer stores time-series data for a single metric at a specific hierarchical level.
//
// Buffers form doubly-linked chains ordered by time. When capacity is reached,
// a new buffer becomes the head and the old head is linked via prev/next.
//
// Fields:
// - prev: Link to older buffer in the chain (nil if this is oldest)
// - next: Link to newer buffer in the chain (nil if this is newest/head)
// - data: Time-series values (schema.Float supports NaN for missing data)
// - frequency: Measurement interval in seconds
// - start: Start timestamp (adjusted by -frequency/2 for alignment)
// - archived: True if data has been persisted to disk archive
// - closed: True if buffer is no longer accepting writes
//
// Index calculation: index = (timestamp - start) / frequency
// Actual data timestamp: start + (frequency / 2) + (index * frequency)
type buffer struct {
prev *buffer
next *buffer
@@ -57,10 +108,22 @@ func newBuffer(ts, freq int64) *buffer {
return b
}
// If a new buffer was created, the new head is returnd.
// Otherwise, the existing buffer is returnd.
// Normaly, only "newer" data should be written, but if the value would
// end up in the same buffer anyways it is allowed.
// write appends a timestamped value to the buffer chain.
//
// Returns the head buffer (which may be newly created if capacity was reached).
// Timestamps older than the buffer's start are rejected. If the calculated index
// exceeds capacity, a new buffer is allocated and linked as the new head.
//
// Missing timestamps are automatically filled with NaN values to maintain alignment.
// Overwrites are allowed if the index is already within the existing data slice.
//
// Parameters:
// - ts: Unix timestamp in seconds
// - value: Metric value (can be schema.NaN for missing data)
//
// Returns:
// - *buffer: The new head buffer (same as b if no new buffer created)
// - error: Non-nil if timestamp is before buffer start
func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {
if ts < b.start {
return nil, errors.New("[METRICSTORE]> cannot write value to buffer from past")
@@ -99,13 +162,27 @@ func (b *buffer) firstWrite() int64 {
return b.start + (b.frequency / 2)
}
// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
// Simple linear interpolation is done between the two neighboring cells if possible.
// If values at the start or end are missing, instead of NaN values, the second and thrid
// return values contain the actual `from`/`to`.
// This function goes back the buffer chain if `from` is older than the currents buffer start.
// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
// If `data` is not long enough to hold all values, this function will panic!
// read retrieves time-series data from the buffer chain for the specified time range.
//
// Traverses the buffer chain backwards (via prev links) if 'from' precedes the current
// buffer's start. Missing data points are represented as NaN. Values are accumulated
// into the provided 'data' slice (using +=, so caller must zero-initialize if needed).
//
// The function adjusts the actual time range returned if data is unavailable at the
// boundaries (returned via adjusted from/to timestamps).
//
// Parameters:
// - from: Start timestamp (Unix seconds)
// - to: End timestamp (Unix seconds, exclusive)
// - data: Pre-allocated slice to accumulate results (must be large enough)
//
// Returns:
// - []schema.Float: Slice of data (may be shorter than input 'data' slice)
// - int64: Actual start timestamp with available data
// - int64: Actual end timestamp (exclusive)
// - error: Non-nil on failure
//
// Panics if 'data' slice is too small to hold all values in [from, to).
func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {
if from < b.firstWrite() {
if b.prev != nil {
@@ -142,7 +219,18 @@ func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int6
return data[:i], from, t, nil
}
// Returns true if this buffer needs to be freed.
// free removes buffers older than the specified timestamp from the chain.
//
// Recursively traverses backwards (via prev) and unlinks buffers whose end time
// is before the retention threshold. Freed buffers are returned to the pool if
// they have the standard capacity (BufferCap).
//
// Parameters:
// - t: Retention threshold timestamp (Unix seconds)
//
// Returns:
// - delme: True if the current buffer itself should be deleted by caller
// - n: Number of buffers freed in this subtree
func (b *buffer) free(t int64) (delme bool, n int) {
if b.prev != nil {
delme, m := b.prev.free(t)
@@ -196,7 +284,19 @@ func (b *buffer) forceFreeOldest() (delme bool, n int) {
return true, 1
}
// Call `callback` on every buffer that contains data in the range from `from` to `to`.
// iterFromTo invokes callback on every buffer in the chain that overlaps [from, to].
//
// Traverses backwards (via prev) first, then processes current buffer if it overlaps
// the time range. Used for checkpoint/archive operations that need to serialize buffers
// within a specific time window.
//
// Parameters:
// - from: Start timestamp (Unix seconds, inclusive)
// - to: End timestamp (Unix seconds, inclusive)
// - callback: Function to invoke on each overlapping buffer
//
// Returns:
// - error: First error returned by callback, or nil if all succeeded
func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
if b == nil {
return nil