Add documentation

2026-01-16 09:41:47 +01:00 · 2026-01-16 08:27:46 +01:00
parent 93dcfee8c5
commit 9a97d0e8eb
6 changed files with 782 additions and 111 deletions
--- a/internal/metricstore/buffer.go
+++ b/internal/metricstore/buffer.go
@@ -3,6 +3,41 @@
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.

+// Package metricstore provides buffer.go: Time-series data buffer implementation.
+//
+// # Buffer Architecture
+//
+// Each metric at each hierarchical level (cluster/host/cpu/etc.) uses a linked-list
+// chain of fixed-size buffers to store time-series data. This design:
+//
+//   - Avoids reallocation/copying when growing (new links added instead)
+//   - Enables efficient pooling (buffers returned to sync.Pool)
+//   - Supports traversal back in time (via prev pointers)
+//   - Maintains temporal ordering (newer data in later buffers)
+//
+// # Buffer Chain Example
+//
+//	[oldest buffer] <- prev -- [older] <- prev -- [newest buffer (head)]
+//	  start=1000               start=1512           start=2024
+//	  data=[v0...v511]         data=[v0...v511]     data=[v0...v42]
+//
+// When the head buffer reaches capacity (BufferCap = 512), a new buffer becomes
+// the new head and the old head is linked via prev.
+//
+// # Pooling Strategy
+//
+// sync.Pool reduces GC pressure for the common case (BufferCap-sized allocations).
+// Non-standard capacity buffers are not pooled (e.g., from checkpoint deserialization).
+//
+// # Time Alignment
+//
+// Timestamps are aligned to measurement frequency intervals:
+//
+//	index = (timestamp - buffer.start) / buffer.frequency
+//	actualTime = buffer.start + (frequency / 2) + (index * frequency)
+//
+// Missing data points are represented as NaN values. The read() function performs
+// linear interpolation where possible.
 package metricstore

 import (
@@ -27,14 +62,30 @@ var bufferPool sync.Pool = sync.Pool{
 }

 var (
-	ErrNoData           error = errors.New("[METRICSTORE]> no data for this metric/level")
+	// ErrNoData indicates no time-series data exists for the requested metric/level.
+	ErrNoData error = errors.New("[METRICSTORE]> no data for this metric/level")
+
+	// ErrDataDoesNotAlign indicates that aggregated data from child scopes
+	// does not align with the parent scope's expected timestamps/intervals.
 	ErrDataDoesNotAlign error = errors.New("[METRICSTORE]> data from lower granularities does not align")
 )

-// Each metric on each level has it's own buffer.
-// This is where the actual values go.
-// If `cap(data)` is reached, a new buffer is created and
-// becomes the new head of a buffer list.
+// buffer stores time-series data for a single metric at a specific hierarchical level.
+//
+// Buffers form doubly-linked chains ordered by time. When capacity is reached,
+// a new buffer becomes the head and the old head is linked via prev/next.
+//
+// Fields:
+//   - prev:      Link to older buffer in the chain (nil if this is oldest)
+//   - next:      Link to newer buffer in the chain (nil if this is newest/head)
+//   - data:      Time-series values (schema.Float supports NaN for missing data)
+//   - frequency: Measurement interval in seconds
+//   - start:     Start timestamp (adjusted by -frequency/2 for alignment)
+//   - archived:  True if data has been persisted to disk archive
+//   - closed:    True if buffer is no longer accepting writes
+//
+// Index calculation: index = (timestamp - start) / frequency
+// Actual data timestamp: start + (frequency / 2) + (index * frequency)
 type buffer struct {
 	prev      *buffer
 	next      *buffer
@@ -57,10 +108,22 @@ func newBuffer(ts, freq int64) *buffer {
 	return b
 }

-// If a new buffer was created, the new head is returnd.
-// Otherwise, the existing buffer is returnd.
-// Normaly, only "newer" data should be written, but if the value would
-// end up in the same buffer anyways it is allowed.
+// write appends a timestamped value to the buffer chain.
+//
+// Returns the head buffer (which may be newly created if capacity was reached).
+// Timestamps older than the buffer's start are rejected. If the calculated index
+// exceeds capacity, a new buffer is allocated and linked as the new head.
+//
+// Missing timestamps are automatically filled with NaN values to maintain alignment.
+// Overwrites are allowed if the index is already within the existing data slice.
+//
+// Parameters:
+//   - ts:    Unix timestamp in seconds
+//   - value: Metric value (can be schema.NaN for missing data)
+//
+// Returns:
+//   - *buffer: The new head buffer (same as b if no new buffer created)
+//   - error:   Non-nil if timestamp is before buffer start
 func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {
 	if ts < b.start {
 		return nil, errors.New("[METRICSTORE]> cannot write value to buffer from past")
@@ -99,13 +162,27 @@ func (b *buffer) firstWrite() int64 {
 	return b.start + (b.frequency / 2)
 }

-// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
-// Simple linear interpolation is done between the two neighboring cells if possible.
-// If values at the start or end are missing, instead of NaN values, the second and thrid
-// return values contain the actual `from`/`to`.
-// This function goes back the buffer chain if `from` is older than the currents buffer start.
-// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
-// If `data` is not long enough to hold all values, this function will panic!
+// read retrieves time-series data from the buffer chain for the specified time range.
+//
+// Traverses the buffer chain backwards (via prev links) if 'from' precedes the current
+// buffer's start. Missing data points are represented as NaN. Values are accumulated
+// into the provided 'data' slice (using +=, so caller must zero-initialize if needed).
+//
+// The function adjusts the actual time range returned if data is unavailable at the
+// boundaries (returned via adjusted from/to timestamps).
+//
+// Parameters:
+//   - from: Start timestamp (Unix seconds)
+//   - to:   End timestamp (Unix seconds, exclusive)
+//   - data: Pre-allocated slice to accumulate results (must be large enough)
+//
+// Returns:
+//   - []schema.Float: Slice of data (may be shorter than input 'data' slice)
+//   - int64:          Actual start timestamp with available data
+//   - int64:          Actual end timestamp (exclusive)
+//   - error:          Non-nil on failure
+//
+// Panics if 'data' slice is too small to hold all values in [from, to).
 func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {
 	if from < b.firstWrite() {
 		if b.prev != nil {
@@ -142,7 +219,18 @@ func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int6
 	return data[:i], from, t, nil
 }

-// Returns true if this buffer needs to be freed.
+// free removes buffers older than the specified timestamp from the chain.
+//
+// Recursively traverses backwards (via prev) and unlinks buffers whose end time
+// is before the retention threshold. Freed buffers are returned to the pool if
+// they have the standard capacity (BufferCap).
+//
+// Parameters:
+//   - t: Retention threshold timestamp (Unix seconds)
+//
+// Returns:
+//   - delme: True if the current buffer itself should be deleted by caller
+//   - n:     Number of buffers freed in this subtree
 func (b *buffer) free(t int64) (delme bool, n int) {
 	if b.prev != nil {
 		delme, m := b.prev.free(t)
@@ -196,7 +284,19 @@ func (b *buffer) forceFreeOldest() (delme bool, n int) {
 	return true, 1
 }

-// Call `callback` on every buffer that contains data in the range from `from` to `to`.
+// iterFromTo invokes callback on every buffer in the chain that overlaps [from, to].
+//
+// Traverses backwards (via prev) first, then processes current buffer if it overlaps
+// the time range. Used for checkpoint/archive operations that need to serialize buffers
+// within a specific time window.
+//
+// Parameters:
+//   - from:     Start timestamp (Unix seconds, inclusive)
+//   - to:       End timestamp (Unix seconds, inclusive)
+//   - callback: Function to invoke on each overlapping buffer
+//
+// Returns:
+//   - error: First error returned by callback, or nil if all succeeded
 func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
 	if b == nil {
 		return nil