Start working on streaming checkpoints

2026-03-13 03:57:30 +01:00 · 2022-07-13 17:41:57 +02:00
parent ec3f0ec793
commit f153207346
4 changed files with 361 additions and 538 deletions
--- a/internal/memstore/chunks.go
+++ b/internal/memstore/chunks.go
@@ -0,0 +1,197 @@
+package memstore
+
+import (
+	"errors"
+	"math"
+
+	"github.com/ClusterCockpit/cc-metric-store/internal/types"
+)
+
+type chunk struct {
+	frequency    int64         // Time between two "slots"
+	start        int64         // Timestamp of when `data[0]` was written.
+	prev, next   *chunk        // `prev` contains older data, `next` newer data.
+	data         []types.Float // The slice should never reallocacte as `cap(data)` is respected.
+	checkpointed bool          // If true, this buffer is already in a checkpoint on disk and full.
+}
+
+func newChunk(ts, freq int64) *chunk {
+	b := &chunk{}
+	b.frequency = freq
+	b.start = ts - (freq / 2)
+	b.prev = nil
+	b.next = nil
+	b.checkpointed = false
+	b.data = RequestFloatSlice(bufferSizeInFloats)
+	b.data = b.data[:0]
+	return b
+}
+
+func freeChunk(c *chunk) {
+	ReleaseFloatSlice(c.data)
+}
+
+// If a new buffer was created, the new head is returnd.
+// Otherwise, the existing buffer is returnd.
+// Normaly, only "newer" data should be written, but if the value would
+// end up in the same buffer anyways it is allowed.
+func (c *chunk) write(ts int64, value types.Float) (*chunk, error) {
+	if ts < c.start {
+		return nil, errors.New("cannot write value to buffer from past")
+	}
+
+	// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)
+	idx := int((ts - c.start) / c.frequency)
+	if idx >= cap(c.data) {
+		newchunk := newChunk(ts, c.frequency)
+		newchunk.prev = c
+		c.next = newchunk
+		c = newchunk
+		idx = 0
+	}
+
+	// Overwriting value or writing value from past
+	if idx < len(c.data) {
+		c.data[idx] = value
+		return c, nil
+	}
+
+	// Fill up unwritten slots with NaN
+	for i := len(c.data); i < idx; i++ {
+		c.data = append(c.data, types.NaN)
+	}
+
+	c.data = append(c.data, value)
+	return c, nil
+}
+
+func (c *chunk) end() int64 {
+	return c.firstWrite() + int64(len(c.data))*c.frequency
+}
+
+func (c *chunk) firstWrite() int64 {
+	return c.start + (c.frequency / 2)
+}
+
+func (c *chunk) close() {}
+
+// func interpolate(idx int, data []Float) Float {
+// 	if idx == 0 || idx+1 == len(data) {
+// 		return NaN
+// 	}
+// 	return (data[idx-1] + data[idx+1]) / 2.0
+// }
+
+// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
+// Simple linear interpolation is done between the two neighboring cells if possible.
+// If values at the start or end are missing, instead of NaN values, the second and thrid
+// return values contain the actual `from`/`to`.
+// This function goes back the buffer chain if `from` is older than the currents buffer start.
+// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
+// If `data` is not long enough to hold all values, this function will panic!
+func (c *chunk) read(from, to int64, data []types.Float) ([]types.Float, int64, int64, error) {
+	if from < c.firstWrite() {
+		if c.prev != nil {
+			return c.prev.read(from, to, data)
+		}
+		from = c.firstWrite()
+	}
+
+	var i int = 0
+	var t int64 = from
+	for ; t < to; t += c.frequency {
+		idx := int((t - c.start) / c.frequency)
+		if idx >= cap(c.data) {
+			if c.next == nil {
+				break
+			}
+			c = c.next
+			idx = 0
+		}
+
+		if idx >= len(c.data) {
+			if c.next == nil || to <= c.next.start {
+				break
+			}
+			data[i] += types.NaN
+		} else if t < c.start {
+			data[i] += types.NaN
+			// } else if b.data[idx].IsNaN() {
+			// 	data[i] += interpolate(idx, b.data)
+		} else {
+			data[i] += c.data[idx]
+		}
+		i++
+	}
+
+	return data[:i], from, t, nil
+}
+
+func (c *chunk) stats(from, to int64, data []types.Float) (types.Stats, int64, int64, error) {
+	stats := types.Stats{
+		Samples: 0,
+		Min:     types.Float(math.MaxFloat64),
+		Avg:     0.0,
+		Max:     types.Float(-math.MaxFloat64),
+	}
+
+	if from < c.firstWrite() {
+		if c.prev != nil {
+			return c.prev.stats(from, to, data)
+		}
+		from = c.firstWrite()
+	}
+
+	var i int = 0
+	var t int64 = from
+	for ; t < to; t += c.frequency {
+		idx := int((t - c.start) / c.frequency)
+		if idx >= cap(c.data) {
+			if c.next == nil {
+				break
+			}
+			c = c.next
+			idx = 0
+		}
+
+		if idx >= len(c.data) {
+			if c.next == nil || to <= c.next.start {
+				break
+			}
+		} else if t >= c.start && !c.data[idx].IsNaN() {
+			x := c.data[idx]
+			stats.Samples += 1
+			stats.Avg += x
+			stats.Max = types.Float(math.Max(float64(x), float64(stats.Max)))
+			stats.Min = types.Float(math.Min(float64(x), float64(stats.Min)))
+		}
+		i++
+	}
+
+	stats.Avg /= types.Float(stats.Samples)
+	if stats.Samples == 0 {
+		stats.Max = types.NaN
+		stats.Min = types.NaN
+	}
+	return stats, from, t, nil
+}
+
+// Returns true if this buffer needs to be freed.
+func (c *chunk) free(t int64) (delme bool, n int) {
+	if c.prev != nil {
+		delme, m := c.prev.free(t)
+		n += m
+		if delme {
+			c.prev.next = nil
+			freeChunk(c.prev)
+			c.prev = nil
+		}
+	}
+
+	end := c.end()
+	if end < t {
+		return true, n + 1
+	}
+
+	return false, n
+}
--- a/internal/memstore/memstore.go
+++ b/internal/memstore/memstore.go
@@ -0,0 +1,102 @@
+package memstore
+
+import "sync"
+
+// Could also be called "node" as this forms a node in a tree structure.
+// Called level because "node" might be confusing here.
+// Can be both a leaf or a inner node. In this tree structue, inner nodes can
+// also hold data (in `metrics`).
+type level struct {
+	lock      sync.RWMutex
+	metrics   []*chunk          // Every level can store metrics.
+	sublevels map[string]*level // Lower levels.
+}
+
+// Find the correct level for the given selector, creating it if
+// it does not exist. Example selector in the context of the
+// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }.
+// This function would probably benefit a lot from `level.children` beeing a `sync.Map`?
+func (l *level) findLevelOrCreate(selector []string, nMetrics int) *level {
+	if len(selector) == 0 {
+		return l
+	}
+
+	// Allow concurrent reads:
+	l.lock.RLock()
+	var child *level
+	var ok bool
+	if l.sublevels == nil {
+		// sublevels map needs to be created...
+		l.lock.RUnlock()
+	} else {
+		child, ok := l.sublevels[selector[0]]
+		l.lock.RUnlock()
+		if ok {
+			return child.findLevelOrCreate(selector[1:], nMetrics)
+		}
+	}
+
+	// The level does not exist, take write lock for unqiue access:
+	l.lock.Lock()
+	// While this thread waited for the write lock, another thread
+	// could have created the child node.
+	if l.sublevels != nil {
+		child, ok = l.sublevels[selector[0]]
+		if ok {
+			l.lock.Unlock()
+			return child.findLevelOrCreate(selector[1:], nMetrics)
+		}
+	}
+
+	child = &level{
+		metrics:   make([]*chunk, nMetrics),
+		sublevels: nil,
+	}
+
+	if l.sublevels != nil {
+		l.sublevels[selector[0]] = child
+	} else {
+		l.sublevels = map[string]*level{selector[0]: child}
+	}
+	l.lock.Unlock()
+	return child.findLevelOrCreate(selector[1:], nMetrics)
+}
+
+func (l *level) free(t int64) (delme bool, n int) {
+	l.lock.Lock()
+	defer l.lock.Unlock()
+
+	for i, c := range l.metrics {
+		if c != nil {
+			delchunk, m := c.free(t)
+			n += m
+			if delchunk {
+				freeChunk(c)
+				l.metrics[i] = nil
+			}
+		}
+	}
+
+	for key, l := range l.sublevels {
+		delsublevel, m := l.free(t)
+		n += m
+		if delsublevel {
+			l.sublevels[key] = nil
+		}
+	}
+
+	return len(l.metrics) == 0 && len(l.sublevels) == 0, n
+}
+
+type MemoryStore struct {
+	root level // root of the tree structure
+	// TODO...
+}
+
+func (ms *MemoryStore) GetOffset(metric string) int {
+	return -1 // TODO!
+}
+
+func (ms *MemoryStore) GetMetricForOffset(offset int) string {
+	return "" // TODO!
+}
--- a/internal/memstore/streaming-checkpoint.go
+++ b/internal/memstore/streaming-checkpoint.go
@@ -0,0 +1,62 @@
+package memstore
+
+import (
+	"io"
+	"reflect"
+	"unsafe"
+
+	"github.com/ClusterCockpit/cc-metric-store/internal/types"
+)
+
+func (l *level) streamingCheckpoint(ms *MemoryStore, from, to int64, w io.Writer, buf []byte, metricsbuf []types.Float) ([]byte, error) {
+	var err error
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+
+	buf = encodeBytes(buf, nil) // Reserved
+
+	// Metrics:
+	buf = encodeUint32(buf, uint32(len(l.metrics)))
+	for i, c := range l.metrics {
+		key := ms.GetMetricForOffset(i)
+		buf = encodeString(buf, key)
+
+		// Metric
+		buf = encodeBytes(buf, nil) // Reserved
+
+		metricsbuf = metricsbuf[:(to-from)/c.frequency+1]
+		var cfrom int64
+		if metricsbuf, cfrom, _, err = c.read(from, to, metricsbuf); err != nil {
+			return nil, err
+		}
+		buf = encodeUint64(buf, uint64(c.frequency))
+		buf = encodeUint64(buf, uint64(cfrom))
+		buf = encodeUint32(buf, uint32(len(metricsbuf)))
+
+		var x types.Float
+		elmsize := unsafe.Sizeof(x)
+		sh := (*reflect.SliceHeader)(unsafe.Pointer(&metricsbuf))
+		bytes := unsafe.Slice((*byte)(unsafe.Pointer(sh.Data)), sh.Len*int(elmsize))
+		buf = append(buf, bytes...)
+
+		if len(buf) >= writeBufferSize {
+			if _, err = w.Write(buf); err != nil {
+				return nil, err
+			}
+
+			buf = buf[0:]
+		}
+	}
+
+	// Sublevels:
+	buf = encodeUint32(buf, uint32(len(l.sublevels)))
+	for key, sublevel := range l.sublevels {
+		buf = encodeString(buf, key)
+		buf, err = sublevel.streamingCheckpoint(ms, from, to, w, buf, metricsbuf)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return buf, nil
+}