mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-28 21:37:31 +01:00
788 lines
22 KiB
Go
788 lines
22 KiB
Go
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||
// All rights reserved. This file is part of cc-backend.
|
||
// Use of this source code is governed by a MIT-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
// Package metricstore provides walCheckpoint.go: WAL-based checkpoint implementation.
|
||
//
|
||
// This replaces the Avro shadow tree with an append-only Write-Ahead Log (WAL)
|
||
// per host, eliminating the extra memory overhead of the AvroStore and providing
|
||
// truly continuous (per-write) crash safety.
|
||
//
|
||
// # Architecture
|
||
//
|
||
// Metric write (DecodeLine)
|
||
// │
|
||
// ├─► WriteToLevel() → main MemoryStore (unchanged)
|
||
// │
|
||
// └─► WALMessages channel
|
||
// │
|
||
// ▼
|
||
// WALStaging goroutine
|
||
// │
|
||
// ▼
|
||
// checkpoints/cluster/host/current.wal (append-only, binary)
|
||
//
|
||
// Periodic checkpoint (Checkpointing goroutine):
|
||
// 1. Write <timestamp>.bin snapshot (column-oriented, from main tree)
|
||
// 2. Signal WALStaging to truncate current.wal per host
|
||
//
|
||
// On restart (FromCheckpoint):
|
||
// 1. Load most recent <timestamp>.bin snapshot
|
||
// 2. Replay current.wal (overwrite-safe: buffer.write handles duplicate timestamps)
|
||
//
|
||
// # WAL Record Format
|
||
//
|
||
// [4B magic 0xCC1DA7A1][4B payload_len][payload][4B CRC32]
|
||
//
|
||
// payload:
|
||
// [8B timestamp int64]
|
||
// [2B metric_name_len uint16][N metric name bytes]
|
||
// [1B selector_count uint8]
|
||
// per selector: [1B selector_len uint8][M selector bytes]
|
||
// [4B value float32 bits]
|
||
//
|
||
// # Binary Snapshot Format
|
||
//
|
||
// [4B magic 0xCC5B0001][8B from int64][8B to int64]
|
||
// Level tree (recursive):
|
||
// [4B num_metrics uint32]
|
||
// per metric:
|
||
// [2B name_len uint16][N name bytes]
|
||
// [8B frequency int64][8B start int64]
|
||
// [4B num_values uint32][num_values × 4B float32]
|
||
// [4B num_children uint32]
|
||
// per child: [2B name_len uint16][N name bytes] + Level (recursive)
|
||
package metricstore
|
||
|
||
import (
|
||
"bufio"
|
||
"context"
|
||
"encoding/binary"
|
||
"fmt"
|
||
"hash/crc32"
|
||
"io"
|
||
"math"
|
||
"os"
|
||
"path"
|
||
"sync"
|
||
"sync/atomic"
|
||
|
||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||
)
|
||
|
||
// Magic numbers for binary formats.
|
||
const (
|
||
walFileMagic = uint32(0xCC1DA701) // WAL file header magic
|
||
walRecordMagic = uint32(0xCC1DA7A1) // WAL record magic
|
||
snapFileMagic = uint32(0xCC5B0001) // Binary snapshot magic
|
||
)
|
||
|
||
// WALMessages is the channel for sending metric writes to the WAL staging goroutine.
|
||
// Buffered to allow burst writes without blocking the metric ingestion path.
|
||
var WALMessages = make(chan *WALMessage, 4096)
|
||
|
||
// walRotateCh is used by the checkpoint goroutine to request WAL file rotation
|
||
// (close, delete, reopen) after a binary snapshot has been written.
|
||
var walRotateCh = make(chan walRotateReq, 256)
|
||
|
||
// WALMessage represents a single metric write to be appended to the WAL.
|
||
// Cluster and Node are NOT stored in the WAL record (inferred from file path).
|
||
type WALMessage struct {
|
||
MetricName string
|
||
Cluster string
|
||
Node string
|
||
Selector []string
|
||
Value schema.Float
|
||
Timestamp int64
|
||
}
|
||
|
||
// walRotateReq requests WAL file rotation for a specific host directory.
|
||
// The done channel is closed by the WAL goroutine when rotation is complete.
|
||
type walRotateReq struct {
|
||
hostDir string
|
||
done chan struct{}
|
||
}
|
||
|
||
// walFileState holds an open WAL file handle for one host directory.
|
||
type walFileState struct {
|
||
f *os.File
|
||
}
|
||
|
||
// WALStaging starts a background goroutine that receives WALMessage items
|
||
// and appends binary WAL records to per-host current.wal files.
|
||
// Also handles WAL rotation requests from the checkpoint goroutine.
|
||
func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||
wg.Add(1)
|
||
go func() {
|
||
defer wg.Done()
|
||
|
||
if Keys.Checkpoints.FileFormat == "json" {
|
||
return
|
||
}
|
||
|
||
hostFiles := make(map[string]*walFileState)
|
||
|
||
defer func() {
|
||
for _, ws := range hostFiles {
|
||
if ws.f != nil {
|
||
ws.f.Close()
|
||
}
|
||
}
|
||
}()
|
||
|
||
getOrOpenWAL := func(hostDir string) *os.File {
|
||
ws, ok := hostFiles[hostDir]
|
||
if ok {
|
||
return ws.f
|
||
}
|
||
|
||
if err := os.MkdirAll(hostDir, CheckpointDirPerms); err != nil {
|
||
cclog.Errorf("[METRICSTORE]> WAL: mkdir %s: %v", hostDir, err)
|
||
return nil
|
||
}
|
||
|
||
walPath := path.Join(hostDir, "current.wal")
|
||
f, err := os.OpenFile(walPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, CheckpointFilePerms)
|
||
if err != nil {
|
||
cclog.Errorf("[METRICSTORE]> WAL: open %s: %v", walPath, err)
|
||
return nil
|
||
}
|
||
|
||
// Write file header magic if file is new (empty).
|
||
info, err := f.Stat()
|
||
if err == nil && info.Size() == 0 {
|
||
var hdr [4]byte
|
||
binary.LittleEndian.PutUint32(hdr[:], walFileMagic)
|
||
if _, err := f.Write(hdr[:]); err != nil {
|
||
cclog.Errorf("[METRICSTORE]> WAL: write header %s: %v", walPath, err)
|
||
f.Close()
|
||
return nil
|
||
}
|
||
}
|
||
|
||
hostFiles[hostDir] = &walFileState{f: f}
|
||
return f
|
||
}
|
||
|
||
processMsg := func(msg *WALMessage) {
|
||
hostDir := path.Join(Keys.Checkpoints.RootDir, msg.Cluster, msg.Node)
|
||
f := getOrOpenWAL(hostDir)
|
||
if f == nil {
|
||
return
|
||
}
|
||
if err := writeWALRecord(f, msg); err != nil {
|
||
cclog.Errorf("[METRICSTORE]> WAL: write record: %v", err)
|
||
}
|
||
}
|
||
|
||
processRotate := func(req walRotateReq) {
|
||
ws, ok := hostFiles[req.hostDir]
|
||
if ok && ws.f != nil {
|
||
ws.f.Close()
|
||
walPath := path.Join(req.hostDir, "current.wal")
|
||
if err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {
|
||
cclog.Errorf("[METRICSTORE]> WAL: remove %s: %v", walPath, err)
|
||
}
|
||
delete(hostFiles, req.hostDir)
|
||
}
|
||
close(req.done)
|
||
}
|
||
|
||
drain := func() {
|
||
for {
|
||
select {
|
||
case msg, ok := <-WALMessages:
|
||
if !ok {
|
||
return
|
||
}
|
||
processMsg(msg)
|
||
case req := <-walRotateCh:
|
||
processRotate(req)
|
||
default:
|
||
return
|
||
}
|
||
}
|
||
}
|
||
|
||
for {
|
||
select {
|
||
case <-ctx.Done():
|
||
drain()
|
||
return
|
||
case msg, ok := <-WALMessages:
|
||
if !ok {
|
||
return
|
||
}
|
||
processMsg(msg)
|
||
case req := <-walRotateCh:
|
||
processRotate(req)
|
||
}
|
||
}
|
||
}()
|
||
}
|
||
|
||
// RotateWALFiles sends rotation requests for the given host directories
|
||
// and blocks until all rotations complete.
|
||
func RotateWALFiles(hostDirs []string) {
|
||
dones := make([]chan struct{}, len(hostDirs))
|
||
for i, dir := range hostDirs {
|
||
dones[i] = make(chan struct{})
|
||
walRotateCh <- walRotateReq{hostDir: dir, done: dones[i]}
|
||
}
|
||
for _, done := range dones {
|
||
<-done
|
||
}
|
||
}
|
||
|
||
// buildWALPayload encodes a WALMessage into a binary payload (without magic/length/CRC).
|
||
func buildWALPayload(msg *WALMessage) []byte {
|
||
size := 8 + 2 + len(msg.MetricName) + 1 + 4
|
||
for _, s := range msg.Selector {
|
||
size += 1 + len(s)
|
||
}
|
||
|
||
buf := make([]byte, 0, size)
|
||
|
||
// Timestamp (8 bytes, little-endian int64)
|
||
var ts [8]byte
|
||
binary.LittleEndian.PutUint64(ts[:], uint64(msg.Timestamp))
|
||
buf = append(buf, ts[:]...)
|
||
|
||
// Metric name (2-byte length prefix + bytes)
|
||
var mLen [2]byte
|
||
binary.LittleEndian.PutUint16(mLen[:], uint16(len(msg.MetricName)))
|
||
buf = append(buf, mLen[:]...)
|
||
buf = append(buf, msg.MetricName...)
|
||
|
||
// Selector count (1 byte)
|
||
buf = append(buf, byte(len(msg.Selector)))
|
||
|
||
// Selectors (1-byte length prefix + bytes each)
|
||
for _, sel := range msg.Selector {
|
||
buf = append(buf, byte(len(sel)))
|
||
buf = append(buf, sel...)
|
||
}
|
||
|
||
// Value (4 bytes, float32 bit representation)
|
||
var val [4]byte
|
||
binary.LittleEndian.PutUint32(val[:], math.Float32bits(float32(msg.Value)))
|
||
buf = append(buf, val[:]...)
|
||
|
||
return buf
|
||
}
|
||
|
||
// writeWALRecord appends a binary WAL record to the file.
|
||
// Format: [4B magic][4B payload_len][payload][4B CRC32]
|
||
func writeWALRecord(f *os.File, msg *WALMessage) error {
|
||
payload := buildWALPayload(msg)
|
||
crc := crc32.ChecksumIEEE(payload)
|
||
|
||
record := make([]byte, 0, 4+4+len(payload)+4)
|
||
|
||
var magic [4]byte
|
||
binary.LittleEndian.PutUint32(magic[:], walRecordMagic)
|
||
record = append(record, magic[:]...)
|
||
|
||
var pLen [4]byte
|
||
binary.LittleEndian.PutUint32(pLen[:], uint32(len(payload)))
|
||
record = append(record, pLen[:]...)
|
||
|
||
record = append(record, payload...)
|
||
|
||
var crcBytes [4]byte
|
||
binary.LittleEndian.PutUint32(crcBytes[:], crc)
|
||
record = append(record, crcBytes[:]...)
|
||
|
||
_, err := f.Write(record)
|
||
return err
|
||
}
|
||
|
||
// readWALRecord reads one WAL record from the reader.
|
||
// Returns (nil, nil) on clean EOF. Returns error on data corruption.
|
||
// A CRC mismatch indicates a truncated trailing record (expected on crash).
|
||
func readWALRecord(r io.Reader) (*WALMessage, error) {
|
||
var magic uint32
|
||
if err := binary.Read(r, binary.LittleEndian, &magic); err != nil {
|
||
if err == io.EOF {
|
||
return nil, nil // Clean EOF
|
||
}
|
||
return nil, fmt.Errorf("read record magic: %w", err)
|
||
}
|
||
|
||
if magic != walRecordMagic {
|
||
return nil, fmt.Errorf("invalid record magic 0x%08X (expected 0x%08X)", magic, walRecordMagic)
|
||
}
|
||
|
||
var payloadLen uint32
|
||
if err := binary.Read(r, binary.LittleEndian, &payloadLen); err != nil {
|
||
return nil, fmt.Errorf("read payload length: %w", err)
|
||
}
|
||
|
||
if payloadLen > 1<<20 { // 1 MB sanity limit
|
||
return nil, fmt.Errorf("record payload too large: %d bytes", payloadLen)
|
||
}
|
||
|
||
payload := make([]byte, payloadLen)
|
||
if _, err := io.ReadFull(r, payload); err != nil {
|
||
return nil, fmt.Errorf("read payload: %w", err)
|
||
}
|
||
|
||
var storedCRC uint32
|
||
if err := binary.Read(r, binary.LittleEndian, &storedCRC); err != nil {
|
||
return nil, fmt.Errorf("read CRC: %w", err)
|
||
}
|
||
|
||
if crc32.ChecksumIEEE(payload) != storedCRC {
|
||
return nil, fmt.Errorf("CRC mismatch (truncated write or corruption)")
|
||
}
|
||
|
||
return parseWALPayload(payload)
|
||
}
|
||
|
||
// parseWALPayload decodes a binary payload into a WALMessage.
|
||
func parseWALPayload(payload []byte) (*WALMessage, error) {
|
||
if len(payload) < 8+2+1+4 {
|
||
return nil, fmt.Errorf("payload too short: %d bytes", len(payload))
|
||
}
|
||
|
||
offset := 0
|
||
|
||
// Timestamp (8 bytes)
|
||
ts := int64(binary.LittleEndian.Uint64(payload[offset : offset+8]))
|
||
offset += 8
|
||
|
||
// Metric name (2-byte length + bytes)
|
||
if offset+2 > len(payload) {
|
||
return nil, fmt.Errorf("metric name length overflows payload")
|
||
}
|
||
mLen := int(binary.LittleEndian.Uint16(payload[offset : offset+2]))
|
||
offset += 2
|
||
|
||
if offset+mLen > len(payload) {
|
||
return nil, fmt.Errorf("metric name overflows payload")
|
||
}
|
||
metricName := string(payload[offset : offset+mLen])
|
||
offset += mLen
|
||
|
||
// Selector count (1 byte)
|
||
if offset >= len(payload) {
|
||
return nil, fmt.Errorf("selector count overflows payload")
|
||
}
|
||
selCount := int(payload[offset])
|
||
offset++
|
||
|
||
selectors := make([]string, selCount)
|
||
for i := range selCount {
|
||
if offset >= len(payload) {
|
||
return nil, fmt.Errorf("selector[%d] length overflows payload", i)
|
||
}
|
||
sLen := int(payload[offset])
|
||
offset++
|
||
|
||
if offset+sLen > len(payload) {
|
||
return nil, fmt.Errorf("selector[%d] data overflows payload", i)
|
||
}
|
||
selectors[i] = string(payload[offset : offset+sLen])
|
||
offset += sLen
|
||
}
|
||
|
||
// Value (4 bytes, float32 bits)
|
||
if offset+4 > len(payload) {
|
||
return nil, fmt.Errorf("value overflows payload")
|
||
}
|
||
bits := binary.LittleEndian.Uint32(payload[offset : offset+4])
|
||
value := schema.Float(math.Float32frombits(bits))
|
||
|
||
return &WALMessage{
|
||
MetricName: metricName,
|
||
Timestamp: ts,
|
||
Selector: selectors,
|
||
Value: value,
|
||
}, nil
|
||
}
|
||
|
||
// loadWALFile reads a WAL file and replays all valid records into the Level tree.
|
||
// l is the host-level node. Corrupt or partial trailing records are silently skipped
|
||
// (expected on crash). Records older than 'from' are skipped.
|
||
func (l *Level) loadWALFile(m *MemoryStore, f *os.File, from int64) error {
|
||
br := bufio.NewReader(f)
|
||
|
||
// Verify file header magic.
|
||
var fileMagic uint32
|
||
if err := binary.Read(br, binary.LittleEndian, &fileMagic); err != nil {
|
||
if err == io.EOF {
|
||
return nil // Empty file, no data
|
||
}
|
||
return fmt.Errorf("[METRICSTORE]> WAL: read file header: %w", err)
|
||
}
|
||
|
||
if fileMagic != walFileMagic {
|
||
return fmt.Errorf("[METRICSTORE]> WAL: invalid file magic 0x%08X (expected 0x%08X)", fileMagic, walFileMagic)
|
||
}
|
||
|
||
// Cache level lookups to avoid repeated tree traversal.
|
||
lvlCache := make(map[string]*Level)
|
||
|
||
for {
|
||
msg, err := readWALRecord(br)
|
||
if err != nil {
|
||
// Truncated trailing record is expected after a crash; stop replaying.
|
||
cclog.Debugf("[METRICSTORE]> WAL: stopping replay at corrupted/partial record: %v", err)
|
||
break
|
||
}
|
||
if msg == nil {
|
||
break // Clean EOF
|
||
}
|
||
|
||
if msg.Timestamp < from {
|
||
continue // Older than retention window
|
||
}
|
||
|
||
minfo, ok := m.Metrics[msg.MetricName]
|
||
if !ok {
|
||
continue // Unknown metric (config may have changed)
|
||
}
|
||
|
||
// Cache key is the null-separated selector path.
|
||
cacheKey := joinSelector(msg.Selector)
|
||
lvl, ok := lvlCache[cacheKey]
|
||
if !ok {
|
||
lvl = l.findLevelOrCreate(msg.Selector, len(m.Metrics))
|
||
lvlCache[cacheKey] = lvl
|
||
}
|
||
|
||
// Write directly to the buffer, same as WriteToLevel but without the
|
||
// global level lookup (we already have the right level).
|
||
lvl.lock.Lock()
|
||
b := lvl.metrics[minfo.offset]
|
||
if b == nil {
|
||
b = newBuffer(msg.Timestamp, minfo.Frequency)
|
||
lvl.metrics[minfo.offset] = b
|
||
}
|
||
nb, writeErr := b.write(msg.Timestamp, msg.Value)
|
||
if writeErr == nil && b != nb {
|
||
lvl.metrics[minfo.offset] = nb
|
||
}
|
||
// Ignore write errors for timestamps before buffer start (can happen when
|
||
// replaying WAL entries that predate a loaded snapshot's start time).
|
||
lvl.lock.Unlock()
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// joinSelector builds a cache key from a selector slice using null bytes as separators.
|
||
func joinSelector(sel []string) string {
|
||
if len(sel) == 0 {
|
||
return ""
|
||
}
|
||
result := sel[0]
|
||
for i := 1; i < len(sel); i++ {
|
||
result += "\x00" + sel[i]
|
||
}
|
||
return result
|
||
}
|
||
|
||
// ToCheckpointWAL writes binary snapshot files for all hosts in parallel.
|
||
// Returns the number of files written, the list of host directories that were
|
||
// successfully checkpointed (for WAL rotation), and any errors.
|
||
func (m *MemoryStore) ToCheckpointWAL(dir string, from, to int64) (int, []string, error) {
|
||
// Collect all cluster/host pairs.
|
||
m.root.lock.RLock()
|
||
totalHosts := 0
|
||
for _, l1 := range m.root.children {
|
||
l1.lock.RLock()
|
||
totalHosts += len(l1.children)
|
||
l1.lock.RUnlock()
|
||
}
|
||
m.root.lock.RUnlock()
|
||
|
||
levels := make([]*Level, 0, totalHosts)
|
||
selectors := make([][]string, 0, totalHosts)
|
||
|
||
m.root.lock.RLock()
|
||
for sel1, l1 := range m.root.children {
|
||
l1.lock.RLock()
|
||
for sel2, l2 := range l1.children {
|
||
levels = append(levels, l2)
|
||
selectors = append(selectors, []string{sel1, sel2})
|
||
}
|
||
l1.lock.RUnlock()
|
||
}
|
||
m.root.lock.RUnlock()
|
||
|
||
type workItem struct {
|
||
level *Level
|
||
hostDir string
|
||
selector []string
|
||
}
|
||
|
||
n, errs := int32(0), int32(0)
|
||
var successDirs []string
|
||
var successMu sync.Mutex
|
||
|
||
var wg sync.WaitGroup
|
||
wg.Add(Keys.NumWorkers)
|
||
work := make(chan workItem, Keys.NumWorkers*2)
|
||
|
||
for range Keys.NumWorkers {
|
||
go func() {
|
||
defer wg.Done()
|
||
for wi := range work {
|
||
err := wi.level.toCheckpointBinary(wi.hostDir, from, to, m)
|
||
if err != nil {
|
||
if err == ErrNoNewArchiveData {
|
||
continue
|
||
}
|
||
cclog.Errorf("[METRICSTORE]> binary checkpoint error for %s: %v", wi.hostDir, err)
|
||
atomic.AddInt32(&errs, 1)
|
||
} else {
|
||
atomic.AddInt32(&n, 1)
|
||
successMu.Lock()
|
||
successDirs = append(successDirs, wi.hostDir)
|
||
successMu.Unlock()
|
||
}
|
||
}
|
||
}()
|
||
}
|
||
|
||
for i := range levels {
|
||
hostDir := path.Join(dir, path.Join(selectors[i]...))
|
||
work <- workItem{
|
||
level: levels[i],
|
||
hostDir: hostDir,
|
||
selector: selectors[i],
|
||
}
|
||
}
|
||
close(work)
|
||
wg.Wait()
|
||
|
||
if errs > 0 {
|
||
return int(n), successDirs, fmt.Errorf("[METRICSTORE]> %d errors during binary checkpoint (%d successes)", errs, n)
|
||
}
|
||
return int(n), successDirs, nil
|
||
}
|
||
|
||
// toCheckpointBinary writes a binary snapshot file for a single host-level node.
|
||
// Uses atomic rename (write to .tmp then rename) to avoid partial reads on crash.
|
||
func (l *Level) toCheckpointBinary(dir string, from, to int64, m *MemoryStore) error {
|
||
cf, err := l.toCheckpointFile(from, to, m)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if cf == nil {
|
||
return ErrNoNewArchiveData
|
||
}
|
||
|
||
if err := os.MkdirAll(dir, CheckpointDirPerms); err != nil {
|
||
return fmt.Errorf("mkdir %s: %w", dir, err)
|
||
}
|
||
|
||
// Write to a temp file first, then rename (atomic on POSIX).
|
||
tmpPath := path.Join(dir, fmt.Sprintf("%d.bin.tmp", from))
|
||
finalPath := path.Join(dir, fmt.Sprintf("%d.bin", from))
|
||
|
||
f, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
|
||
if err != nil {
|
||
return fmt.Errorf("open binary snapshot %s: %w", tmpPath, err)
|
||
}
|
||
|
||
bw := bufio.NewWriter(f)
|
||
if err := writeBinarySnapshotFile(bw, cf); err != nil {
|
||
f.Close()
|
||
os.Remove(tmpPath)
|
||
return fmt.Errorf("write binary snapshot: %w", err)
|
||
}
|
||
if err := bw.Flush(); err != nil {
|
||
f.Close()
|
||
os.Remove(tmpPath)
|
||
return err
|
||
}
|
||
f.Close()
|
||
|
||
return os.Rename(tmpPath, finalPath)
|
||
}
|
||
|
||
// writeBinarySnapshotFile writes the binary snapshot file header and level tree.
|
||
func writeBinarySnapshotFile(w io.Writer, cf *CheckpointFile) error {
|
||
if err := binary.Write(w, binary.LittleEndian, snapFileMagic); err != nil {
|
||
return err
|
||
}
|
||
if err := binary.Write(w, binary.LittleEndian, cf.From); err != nil {
|
||
return err
|
||
}
|
||
if err := binary.Write(w, binary.LittleEndian, cf.To); err != nil {
|
||
return err
|
||
}
|
||
return writeBinaryLevel(w, cf)
|
||
}
|
||
|
||
// writeBinaryLevel recursively writes a CheckpointFile level in binary format.
|
||
func writeBinaryLevel(w io.Writer, cf *CheckpointFile) error {
|
||
if err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Metrics))); err != nil {
|
||
return err
|
||
}
|
||
|
||
for name, metric := range cf.Metrics {
|
||
if err := writeString16(w, name); err != nil {
|
||
return err
|
||
}
|
||
if err := binary.Write(w, binary.LittleEndian, metric.Frequency); err != nil {
|
||
return err
|
||
}
|
||
if err := binary.Write(w, binary.LittleEndian, metric.Start); err != nil {
|
||
return err
|
||
}
|
||
if err := binary.Write(w, binary.LittleEndian, uint32(len(metric.Data))); err != nil {
|
||
return err
|
||
}
|
||
for _, v := range metric.Data {
|
||
if err := binary.Write(w, binary.LittleEndian, math.Float32bits(float32(v))); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
}
|
||
|
||
if err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Children))); err != nil {
|
||
return err
|
||
}
|
||
|
||
for name, child := range cf.Children {
|
||
if err := writeString16(w, name); err != nil {
|
||
return err
|
||
}
|
||
if err := writeBinaryLevel(w, child); err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// writeString16 writes a 2-byte length-prefixed string to w.
|
||
func writeString16(w io.Writer, s string) error {
|
||
if err := binary.Write(w, binary.LittleEndian, uint16(len(s))); err != nil {
|
||
return err
|
||
}
|
||
_, err := io.WriteString(w, s)
|
||
return err
|
||
}
|
||
|
||
// loadBinaryFile reads a binary snapshot file and loads data into the Level tree.
|
||
// The retention check (from) is applied to the file's 'to' timestamp.
|
||
func (l *Level) loadBinaryFile(m *MemoryStore, f *os.File, from int64) error {
|
||
br := bufio.NewReader(f)
|
||
|
||
var magic uint32
|
||
if err := binary.Read(br, binary.LittleEndian, &magic); err != nil {
|
||
return fmt.Errorf("[METRICSTORE]> binary snapshot: read magic: %w", err)
|
||
}
|
||
if magic != snapFileMagic {
|
||
return fmt.Errorf("[METRICSTORE]> binary snapshot: invalid magic 0x%08X (expected 0x%08X)", magic, snapFileMagic)
|
||
}
|
||
|
||
var fileFrom, fileTo int64
|
||
if err := binary.Read(br, binary.LittleEndian, &fileFrom); err != nil {
|
||
return fmt.Errorf("[METRICSTORE]> binary snapshot: read from: %w", err)
|
||
}
|
||
if err := binary.Read(br, binary.LittleEndian, &fileTo); err != nil {
|
||
return fmt.Errorf("[METRICSTORE]> binary snapshot: read to: %w", err)
|
||
}
|
||
|
||
if fileTo != 0 && fileTo < from {
|
||
return nil // File is older than retention window, skip it
|
||
}
|
||
|
||
cf, err := readBinaryLevel(br)
|
||
if err != nil {
|
||
return fmt.Errorf("[METRICSTORE]> binary snapshot: read level tree: %w", err)
|
||
}
|
||
cf.From = fileFrom
|
||
cf.To = fileTo
|
||
|
||
return l.loadFile(cf, m)
|
||
}
|
||
|
||
// readBinaryLevel recursively reads a level from the binary snapshot format.
|
||
func readBinaryLevel(r io.Reader) (*CheckpointFile, error) {
|
||
cf := &CheckpointFile{
|
||
Metrics: make(map[string]*CheckpointMetrics),
|
||
Children: make(map[string]*CheckpointFile),
|
||
}
|
||
|
||
var numMetrics uint32
|
||
if err := binary.Read(r, binary.LittleEndian, &numMetrics); err != nil {
|
||
return nil, fmt.Errorf("read num_metrics: %w", err)
|
||
}
|
||
|
||
for range numMetrics {
|
||
name, err := readString16(r)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("read metric name: %w", err)
|
||
}
|
||
|
||
var freq, start int64
|
||
if err := binary.Read(r, binary.LittleEndian, &freq); err != nil {
|
||
return nil, fmt.Errorf("read frequency for %s: %w", name, err)
|
||
}
|
||
if err := binary.Read(r, binary.LittleEndian, &start); err != nil {
|
||
return nil, fmt.Errorf("read start for %s: %w", name, err)
|
||
}
|
||
|
||
var numValues uint32
|
||
if err := binary.Read(r, binary.LittleEndian, &numValues); err != nil {
|
||
return nil, fmt.Errorf("read num_values for %s: %w", name, err)
|
||
}
|
||
|
||
data := make([]schema.Float, numValues)
|
||
for i := range numValues {
|
||
var bits uint32
|
||
if err := binary.Read(r, binary.LittleEndian, &bits); err != nil {
|
||
return nil, fmt.Errorf("read value[%d] for %s: %w", i, name, err)
|
||
}
|
||
data[i] = schema.Float(math.Float32frombits(bits))
|
||
}
|
||
|
||
cf.Metrics[name] = &CheckpointMetrics{
|
||
Frequency: freq,
|
||
Start: start,
|
||
Data: data,
|
||
}
|
||
}
|
||
|
||
var numChildren uint32
|
||
if err := binary.Read(r, binary.LittleEndian, &numChildren); err != nil {
|
||
return nil, fmt.Errorf("read num_children: %w", err)
|
||
}
|
||
|
||
for range numChildren {
|
||
childName, err := readString16(r)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("read child name: %w", err)
|
||
}
|
||
|
||
child, err := readBinaryLevel(r)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("read child %s: %w", childName, err)
|
||
}
|
||
cf.Children[childName] = child
|
||
}
|
||
|
||
return cf, nil
|
||
}
|
||
|
||
// readString16 reads a 2-byte length-prefixed string from r.
|
||
func readString16(r io.Reader) (string, error) {
|
||
var sLen uint16
|
||
if err := binary.Read(r, binary.LittleEndian, &sLen); err != nil {
|
||
return "", err
|
||
}
|
||
buf := make([]byte, sLen)
|
||
if _, err := io.ReadFull(r, buf); err != nil {
|
||
return "", err
|
||
}
|
||
return string(buf), nil
|
||
}
|