mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-03-20 23:07:29 +01:00
fix: Shard WAL consumer for higher throughput
Entire-Checkpoint: e583b7b11439
This commit is contained in:
@@ -361,13 +361,11 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
|||||||
Value: metric.Value,
|
Value: metric.Value,
|
||||||
Timestamp: time,
|
Timestamp: time,
|
||||||
}
|
}
|
||||||
select {
|
if !SendWALMessage(msg) {
|
||||||
case WALMessages <- msg:
|
// WAL shard channel full — metric is written to memory store but not WAL.
|
||||||
default:
|
|
||||||
// WAL channel full — metric is written to memory store but not WAL.
|
|
||||||
// Next binary snapshot will capture it.
|
// Next binary snapshot will capture it.
|
||||||
if dropped := walDropped.Add(1); dropped%10000 == 1 {
|
if dropped := walDropped.Add(1); dropped%10000 == 1 {
|
||||||
cclog.Warnf("[METRICSTORE]> WAL channel full, dropped %d messages (data safe in memory, next snapshot will capture)", dropped)
|
cclog.Warnf("[METRICSTORE]> WAL shard channel full, dropped %d messages (data safe in memory, next snapshot will capture)", dropped)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -277,7 +277,9 @@ func Shutdown() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if Keys.Checkpoints.FileFormat == "wal" {
|
if Keys.Checkpoints.FileFormat == "wal" {
|
||||||
close(WALMessages)
|
for _, ch := range walShardChs {
|
||||||
|
close(ch)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ import (
|
|||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"hash/crc32"
|
"hash/crc32"
|
||||||
|
"hash/fnv"
|
||||||
"io"
|
"io"
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
@@ -80,13 +81,15 @@ const (
|
|||||||
snapFileMagic = uint32(0xCC5B0001) // Binary snapshot magic
|
snapFileMagic = uint32(0xCC5B0001) // Binary snapshot magic
|
||||||
)
|
)
|
||||||
|
|
||||||
// WALMessages is the channel for sending metric writes to the WAL staging goroutine.
|
// walShardChs holds per-shard channels for WAL messages.
|
||||||
// Buffered to allow burst writes without blocking the metric ingestion path.
|
// Initialized by WALStaging; nil when WAL is not active.
|
||||||
var WALMessages = make(chan *WALMessage, 65536)
|
var walShardChs []chan *WALMessage
|
||||||
|
|
||||||
// walRotateCh is used by the checkpoint goroutine to request WAL file rotation
|
// walShardRotateChs holds per-shard channels for WAL rotation requests.
|
||||||
// (close, delete, reopen) after a binary snapshot has been written.
|
var walShardRotateChs []chan walRotateReq
|
||||||
var walRotateCh = make(chan walRotateReq, 256)
|
|
||||||
|
// walNumShards stores the number of shards (set during WALStaging init).
|
||||||
|
var walNumShards int
|
||||||
|
|
||||||
// WALMessage represents a single metric write to be appended to the WAL.
|
// WALMessage represents a single metric write to be appended to the WAL.
|
||||||
// Cluster and Node are NOT stored in the WAL record (inferred from file path).
|
// Cluster and Node are NOT stored in the WAL record (inferred from file path).
|
||||||
@@ -112,15 +115,56 @@ type walFileState struct {
|
|||||||
w *bufio.Writer
|
w *bufio.Writer
|
||||||
}
|
}
|
||||||
|
|
||||||
// WALStaging starts a background goroutine that receives WALMessage items
|
// walShardIndex computes which shard a message belongs to based on cluster+node.
|
||||||
// and appends binary WAL records to per-host current.wal files.
|
// Uses FNV-1a hash for fast, well-distributed mapping.
|
||||||
// Also handles WAL rotation requests from the checkpoint goroutine.
|
func walShardIndex(cluster, node string) int {
|
||||||
|
h := fnv.New32a()
|
||||||
|
h.Write([]byte(cluster))
|
||||||
|
h.Write([]byte{0})
|
||||||
|
h.Write([]byte(node))
|
||||||
|
return int(h.Sum32()) % walNumShards
|
||||||
|
}
|
||||||
|
|
||||||
|
// SendWALMessage routes a WAL message to the appropriate shard channel.
|
||||||
|
// Returns false if the channel is full (message dropped).
|
||||||
|
func SendWALMessage(msg *WALMessage) bool {
|
||||||
|
if walShardChs == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
shard := walShardIndex(msg.Cluster, msg.Node)
|
||||||
|
select {
|
||||||
|
case walShardChs[shard] <- msg:
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WALStaging starts N sharded background goroutines that receive WALMessage items
|
||||||
|
// and append binary WAL records to per-host current.wal files.
|
||||||
|
// Each shard owns a subset of hosts (determined by hash of cluster+node),
|
||||||
|
// parallelizing serialization and I/O while keeping per-host writes serial.
|
||||||
func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
wg.Go(func() {
|
|
||||||
if Keys.Checkpoints.FileFormat == "json" {
|
if Keys.Checkpoints.FileFormat == "json" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
walNumShards = max(Keys.NumWorkers, 1)
|
||||||
|
chBufSize := max(65536/walNumShards, 1024)
|
||||||
|
|
||||||
|
walShardChs = make([]chan *WALMessage, walNumShards)
|
||||||
|
walShardRotateChs = make([]chan walRotateReq, walNumShards)
|
||||||
|
|
||||||
|
for i := range walNumShards {
|
||||||
|
walShardChs[i] = make(chan *WALMessage, chBufSize)
|
||||||
|
walShardRotateChs[i] = make(chan walRotateReq, 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range walNumShards {
|
||||||
|
msgCh := walShardChs[i]
|
||||||
|
rotateCh := walShardRotateChs[i]
|
||||||
|
|
||||||
|
wg.Go(func() {
|
||||||
hostFiles := make(map[string]*walFileState)
|
hostFiles := make(map[string]*walFileState)
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
@@ -175,7 +219,7 @@ func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
if ws == nil {
|
if ws == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err := writeWALRecord(ws.w, msg); err != nil {
|
if err := writeWALRecordDirect(ws.w, msg); err != nil {
|
||||||
cclog.Errorf("[METRICSTORE]> WAL: write record: %v", err)
|
cclog.Errorf("[METRICSTORE]> WAL: write record: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -194,23 +238,26 @@ func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
close(req.done)
|
close(req.done)
|
||||||
}
|
}
|
||||||
|
|
||||||
drain := func() {
|
flushAll := func() {
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case msg, ok := <-WALMessages:
|
|
||||||
if !ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
processMsg(msg)
|
|
||||||
case req := <-walRotateCh:
|
|
||||||
processRotate(req)
|
|
||||||
default:
|
|
||||||
// Flush all buffered writers after draining remaining messages.
|
|
||||||
for _, ws := range hostFiles {
|
for _, ws := range hostFiles {
|
||||||
if ws.f != nil {
|
if ws.f != nil {
|
||||||
ws.w.Flush()
|
ws.w.Flush()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
drain := func() {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case msg, ok := <-msgCh:
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
processMsg(msg)
|
||||||
|
case req := <-rotateCh:
|
||||||
|
processRotate(req)
|
||||||
|
default:
|
||||||
|
flushAll()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -221,7 +268,7 @@ func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
drain()
|
drain()
|
||||||
return
|
return
|
||||||
case msg, ok := <-WALMessages:
|
case msg, ok := <-msgCh:
|
||||||
if !ok {
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -230,44 +277,56 @@ func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
// Drain up to 256 more messages without blocking to batch writes.
|
// Drain up to 256 more messages without blocking to batch writes.
|
||||||
for range 256 {
|
for range 256 {
|
||||||
select {
|
select {
|
||||||
case msg, ok := <-WALMessages:
|
case msg, ok := <-msgCh:
|
||||||
if !ok {
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
processMsg(msg)
|
processMsg(msg)
|
||||||
case req := <-walRotateCh:
|
case req := <-rotateCh:
|
||||||
processRotate(req)
|
processRotate(req)
|
||||||
default:
|
default:
|
||||||
goto flushed
|
goto flushed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
flushed:
|
flushed:
|
||||||
// Flush all buffered writers after processing the batch.
|
flushAll()
|
||||||
for _, ws := range hostFiles {
|
case req := <-rotateCh:
|
||||||
if ws.f != nil {
|
|
||||||
ws.w.Flush()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case req := <-walRotateCh:
|
|
||||||
processRotate(req)
|
processRotate(req)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// RotateWALFiles sends rotation requests for the given host directories
|
// RotateWALFiles sends rotation requests for the given host directories
|
||||||
// and blocks until all rotations complete.
|
// and blocks until all rotations complete. Each request is routed to the
|
||||||
|
// shard that owns the host directory.
|
||||||
func RotateWALFiles(hostDirs []string) {
|
func RotateWALFiles(hostDirs []string) {
|
||||||
|
if walShardRotateChs == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
dones := make([]chan struct{}, len(hostDirs))
|
dones := make([]chan struct{}, len(hostDirs))
|
||||||
for i, dir := range hostDirs {
|
for i, dir := range hostDirs {
|
||||||
dones[i] = make(chan struct{})
|
dones[i] = make(chan struct{})
|
||||||
walRotateCh <- walRotateReq{hostDir: dir, done: dones[i]}
|
// Extract cluster/node from hostDir to find the right shard.
|
||||||
|
// hostDir = rootDir/cluster/node
|
||||||
|
shard := walShardIndexFromDir(dir)
|
||||||
|
walShardRotateChs[shard] <- walRotateReq{hostDir: dir, done: dones[i]}
|
||||||
}
|
}
|
||||||
for _, done := range dones {
|
for _, done := range dones {
|
||||||
<-done
|
<-done
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// walShardIndexFromDir extracts cluster and node from a host directory path
|
||||||
|
// and returns the shard index. The path format is: rootDir/cluster/node
|
||||||
|
func walShardIndexFromDir(hostDir string) int {
|
||||||
|
// hostDir = .../cluster/node — extract last two path components
|
||||||
|
node := path.Base(hostDir)
|
||||||
|
cluster := path.Base(path.Dir(hostDir))
|
||||||
|
return walShardIndex(cluster, node)
|
||||||
|
}
|
||||||
|
|
||||||
// RotateWALFiles sends rotation requests for the given host directories
|
// RotateWALFiles sends rotation requests for the given host directories
|
||||||
// and blocks until all rotations complete.
|
// and blocks until all rotations complete.
|
||||||
func RotateWALFilesAfterShutdown(hostDirs []string) {
|
func RotateWALFilesAfterShutdown(hostDirs []string) {
|
||||||
@@ -279,6 +338,81 @@ func RotateWALFilesAfterShutdown(hostDirs []string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// writeWALRecordDirect encodes a WAL record directly into the bufio.Writer,
|
||||||
|
// avoiding heap allocations by using a stack-allocated scratch buffer for
|
||||||
|
// the fixed-size header/trailer and computing CRC inline.
|
||||||
|
func writeWALRecordDirect(w *bufio.Writer, msg *WALMessage) error {
|
||||||
|
// Compute payload size.
|
||||||
|
payloadSize := 8 + 2 + len(msg.MetricName) + 1 + 4
|
||||||
|
for _, s := range msg.Selector {
|
||||||
|
payloadSize += 1 + len(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write magic + payload length (8 bytes header).
|
||||||
|
var hdr [8]byte
|
||||||
|
binary.LittleEndian.PutUint32(hdr[0:4], walRecordMagic)
|
||||||
|
binary.LittleEndian.PutUint32(hdr[4:8], uint32(payloadSize))
|
||||||
|
if _, err := w.Write(hdr[:]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// We need to compute CRC over the payload as we write it.
|
||||||
|
crc := crc32.NewIEEE()
|
||||||
|
|
||||||
|
// Timestamp (8 bytes).
|
||||||
|
var scratch [8]byte
|
||||||
|
binary.LittleEndian.PutUint64(scratch[:8], uint64(msg.Timestamp))
|
||||||
|
crc.Write(scratch[:8])
|
||||||
|
if _, err := w.Write(scratch[:8]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metric name length (2 bytes) + metric name.
|
||||||
|
binary.LittleEndian.PutUint16(scratch[:2], uint16(len(msg.MetricName)))
|
||||||
|
crc.Write(scratch[:2])
|
||||||
|
if _, err := w.Write(scratch[:2]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
nameBytes := []byte(msg.MetricName)
|
||||||
|
crc.Write(nameBytes)
|
||||||
|
if _, err := w.Write(nameBytes); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Selector count (1 byte).
|
||||||
|
scratch[0] = byte(len(msg.Selector))
|
||||||
|
crc.Write(scratch[:1])
|
||||||
|
if _, err := w.Write(scratch[:1]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Selectors (1-byte length + bytes each).
|
||||||
|
for _, sel := range msg.Selector {
|
||||||
|
scratch[0] = byte(len(sel))
|
||||||
|
crc.Write(scratch[:1])
|
||||||
|
if _, err := w.Write(scratch[:1]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
selBytes := []byte(sel)
|
||||||
|
crc.Write(selBytes)
|
||||||
|
if _, err := w.Write(selBytes); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Value (4 bytes, float32 bits).
|
||||||
|
binary.LittleEndian.PutUint32(scratch[:4], math.Float32bits(float32(msg.Value)))
|
||||||
|
crc.Write(scratch[:4])
|
||||||
|
if _, err := w.Write(scratch[:4]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// CRC32 (4 bytes).
|
||||||
|
binary.LittleEndian.PutUint32(scratch[:4], crc.Sum32())
|
||||||
|
_, err := w.Write(scratch[:4])
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// buildWALPayload encodes a WALMessage into a binary payload (without magic/length/CRC).
|
// buildWALPayload encodes a WALMessage into a binary payload (without magic/length/CRC).
|
||||||
func buildWALPayload(msg *WALMessage) []byte {
|
func buildWALPayload(msg *WALMessage) []byte {
|
||||||
size := 8 + 2 + len(msg.MetricName) + 1 + 4
|
size := 8 + 2 + len(msg.MetricName) + 1 + 4
|
||||||
|
|||||||
Reference in New Issue
Block a user