mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-28 13:27:30 +01:00
Merge branch 'optimize-checkpoint-wal' of github.com:ClusterCockpit/cc-backend into optimize-checkpoint-wal
This commit is contained in:
@@ -43,6 +43,7 @@ package metricstore
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
@@ -53,12 +54,102 @@ import (
|
|||||||
// of data or reallocation needs to happen on writes.
|
// of data or reallocation needs to happen on writes.
|
||||||
const BufferCap int = DefaultBufferCapacity
|
const BufferCap int = DefaultBufferCapacity
|
||||||
|
|
||||||
var bufferPool sync.Pool = sync.Pool{
|
// BufferPool is the global instance.
|
||||||
New: func() any {
|
// It is initialized immediately when the package loads.
|
||||||
|
var bufferPool = NewPersistentBufferPool()
|
||||||
|
|
||||||
|
type PersistentBufferPool struct {
|
||||||
|
pool []*buffer
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewPersistentBufferPool creates a dynamic pool for buffers.
|
||||||
|
func NewPersistentBufferPool() *PersistentBufferPool {
|
||||||
|
return &PersistentBufferPool{
|
||||||
|
pool: make([]*buffer, 0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *PersistentBufferPool) Get() *buffer {
|
||||||
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
|
||||||
|
n := len(p.pool)
|
||||||
|
if n == 0 {
|
||||||
|
// Pool is empty, allocate a new one
|
||||||
return &buffer{
|
return &buffer{
|
||||||
data: make([]schema.Float, 0, BufferCap),
|
data: make([]schema.Float, 0, BufferCap),
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
|
|
||||||
|
// Reuse existing buffer from the pool
|
||||||
|
b := p.pool[n-1]
|
||||||
|
p.pool[n-1] = nil // Avoid memory leak
|
||||||
|
p.pool = p.pool[:n-1]
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *PersistentBufferPool) Put(b *buffer) {
|
||||||
|
// Reset the buffer before putting it back
|
||||||
|
b.data = b.data[:0]
|
||||||
|
|
||||||
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
p.pool = append(p.pool, b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetSize returns the exact number of buffers currently sitting in the pool.
|
||||||
|
func (p *PersistentBufferPool) GetSize() int {
|
||||||
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
return len(p.pool)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear drains all buffers currently in the pool, allowing the GC to collect them.
|
||||||
|
func (p *PersistentBufferPool) Clear() {
|
||||||
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
for i := range p.pool {
|
||||||
|
p.pool[i] = nil
|
||||||
|
}
|
||||||
|
p.pool = p.pool[:0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean removes buffers from the pool that haven't been used in the given duration.
|
||||||
|
// It uses a simple LRU approach based on the lastUsed timestamp.
|
||||||
|
func (p *PersistentBufferPool) Clean(threshold int64) {
|
||||||
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
|
||||||
|
// Filter in place
|
||||||
|
active := p.pool[:0]
|
||||||
|
for _, b := range p.pool {
|
||||||
|
if b.lastUsed >= threshold {
|
||||||
|
active = append(active, b)
|
||||||
|
} else {
|
||||||
|
// Buffer is older than the threshold, let it be collected by GC
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nullify the rest to prevent memory leaks
|
||||||
|
for i := len(active); i < len(p.pool); i++ {
|
||||||
|
p.pool[i] = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
p.pool = active
|
||||||
|
}
|
||||||
|
|
||||||
|
// CleanAll removes all buffers from the pool.
|
||||||
|
func (p *PersistentBufferPool) CleanAll() {
|
||||||
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
|
||||||
|
// Nullify all buffers to prevent memory leaks
|
||||||
|
for i := range p.pool {
|
||||||
|
p.pool[i] = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
p.pool = p.pool[:0]
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -94,10 +185,11 @@ type buffer struct {
|
|||||||
start int64
|
start int64
|
||||||
archived bool
|
archived bool
|
||||||
closed bool
|
closed bool
|
||||||
|
lastUsed int64
|
||||||
}
|
}
|
||||||
|
|
||||||
func newBuffer(ts, freq int64) *buffer {
|
func newBuffer(ts, freq int64) *buffer {
|
||||||
b := bufferPool.Get().(*buffer)
|
b := bufferPool.Get()
|
||||||
b.frequency = freq
|
b.frequency = freq
|
||||||
b.start = ts - (freq / 2)
|
b.start = ts - (freq / 2)
|
||||||
b.prev = nil
|
b.prev = nil
|
||||||
@@ -240,6 +332,7 @@ func (b *buffer) free(t int64) (delme bool, n int) {
|
|||||||
if cap(b.prev.data) != BufferCap {
|
if cap(b.prev.data) != BufferCap {
|
||||||
b.prev.data = make([]schema.Float, 0, BufferCap)
|
b.prev.data = make([]schema.Float, 0, BufferCap)
|
||||||
}
|
}
|
||||||
|
b.prev.lastUsed = time.Now().Unix()
|
||||||
bufferPool.Put(b.prev)
|
bufferPool.Put(b.prev)
|
||||||
b.prev = nil
|
b.prev = nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ package metricstore
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
@@ -192,6 +193,7 @@ func (l *Level) free(t int64) (int, error) {
|
|||||||
if cap(b.data) != BufferCap {
|
if cap(b.data) != BufferCap {
|
||||||
b.data = make([]schema.Float, 0, BufferCap)
|
b.data = make([]schema.Float, 0, BufferCap)
|
||||||
}
|
}
|
||||||
|
b.lastUsed = time.Now().Unix()
|
||||||
bufferPool.Put(b)
|
bufferPool.Put(b)
|
||||||
l.metrics[i] = nil
|
l.metrics[i] = nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -357,6 +357,9 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
state.mu.Unlock()
|
state.mu.Unlock()
|
||||||
|
|
||||||
|
// Clean up the buffer pool
|
||||||
|
bufferPool.Clean(state.lastRetentionTime)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@@ -425,6 +428,9 @@ func MemoryUsageTracker(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
runtime.ReadMemStats(&mem)
|
runtime.ReadMemStats(&mem)
|
||||||
actualMemoryGB = float64(mem.Alloc) / 1e9
|
actualMemoryGB = float64(mem.Alloc) / 1e9
|
||||||
|
|
||||||
|
bufferPool.CleanAll()
|
||||||
|
cclog.Infof("[METRICSTORE]> Cleaned up bufferPool\n")
|
||||||
|
|
||||||
if actualMemoryGB > float64(Keys.MemoryCap) {
|
if actualMemoryGB > float64(Keys.MemoryCap) {
|
||||||
cclog.Warnf("[METRICSTORE]> memory usage %.2f GB exceeds cap %d GB, starting emergency buffer freeing", actualMemoryGB, Keys.MemoryCap)
|
cclog.Warnf("[METRICSTORE]> memory usage %.2f GB exceeds cap %d GB, starting emergency buffer freeing", actualMemoryGB, Keys.MemoryCap)
|
||||||
|
|
||||||
|
|||||||
@@ -464,3 +464,53 @@ func TestBufferHealthChecks(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBufferPoolClean(t *testing.T) {
|
||||||
|
// Use a fresh pool for testing
|
||||||
|
pool := NewPersistentBufferPool()
|
||||||
|
|
||||||
|
now := time.Now().Unix()
|
||||||
|
|
||||||
|
// Create some buffers and put them in the pool with different lastUsed times
|
||||||
|
b1 := &buffer{lastUsed: now - 3600, data: make([]schema.Float, 0)} // 1 hour ago
|
||||||
|
b2 := &buffer{lastUsed: now - 7200, data: make([]schema.Float, 0)} // 2 hours ago
|
||||||
|
b3 := &buffer{lastUsed: now - 180000, data: make([]schema.Float, 0)} // 50 hours ago
|
||||||
|
b4 := &buffer{lastUsed: now - 200000, data: make([]schema.Float, 0)} // 55 hours ago
|
||||||
|
b5 := &buffer{lastUsed: now, data: make([]schema.Float, 0)}
|
||||||
|
|
||||||
|
pool.Put(b1)
|
||||||
|
pool.Put(b2)
|
||||||
|
pool.Put(b3)
|
||||||
|
pool.Put(b4)
|
||||||
|
pool.Put(b5)
|
||||||
|
|
||||||
|
if pool.GetSize() != 5 {
|
||||||
|
t.Fatalf("Expected pool size 5, got %d", pool.GetSize())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean buffers older than 48 hours
|
||||||
|
timeUpdate := time.Now().Add(-48 * time.Hour).Unix()
|
||||||
|
pool.Clean(timeUpdate)
|
||||||
|
|
||||||
|
// Expected: b1, b2, b5 should remain. b3, b4 should be cleaned.
|
||||||
|
if pool.GetSize() != 3 {
|
||||||
|
t.Fatalf("Expected pool size 3 after clean, got %d", pool.GetSize())
|
||||||
|
}
|
||||||
|
|
||||||
|
validBufs := map[int64]bool{
|
||||||
|
b1.lastUsed: true,
|
||||||
|
b2.lastUsed: true,
|
||||||
|
b5.lastUsed: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
b := pool.Get()
|
||||||
|
if !validBufs[b.lastUsed] {
|
||||||
|
t.Errorf("Found unexpected buffer with lastUsed %d", b.lastUsed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if pool.GetSize() != 0 {
|
||||||
|
t.Fatalf("Expected pool to be empty, got %d", pool.GetSize())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user