Make checkpointInterval an option config option again.

Also applies small fixes

Entire-Checkpoint: c11d1a65fae4
This commit is contained in:
2026-03-13 09:07:38 +01:00
parent b214e1755a
commit 39ab12784c
7 changed files with 43 additions and 20 deletions

View File

@@ -127,7 +127,7 @@ func (data *APIMetricData) AddStats() {
// This is commonly used for unit conversion (e.g., bytes to gigabytes).
// Scaling by 0 or 1 is a no-op for performance reasons.
func (data *APIMetricData) ScaleBy(f schema.Float) {
if f == 0 || f == 1 {
if f == 1 {
return
}

View File

@@ -100,8 +100,15 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
wg.Go(func() {
const checkpointInterval = 12 * time.Hour
d := checkpointInterval
d := 12 * time.Hour // default checkpoint interval
if Keys.CheckpointInterval != "" {
parsed, err := time.ParseDuration(Keys.CheckpointInterval)
if err != nil {
cclog.Errorf("[METRICSTORE]> invalid checkpoint-interval %q: %s, using default 12h", Keys.CheckpointInterval, err)
} else {
d = parsed
}
}
ticker := time.NewTicker(d)
defer ticker.Stop()

View File

@@ -54,6 +54,10 @@ const configSchema = `{
"description": "Keep the metrics within memory for given time interval. Retention for X hours, then the metrics would be freed.",
"type": "string"
},
"checkpoint-interval": {
"description": "Interval between checkpoints as a Go duration string (e.g., '12h', '6h', '30m'). Default is '12h'.",
"type": "string"
},
"memory-cap": {
"description": "Upper memory capacity limit used by metricstore in GB",
"type": "integer"

View File

@@ -66,7 +66,10 @@ func (l *Level) collectMetricStatus(m *MemoryStore, expectedMetrics []string, he
if degraded[metricName] {
continue // already degraded, cannot improve
}
mc := m.Metrics[metricName]
mc, ok := m.Metrics[metricName]
if !ok {
continue // unknown metric, will be reported as missing
}
b := l.metrics[mc.offset]
if b.bufferExists() {
if !b.isBufferHealthy() {

View File

@@ -181,6 +181,14 @@ func (l *Level) collectPaths(currentDepth, targetDepth int, currentPath []string
// - int: Total number of buffers freed in this subtree
// - error: Non-nil on failure (propagated from children)
func (l *Level) free(t int64) (int, error) {
n, _, err := l.freeAndCheckEmpty(t)
return n, err
}
// freeAndCheckEmpty performs free() and atomically checks if the level is empty
// while still holding its own lock, avoiding a TOCTOU race between free() and
// a separate isEmpty() call.
func (l *Level) freeAndCheckEmpty(t int64) (int, bool, error) {
l.lock.Lock()
defer l.lock.Unlock()
@@ -201,30 +209,28 @@ func (l *Level) free(t int64) (int, error) {
}
for key, child := range l.children {
m, err := child.free(t)
m, empty, err := child.freeAndCheckEmpty(t)
n += m
if err != nil {
return n, err
return n, false, err
}
if child.isEmpty() {
if empty {
delete(l.children, key)
}
}
return n, nil
}
// isEmpty returns true if this level has no metrics and no children.
func (l *Level) isEmpty() bool {
l.lock.RLock()
defer l.lock.RUnlock()
// Check emptiness while still holding the lock
empty := len(l.children) == 0
if empty {
for _, b := range l.metrics {
if b != nil {
return false
empty = false
break
}
}
return len(l.children) == 0
}
return n, empty, nil
}
// forceFree removes the oldest buffer from each metric chain in the subtree.

View File

@@ -754,6 +754,9 @@ func (m *MemoryStore) ForceFree() (int, error) {
}
func (m *MemoryStore) FreeAll() error {
m.root.lock.Lock()
defer m.root.lock.Unlock()
for k := range m.root.children {
delete(m.root.children, k)
}

View File

@@ -42,7 +42,7 @@ func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) {
if b == nil {
break
}
idx = 0
idx = int((t - b.start) / b.frequency)
}
if t < b.start || idx >= len(b.data) {