mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-28 21:37:31 +01:00
Compare commits
8 Commits
optimize-c
...
optimize-c
| Author | SHA1 | Date | |
|---|---|---|---|
| 23ce1722a9 | |||
| 9c0104a252 | |||
| 3555fb6255 | |||
| 62d2143979 | |||
| 9fc1836c30 | |||
| b6c574c7ec | |||
|
8b7cb587c5
|
|||
|
415467967d
|
2
go.mod
2
go.mod
@@ -28,7 +28,6 @@ require (
|
||||
github.com/gorilla/sessions v1.4.0
|
||||
github.com/jmoiron/sqlx v1.4.0
|
||||
github.com/joho/godotenv v1.5.1
|
||||
github.com/linkedin/goavro/v2 v2.15.0
|
||||
github.com/mattn/go-sqlite3 v1.14.34
|
||||
github.com/parquet-go/parquet-go v0.27.0
|
||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||
@@ -80,7 +79,6 @@ require (
|
||||
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.5.0 // indirect
|
||||
github.com/goccy/go-yaml v1.19.2 // indirect
|
||||
github.com/golang/snappy v1.0.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||
github.com/gorilla/websocket v1.5.3 // indirect
|
||||
|
||||
10
go.sum
10
go.sum
@@ -149,9 +149,8 @@ github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63Y
|
||||
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||
github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA=
|
||||
github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE=
|
||||
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
|
||||
github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/go-tpm v0.9.7 h1:u89J4tUUeDTlH8xxC3CTW7OHZjbjKoHdQ9W7gCUhtxA=
|
||||
@@ -214,8 +213,6 @@ github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6Fm
|
||||
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
|
||||
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||
github.com/linkedin/goavro/v2 v2.15.0 h1:pDj1UrjUOO62iXhgBiE7jQkpNIc5/tA5eZsgolMjgVI=
|
||||
github.com/linkedin/goavro/v2 v2.15.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
|
||||
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk=
|
||||
@@ -276,14 +273,11 @@ github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKk
|
||||
github.com/stmcginnis/gofish v0.21.3 h1:EBLCHfORnbx7MPw7lplOOVe9QAD1T3XRVz6+a1Z4z5Q=
|
||||
github.com/stmcginnis/gofish v0.21.3/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
|
||||
|
||||
@@ -1,481 +0,0 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package metricstore
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/linkedin/goavro/v2"
|
||||
)
|
||||
|
||||
var (
|
||||
NumAvroWorkers int = DefaultAvroWorkers
|
||||
startUp bool = true
|
||||
)
|
||||
|
||||
func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
|
||||
levels := make([]*AvroLevel, 0)
|
||||
selectors := make([][]string, 0)
|
||||
as.root.lock.RLock()
|
||||
// Cluster
|
||||
for sel1, l1 := range as.root.children {
|
||||
l1.lock.RLock()
|
||||
// Node
|
||||
for sel2, l2 := range l1.children {
|
||||
l2.lock.RLock()
|
||||
// Frequency
|
||||
for sel3, l3 := range l2.children {
|
||||
levels = append(levels, l3)
|
||||
selectors = append(selectors, []string{sel1, sel2, sel3})
|
||||
}
|
||||
l2.lock.RUnlock()
|
||||
}
|
||||
l1.lock.RUnlock()
|
||||
}
|
||||
as.root.lock.RUnlock()
|
||||
|
||||
type workItem struct {
|
||||
level *AvroLevel
|
||||
dir string
|
||||
selector []string
|
||||
}
|
||||
|
||||
n, errs := int32(0), int32(0)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(NumAvroWorkers)
|
||||
work := make(chan workItem, NumAvroWorkers*2)
|
||||
for range NumAvroWorkers {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
for workItem := range work {
|
||||
from := getTimestamp(workItem.dir)
|
||||
|
||||
if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil {
|
||||
if err == ErrNoNewArchiveData {
|
||||
continue
|
||||
}
|
||||
|
||||
cclog.Errorf("error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||
atomic.AddInt32(&errs, 1)
|
||||
} else {
|
||||
atomic.AddInt32(&n, 1)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
for i := range len(levels) {
|
||||
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||
work <- workItem{
|
||||
level: levels[i],
|
||||
dir: dir,
|
||||
selector: selectors[i],
|
||||
}
|
||||
}
|
||||
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("%d errors happend while creating avro checkpoints (%d successes)", errs, n)
|
||||
}
|
||||
|
||||
startUp = false
|
||||
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
// getTimestamp returns the timestamp from the directory name
|
||||
func getTimestamp(dir string) int64 {
|
||||
// Extract the resolution and timestamp from the directory name
|
||||
// The existing avro file will be in epoch timestamp format
|
||||
// iterate over all the files in the directory and find the maximum timestamp
|
||||
// and return it
|
||||
|
||||
resolution := path.Base(dir)
|
||||
dir = path.Dir(dir)
|
||||
|
||||
files, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
var maxTS int64 = 0
|
||||
|
||||
if len(files) == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if file.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := file.Name()
|
||||
|
||||
if len(name) < 5 || !strings.HasSuffix(name, ".avro") || !strings.HasPrefix(name, resolution+"_") {
|
||||
continue
|
||||
}
|
||||
|
||||
ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64)
|
||||
if err != nil {
|
||||
fmt.Printf("error while parsing timestamp: %s\n", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
if ts > maxTS {
|
||||
maxTS = ts
|
||||
}
|
||||
}
|
||||
|
||||
interval, _ := time.ParseDuration(Keys.Checkpoints.Interval)
|
||||
updateTime := time.Unix(maxTS, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix()
|
||||
|
||||
if startUp {
|
||||
return 0
|
||||
}
|
||||
|
||||
if updateTime < time.Now().Unix() {
|
||||
return 0
|
||||
}
|
||||
|
||||
return maxTS
|
||||
}
|
||||
|
||||
func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
|
||||
l.lock.Lock()
|
||||
defer l.lock.Unlock()
|
||||
|
||||
// fmt.Printf("Checkpointing directory: %s\n", dir)
|
||||
// filepath contains the resolution
|
||||
intRes, _ := strconv.Atoi(path.Base(dir))
|
||||
|
||||
// find smallest overall timestamp in l.data map and delete it from l.data
|
||||
minTS := int64(1<<63 - 1)
|
||||
for ts, dat := range l.data {
|
||||
if ts < minTS && len(dat) != 0 {
|
||||
minTS = ts
|
||||
}
|
||||
}
|
||||
|
||||
if from == 0 && minTS != int64(1<<63-1) {
|
||||
from = minTS
|
||||
}
|
||||
|
||||
if from == 0 {
|
||||
return ErrNoNewArchiveData
|
||||
}
|
||||
|
||||
var schema string
|
||||
var codec *goavro.Codec
|
||||
recordList := make([]map[string]any, 0)
|
||||
|
||||
var f *os.File
|
||||
|
||||
filePath := dir + fmt.Sprintf("_%d.avro", from)
|
||||
|
||||
var err error
|
||||
|
||||
fp_, err_ := os.Stat(filePath)
|
||||
if errors.Is(err_, os.ErrNotExist) {
|
||||
err = os.MkdirAll(path.Dir(dir), 0o755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
} else if fp_.Size() != 0 {
|
||||
f, err = os.Open(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open existing avro file: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
br := bufio.NewReader(f)
|
||||
|
||||
reader, err := goavro.NewOCFReader(br)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create OCF reader: %v", err)
|
||||
}
|
||||
codec = reader.Codec()
|
||||
schema = codec.Schema()
|
||||
}
|
||||
|
||||
timeRef := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
|
||||
|
||||
if dumpAll {
|
||||
timeRef = time.Now().Unix()
|
||||
}
|
||||
|
||||
// Empty values
|
||||
if len(l.data) == 0 {
|
||||
// we checkpoint avro files every 60 seconds
|
||||
repeat := 60 / intRes
|
||||
|
||||
for range repeat {
|
||||
recordList = append(recordList, make(map[string]any))
|
||||
}
|
||||
}
|
||||
|
||||
readFlag := true
|
||||
|
||||
for ts := range l.data {
|
||||
flag := false
|
||||
if ts < timeRef {
|
||||
data := l.data[ts]
|
||||
|
||||
schemaGen, err := generateSchema(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
flag, schema, err = compareSchema(schema, schemaGen)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to compare read and generated schema: %v", err)
|
||||
}
|
||||
if flag && readFlag && !errors.Is(err_, os.ErrNotExist) {
|
||||
// Use closure to ensure file is closed even on error
|
||||
err := func() error {
|
||||
f2, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open Avro file: %v", err)
|
||||
}
|
||||
defer f2.Close()
|
||||
|
||||
br := bufio.NewReader(f2)
|
||||
|
||||
ocfReader, err := goavro.NewOCFReader(br)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create OCF reader while changing schema: %v", err)
|
||||
}
|
||||
|
||||
for ocfReader.Scan() {
|
||||
record, err := ocfReader.Read()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read record: %v", err)
|
||||
}
|
||||
|
||||
recordList = append(recordList, record.(map[string]any))
|
||||
}
|
||||
|
||||
return nil
|
||||
}()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = os.Remove(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to delete file: %v", err)
|
||||
}
|
||||
|
||||
readFlag = false
|
||||
}
|
||||
codec, err = goavro.NewCodec(schema)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create codec after merged schema: %v", err)
|
||||
}
|
||||
|
||||
recordList = append(recordList, generateRecord(data))
|
||||
delete(l.data, ts)
|
||||
}
|
||||
}
|
||||
|
||||
if len(recordList) == 0 {
|
||||
return ErrNoNewArchiveData
|
||||
}
|
||||
|
||||
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to append new avro file: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// fmt.Printf("Codec : %#v\n", codec)
|
||||
|
||||
writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
|
||||
W: f,
|
||||
Codec: codec,
|
||||
CompressionName: goavro.CompressionDeflateLabel,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create OCF writer: %v", err)
|
||||
}
|
||||
|
||||
// Append the new record
|
||||
if err := writer.Append(recordList); err != nil {
|
||||
return fmt.Errorf("failed to append record: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func compareSchema(schemaRead, schemaGen string) (bool, string, error) {
|
||||
var genSchema, readSchema AvroSchema
|
||||
|
||||
if schemaRead == "" {
|
||||
return false, schemaGen, nil
|
||||
}
|
||||
|
||||
// Unmarshal the schema strings into AvroSchema structs
|
||||
if err := json.Unmarshal([]byte(schemaGen), &genSchema); err != nil {
|
||||
return false, "", fmt.Errorf("failed to parse generated schema: %v", err)
|
||||
}
|
||||
if err := json.Unmarshal([]byte(schemaRead), &readSchema); err != nil {
|
||||
return false, "", fmt.Errorf("failed to parse read schema: %v", err)
|
||||
}
|
||||
|
||||
sort.Slice(genSchema.Fields, func(i, j int) bool {
|
||||
return genSchema.Fields[i].Name < genSchema.Fields[j].Name
|
||||
})
|
||||
|
||||
sort.Slice(readSchema.Fields, func(i, j int) bool {
|
||||
return readSchema.Fields[i].Name < readSchema.Fields[j].Name
|
||||
})
|
||||
|
||||
// Check if schemas are identical
|
||||
schemasEqual := true
|
||||
if len(genSchema.Fields) <= len(readSchema.Fields) {
|
||||
|
||||
for i := range genSchema.Fields {
|
||||
if genSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||
schemasEqual = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If schemas are identical, return the read schema
|
||||
if schemasEqual {
|
||||
return false, schemaRead, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Create a map to hold unique fields from both schemas
|
||||
fieldMap := make(map[string]AvroField)
|
||||
|
||||
// Add fields from the read schema
|
||||
for _, field := range readSchema.Fields {
|
||||
fieldMap[field.Name] = field
|
||||
}
|
||||
|
||||
// Add or update fields from the generated schema
|
||||
for _, field := range genSchema.Fields {
|
||||
fieldMap[field.Name] = field
|
||||
}
|
||||
|
||||
// Create a union schema by collecting fields from the map
|
||||
var mergedFields []AvroField
|
||||
for _, field := range fieldMap {
|
||||
mergedFields = append(mergedFields, field)
|
||||
}
|
||||
|
||||
// Sort fields by name for consistency
|
||||
sort.Slice(mergedFields, func(i, j int) bool {
|
||||
return mergedFields[i].Name < mergedFields[j].Name
|
||||
})
|
||||
|
||||
// Create the merged schema
|
||||
mergedSchema := AvroSchema{
|
||||
Type: "record",
|
||||
Name: genSchema.Name,
|
||||
Fields: mergedFields,
|
||||
}
|
||||
|
||||
// Check if schemas are identical
|
||||
schemasEqual = len(mergedSchema.Fields) == len(readSchema.Fields)
|
||||
if schemasEqual {
|
||||
for i := range mergedSchema.Fields {
|
||||
if mergedSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||
schemasEqual = false
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if schemasEqual {
|
||||
return false, schemaRead, nil
|
||||
}
|
||||
}
|
||||
|
||||
// Marshal the merged schema back to JSON
|
||||
mergedSchemaJSON, err := json.Marshal(mergedSchema)
|
||||
if err != nil {
|
||||
return false, "", fmt.Errorf("failed to marshal merged schema: %v", err)
|
||||
}
|
||||
|
||||
return true, string(mergedSchemaJSON), nil
|
||||
}
|
||||
|
||||
func generateSchema(data map[string]schema.Float) (string, error) {
|
||||
// Define the Avro schema structure
|
||||
schema := map[string]any{
|
||||
"type": "record",
|
||||
"name": "DataRecord",
|
||||
"fields": []map[string]any{},
|
||||
}
|
||||
|
||||
fieldTracker := make(map[string]struct{})
|
||||
|
||||
for key := range data {
|
||||
if _, exists := fieldTracker[key]; !exists {
|
||||
key = correctKey(key)
|
||||
|
||||
field := map[string]any{
|
||||
"name": key,
|
||||
"type": "double",
|
||||
"default": -1.0,
|
||||
}
|
||||
schema["fields"] = append(schema["fields"].([]map[string]any), field)
|
||||
fieldTracker[key] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
schemaString, err := json.Marshal(schema)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to marshal schema: %v", err)
|
||||
}
|
||||
|
||||
return string(schemaString), nil
|
||||
}
|
||||
|
||||
func generateRecord(data map[string]schema.Float) map[string]any {
|
||||
record := make(map[string]any)
|
||||
|
||||
// Iterate through each map in data
|
||||
for key, value := range data {
|
||||
key = correctKey(key)
|
||||
|
||||
// Set the value in the record
|
||||
// avro only accepts basic types
|
||||
record[key] = value.Double()
|
||||
}
|
||||
|
||||
return record
|
||||
}
|
||||
|
||||
func correctKey(key string) string {
|
||||
key = strings.ReplaceAll(key, "_", "_0x5F_")
|
||||
key = strings.ReplaceAll(key, ":", "_0x3A_")
|
||||
key = strings.ReplaceAll(key, ".", "_0x2E_")
|
||||
return key
|
||||
}
|
||||
|
||||
func ReplaceKey(key string) string {
|
||||
key = strings.ReplaceAll(key, "_0x2E_", ".")
|
||||
key = strings.ReplaceAll(key, "_0x3A_", ":")
|
||||
key = strings.ReplaceAll(key, "_0x5F_", "_")
|
||||
return key
|
||||
}
|
||||
@@ -1,130 +0,0 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package metricstore
|
||||
|
||||
import (
|
||||
"context"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
)
|
||||
|
||||
func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
if Keys.Checkpoints.FileFormat == "json" {
|
||||
return
|
||||
}
|
||||
|
||||
ms := GetMemoryStore()
|
||||
var avroLevel *AvroLevel
|
||||
oldSelector := make([]string, 0)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
// Drain any remaining messages in channel before exiting
|
||||
for {
|
||||
select {
|
||||
case val, ok := <-LineProtocolMessages:
|
||||
if !ok {
|
||||
// Channel closed
|
||||
return
|
||||
}
|
||||
// Process remaining message
|
||||
freq, err := ms.GetMetricFrequency(val.MetricName)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
var metricName strings.Builder
|
||||
for _, selectorName := range val.Selector {
|
||||
metricName.WriteString(selectorName + SelectorDelimiter)
|
||||
}
|
||||
metricName.WriteString(val.MetricName)
|
||||
|
||||
var selector []string
|
||||
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
|
||||
|
||||
if !stringSlicesEqual(oldSelector, selector) {
|
||||
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
|
||||
if avroLevel == nil {
|
||||
cclog.Errorf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
|
||||
}
|
||||
oldSelector = slices.Clone(selector)
|
||||
}
|
||||
|
||||
if avroLevel != nil {
|
||||
avroLevel.addMetric(metricName.String(), val.Value, val.Timestamp, int(freq))
|
||||
}
|
||||
default:
|
||||
// No more messages, exit
|
||||
return
|
||||
}
|
||||
}
|
||||
case val, ok := <-LineProtocolMessages:
|
||||
if !ok {
|
||||
// Channel closed, exit gracefully
|
||||
return
|
||||
}
|
||||
|
||||
// Fetch the frequency of the metric from the global configuration
|
||||
freq, err := ms.GetMetricFrequency(val.MetricName)
|
||||
if err != nil {
|
||||
cclog.Errorf("Error fetching metric frequency: %s\n", err)
|
||||
continue
|
||||
}
|
||||
|
||||
var metricName strings.Builder
|
||||
|
||||
for _, selectorName := range val.Selector {
|
||||
metricName.WriteString(selectorName + SelectorDelimiter)
|
||||
}
|
||||
|
||||
metricName.WriteString(val.MetricName)
|
||||
|
||||
// Create a new selector for the Avro level
|
||||
// The selector is a slice of strings that represents the path to the
|
||||
// Avro level. It is created by appending the cluster, node, and metric
|
||||
// name to the selector.
|
||||
var selector []string
|
||||
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
|
||||
|
||||
if !stringSlicesEqual(oldSelector, selector) {
|
||||
// Get the Avro level for the metric
|
||||
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
|
||||
|
||||
// If the Avro level is nil, create a new one
|
||||
if avroLevel == nil {
|
||||
cclog.Errorf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
|
||||
}
|
||||
oldSelector = slices.Clone(selector)
|
||||
}
|
||||
|
||||
if avroLevel != nil {
|
||||
avroLevel.addMetric(metricName.String(), val.Value, val.Timestamp, int(freq))
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func stringSlicesEqual(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
@@ -1,167 +0,0 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package metricstore
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
)
|
||||
|
||||
var (
|
||||
LineProtocolMessages = make(chan *AvroStruct)
|
||||
// SelectorDelimiter separates hierarchical selector components in metric names for Avro encoding
|
||||
SelectorDelimiter = "_SEL_"
|
||||
)
|
||||
|
||||
var CheckpointBufferMinutes = DefaultCheckpointBufferMin
|
||||
|
||||
type AvroStruct struct {
|
||||
MetricName string
|
||||
Cluster string
|
||||
Node string
|
||||
Selector []string
|
||||
Value schema.Float
|
||||
Timestamp int64
|
||||
}
|
||||
|
||||
type AvroStore struct {
|
||||
root AvroLevel
|
||||
}
|
||||
|
||||
var avroStore AvroStore
|
||||
|
||||
type AvroLevel struct {
|
||||
children map[string]*AvroLevel
|
||||
data map[int64]map[string]schema.Float
|
||||
lock sync.RWMutex
|
||||
}
|
||||
|
||||
type AvroField struct {
|
||||
Name string `json:"name"`
|
||||
Type any `json:"type"`
|
||||
Default any `json:"default,omitempty"`
|
||||
}
|
||||
|
||||
type AvroSchema struct {
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
Fields []AvroField `json:"fields"`
|
||||
}
|
||||
|
||||
func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel {
|
||||
if len(selector) == 0 {
|
||||
return l
|
||||
}
|
||||
|
||||
// Allow concurrent reads:
|
||||
l.lock.RLock()
|
||||
var child *AvroLevel
|
||||
var ok bool
|
||||
if l.children == nil {
|
||||
// Children map needs to be created...
|
||||
l.lock.RUnlock()
|
||||
} else {
|
||||
child, ok := l.children[selector[0]]
|
||||
l.lock.RUnlock()
|
||||
if ok {
|
||||
return child.findAvroLevelOrCreate(selector[1:])
|
||||
}
|
||||
}
|
||||
|
||||
// The level does not exist, take write lock for unique access:
|
||||
l.lock.Lock()
|
||||
// While this thread waited for the write lock, another thread
|
||||
// could have created the child node.
|
||||
if l.children != nil {
|
||||
child, ok = l.children[selector[0]]
|
||||
if ok {
|
||||
l.lock.Unlock()
|
||||
return child.findAvroLevelOrCreate(selector[1:])
|
||||
}
|
||||
}
|
||||
|
||||
child = &AvroLevel{
|
||||
data: make(map[int64]map[string]schema.Float, 0),
|
||||
children: nil,
|
||||
}
|
||||
|
||||
if l.children != nil {
|
||||
l.children[selector[0]] = child
|
||||
} else {
|
||||
l.children = map[string]*AvroLevel{selector[0]: child}
|
||||
}
|
||||
l.lock.Unlock()
|
||||
return child.findAvroLevelOrCreate(selector[1:])
|
||||
}
|
||||
|
||||
func (l *AvroLevel) addMetric(metricName string, value schema.Float, timestamp int64, Freq int) {
|
||||
l.lock.Lock()
|
||||
defer l.lock.Unlock()
|
||||
|
||||
KeyCounter := int(CheckpointBufferMinutes * 60 / Freq)
|
||||
|
||||
// Create keys in advance for the given amount of time
|
||||
if len(l.data) != KeyCounter {
|
||||
if len(l.data) == 0 {
|
||||
for i := range KeyCounter {
|
||||
l.data[timestamp+int64(i*Freq)] = make(map[string]schema.Float, 0)
|
||||
}
|
||||
} else {
|
||||
// Get the last timestamp
|
||||
var lastTS int64
|
||||
for ts := range l.data {
|
||||
if ts > lastTS {
|
||||
lastTS = ts
|
||||
}
|
||||
}
|
||||
// Create keys for the next KeyCounter timestamps
|
||||
l.data[lastTS+int64(Freq)] = make(map[string]schema.Float, 0)
|
||||
}
|
||||
}
|
||||
|
||||
closestTS := int64(0)
|
||||
minDiff := int64(Freq) + 1 // Start with diff just outside the valid range
|
||||
found := false
|
||||
|
||||
// Iterate over timestamps and choose the one which is within range.
|
||||
// Since its epoch time, we check if the difference is less than 60 seconds.
|
||||
for ts, dat := range l.data {
|
||||
// Check if timestamp is within range
|
||||
diff := timestamp - ts
|
||||
if diff < -int64(Freq) || diff > int64(Freq) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Metric already present at this timestamp — skip
|
||||
if _, ok := dat[metricName]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if this is the closest timestamp so far
|
||||
if Abs(diff) < minDiff {
|
||||
minDiff = Abs(diff)
|
||||
closestTS = ts
|
||||
found = true
|
||||
}
|
||||
}
|
||||
|
||||
if found {
|
||||
l.data[closestTS][metricName] = value
|
||||
}
|
||||
}
|
||||
|
||||
func GetAvroStore() *AvroStore {
|
||||
return &avroStore
|
||||
}
|
||||
|
||||
// Abs returns the absolute value of x.
|
||||
func Abs(x int64) int64 {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
274
pkg/metricstore/binaryCheckpoint.go
Normal file
274
pkg/metricstore/binaryCheckpoint.go
Normal file
@@ -0,0 +1,274 @@
|
||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||
// All rights reserved. This file is part of cc-backend.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This file implements the binary checkpoint format for fast loading.
|
||||
//
|
||||
// The binary format stores metric data in column-oriented layout (per-metric
|
||||
// float64 arrays) for maximum load speed. Float32 arrays are read/written
|
||||
// as raw bytes, avoiding per-element parsing overhead.
|
||||
//
|
||||
// File format:
|
||||
//
|
||||
// Header (28 bytes):
|
||||
// magic: [4]byte "CCMS"
|
||||
// version: uint32 LE
|
||||
// from: int64 LE
|
||||
// to: int64 LE
|
||||
//
|
||||
// Body (recursive):
|
||||
// nmetrics: uint32 LE
|
||||
// Per metric:
|
||||
// name_len: uint16 LE
|
||||
// name: []byte
|
||||
// freq: int64 LE
|
||||
// start: int64 LE
|
||||
// nvalues: uint32 LE
|
||||
// data: []float64 LE (NaN = missing)
|
||||
// nchildren: uint32 LE
|
||||
// Per child:
|
||||
// name_len: uint16 LE
|
||||
// name: []byte
|
||||
// (recursive body)
|
||||
package metricstore
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
)
|
||||
|
||||
var (
|
||||
binaryMagic = [4]byte{'C', 'C', 'M', 'S'}
|
||||
binaryVersion = uint32(1)
|
||||
binaryByteOrder = binary.LittleEndian
|
||||
floatSize = int(unsafe.Sizeof(schema.Float(0))) // schema.Float is float64
|
||||
)
|
||||
|
||||
// writeBinaryCheckpoint writes a CheckpointFile to a binary checkpoint file on disk.
|
||||
func writeBinaryCheckpoint(filePath string, cf *CheckpointFile) error {
|
||||
f, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
if err2 := os.MkdirAll(path.Dir(filePath), CheckpointDirPerms); err2 != nil {
|
||||
return err2
|
||||
}
|
||||
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
bw := bufio.NewWriter(f)
|
||||
|
||||
// Write header
|
||||
if _, err := bw.Write(binaryMagic[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := binary.Write(bw, binaryByteOrder, binaryVersion); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := binary.Write(bw, binaryByteOrder, cf.From); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := binary.Write(bw, binaryByteOrder, cf.To); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write body (metrics + children recursively)
|
||||
if err := writeBinaryBody(bw, cf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return bw.Flush()
|
||||
}
|
||||
|
||||
// writeBinaryBody writes the metrics and children of a CheckpointFile.
|
||||
func writeBinaryBody(w io.Writer, cf *CheckpointFile) error {
|
||||
if err := binary.Write(w, binaryByteOrder, uint32(len(cf.Metrics))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for name, metric := range cf.Metrics {
|
||||
nameBytes := []byte(name)
|
||||
if err := binary.Write(w, binaryByteOrder, uint16(len(nameBytes))); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.Write(nameBytes); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := binary.Write(w, binaryByteOrder, metric.Frequency); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := binary.Write(w, binaryByteOrder, metric.Start); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := binary.Write(w, binaryByteOrder, uint32(len(metric.Data))); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFloatArray(w, metric.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := binary.Write(w, binaryByteOrder, uint32(len(cf.Children))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for name, child := range cf.Children {
|
||||
nameBytes := []byte(name)
|
||||
if err := binary.Write(w, binaryByteOrder, uint16(len(nameBytes))); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.Write(nameBytes); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeBinaryBody(w, child); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeFloatArray writes a schema.Float slice as raw little-endian float64 bytes.
|
||||
func writeFloatArray(w io.Writer, data []schema.Float) error {
|
||||
if len(data) == 0 {
|
||||
return nil
|
||||
}
|
||||
buf := unsafe.Slice((*byte)(unsafe.Pointer(&data[0])), len(data)*floatSize)
|
||||
_, err := w.Write(buf)
|
||||
return err
|
||||
}
|
||||
|
||||
// loadBinaryFile reads a binary checkpoint file into a CheckpointFile.
|
||||
func loadBinaryFile(filePath string) (*CheckpointFile, error) {
|
||||
f, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
br := bufio.NewReader(f)
|
||||
|
||||
var magic [4]byte
|
||||
if _, err := io.ReadFull(br, magic[:]); err != nil {
|
||||
return nil, fmt.Errorf("reading magic: %w", err)
|
||||
}
|
||||
if magic != binaryMagic {
|
||||
return nil, fmt.Errorf("[METRICSTORE]> invalid binary checkpoint magic in %s", filePath)
|
||||
}
|
||||
|
||||
var version uint32
|
||||
if err := binary.Read(br, binaryByteOrder, &version); err != nil {
|
||||
return nil, fmt.Errorf("reading version: %w", err)
|
||||
}
|
||||
if version != binaryVersion {
|
||||
return nil, fmt.Errorf("[METRICSTORE]> unsupported binary checkpoint version %d in %s", version, filePath)
|
||||
}
|
||||
|
||||
cf := &CheckpointFile{}
|
||||
if err := binary.Read(br, binaryByteOrder, &cf.From); err != nil {
|
||||
return nil, fmt.Errorf("reading from: %w", err)
|
||||
}
|
||||
if err := binary.Read(br, binaryByteOrder, &cf.To); err != nil {
|
||||
return nil, fmt.Errorf("reading to: %w", err)
|
||||
}
|
||||
|
||||
if err := readBinaryBody(br, cf); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cf, nil
|
||||
}
|
||||
|
||||
// readBinaryBody reads the metrics and children of a CheckpointFile.
|
||||
func readBinaryBody(r io.Reader, cf *CheckpointFile) error {
|
||||
var nmetrics uint32
|
||||
if err := binary.Read(r, binaryByteOrder, &nmetrics); err != nil {
|
||||
return fmt.Errorf("reading metric count: %w", err)
|
||||
}
|
||||
|
||||
cf.Metrics = make(map[string]*CheckpointMetrics, nmetrics)
|
||||
|
||||
for range nmetrics {
|
||||
var nameLen uint16
|
||||
if err := binary.Read(r, binaryByteOrder, &nameLen); err != nil {
|
||||
return fmt.Errorf("reading metric name length: %w", err)
|
||||
}
|
||||
nameBytes := make([]byte, nameLen)
|
||||
if _, err := io.ReadFull(r, nameBytes); err != nil {
|
||||
return fmt.Errorf("reading metric name: %w", err)
|
||||
}
|
||||
|
||||
cm := &CheckpointMetrics{}
|
||||
if err := binary.Read(r, binaryByteOrder, &cm.Frequency); err != nil {
|
||||
return fmt.Errorf("reading frequency: %w", err)
|
||||
}
|
||||
if err := binary.Read(r, binaryByteOrder, &cm.Start); err != nil {
|
||||
return fmt.Errorf("reading start: %w", err)
|
||||
}
|
||||
|
||||
var nvalues uint32
|
||||
if err := binary.Read(r, binaryByteOrder, &nvalues); err != nil {
|
||||
return fmt.Errorf("reading value count: %w", err)
|
||||
}
|
||||
|
||||
var err error
|
||||
cm.Data, err = readFloatArray(r, int(nvalues))
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading data for %s: %w", string(nameBytes), err)
|
||||
}
|
||||
|
||||
cf.Metrics[string(nameBytes)] = cm
|
||||
}
|
||||
|
||||
var nchildren uint32
|
||||
if err := binary.Read(r, binaryByteOrder, &nchildren); err != nil {
|
||||
return fmt.Errorf("reading children count: %w", err)
|
||||
}
|
||||
|
||||
cf.Children = make(map[string]*CheckpointFile, nchildren)
|
||||
|
||||
for range nchildren {
|
||||
var nameLen uint16
|
||||
if err := binary.Read(r, binaryByteOrder, &nameLen); err != nil {
|
||||
return fmt.Errorf("reading child name length: %w", err)
|
||||
}
|
||||
nameBytes := make([]byte, nameLen)
|
||||
if _, err := io.ReadFull(r, nameBytes); err != nil {
|
||||
return fmt.Errorf("reading child name: %w", err)
|
||||
}
|
||||
|
||||
child := &CheckpointFile{}
|
||||
if err := readBinaryBody(r, child); err != nil {
|
||||
return fmt.Errorf("reading child %s: %w", string(nameBytes), err)
|
||||
}
|
||||
|
||||
cf.Children[string(nameBytes)] = child
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// readFloatArray reads n float32 values from raw little-endian bytes.
|
||||
func readFloatArray(r io.Reader, n int) ([]schema.Float, error) {
|
||||
if n == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
data := make([]schema.Float, n)
|
||||
buf := unsafe.Slice((*byte)(unsafe.Pointer(&data[0])), n*floatSize)
|
||||
if _, err := io.ReadFull(r, buf); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
@@ -6,15 +6,16 @@
|
||||
// This file implements checkpoint persistence for the in-memory metric store.
|
||||
//
|
||||
// Checkpoints enable graceful restarts by periodically saving in-memory metric
|
||||
// data to disk in either JSON or Avro format. The checkpoint system:
|
||||
// data to disk. The checkpoint system supports two write formats:
|
||||
// - binary (default): fast loading via raw float32 arrays
|
||||
// - json: human-readable, slightly slower to load
|
||||
//
|
||||
// Key Features:
|
||||
// - Periodic background checkpointing via the Checkpointing() worker
|
||||
// - Two formats: JSON (human-readable) and Avro (compact, efficient)
|
||||
// - Parallel checkpoint creation and loading using worker pools
|
||||
// - Hierarchical file organization: checkpoint_dir/cluster/host/timestamp.{json|avro}
|
||||
// - Hierarchical file organization: checkpoint_dir/cluster/host/timestamp.{bin|json}
|
||||
// - Only saves unarchived data (archived data is already persisted elsewhere)
|
||||
// - Automatic format detection and fallback during loading
|
||||
// - Automatic format detection during loading (supports bin, json, and legacy avro)
|
||||
// - GC optimization during loading to prevent excessive heap growth
|
||||
//
|
||||
// Checkpoint Workflow:
|
||||
@@ -27,8 +28,8 @@
|
||||
// checkpoints/
|
||||
// cluster1/
|
||||
// host001/
|
||||
// 1234567890.json (timestamp = checkpoint start time)
|
||||
// 1234567950.json
|
||||
// 1234567890.bin (timestamp = checkpoint start time)
|
||||
// 1234567950.bin
|
||||
// host002/
|
||||
// ...
|
||||
package metricstore
|
||||
@@ -52,13 +53,11 @@ import (
|
||||
|
||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||
"github.com/linkedin/goavro/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
CheckpointFilePerms = 0o644 // File permissions for checkpoint files
|
||||
CheckpointDirPerms = 0o755 // Directory permissions for checkpoint directories
|
||||
GCTriggerInterval = DefaultGCTriggerInterval // Interval for triggering GC during checkpoint loading
|
||||
CheckpointFilePerms = 0o644 // File permissions for checkpoint files
|
||||
CheckpointDirPerms = 0o755 // Directory permissions for checkpoint directories
|
||||
)
|
||||
|
||||
// CheckpointMetrics represents metric data in a checkpoint file.
|
||||
@@ -86,83 +85,200 @@ var (
|
||||
|
||||
// Checkpointing starts a background worker that periodically saves metric data to disk.
|
||||
//
|
||||
// The behavior depends on the configured file format:
|
||||
// - JSON: Periodic checkpointing based on Keys.Checkpoints.Interval
|
||||
// - Avro: Initial delay + periodic checkpointing at DefaultAvroCheckpointInterval
|
||||
//
|
||||
// The worker respects context cancellation and signals completion via the WaitGroup.
|
||||
// Checkpoints are written at the configured interval (Keys.Checkpoints.Interval) in
|
||||
// either binary or JSON format. The worker respects context cancellation and signals
|
||||
// completion via the WaitGroup.
|
||||
func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
|
||||
lastCheckpointMu.Lock()
|
||||
lastCheckpoint = time.Now()
|
||||
lastCheckpointMu.Unlock()
|
||||
|
||||
if Keys.Checkpoints.FileFormat == "json" {
|
||||
ms := GetMemoryStore()
|
||||
ms := GetMemoryStore()
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
d, err := time.ParseDuration(Keys.Checkpoints.Interval)
|
||||
if err != nil {
|
||||
cclog.Fatalf("[METRICSTORE]> invalid checkpoint interval '%s': %s", Keys.Checkpoints.Interval, err.Error())
|
||||
}
|
||||
if d <= 0 {
|
||||
cclog.Warnf("[METRICSTORE]> checkpoint interval is zero or negative (%s), checkpointing disabled", d)
|
||||
return
|
||||
}
|
||||
wg.Go(func() {
|
||||
d, err := time.ParseDuration(Keys.Checkpoints.Interval)
|
||||
if err != nil {
|
||||
cclog.Fatalf("[METRICSTORE]> invalid checkpoint interval '%s': %s", Keys.Checkpoints.Interval, err.Error())
|
||||
}
|
||||
if d <= 0 {
|
||||
cclog.Warnf("[METRICSTORE]> checkpoint interval is zero or negative (%s), checkpointing disabled", d)
|
||||
return
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
lastCheckpointMu.Lock()
|
||||
from := lastCheckpoint
|
||||
lastCheckpointMu.Unlock()
|
||||
|
||||
cclog.Infof("[METRICSTORE]> start checkpointing (starting at %s)...", from.Format(time.RFC3339))
|
||||
now := time.Now()
|
||||
n, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir,
|
||||
from.Unix(), now.Unix())
|
||||
if err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> checkpointing failed: %s", err.Error())
|
||||
} else {
|
||||
cclog.Infof("[METRICSTORE]> done: %d checkpoint files created", n)
|
||||
lastCheckpointMu.Lock()
|
||||
lastCheckpoint = now
|
||||
lastCheckpointMu.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
} else {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
ticker := time.NewTicker(d)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(time.Duration(CheckpointBufferMinutes) * time.Minute):
|
||||
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
|
||||
}
|
||||
case <-ticker.C:
|
||||
lastCheckpointMu.Lock()
|
||||
from := lastCheckpoint
|
||||
lastCheckpointMu.Unlock()
|
||||
|
||||
ticker := time.NewTicker(DefaultAvroCheckpointInterval)
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
|
||||
cclog.Infof("[METRICSTORE]> start checkpointing (starting at %s)...", from.Format(time.RFC3339))
|
||||
now := time.Now()
|
||||
n, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir,
|
||||
from.Unix(), now.Unix())
|
||||
if err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> checkpointing failed: %s", err.Error())
|
||||
} else {
|
||||
cclog.Infof("[METRICSTORE]> done: %d checkpoint files created", n)
|
||||
lastCheckpointMu.Lock()
|
||||
lastCheckpoint = now
|
||||
lastCheckpointMu.Unlock()
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// UnmarshalJSON provides optimized JSON decoding for CheckpointMetrics.
|
||||
//
|
||||
// Mirrors the optimized MarshalJSON by manually parsing JSON to avoid
|
||||
// per-element interface dispatch and allocation overhead of the generic
|
||||
// json.Unmarshal path for []schema.Float.
|
||||
func (cm *CheckpointMetrics) UnmarshalJSON(input []byte) error {
|
||||
// Minimal manual JSON parsing for the known structure:
|
||||
// {"frequency":N,"start":N,"data":[...]}
|
||||
// Field order may vary, so we parse field names.
|
||||
|
||||
if len(input) < 2 || input[0] != '{' {
|
||||
return fmt.Errorf("expected JSON object")
|
||||
}
|
||||
|
||||
i := 1 // skip '{'
|
||||
for i < len(input) {
|
||||
// Skip whitespace
|
||||
for i < len(input) && (input[i] == ' ' || input[i] == '\t' || input[i] == '\n' || input[i] == '\r') {
|
||||
i++
|
||||
}
|
||||
if i >= len(input) || input[i] == '}' {
|
||||
break
|
||||
}
|
||||
if input[i] == ',' {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse field name
|
||||
if input[i] != '"' {
|
||||
return fmt.Errorf("expected field name at pos %d", i)
|
||||
}
|
||||
i++
|
||||
nameStart := i
|
||||
for i < len(input) && input[i] != '"' {
|
||||
i++
|
||||
}
|
||||
fieldName := string(input[nameStart:i])
|
||||
i++ // skip closing '"'
|
||||
|
||||
// Skip ':'
|
||||
for i < len(input) && (input[i] == ' ' || input[i] == ':') {
|
||||
i++
|
||||
}
|
||||
|
||||
switch fieldName {
|
||||
case "frequency":
|
||||
numStart := i
|
||||
for i < len(input) && input[i] != ',' && input[i] != '}' {
|
||||
i++
|
||||
}
|
||||
v, err := strconv.ParseInt(string(input[numStart:i]), 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid frequency: %w", err)
|
||||
}
|
||||
cm.Frequency = v
|
||||
|
||||
case "start":
|
||||
numStart := i
|
||||
for i < len(input) && input[i] != ',' && input[i] != '}' {
|
||||
i++
|
||||
}
|
||||
v, err := strconv.ParseInt(string(input[numStart:i]), 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid start: %w", err)
|
||||
}
|
||||
cm.Start = v
|
||||
|
||||
case "data":
|
||||
if input[i] != '[' {
|
||||
return fmt.Errorf("expected '[' for data array at pos %d", i)
|
||||
}
|
||||
i++ // skip '['
|
||||
|
||||
cm.Data = make([]schema.Float, 0, 256)
|
||||
for i < len(input) {
|
||||
// Skip whitespace
|
||||
for i < len(input) && (input[i] == ' ' || input[i] == '\t' || input[i] == '\n' || input[i] == '\r') {
|
||||
i++
|
||||
}
|
||||
if i >= len(input) {
|
||||
break
|
||||
}
|
||||
if input[i] == ']' {
|
||||
i++
|
||||
break
|
||||
}
|
||||
if input[i] == ',' {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse value: number or null
|
||||
if input[i] == 'n' {
|
||||
// "null"
|
||||
cm.Data = append(cm.Data, schema.NaN)
|
||||
i += 4
|
||||
} else {
|
||||
numStart := i
|
||||
for i < len(input) && input[i] != ',' && input[i] != ']' && input[i] != ' ' {
|
||||
i++
|
||||
}
|
||||
v, err := strconv.ParseFloat(string(input[numStart:i]), 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid data value: %w", err)
|
||||
}
|
||||
cm.Data = append(cm.Data, schema.Float(v))
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
// Skip unknown field value
|
||||
depth := 0
|
||||
inStr := false
|
||||
for i < len(input) {
|
||||
if inStr {
|
||||
if input[i] == '\\' {
|
||||
i++
|
||||
} else if input[i] == '"' {
|
||||
inStr = false
|
||||
}
|
||||
} else {
|
||||
switch input[i] {
|
||||
case '"':
|
||||
inStr = true
|
||||
case '{', '[':
|
||||
depth++
|
||||
case '}', ']':
|
||||
if depth == 0 {
|
||||
goto doneSkip
|
||||
}
|
||||
depth--
|
||||
case ',':
|
||||
if depth == 0 {
|
||||
goto doneSkip
|
||||
}
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
doneSkip:
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalJSON provides optimized JSON encoding for CheckpointMetrics.
|
||||
@@ -337,7 +453,8 @@ func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFil
|
||||
return retval, nil
|
||||
}
|
||||
|
||||
// toCheckpoint writes a Level's data to a JSON checkpoint file.
|
||||
// toCheckpoint writes a Level's data to a checkpoint file.
|
||||
// The format (binary or JSON) is determined by Keys.Checkpoints.FileFormat.
|
||||
// Creates directory if needed. Returns ErrNoNewArchiveData if nothing to save.
|
||||
func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
||||
cf, err := l.toCheckpointFile(from, to, m)
|
||||
@@ -349,12 +466,23 @@ func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
||||
return ErrNoNewArchiveData
|
||||
}
|
||||
|
||||
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
|
||||
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
|
||||
if Keys.Checkpoints.FileFormat == "json" {
|
||||
return writeJSONCheckpoint(dir, from, cf)
|
||||
}
|
||||
|
||||
// Default: binary format
|
||||
filePath := path.Join(dir, fmt.Sprintf("%d.bin", from))
|
||||
return writeBinaryCheckpoint(filePath, cf)
|
||||
}
|
||||
|
||||
// writeJSONCheckpoint writes a CheckpointFile in JSON format.
|
||||
func writeJSONCheckpoint(dir string, from int64, cf *CheckpointFile) error {
|
||||
filePath := path.Join(dir, fmt.Sprintf("%d.json", from))
|
||||
f, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
err = os.MkdirAll(dir, CheckpointDirPerms)
|
||||
if err == nil {
|
||||
f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
|
||||
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
@@ -371,51 +499,56 @@ func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
||||
}
|
||||
|
||||
// enqueueCheckpointHosts traverses checkpoint directory and enqueues cluster/host pairs.
|
||||
// Returns error if directory structure is invalid.
|
||||
func enqueueCheckpointHosts(dir string, work chan<- [2]string) error {
|
||||
// Returns the set of cluster names found and any error if directory structure is invalid.
|
||||
func enqueueCheckpointHosts(dir string, work chan<- [2]string) (map[string]struct{}, error) {
|
||||
clustersDir, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
gcCounter := 0
|
||||
clusters := make(map[string]struct{}, len(clustersDir))
|
||||
|
||||
for _, clusterDir := range clustersDir {
|
||||
if !clusterDir.IsDir() {
|
||||
return errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
|
||||
return nil, errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
|
||||
}
|
||||
|
||||
clusters[clusterDir.Name()] = struct{}{}
|
||||
|
||||
hostsDir, err := os.ReadDir(filepath.Join(dir, clusterDir.Name()))
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, hostDir := range hostsDir {
|
||||
if !hostDir.IsDir() {
|
||||
return errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
|
||||
return nil, errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
|
||||
}
|
||||
|
||||
gcCounter++
|
||||
// if gcCounter%GCTriggerInterval == 0 {
|
||||
// Forcing garbage collection runs here regulary during the loading of checkpoints
|
||||
// will decrease the total heap size after loading everything back to memory is done.
|
||||
// While loading data, the heap will grow fast, so the GC target size will double
|
||||
// almost always. By forcing GCs here, we can keep it growing more slowly so that
|
||||
// at the end, less memory is wasted.
|
||||
// runtime.GC()
|
||||
// }
|
||||
|
||||
work <- [2]string{clusterDir.Name(), hostDir.Name()}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
return clusters, nil
|
||||
}
|
||||
|
||||
// FromCheckpoint loads checkpoint files from disk into memory in parallel.
|
||||
//
|
||||
// Uses worker pool to load cluster/host combinations. Periodically triggers GC
|
||||
// to prevent excessive heap growth. Returns number of files loaded and any errors.
|
||||
// Pre-creates cluster-level entries to reduce lock contention during parallel loading.
|
||||
// Uses worker pool to load cluster/host combinations. Returns number of files loaded and any errors.
|
||||
func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
|
||||
// Pre-create cluster-level entries to eliminate write-lock contention on m.root
|
||||
// during parallel loading. Workers only contend at the cluster level (independent).
|
||||
clusterDirs, err := os.ReadDir(dir)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return 0, err
|
||||
}
|
||||
for _, d := range clusterDirs {
|
||||
if d.IsDir() {
|
||||
m.root.findLevelOrCreate([]string{d.Name()}, len(m.Metrics))
|
||||
}
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
work := make(chan [2]string, Keys.NumWorkers*4)
|
||||
n, errs := int32(0), int32(0)
|
||||
@@ -436,7 +569,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
|
||||
}()
|
||||
}
|
||||
|
||||
err := enqueueCheckpointHosts(dir, work)
|
||||
_, err = enqueueCheckpointHosts(dir, work)
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
@@ -452,7 +585,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
|
||||
|
||||
// FromCheckpointFiles is the main entry point for loading checkpoints at startup.
|
||||
//
|
||||
// Automatically detects checkpoint format (JSON vs Avro) and falls back if needed.
|
||||
// Automatically detects checkpoint format (binary, JSON, or legacy Avro).
|
||||
// Creates checkpoint directory if it doesn't exist. This function must be called
|
||||
// before any writes or reads, and can only be called once.
|
||||
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||
@@ -468,150 +601,11 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||
return m.FromCheckpoint(dir, from)
|
||||
}
|
||||
|
||||
func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
|
||||
br := bufio.NewReader(f)
|
||||
|
||||
fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:]
|
||||
resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64)
|
||||
// loadBinaryCheckpointFile loads a binary checkpoint file into the Level tree.
|
||||
// Binary files are decoded in the same way as JSON files (via loadFile).
|
||||
func (l *Level) loadBinaryCheckpointFile(m *MemoryStore, filePath string, from int64) error {
|
||||
cf, err := loadBinaryFile(filePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error while reading avro file (resolution parsing) : %s", err)
|
||||
}
|
||||
|
||||
fromTimestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64)
|
||||
|
||||
// Same logic according to lineprotocol
|
||||
fromTimestamp -= (resolution / 2)
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error converting timestamp from the avro file : %s", err)
|
||||
}
|
||||
|
||||
// fmt.Printf("File : %s with resolution : %d\n", fileName, resolution)
|
||||
|
||||
var recordCounter int64 = 0
|
||||
|
||||
// Create a new OCF reader from the buffered reader
|
||||
ocfReader, err := goavro.NewOCFReader(br)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error creating OCF reader: %w", err)
|
||||
}
|
||||
|
||||
metricsData := make(map[string]schema.FloatArray)
|
||||
|
||||
for ocfReader.Scan() {
|
||||
datum, err := ocfReader.Read()
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error while reading avro file : %s", err)
|
||||
}
|
||||
|
||||
record, ok := datum.(map[string]any)
|
||||
if !ok {
|
||||
return fmt.Errorf("[METRICSTORE]> failed to assert datum as map[string]interface{}")
|
||||
}
|
||||
|
||||
for key, value := range record {
|
||||
metricsData[key] = append(metricsData[key], schema.ConvertToFloat(value.(float64)))
|
||||
}
|
||||
|
||||
recordCounter += 1
|
||||
}
|
||||
|
||||
to := (fromTimestamp + (recordCounter / (60 / resolution) * 60))
|
||||
if to < from {
|
||||
return nil
|
||||
}
|
||||
|
||||
for key, floatArray := range metricsData {
|
||||
metricName := ReplaceKey(key)
|
||||
|
||||
if strings.Contains(metricName, SelectorDelimiter) {
|
||||
subString := strings.Split(metricName, SelectorDelimiter)
|
||||
|
||||
lvl := l
|
||||
|
||||
for i := 0; i < len(subString)-1; i++ {
|
||||
|
||||
sel := subString[i]
|
||||
|
||||
if lvl.children == nil {
|
||||
lvl.children = make(map[string]*Level)
|
||||
}
|
||||
|
||||
child, ok := lvl.children[sel]
|
||||
if !ok {
|
||||
child = &Level{
|
||||
metrics: make([]*buffer, len(m.Metrics)),
|
||||
children: nil,
|
||||
}
|
||||
lvl.children[sel] = child
|
||||
}
|
||||
lvl = child
|
||||
}
|
||||
|
||||
leafMetricName := subString[len(subString)-1]
|
||||
err = lvl.createBuffer(m, leafMetricName, floatArray, fromTimestamp, resolution)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||
}
|
||||
} else {
|
||||
err = l.createBuffer(m, metricName, floatArray, fromTimestamp, resolution)
|
||||
if err != nil {
|
||||
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schema.FloatArray, from int64, resolution int64) error {
|
||||
n := len(floatArray)
|
||||
b := &buffer{
|
||||
frequency: resolution,
|
||||
start: from,
|
||||
data: floatArray[0:n:n],
|
||||
prev: nil,
|
||||
next: nil,
|
||||
archived: true,
|
||||
}
|
||||
|
||||
minfo, ok := m.Metrics[metricName]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
prev := l.metrics[minfo.offset]
|
||||
if prev == nil {
|
||||
l.metrics[minfo.offset] = b
|
||||
} else {
|
||||
if prev.start > b.start {
|
||||
return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
|
||||
}
|
||||
|
||||
b.prev = prev
|
||||
prev.next = b
|
||||
|
||||
missingCount := ((int(b.start) - int(prev.start)) - len(prev.data)*int(b.frequency))
|
||||
if missingCount > 0 {
|
||||
missingCount /= int(b.frequency)
|
||||
|
||||
for range missingCount {
|
||||
prev.data = append(prev.data, schema.NaN)
|
||||
}
|
||||
|
||||
prev.data = prev.data[0:len(prev.data):len(prev.data)]
|
||||
}
|
||||
}
|
||||
l.metrics[minfo.offset] = b
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {
|
||||
br := bufio.NewReader(f)
|
||||
cf := &CheckpointFile{}
|
||||
if err := json.NewDecoder(br).Decode(cf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -619,11 +613,7 @@ func (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := l.loadFile(cf, m); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return l.loadFile(cf, m)
|
||||
}
|
||||
|
||||
func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
|
||||
@@ -689,26 +679,14 @@ func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, err
|
||||
return 0, err
|
||||
}
|
||||
|
||||
allFiles := make([]fs.DirEntry, 0)
|
||||
allFiles := make([]fs.DirEntry, 0, len(direntries))
|
||||
filesLoaded := 0
|
||||
for _, e := range direntries {
|
||||
if e.IsDir() {
|
||||
child := &Level{
|
||||
metrics: make([]*buffer, len(m.Metrics)),
|
||||
children: make(map[string]*Level),
|
||||
}
|
||||
|
||||
files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from)
|
||||
filesLoaded += files
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
l.children[e.Name()] = child
|
||||
} else if strings.HasSuffix(e.Name(), ".json") || strings.HasSuffix(e.Name(), ".avro") {
|
||||
allFiles = append(allFiles, e)
|
||||
} else {
|
||||
cclog.Warnf("[METRICSTORE]> unexpected subdirectory '%s' in checkpoint dir '%s', skipping", e.Name(), dir)
|
||||
continue
|
||||
} else if strings.HasSuffix(e.Name(), ".bin") || strings.HasSuffix(e.Name(), ".json") {
|
||||
allFiles = append(allFiles, e)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -717,95 +695,190 @@ func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, err
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
loaders := map[string]func(*MemoryStore, *os.File, int64) error{
|
||||
".json": l.loadJSONFile,
|
||||
".avro": l.loadAvroFile,
|
||||
if len(files) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Separate files by type
|
||||
var binFiles, jsonFiles []string
|
||||
for _, filename := range files {
|
||||
ext := filepath.Ext(filename)
|
||||
loader := loaders[ext]
|
||||
if loader == nil {
|
||||
cclog.Warnf("Unknown extension for file %s", filename)
|
||||
continue
|
||||
switch filepath.Ext(filename) {
|
||||
case ".bin":
|
||||
binFiles = append(binFiles, filename)
|
||||
case ".json":
|
||||
jsonFiles = append(jsonFiles, filename)
|
||||
default:
|
||||
cclog.Warnf("[METRICSTORE]> unknown extension for file %s", filename)
|
||||
}
|
||||
}
|
||||
|
||||
// Parallel binary decoding: decode files concurrently, then apply sequentially
|
||||
if len(binFiles) > 0 {
|
||||
type decodedFile struct {
|
||||
cf *CheckpointFile
|
||||
err error
|
||||
}
|
||||
|
||||
// Use a closure to ensure file is closed immediately after use
|
||||
err := func() error {
|
||||
f, err := os.Open(path.Join(dir, filename))
|
||||
if err != nil {
|
||||
return err
|
||||
decoded := make([]decodedFile, len(binFiles))
|
||||
var decodeWg sync.WaitGroup
|
||||
|
||||
for i, filename := range binFiles {
|
||||
decodeWg.Add(1)
|
||||
go func(idx int, fname string) {
|
||||
defer decodeWg.Done()
|
||||
cf, err := loadBinaryFile(path.Join(dir, fname))
|
||||
if err != nil {
|
||||
decoded[idx] = decodedFile{err: fmt.Errorf("decoding %s: %w", fname, err)}
|
||||
return
|
||||
}
|
||||
decoded[idx] = decodedFile{cf: cf}
|
||||
}(i, filename)
|
||||
}
|
||||
|
||||
decodeWg.Wait()
|
||||
|
||||
for i, d := range decoded {
|
||||
if d.err != nil {
|
||||
return filesLoaded, d.err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return loader(m, f, from)
|
||||
}()
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
if d.cf.To != 0 && d.cf.To < from {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := l.loadFile(d.cf, m); err != nil {
|
||||
return filesLoaded, fmt.Errorf("loading %s: %w", binFiles[i], err)
|
||||
}
|
||||
filesLoaded++
|
||||
}
|
||||
}
|
||||
|
||||
// Parallel JSON decoding: decode files concurrently, then apply sequentially
|
||||
if len(jsonFiles) > 0 {
|
||||
type decodedFile struct {
|
||||
cf *CheckpointFile
|
||||
err error
|
||||
}
|
||||
|
||||
filesLoaded += 1
|
||||
decoded := make([]decodedFile, len(jsonFiles))
|
||||
var decodeWg sync.WaitGroup
|
||||
|
||||
for i, filename := range jsonFiles {
|
||||
decodeWg.Add(1)
|
||||
go func(idx int, fname string) {
|
||||
defer decodeWg.Done()
|
||||
f, err := os.Open(path.Join(dir, fname))
|
||||
if err != nil {
|
||||
decoded[idx] = decodedFile{err: err}
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
cf := &CheckpointFile{}
|
||||
if err := json.NewDecoder(bufio.NewReader(f)).Decode(cf); err != nil {
|
||||
decoded[idx] = decodedFile{err: fmt.Errorf("decoding %s: %w", fname, err)}
|
||||
return
|
||||
}
|
||||
|
||||
decoded[idx] = decodedFile{cf: cf}
|
||||
}(i, filename)
|
||||
}
|
||||
|
||||
decodeWg.Wait()
|
||||
|
||||
for i, d := range decoded {
|
||||
if d.err != nil {
|
||||
return filesLoaded, d.err
|
||||
}
|
||||
|
||||
if d.cf.To != 0 && d.cf.To < from {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := l.loadFile(d.cf, m); err != nil {
|
||||
return filesLoaded, fmt.Errorf("loading %s: %w", jsonFiles[i], err)
|
||||
}
|
||||
filesLoaded++
|
||||
}
|
||||
}
|
||||
|
||||
return filesLoaded, nil
|
||||
}
|
||||
|
||||
// This will probably get very slow over time!
|
||||
// A solution could be some sort of an index file in which all other files
|
||||
// and the timespan they contain is listed.
|
||||
// NOTE: This now assumes that you have distinct timestamps for json and avro files
|
||||
// Also, it assumes that the timestamps are not overlapping/self-modified.
|
||||
// findFiles filters and sorts checkpoint files by timestamp.
|
||||
//
|
||||
// When findMoreRecentFiles is true, returns files with timestamp >= t (for loading),
|
||||
// plus the immediately preceding file if it straddles the boundary.
|
||||
// When false, returns files with timestamp <= t (for cleanup).
|
||||
//
|
||||
// Filters before sorting so only relevant files are sorted, keeping performance
|
||||
// stable regardless of total directory size.
|
||||
func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {
|
||||
nums := map[string]int64{}
|
||||
type fileEntry struct {
|
||||
name string
|
||||
ts int64
|
||||
}
|
||||
|
||||
// Parse timestamps and pre-filter in a single pass
|
||||
var candidates []fileEntry
|
||||
var bestPreceding *fileEntry // Track the file just before the cutoff (for boundary straddling)
|
||||
|
||||
for _, e := range direntries {
|
||||
if !strings.HasSuffix(e.Name(), ".json") && !strings.HasSuffix(e.Name(), ".avro") {
|
||||
name := e.Name()
|
||||
ext := filepath.Ext(name)
|
||||
if ext != ".bin" && ext != ".json" {
|
||||
continue
|
||||
}
|
||||
|
||||
ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64)
|
||||
// Parse timestamp from filename: for .bin and .json it's just "TIMESTAMP.ext"
|
||||
baseName := name[:len(name)-len(ext)]
|
||||
// Handle legacy format with prefix (e.g., "60_TIMESTAMP.avro")
|
||||
if idx := strings.Index(baseName, "_"); idx >= 0 {
|
||||
baseName = baseName[idx+1:]
|
||||
}
|
||||
ts, err := strconv.ParseInt(baseName, 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nums[e.Name()] = ts
|
||||
}
|
||||
|
||||
sort.Slice(direntries, func(i, j int) bool {
|
||||
a, b := direntries[i], direntries[j]
|
||||
return nums[a.Name()] < nums[b.Name()]
|
||||
})
|
||||
|
||||
if len(nums) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
filenames := make([]string, 0)
|
||||
|
||||
for i, e := range direntries {
|
||||
ts1 := nums[e.Name()]
|
||||
|
||||
// Logic to look for files in forward or direction
|
||||
// If logic: All files greater than or after
|
||||
// the given timestamp will be selected
|
||||
// Else If logic: All files less than or before
|
||||
// the given timestamp will be selected
|
||||
if findMoreRecentFiles && t <= ts1 {
|
||||
filenames = append(filenames, e.Name())
|
||||
} else if !findMoreRecentFiles && ts1 <= t && ts1 != 0 {
|
||||
filenames = append(filenames, e.Name())
|
||||
}
|
||||
if i == len(direntries)-1 {
|
||||
continue
|
||||
}
|
||||
|
||||
enext := direntries[i+1]
|
||||
ts2 := nums[enext.Name()]
|
||||
|
||||
if findMoreRecentFiles {
|
||||
if ts1 < t && t < ts2 {
|
||||
filenames = append(filenames, e.Name())
|
||||
if ts >= t {
|
||||
candidates = append(candidates, fileEntry{name, ts})
|
||||
} else {
|
||||
// Track the most recent file before the cutoff for boundary straddling
|
||||
if bestPreceding == nil || ts > bestPreceding.ts {
|
||||
bestPreceding = &fileEntry{name, ts}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ts <= t && ts != 0 {
|
||||
candidates = append(candidates, fileEntry{name, ts})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Include the boundary-straddling file if we found one and there are also files after the cutoff
|
||||
if findMoreRecentFiles && bestPreceding != nil && len(candidates) > 0 {
|
||||
candidates = append(candidates, *bestPreceding)
|
||||
}
|
||||
|
||||
if len(candidates) == 0 {
|
||||
// If searching for recent files and we only have a preceding file, include it
|
||||
if findMoreRecentFiles && bestPreceding != nil {
|
||||
return []string{bestPreceding.name}, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Sort only the filtered candidates
|
||||
sort.Slice(candidates, func(i, j int) bool {
|
||||
return candidates[i].ts < candidates[j].ts
|
||||
})
|
||||
|
||||
filenames := make([]string, len(candidates))
|
||||
for i, c := range candidates {
|
||||
filenames[i] = c.name
|
||||
}
|
||||
|
||||
return filenames, nil
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
// ├─ RetentionInMemory: How long to keep data in RAM
|
||||
// ├─ MemoryCap: Memory limit in bytes (triggers forceFree)
|
||||
// ├─ Checkpoints: Persistence configuration
|
||||
// │ ├─ FileFormat: "avro" or "json"
|
||||
// │ ├─ FileFormat: "binary" or "json"
|
||||
// │ ├─ Interval: How often to save (e.g., "1h")
|
||||
// │ └─ RootDir: Checkpoint storage path
|
||||
// ├─ Cleanup: Long-term storage configuration
|
||||
@@ -54,17 +54,13 @@ import (
|
||||
const (
|
||||
DefaultMaxWorkers = 10
|
||||
DefaultBufferCapacity = 512
|
||||
DefaultGCTriggerInterval = 100
|
||||
DefaultAvroWorkers = 4
|
||||
DefaultCheckpointBufferMin = 3
|
||||
DefaultAvroCheckpointInterval = time.Minute
|
||||
DefaultMemoryUsageTrackerInterval = 1 * time.Hour
|
||||
)
|
||||
|
||||
// Checkpoints configures periodic persistence of in-memory metric data.
|
||||
//
|
||||
// Fields:
|
||||
// - FileFormat: "avro" (default, binary, compact) or "json" (human-readable, slower)
|
||||
// - FileFormat: "binary" (default, fast loading) or "json" (human-readable)
|
||||
// - Interval: Duration string (e.g., "1h", "30m") between checkpoint saves
|
||||
// - RootDir: Filesystem path for checkpoint files (created if missing)
|
||||
type Checkpoints struct {
|
||||
@@ -144,7 +140,7 @@ type MetricStoreConfig struct {
|
||||
// Accessed by Init(), Checkpointing(), and other lifecycle functions.
|
||||
var Keys MetricStoreConfig = MetricStoreConfig{
|
||||
Checkpoints: Checkpoints{
|
||||
FileFormat: "json",
|
||||
FileFormat: "binary",
|
||||
RootDir: "./var/checkpoints",
|
||||
},
|
||||
Cleanup: &Cleanup{
|
||||
|
||||
@@ -18,8 +18,9 @@ const configSchema = `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"file-format": {
|
||||
"description": "Specify the format for checkpoint files. There are 2 variants: 'avro' and 'json'. If nothing is specified, 'avro' is default.",
|
||||
"type": "string"
|
||||
"description": "Specify the format for checkpoint files: 'binary' (default, fast loading) or 'json' (human-readable).",
|
||||
"type": "string",
|
||||
"enum": ["binary", "json"]
|
||||
},
|
||||
"interval": {
|
||||
"description": "Interval at which the metrics should be checkpointed.",
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
package metricstore
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
@@ -94,6 +95,28 @@ func reorder(buf, prefix []byte) []byte {
|
||||
}
|
||||
}
|
||||
|
||||
type decodeState struct {
|
||||
metricBuf []byte
|
||||
selector []string
|
||||
typeBuf []byte
|
||||
subTypeBuf []byte
|
||||
prevTypeBytes []byte
|
||||
prevTypeStr string
|
||||
prevSubTypeBytes []byte
|
||||
prevSubTypeStr string
|
||||
}
|
||||
|
||||
var decodeStatePool = sync.Pool{
|
||||
New: func() any {
|
||||
return &decodeState{
|
||||
metricBuf: make([]byte, 0, 16),
|
||||
selector: make([]string, 0, 4),
|
||||
typeBuf: make([]byte, 0, 16),
|
||||
subTypeBuf: make([]byte, 0, 16),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
// Decode lines using dec and make write calls to the MemoryStore.
|
||||
// If a line is missing its cluster tag, use clusterDefault as default.
|
||||
func DecodeLine(dec *lineprotocol.Decoder,
|
||||
@@ -102,9 +125,9 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
||||
) error {
|
||||
// Reduce allocations in loop:
|
||||
t := time.Now()
|
||||
metric, metricBuf := Metric{}, make([]byte, 0, 16)
|
||||
selector := make([]string, 0, 4)
|
||||
typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
|
||||
metric := Metric{}
|
||||
st := decodeStatePool.Get().(*decodeState)
|
||||
defer decodeStatePool.Put(st)
|
||||
|
||||
// Optimize for the case where all lines in a "batch" are about the same
|
||||
// cluster and host. By using `WriteToLevel` (level = host), we do not need
|
||||
@@ -121,7 +144,7 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
||||
|
||||
// Needs to be copied because another call to dec.* would
|
||||
// invalidate the returned slice.
|
||||
metricBuf = append(metricBuf[:0], rawmeasurement...)
|
||||
st.metricBuf = append(st.metricBuf[:0], rawmeasurement...)
|
||||
|
||||
// The go compiler optimizes map[string(byteslice)] lookups:
|
||||
metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]
|
||||
@@ -129,7 +152,7 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
||||
continue
|
||||
}
|
||||
|
||||
typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
|
||||
st.typeBuf, st.subTypeBuf = st.typeBuf[:0], st.subTypeBuf[:0]
|
||||
cluster, host := clusterDefault, ""
|
||||
for {
|
||||
key, val, err := dec.NextTag()
|
||||
@@ -162,41 +185,49 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
||||
}
|
||||
|
||||
// We cannot be sure that the "type" tag comes before the "type-id" tag:
|
||||
if len(typeBuf) == 0 {
|
||||
typeBuf = append(typeBuf, val...)
|
||||
if len(st.typeBuf) == 0 {
|
||||
st.typeBuf = append(st.typeBuf, val...)
|
||||
} else {
|
||||
typeBuf = reorder(typeBuf, val)
|
||||
st.typeBuf = reorder(st.typeBuf, val)
|
||||
}
|
||||
case "type-id":
|
||||
typeBuf = append(typeBuf, val...)
|
||||
st.typeBuf = append(st.typeBuf, val...)
|
||||
case "subtype":
|
||||
// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
|
||||
if len(subTypeBuf) == 0 {
|
||||
subTypeBuf = append(subTypeBuf, val...)
|
||||
if len(st.subTypeBuf) == 0 {
|
||||
st.subTypeBuf = append(st.subTypeBuf, val...)
|
||||
} else {
|
||||
subTypeBuf = reorder(subTypeBuf, val)
|
||||
// subTypeBuf = reorder(typeBuf, val)
|
||||
st.subTypeBuf = reorder(st.subTypeBuf, val)
|
||||
}
|
||||
case "stype-id":
|
||||
subTypeBuf = append(subTypeBuf, val...)
|
||||
st.subTypeBuf = append(st.subTypeBuf, val...)
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// If the cluster or host changed, the lvl was set to nil
|
||||
if lvl == nil {
|
||||
selector = selector[:2]
|
||||
selector[0], selector[1] = cluster, host
|
||||
lvl = ms.GetLevel(selector)
|
||||
st.selector = st.selector[:2]
|
||||
st.selector[0], st.selector[1] = cluster, host
|
||||
lvl = ms.GetLevel(st.selector)
|
||||
prevCluster, prevHost = cluster, host
|
||||
}
|
||||
|
||||
// subtypes:
|
||||
selector = selector[:0]
|
||||
if len(typeBuf) > 0 {
|
||||
selector = append(selector, string(typeBuf)) // <- Allocation :(
|
||||
if len(subTypeBuf) > 0 {
|
||||
selector = append(selector, string(subTypeBuf))
|
||||
// subtypes: cache []byte→string conversions; messages in a batch typically
|
||||
// share the same type/subtype so the hit rate is very high.
|
||||
st.selector = st.selector[:0]
|
||||
if len(st.typeBuf) > 0 {
|
||||
if !bytes.Equal(st.typeBuf, st.prevTypeBytes) {
|
||||
st.prevTypeBytes = append(st.prevTypeBytes[:0], st.typeBuf...)
|
||||
st.prevTypeStr = string(st.typeBuf)
|
||||
}
|
||||
st.selector = append(st.selector, st.prevTypeStr)
|
||||
if len(st.subTypeBuf) > 0 {
|
||||
if !bytes.Equal(st.subTypeBuf, st.prevSubTypeBytes) {
|
||||
st.prevSubTypeBytes = append(st.prevSubTypeBytes[:0], st.subTypeBuf...)
|
||||
st.prevSubTypeStr = string(st.subTypeBuf)
|
||||
}
|
||||
st.selector = append(st.selector, st.prevSubTypeStr)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -244,18 +275,7 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
||||
|
||||
time := t.Unix()
|
||||
|
||||
if Keys.Checkpoints.FileFormat != "json" {
|
||||
LineProtocolMessages <- &AvroStruct{
|
||||
MetricName: string(metricBuf),
|
||||
Cluster: cluster,
|
||||
Node: host,
|
||||
Selector: append([]string{}, selector...),
|
||||
Value: metric.Value,
|
||||
Timestamp: time,
|
||||
}
|
||||
}
|
||||
|
||||
if err := ms.WriteToLevel(lvl, selector, time, []Metric{metric}); err != nil {
|
||||
if err := ms.WriteToLevel(lvl, st.selector, time, []Metric{metric}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
//
|
||||
// The package organizes metrics in a tree structure (cluster → host → component) and
|
||||
// provides concurrent read/write access to metric data with configurable aggregation strategies.
|
||||
// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,
|
||||
// Background goroutines handle periodic checkpointing (binary or JSON format), archiving old data,
|
||||
// and enforcing retention policies.
|
||||
//
|
||||
// Key features:
|
||||
@@ -151,6 +151,12 @@ func Init(rawConfig json.RawMessage, metrics map[string]MetricConfig, wg *sync.W
|
||||
|
||||
restoreFrom := startupTime.Add(-d)
|
||||
cclog.Infof("[METRICSTORE]> Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339))
|
||||
|
||||
// Lower GC target during loading to prevent excessive heap growth.
|
||||
// During checkpoint loading the heap grows rapidly, causing the GC to
|
||||
// double its target repeatedly. A lower percentage keeps it tighter.
|
||||
oldGCPercent := debug.SetGCPercent(20)
|
||||
|
||||
files, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix())
|
||||
loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB
|
||||
if err != nil {
|
||||
@@ -159,20 +165,16 @@ func Init(rawConfig json.RawMessage, metrics map[string]MetricConfig, wg *sync.W
|
||||
cclog.Infof("[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds())
|
||||
}
|
||||
|
||||
// Try to use less memory by forcing a GC run here and then
|
||||
// lowering the target percentage. The default of 100 means
|
||||
// that only once the ratio of new allocations execeds the
|
||||
// previously active heap, a GC is triggered.
|
||||
// Forcing a GC here will set the "previously active heap"
|
||||
// to a minumum.
|
||||
// runtime.GC()
|
||||
// Restore GC target and force a collection to set a tight baseline
|
||||
// for the "previously active heap" size, reducing long-term memory waste.
|
||||
debug.SetGCPercent(oldGCPercent)
|
||||
runtime.GC()
|
||||
|
||||
ctx, shutdown := context.WithCancel(context.Background())
|
||||
|
||||
Retention(wg, ctx)
|
||||
Checkpointing(wg, ctx)
|
||||
CleanUp(wg, ctx)
|
||||
DataStaging(wg, ctx)
|
||||
MemoryUsageTracker(wg, ctx)
|
||||
|
||||
// Note: Signal handling has been removed from this function.
|
||||
@@ -276,22 +278,10 @@ func Shutdown() {
|
||||
shutdownFunc()
|
||||
}
|
||||
|
||||
if Keys.Checkpoints.FileFormat != "json" {
|
||||
close(LineProtocolMessages)
|
||||
}
|
||||
|
||||
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
||||
var files int
|
||||
var err error
|
||||
|
||||
ms := GetMemoryStore()
|
||||
|
||||
if Keys.Checkpoints.FileFormat == "json" {
|
||||
files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
|
||||
} else {
|
||||
files, err = GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, true)
|
||||
}
|
||||
|
||||
files, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
|
||||
if err != nil {
|
||||
cclog.Errorf("[METRICSTORE]> Writing checkpoint failed: %s\n", err.Error())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user