mirror of
https://github.com/ClusterCockpit/cc-metric-store.git
synced 2025-07-23 05:11:41 +02:00
Intermediate state
Not working yet
This commit is contained in:
424
internal/api/api.go
Normal file
424
internal/api/api.go
Normal file
@@ -0,0 +1,424 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"crypto/ed25519"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang-jwt/jwt/v4"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||
)
|
||||
|
||||
type ApiMetricData struct {
|
||||
Error *string `json:"error,omitempty"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
Data FloatArray `json:"data,omitempty"`
|
||||
Avg Float `json:"avg"`
|
||||
Min Float `json:"min"`
|
||||
Max Float `json:"max"`
|
||||
}
|
||||
|
||||
// TODO: Optimize this, just like the stats endpoint!
|
||||
func (data *ApiMetricData) AddStats() {
|
||||
n := 0
|
||||
sum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64
|
||||
for _, x := range data.Data {
|
||||
if x.IsNaN() {
|
||||
continue
|
||||
}
|
||||
|
||||
n += 1
|
||||
sum += float64(x)
|
||||
min = math.Min(min, float64(x))
|
||||
max = math.Max(max, float64(x))
|
||||
}
|
||||
|
||||
if n > 0 {
|
||||
avg := sum / float64(n)
|
||||
data.Avg = Float(avg)
|
||||
data.Min = Float(min)
|
||||
data.Max = Float(max)
|
||||
} else {
|
||||
data.Avg, data.Min, data.Max = NaN, NaN, NaN
|
||||
}
|
||||
}
|
||||
|
||||
func (data *ApiMetricData) ScaleBy(f Float) {
|
||||
if f == 0 || f == 1 {
|
||||
return
|
||||
}
|
||||
|
||||
data.Avg *= f
|
||||
data.Min *= f
|
||||
data.Max *= f
|
||||
for i := 0; i < len(data.Data); i++ {
|
||||
data.Data[i] *= f
|
||||
}
|
||||
}
|
||||
|
||||
func (data *ApiMetricData) PadDataWithNull(from, to int64, metric string) {
|
||||
minfo, ok := memoryStore.metrics[metric]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if (data.From / minfo.Frequency) > (from / minfo.Frequency) {
|
||||
padfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency))
|
||||
ndata := make([]Float, 0, padfront+len(data.Data))
|
||||
for i := 0; i < padfront; i++ {
|
||||
ndata = append(ndata, NaN)
|
||||
}
|
||||
for j := 0; j < len(data.Data); j++ {
|
||||
ndata = append(ndata, data.Data[j])
|
||||
}
|
||||
data.Data = ndata
|
||||
}
|
||||
}
|
||||
|
||||
func handleFree(rw http.ResponseWriter, r *http.Request) {
|
||||
rawTo := r.URL.Query().Get("to")
|
||||
if rawTo == "" {
|
||||
http.Error(rw, "'to' is a required query parameter", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
to, err := strconv.ParseInt(rawTo, 10, 64)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: lastCheckpoint might be modified by different go-routines.
|
||||
// Load it using the sync/atomic package?
|
||||
freeUpTo := lastCheckpoint.Unix()
|
||||
if to < freeUpTo {
|
||||
freeUpTo = to
|
||||
}
|
||||
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(rw, "Method Not Allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
bodyDec := json.NewDecoder(r.Body)
|
||||
var selectors [][]string
|
||||
err = bodyDec.Decode(&selectors)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
n := 0
|
||||
for _, sel := range selectors {
|
||||
bn, err := memoryStore.Free(sel, freeUpTo)
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
n += bn
|
||||
}
|
||||
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
rw.Write([]byte(fmt.Sprintf("buffers freed: %d\n", n)))
|
||||
}
|
||||
|
||||
func handleWrite(rw http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(rw, "Method Not Allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
bytes, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
log.Printf("error while reading request body: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if debugDump != io.Discard {
|
||||
now := time.Now()
|
||||
msg := make([]byte, 0, 512)
|
||||
msg = append(msg, "\n--- local unix time: "...)
|
||||
msg = strconv.AppendInt(msg, now.Unix(), 10)
|
||||
msg = append(msg, " ---\n"...)
|
||||
|
||||
debugDumpLock.Lock()
|
||||
defer debugDumpLock.Unlock()
|
||||
if _, err := debugDump.Write(msg); err != nil {
|
||||
log.Printf("error while writing to debug dump: %s", err.Error())
|
||||
}
|
||||
if _, err := debugDump.Write(bytes); err != nil {
|
||||
log.Printf("error while writing to debug dump: %s", err.Error())
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
dec := lineprotocol.NewDecoderWithBytes(bytes)
|
||||
if err := decodeLine(dec, r.URL.Query().Get("cluster")); err != nil {
|
||||
log.Printf("/api/write error: %s", err.Error())
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
rw.WriteHeader(http.StatusOK)
|
||||
}
|
||||
|
||||
type ApiQueryRequest struct {
|
||||
Cluster string `json:"cluster"`
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
WithStats bool `json:"with-stats"`
|
||||
WithData bool `json:"with-data"`
|
||||
WithPadding bool `json:"with-padding"`
|
||||
Queries []ApiQuery `json:"queries"`
|
||||
ForAllNodes []string `json:"for-all-nodes"`
|
||||
}
|
||||
|
||||
type ApiQueryResponse struct {
|
||||
Queries []ApiQuery `json:"queries,omitempty"`
|
||||
Results [][]ApiMetricData `json:"results"`
|
||||
}
|
||||
|
||||
type ApiQuery struct {
|
||||
Metric string `json:"metric"`
|
||||
Hostname string `json:"host"`
|
||||
Aggregate bool `json:"aggreg"`
|
||||
ScaleFactor Float `json:"scale-by,omitempty"`
|
||||
Type *string `json:"type,omitempty"`
|
||||
TypeIds []string `json:"type-ids,omitempty"`
|
||||
SubType *string `json:"subtype,omitempty"`
|
||||
SubTypeIds []string `json:"subtype-ids,omitempty"`
|
||||
}
|
||||
|
||||
func handleQuery(rw http.ResponseWriter, r *http.Request) {
|
||||
var err error
|
||||
var req ApiQueryRequest = ApiQueryRequest{WithStats: true, WithData: true, WithPadding: true}
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
response := ApiQueryResponse{
|
||||
Results: make([][]ApiMetricData, 0, len(req.Queries)),
|
||||
}
|
||||
if req.ForAllNodes != nil {
|
||||
nodes := memoryStore.ListChildren([]string{req.Cluster})
|
||||
for _, node := range nodes {
|
||||
for _, metric := range req.ForAllNodes {
|
||||
q := ApiQuery{
|
||||
Metric: metric,
|
||||
Hostname: node,
|
||||
}
|
||||
req.Queries = append(req.Queries, q)
|
||||
response.Queries = append(response.Queries, q)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, query := range req.Queries {
|
||||
sels := make([]Selector, 0, 1)
|
||||
if query.Aggregate || query.Type == nil {
|
||||
sel := Selector{{String: req.Cluster}, {String: query.Hostname}}
|
||||
if query.Type != nil {
|
||||
if len(query.TypeIds) == 1 {
|
||||
sel = append(sel, SelectorElement{String: *query.Type + query.TypeIds[0]})
|
||||
} else {
|
||||
ids := make([]string, len(query.TypeIds))
|
||||
for i, id := range query.TypeIds {
|
||||
ids[i] = *query.Type + id
|
||||
}
|
||||
sel = append(sel, SelectorElement{Group: ids})
|
||||
}
|
||||
|
||||
if query.SubType != nil {
|
||||
if len(query.SubTypeIds) == 1 {
|
||||
sel = append(sel, SelectorElement{String: *query.SubType + query.SubTypeIds[0]})
|
||||
} else {
|
||||
ids := make([]string, len(query.SubTypeIds))
|
||||
for i, id := range query.SubTypeIds {
|
||||
ids[i] = *query.SubType + id
|
||||
}
|
||||
sel = append(sel, SelectorElement{Group: ids})
|
||||
}
|
||||
}
|
||||
}
|
||||
sels = append(sels, sel)
|
||||
} else {
|
||||
for _, typeId := range query.TypeIds {
|
||||
if query.SubType != nil {
|
||||
for _, subTypeId := range query.SubTypeIds {
|
||||
sels = append(sels, Selector{
|
||||
{String: req.Cluster},
|
||||
{String: query.Hostname},
|
||||
{String: *query.Type + typeId},
|
||||
{String: *query.SubType + subTypeId},
|
||||
})
|
||||
}
|
||||
} else {
|
||||
sels = append(sels, Selector{
|
||||
{String: req.Cluster},
|
||||
{String: query.Hostname},
|
||||
{String: *query.Type + typeId},
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// log.Printf("query: %#v\n", query)
|
||||
// log.Printf("sels: %#v\n", sels)
|
||||
|
||||
res := make([]ApiMetricData, 0, len(sels))
|
||||
for _, sel := range sels {
|
||||
data := ApiMetricData{}
|
||||
data.Data, data.From, data.To, err = memoryStore.Read(sel, query.Metric, req.From, req.To)
|
||||
// log.Printf("data: %#v, %#v, %#v, %#v", data.Data, data.From, data.To, err)
|
||||
if err != nil {
|
||||
msg := err.Error()
|
||||
data.Error = &msg
|
||||
res = append(res, data)
|
||||
continue
|
||||
}
|
||||
|
||||
if req.WithStats {
|
||||
data.AddStats()
|
||||
}
|
||||
if query.ScaleFactor != 0 {
|
||||
data.ScaleBy(query.ScaleFactor)
|
||||
}
|
||||
if req.WithPadding {
|
||||
data.PadDataWithNull(req.From, req.To, query.Metric)
|
||||
}
|
||||
if !req.WithData {
|
||||
data.Data = nil
|
||||
}
|
||||
res = append(res, data)
|
||||
}
|
||||
response.Results = append(response.Results, res)
|
||||
}
|
||||
|
||||
rw.Header().Set("Content-Type", "application/json")
|
||||
bw := bufio.NewWriter(rw)
|
||||
defer bw.Flush()
|
||||
if err := json.NewEncoder(bw).Encode(response); err != nil {
|
||||
log.Print(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler {
|
||||
cacheLock := sync.RWMutex{}
|
||||
cache := map[string]*jwt.Token{}
|
||||
|
||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||
authheader := r.Header.Get("Authorization")
|
||||
if authheader == "" || !strings.HasPrefix(authheader, "Bearer ") {
|
||||
http.Error(rw, "Use JWT Authentication", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
rawtoken := authheader[len("Bearer "):]
|
||||
cacheLock.RLock()
|
||||
token, ok := cache[rawtoken]
|
||||
cacheLock.RUnlock()
|
||||
if ok && token.Claims.Valid() == nil {
|
||||
next.ServeHTTP(rw, r)
|
||||
return
|
||||
}
|
||||
|
||||
// The actual token is ignored for now.
|
||||
// In case expiration and so on are specified, the Parse function
|
||||
// already returns an error for expired tokens.
|
||||
var err error
|
||||
token, err = jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
|
||||
if t.Method != jwt.SigningMethodEdDSA {
|
||||
return nil, errors.New("only Ed25519/EdDSA supported")
|
||||
}
|
||||
|
||||
return publicKey, nil
|
||||
})
|
||||
if err != nil {
|
||||
http.Error(rw, err.Error(), http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
cacheLock.Lock()
|
||||
cache[rawtoken] = token
|
||||
cacheLock.Unlock()
|
||||
|
||||
// Let request through...
|
||||
next.ServeHTTP(rw, r)
|
||||
})
|
||||
}
|
||||
|
||||
func StartApiServer(ctx context.Context, httpConfig *HttpConfig) error {
|
||||
r := mux.NewRouter()
|
||||
|
||||
r.HandleFunc("/api/free", handleFree)
|
||||
r.HandleFunc("/api/write", handleWrite)
|
||||
r.HandleFunc("/api/query", handleQuery)
|
||||
r.HandleFunc("/api/debug", func(rw http.ResponseWriter, r *http.Request) {
|
||||
raw := r.URL.Query().Get("selector")
|
||||
selector := []string{}
|
||||
if len(raw) != 0 {
|
||||
selector = strings.Split(raw, ":")
|
||||
}
|
||||
|
||||
if err := memoryStore.DebugDump(bufio.NewWriter(rw), selector); err != nil {
|
||||
rw.WriteHeader(http.StatusBadRequest)
|
||||
rw.Write([]byte(err.Error()))
|
||||
}
|
||||
})
|
||||
|
||||
server := &http.Server{
|
||||
Handler: r,
|
||||
Addr: httpConfig.Address,
|
||||
WriteTimeout: 30 * time.Second,
|
||||
ReadTimeout: 30 * time.Second,
|
||||
}
|
||||
|
||||
if len(conf.JwtPublicKey) > 0 {
|
||||
buf, err := base64.StdEncoding.DecodeString(conf.JwtPublicKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
publicKey := ed25519.PublicKey(buf)
|
||||
server.Handler = authentication(server.Handler, publicKey)
|
||||
}
|
||||
|
||||
go func() {
|
||||
if httpConfig.CertFile != "" && httpConfig.KeyFile != "" {
|
||||
log.Printf("API https endpoint listening on '%s'\n", httpConfig.Address)
|
||||
err := server.ListenAndServeTLS(httpConfig.CertFile, httpConfig.KeyFile)
|
||||
if err != nil && err != http.ErrServerClosed {
|
||||
log.Println(err)
|
||||
}
|
||||
} else {
|
||||
log.Printf("API http endpoint listening on '%s'\n", httpConfig.Address)
|
||||
err := server.ListenAndServe()
|
||||
if err != nil && err != http.ErrServerClosed {
|
||||
log.Println(err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
<-ctx.Done()
|
||||
err := server.Shutdown(context.Background())
|
||||
log.Println("API server shut down")
|
||||
return err
|
||||
}
|
||||
}
|
316
internal/api/lineprotocol.go
Normal file
316
internal/api/lineprotocol.go
Normal file
@@ -0,0 +1,316 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"net"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ClusterCockpit/cc-metric-store/internal/config"
|
||||
"github.com/ClusterCockpit/cc-metric-store/internal/memstore"
|
||||
"github.com/ClusterCockpit/cc-metric-store/internal/util"
|
||||
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||
"github.com/nats-io/nats.go"
|
||||
)
|
||||
|
||||
type Metric struct {
|
||||
Name string
|
||||
Value util.Float
|
||||
mc config.MetricConfig
|
||||
}
|
||||
|
||||
// Currently unused, could be used to send messages via raw TCP.
|
||||
// Each connection is handled in it's own goroutine. This is a blocking function.
|
||||
func ReceiveRaw(ctx context.Context, listener net.Listener, handleLine func(*lineprotocol.Decoder, string) error) error {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
<-ctx.Done()
|
||||
if err := listener.Close(); err != nil {
|
||||
log.Printf("listener.Close(): %s", err.Error())
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
conn, err := listener.Accept()
|
||||
if err != nil {
|
||||
if errors.Is(err, net.ErrClosed) {
|
||||
break
|
||||
}
|
||||
|
||||
log.Printf("listener.Accept(): %s", err.Error())
|
||||
}
|
||||
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer conn.Close()
|
||||
|
||||
dec := lineprotocol.NewDecoder(conn)
|
||||
connctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
select {
|
||||
case <-connctx.Done():
|
||||
conn.Close()
|
||||
case <-ctx.Done():
|
||||
conn.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
if err := handleLine(dec, "default"); err != nil {
|
||||
if errors.Is(err, net.ErrClosed) {
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
|
||||
errmsg := make([]byte, 128)
|
||||
errmsg = append(errmsg, `error: `...)
|
||||
errmsg = append(errmsg, err.Error()...)
|
||||
errmsg = append(errmsg, '\n')
|
||||
conn.Write(errmsg)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Connect to a nats server and subscribe to "updates". This is a blocking
|
||||
// function. handleLine will be called for each line recieved via nats.
|
||||
// Send `true` through the done channel for gracefull termination.
|
||||
func ReceiveNats(conf *config.NatsConfig, handleLine func(*lineprotocol.Decoder, string) error, workers int, ctx context.Context) error {
|
||||
var opts []nats.Option
|
||||
if conf.Username != "" && conf.Password != "" {
|
||||
opts = append(opts, nats.UserInfo(conf.Username, conf.Password))
|
||||
}
|
||||
|
||||
nc, err := nats.Connect(conf.Address, opts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer nc.Close()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
var subs []*nats.Subscription
|
||||
|
||||
msgs := make(chan *nats.Msg, workers*2)
|
||||
|
||||
for _, sc := range conf.Subscriptions {
|
||||
clusterTag := sc.ClusterTag
|
||||
var sub *nats.Subscription
|
||||
if workers > 1 {
|
||||
wg.Add(workers)
|
||||
|
||||
for i := 0; i < workers; i++ {
|
||||
go func() {
|
||||
for m := range msgs {
|
||||
dec := lineprotocol.NewDecoderWithBytes(m.Data)
|
||||
if err := handleLine(dec, clusterTag); err != nil {
|
||||
log.Printf("error: %s\n", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
|
||||
msgs <- m
|
||||
})
|
||||
} else {
|
||||
sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
|
||||
dec := lineprotocol.NewDecoderWithBytes(m.Data)
|
||||
if err := handleLine(dec, clusterTag); err != nil {
|
||||
log.Printf("error: %s\n", err.Error())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.Printf("NATS subscription to '%s' on '%s' established\n", sc.SubscribeTo, conf.Address)
|
||||
subs = append(subs, sub)
|
||||
}
|
||||
|
||||
<-ctx.Done()
|
||||
for _, sub := range subs {
|
||||
err = sub.Unsubscribe()
|
||||
if err != nil {
|
||||
log.Printf("NATS unsubscribe failed: %s", err.Error())
|
||||
}
|
||||
}
|
||||
close(msgs)
|
||||
wg.Wait()
|
||||
|
||||
nc.Close()
|
||||
log.Println("NATS connection closed")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Place `prefix` in front of `buf` but if possible,
|
||||
// do that inplace in `buf`.
|
||||
func reorder(buf, prefix []byte) []byte {
|
||||
n := len(prefix)
|
||||
m := len(buf)
|
||||
if cap(buf) < m+n {
|
||||
return append(prefix[:n:n], buf...)
|
||||
} else {
|
||||
buf = buf[:n+m]
|
||||
for i := m - 1; i >= 0; i-- {
|
||||
buf[i+n] = buf[i]
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
buf[i] = prefix[i]
|
||||
}
|
||||
return buf
|
||||
}
|
||||
}
|
||||
|
||||
// Decode lines using dec and make write calls to the MemoryStore.
|
||||
// If a line is missing its cluster tag, use clusterDefault as default.
|
||||
func decodeLine(dec *lineprotocol.Decoder, memoryStore *memstore.MemoryStore, clusterDefault string) error {
|
||||
// Reduce allocations in loop:
|
||||
t := time.Now()
|
||||
metric, metricBuf := Metric{}, make([]byte, 0, 16)
|
||||
selector := make([]string, 0, 4)
|
||||
typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
|
||||
|
||||
// Optimize for the case where all lines in a "batch" are about the same
|
||||
// cluster and host. By using `WriteToLevel` (level = host), we do not need
|
||||
// to take the root- and cluster-level lock as often.
|
||||
var lvl *level = nil
|
||||
var prevCluster, prevHost string = "", ""
|
||||
|
||||
var ok bool
|
||||
for dec.Next() {
|
||||
rawmeasurement, err := dec.Measurement()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Needs to be copied because another call to dec.* would
|
||||
// invalidate the returned slice.
|
||||
metricBuf = append(metricBuf[:0], rawmeasurement...)
|
||||
|
||||
// The go compiler optimizes map[string(byteslice)] lookups:
|
||||
metric.mc, ok = memoryStore.metrics[string(rawmeasurement)]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
|
||||
cluster, host := clusterDefault, ""
|
||||
for {
|
||||
key, val, err := dec.NextTag()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if key == nil {
|
||||
break
|
||||
}
|
||||
|
||||
// The go compiler optimizes string([]byte{...}) == "...":
|
||||
switch string(key) {
|
||||
case "cluster":
|
||||
if string(val) == prevCluster {
|
||||
cluster = prevCluster
|
||||
} else {
|
||||
cluster = string(val)
|
||||
lvl = nil
|
||||
}
|
||||
case "hostname", "host":
|
||||
if string(val) == prevHost {
|
||||
host = prevHost
|
||||
} else {
|
||||
host = string(val)
|
||||
lvl = nil
|
||||
}
|
||||
case "type":
|
||||
if string(val) == "node" {
|
||||
break
|
||||
}
|
||||
|
||||
// We cannot be sure that the "type" tag comes before the "type-id" tag:
|
||||
if len(typeBuf) == 0 {
|
||||
typeBuf = append(typeBuf, val...)
|
||||
} else {
|
||||
typeBuf = reorder(typeBuf, val)
|
||||
}
|
||||
case "type-id":
|
||||
typeBuf = append(typeBuf, val...)
|
||||
case "subtype":
|
||||
// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
|
||||
if len(subTypeBuf) == 0 {
|
||||
subTypeBuf = append(subTypeBuf, val...)
|
||||
} else {
|
||||
subTypeBuf = reorder(typeBuf, val)
|
||||
}
|
||||
case "stype-id":
|
||||
subTypeBuf = append(subTypeBuf, val...)
|
||||
default:
|
||||
// Ignore unkown tags (cc-metric-collector might send us a unit for example that we do not need)
|
||||
// return fmt.Errorf("unkown tag: '%s' (value: '%s')", string(key), string(val))
|
||||
}
|
||||
}
|
||||
|
||||
// If the cluster or host changed, the lvl was set to nil
|
||||
if lvl == nil {
|
||||
selector = selector[:2]
|
||||
selector[0], selector[1] = cluster, host
|
||||
lvl = memoryStore.GetLevel(selector)
|
||||
prevCluster, prevHost = cluster, host
|
||||
}
|
||||
|
||||
// subtypes:
|
||||
selector = selector[:0]
|
||||
if len(typeBuf) > 0 {
|
||||
selector = append(selector, string(typeBuf)) // <- Allocation :(
|
||||
if len(subTypeBuf) > 0 {
|
||||
selector = append(selector, string(subTypeBuf))
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
key, val, err := dec.NextField()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if key == nil {
|
||||
break
|
||||
}
|
||||
|
||||
if string(key) != "value" {
|
||||
return fmt.Errorf("unkown field: '%s' (value: %#v)", string(key), val)
|
||||
}
|
||||
|
||||
if val.Kind() == lineprotocol.Float {
|
||||
metric.Value = util.Float(val.FloatV())
|
||||
} else if val.Kind() == lineprotocol.Int {
|
||||
metric.Value = util.Float(val.IntV())
|
||||
} else if val.Kind() == lineprotocol.Uint {
|
||||
metric.Value = util.Float(val.UintV())
|
||||
} else {
|
||||
return fmt.Errorf("unsupported value type in message: %s", val.Kind().String())
|
||||
}
|
||||
}
|
||||
|
||||
if t, err = dec.Time(lineprotocol.Second, t); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := memoryStore.WriteToLevel(lvl, selector, t.Unix(), []Metric{metric}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
112
internal/config/config.go
Normal file
112
internal/config/config.go
Normal file
@@ -0,0 +1,112 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
// For aggregation over multiple values at different cpus/sockets/..., not time!
|
||||
type AggregationStrategy int
|
||||
|
||||
const (
|
||||
NoAggregation AggregationStrategy = iota
|
||||
SumAggregation
|
||||
AvgAggregation
|
||||
)
|
||||
|
||||
func (as *AggregationStrategy) UnmarshalJSON(data []byte) error {
|
||||
var str string
|
||||
if err := json.Unmarshal(data, &str); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch str {
|
||||
case "":
|
||||
*as = NoAggregation
|
||||
case "sum":
|
||||
*as = SumAggregation
|
||||
case "avg":
|
||||
*as = AvgAggregation
|
||||
default:
|
||||
return fmt.Errorf("invalid aggregation strategy: %#v", str)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type MetricConfig struct {
|
||||
// Interval in seconds at which measurements will arive.
|
||||
Frequency int64 `json:"frequency"`
|
||||
|
||||
// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
|
||||
Aggregation AggregationStrategy `json:"aggregation"`
|
||||
|
||||
// Private, used internally...
|
||||
offset int
|
||||
}
|
||||
|
||||
type HttpConfig struct {
|
||||
// Address to bind to, for example "0.0.0.0:8081"
|
||||
Address string `json:"address"`
|
||||
|
||||
// If not the empty string, use https with this as the certificate file
|
||||
CertFile string `json:"https-cert-file"`
|
||||
|
||||
// If not the empty string, use https with this as the key file
|
||||
KeyFile string `json:"https-key-file"`
|
||||
}
|
||||
|
||||
type NatsConfig struct {
|
||||
// Address of the nats server
|
||||
Address string `json:"address"`
|
||||
|
||||
// Username/Password, optional
|
||||
Username string `json:"username"`
|
||||
Password string `json:"password"`
|
||||
|
||||
Subscriptions []struct {
|
||||
// Channel name
|
||||
SubscribeTo string `json:"subscribe-to"`
|
||||
|
||||
// Allow lines without a cluster tag, use this as default, optional
|
||||
ClusterTag string `json:"cluster-tag"`
|
||||
} `json:"subscriptions"`
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
Metrics map[string]MetricConfig `json:"metrics"`
|
||||
HttpConfig *HttpConfig `json:"http-api"`
|
||||
Checkpoints struct {
|
||||
Interval string `json:"interval"`
|
||||
RootDir string `json:"directory"`
|
||||
Restore string `json:"restore"`
|
||||
} `json:"checkpoints"`
|
||||
Debug struct {
|
||||
DumpToFile string `json:"dump-to-file"`
|
||||
EnableGops bool `json:"gops"`
|
||||
} `json:"debug"`
|
||||
RetentionInMemory string `json:"retention-in-memory"`
|
||||
JwtPublicKey string `json:"jwt-public-key"`
|
||||
Archive struct {
|
||||
Interval string `json:"interval"`
|
||||
RootDir string `json:"directory"`
|
||||
DeleteInstead bool `json:"delete-instead"`
|
||||
} `json:"archive"`
|
||||
Nats []*NatsConfig `json:"nats"`
|
||||
}
|
||||
|
||||
func LoadConfiguration(file string) Config {
|
||||
var config Config
|
||||
configFile, err := os.Open(file)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
dec := json.NewDecoder(configFile)
|
||||
dec.DisallowUnknownFields()
|
||||
if err := dec.Decode(&config); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
return config
|
||||
}
|
597
internal/memstore/archive.go
Normal file
597
internal/memstore/archive.go
Normal file
@@ -0,0 +1,597 @@
|
||||
package memstore
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Whenever changed, update MarshalJSON as well!
|
||||
type CheckpointMetrics struct {
|
||||
Frequency int64 `json:"frequency"`
|
||||
Start int64 `json:"start"`
|
||||
Data []Float `json:"data"`
|
||||
}
|
||||
|
||||
// As `Float` implements a custom MarshalJSON() function,
|
||||
// serializing an array of such types has more overhead
|
||||
// than one would assume (because of extra allocations, interfaces and so on).
|
||||
func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {
|
||||
buf := make([]byte, 0, 128+len(cm.Data)*8)
|
||||
buf = append(buf, `{"frequency":`...)
|
||||
buf = strconv.AppendInt(buf, cm.Frequency, 10)
|
||||
buf = append(buf, `,"start":`...)
|
||||
buf = strconv.AppendInt(buf, cm.Start, 10)
|
||||
buf = append(buf, `,"data":[`...)
|
||||
for i, x := range cm.Data {
|
||||
if i != 0 {
|
||||
buf = append(buf, ',')
|
||||
}
|
||||
if x.IsNaN() {
|
||||
buf = append(buf, `null`...)
|
||||
} else {
|
||||
buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)
|
||||
}
|
||||
}
|
||||
buf = append(buf, `]}`...)
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
type CheckpointFile struct {
|
||||
From int64 `json:"from"`
|
||||
To int64 `json:"to"`
|
||||
Metrics map[string]*CheckpointMetrics `json:"metrics"`
|
||||
Children map[string]*CheckpointFile `json:"children"`
|
||||
}
|
||||
|
||||
var ErrNoNewData error = errors.New("all data already archived")
|
||||
|
||||
var NumWorkers int = 4
|
||||
|
||||
func init() {
|
||||
maxWorkers := 10
|
||||
NumWorkers = runtime.NumCPU()/2 + 1
|
||||
if NumWorkers > maxWorkers {
|
||||
NumWorkers = maxWorkers
|
||||
}
|
||||
}
|
||||
|
||||
// Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
|
||||
// On a per-host basis a new JSON file is created. I have no idea if this will scale.
|
||||
// The good thing: Only a host at a time is locked, so this function can run
|
||||
// in parallel to writes/reads.
|
||||
func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
|
||||
levels := make([]*level, 0)
|
||||
selectors := make([][]string, 0)
|
||||
m.root.lock.RLock()
|
||||
for sel1, l1 := range m.root.children {
|
||||
l1.lock.RLock()
|
||||
for sel2, l2 := range l1.children {
|
||||
levels = append(levels, l2)
|
||||
selectors = append(selectors, []string{sel1, sel2})
|
||||
}
|
||||
l1.lock.RUnlock()
|
||||
}
|
||||
m.root.lock.RUnlock()
|
||||
|
||||
type workItem struct {
|
||||
level *level
|
||||
dir string
|
||||
selector []string
|
||||
}
|
||||
|
||||
n, errs := int32(0), int32(0)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(NumWorkers)
|
||||
work := make(chan workItem, NumWorkers*2)
|
||||
for worker := 0; worker < NumWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
|
||||
for workItem := range work {
|
||||
if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {
|
||||
if err == ErrNoNewData {
|
||||
continue
|
||||
}
|
||||
|
||||
log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||
atomic.AddInt32(&errs, 1)
|
||||
} else {
|
||||
atomic.AddInt32(&n, 1)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
for i := 0; i < len(levels); i++ {
|
||||
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||
work <- workItem{
|
||||
level: levels[i],
|
||||
dir: dir,
|
||||
selector: selectors[i],
|
||||
}
|
||||
}
|
||||
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n)
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
|
||||
retval := &CheckpointFile{
|
||||
From: from,
|
||||
To: to,
|
||||
Metrics: make(map[string]*CheckpointMetrics),
|
||||
Children: make(map[string]*CheckpointFile),
|
||||
}
|
||||
|
||||
for metric, minfo := range m.metrics {
|
||||
b := l.metrics[minfo.offset]
|
||||
if b == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
allArchived := true
|
||||
b.iterFromTo(from, to, func(b *buffer) error {
|
||||
if !b.archived {
|
||||
allArchived = false
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if allArchived {
|
||||
continue
|
||||
}
|
||||
|
||||
data := make([]Float, (to-from)/b.frequency+1)
|
||||
data, start, end, err := b.read(from, to, data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i := int((end - start) / b.frequency); i < len(data); i++ {
|
||||
data[i] = NaN
|
||||
}
|
||||
|
||||
retval.Metrics[metric] = &CheckpointMetrics{
|
||||
Frequency: b.frequency,
|
||||
Start: start,
|
||||
Data: data,
|
||||
}
|
||||
}
|
||||
|
||||
for name, child := range l.children {
|
||||
val, err := child.toCheckpointFile(from, to, m)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if val != nil {
|
||||
retval.Children[name] = val
|
||||
}
|
||||
}
|
||||
|
||||
if len(retval.Children) == 0 && len(retval.Metrics) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return retval, nil
|
||||
}
|
||||
|
||||
func (l *level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
||||
cf, err := l.toCheckpointFile(from, to, m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cf == nil {
|
||||
return ErrNoNewData
|
||||
}
|
||||
|
||||
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
|
||||
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
err = os.MkdirAll(dir, 0755)
|
||||
if err == nil {
|
||||
f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
bw := bufio.NewWriter(f)
|
||||
if err = json.NewEncoder(bw).Encode(cf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return bw.Flush()
|
||||
}
|
||||
|
||||
// Metrics stored at the lowest 2 levels are not loaded (root and cluster)!
|
||||
// This function can only be called once and before the very first write or read.
|
||||
// Different host's data is loaded to memory in parallel.
|
||||
func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
|
||||
var wg sync.WaitGroup
|
||||
work := make(chan [2]string, NumWorkers)
|
||||
n, errs := int32(0), int32(0)
|
||||
|
||||
wg.Add(NumWorkers)
|
||||
for worker := 0; worker < NumWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for host := range work {
|
||||
lvl := m.root.findLevelOrCreate(host[:], len(m.metrics))
|
||||
nn, err := lvl.fromCheckpoint(filepath.Join(dir, host[0], host[1]), from, m)
|
||||
if err != nil {
|
||||
log.Fatalf("error while loading checkpoints: %s", err.Error())
|
||||
atomic.AddInt32(&errs, 1)
|
||||
}
|
||||
atomic.AddInt32(&n, int32(nn))
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
i := 0
|
||||
clustersDir, err := os.ReadDir(dir)
|
||||
for _, clusterDir := range clustersDir {
|
||||
if !clusterDir.IsDir() {
|
||||
err = errors.New("expected only directories at first level of checkpoints/ directory")
|
||||
goto done
|
||||
}
|
||||
|
||||
hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name()))
|
||||
if e != nil {
|
||||
err = e
|
||||
goto done
|
||||
}
|
||||
|
||||
for _, hostDir := range hostsDir {
|
||||
if !hostDir.IsDir() {
|
||||
err = errors.New("expected only directories at second level of checkpoints/ directory")
|
||||
goto done
|
||||
}
|
||||
|
||||
i++
|
||||
if i%NumWorkers == 0 && i > 100 {
|
||||
// Forcing garbage collection runs here regulary during the loading of checkpoints
|
||||
// will decrease the total heap size after loading everything back to memory is done.
|
||||
// While loading data, the heap will grow fast, so the GC target size will double
|
||||
// almost always. By forcing GCs here, we can keep it growing more slowly so that
|
||||
// at the end, less memory is wasted.
|
||||
runtime.GC()
|
||||
}
|
||||
|
||||
work <- [2]string{clusterDir.Name(), hostDir.Name()}
|
||||
}
|
||||
}
|
||||
done:
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
if err != nil {
|
||||
return int(n), err
|
||||
}
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n)
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
func (l *level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
|
||||
for name, metric := range cf.Metrics {
|
||||
n := len(metric.Data)
|
||||
b := &buffer{
|
||||
frequency: metric.Frequency,
|
||||
start: metric.Start,
|
||||
data: metric.Data[0:n:n], // Space is wasted here :(
|
||||
prev: nil,
|
||||
next: nil,
|
||||
archived: true,
|
||||
}
|
||||
b.close()
|
||||
|
||||
minfo, ok := m.metrics[name]
|
||||
if !ok {
|
||||
continue
|
||||
// return errors.New("Unkown metric: " + name)
|
||||
}
|
||||
|
||||
prev := l.metrics[minfo.offset]
|
||||
if prev == nil {
|
||||
l.metrics[minfo.offset] = b
|
||||
} else {
|
||||
if prev.start > b.start {
|
||||
return errors.New("wooops")
|
||||
}
|
||||
|
||||
b.prev = prev
|
||||
prev.next = b
|
||||
}
|
||||
l.metrics[minfo.offset] = b
|
||||
}
|
||||
|
||||
if len(cf.Children) > 0 && l.children == nil {
|
||||
l.children = make(map[string]*level)
|
||||
}
|
||||
|
||||
for sel, childCf := range cf.Children {
|
||||
child, ok := l.children[sel]
|
||||
if !ok {
|
||||
child = &level{
|
||||
metrics: make([]*buffer, len(m.metrics)),
|
||||
children: nil,
|
||||
}
|
||||
l.children[sel] = child
|
||||
}
|
||||
|
||||
if err := child.loadFile(childCf, m); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, error) {
|
||||
direntries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return 0, err
|
||||
}
|
||||
|
||||
jsonFiles := make([]fs.DirEntry, 0)
|
||||
filesLoaded := 0
|
||||
for _, e := range direntries {
|
||||
if e.IsDir() {
|
||||
child := &level{
|
||||
metrics: make([]*buffer, len(m.metrics)),
|
||||
children: make(map[string]*level),
|
||||
}
|
||||
|
||||
files, err := child.fromCheckpoint(path.Join(dir, e.Name()), from, m)
|
||||
filesLoaded += files
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
l.children[e.Name()] = child
|
||||
} else if strings.HasSuffix(e.Name(), ".json") {
|
||||
jsonFiles = append(jsonFiles, e)
|
||||
} else {
|
||||
return filesLoaded, errors.New("unexpected file: " + dir + "/" + e.Name())
|
||||
}
|
||||
}
|
||||
|
||||
files, err := findFiles(jsonFiles, from, true)
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
for _, filename := range files {
|
||||
f, err := os.Open(path.Join(dir, filename))
|
||||
if err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
br := bufio.NewReader(f)
|
||||
cf := &CheckpointFile{}
|
||||
if err = json.NewDecoder(br).Decode(cf); err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
if cf.To != 0 && cf.To < from {
|
||||
continue
|
||||
}
|
||||
|
||||
if err = l.loadFile(cf, m); err != nil {
|
||||
return filesLoaded, err
|
||||
}
|
||||
|
||||
filesLoaded += 1
|
||||
}
|
||||
|
||||
return filesLoaded, nil
|
||||
}
|
||||
|
||||
// This will probably get very slow over time!
|
||||
// A solution could be some sort of an index file in which all other files
|
||||
// and the timespan they contain is listed.
|
||||
func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {
|
||||
nums := map[string]int64{}
|
||||
for _, e := range direntries {
|
||||
ts, err := strconv.ParseInt(strings.TrimSuffix(e.Name(), ".json"), 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nums[e.Name()] = ts
|
||||
}
|
||||
|
||||
sort.Slice(direntries, func(i, j int) bool {
|
||||
a, b := direntries[i], direntries[j]
|
||||
return nums[a.Name()] < nums[b.Name()]
|
||||
})
|
||||
|
||||
filenames := make([]string, 0)
|
||||
for i := 0; i < len(direntries); i++ {
|
||||
e := direntries[i]
|
||||
ts1 := nums[e.Name()]
|
||||
|
||||
if findMoreRecentFiles && t <= ts1 || i == len(direntries)-1 {
|
||||
filenames = append(filenames, e.Name())
|
||||
continue
|
||||
}
|
||||
|
||||
enext := direntries[i+1]
|
||||
ts2 := nums[enext.Name()]
|
||||
|
||||
if findMoreRecentFiles {
|
||||
if ts1 < t && t < ts2 {
|
||||
filenames = append(filenames, e.Name())
|
||||
}
|
||||
} else {
|
||||
if ts2 < t {
|
||||
filenames = append(filenames, e.Name())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return filenames, nil
|
||||
}
|
||||
|
||||
// ZIP all checkpoint files older than `from` together and write them to the `archiveDir`,
|
||||
// deleting them from the `checkpointsDir`.
|
||||
func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteInstead bool) (int, error) {
|
||||
entries1, err := os.ReadDir(checkpointsDir)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
type workItem struct {
|
||||
cdir, adir string
|
||||
cluster, host string
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
n, errs := int32(0), int32(0)
|
||||
work := make(chan workItem, NumWorkers)
|
||||
|
||||
wg.Add(NumWorkers)
|
||||
for worker := 0; worker < NumWorkers; worker++ {
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for workItem := range work {
|
||||
m, err := archiveCheckpoints(workItem.cdir, workItem.adir, from, deleteInstead)
|
||||
if err != nil {
|
||||
log.Printf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error())
|
||||
atomic.AddInt32(&errs, 1)
|
||||
}
|
||||
atomic.AddInt32(&n, int32(m))
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
for _, de1 := range entries1 {
|
||||
entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
|
||||
if e != nil {
|
||||
err = e
|
||||
}
|
||||
|
||||
for _, de2 := range entries2 {
|
||||
cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name())
|
||||
adir := filepath.Join(archiveDir, de1.Name(), de2.Name())
|
||||
work <- workItem{
|
||||
adir: adir, cdir: cdir,
|
||||
cluster: de1.Name(), host: de2.Name(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
close(work)
|
||||
wg.Wait()
|
||||
|
||||
if err != nil {
|
||||
return int(n), err
|
||||
}
|
||||
|
||||
if errs > 0 {
|
||||
return int(n), fmt.Errorf("%d errors happend while archiving (%d successes)", errs, n)
|
||||
}
|
||||
return int(n), nil
|
||||
}
|
||||
|
||||
// Helper function for `ArchiveCheckpoints`.
|
||||
func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead bool) (int, error) {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
files, err := findFiles(entries, from, false)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if deleteInstead {
|
||||
n := 0
|
||||
for _, checkpoint := range files {
|
||||
filename := filepath.Join(dir, checkpoint)
|
||||
if err = os.Remove(filename); err != nil {
|
||||
return n, err
|
||||
}
|
||||
n += 1
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from))
|
||||
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil && os.IsNotExist(err) {
|
||||
err = os.MkdirAll(archiveDir, 0755)
|
||||
if err == nil {
|
||||
f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0644)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
bw := bufio.NewWriter(f)
|
||||
defer bw.Flush()
|
||||
zw := zip.NewWriter(bw)
|
||||
defer zw.Close()
|
||||
|
||||
n := 0
|
||||
for _, checkpoint := range files {
|
||||
filename := filepath.Join(dir, checkpoint)
|
||||
r, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
w, err := zw.Create(checkpoint)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
|
||||
if _, err = io.Copy(w, r); err != nil {
|
||||
return n, err
|
||||
}
|
||||
|
||||
if err = os.Remove(filename); err != nil {
|
||||
return n, err
|
||||
}
|
||||
n += 1
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
542
internal/memstore/memstore.go
Normal file
542
internal/memstore/memstore.go
Normal file
@@ -0,0 +1,542 @@
|
||||
package memstore
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ClusterCockpit/cc-metric-store/internal/api"
|
||||
"github.com/ClusterCockpit/cc-metric-store/internal/config"
|
||||
"github.com/ClusterCockpit/cc-metric-store/internal/util"
|
||||
)
|
||||
|
||||
// Default buffer capacity.
|
||||
// `buffer.data` will only ever grow up to it's capacity and a new link
|
||||
// in the buffer chain will be created if needed so that no copying
|
||||
// of data or reallocation needs to happen on writes.
|
||||
const (
|
||||
BUFFER_CAP int = 512
|
||||
)
|
||||
|
||||
// So that we can reuse allocations
|
||||
var bufferPool sync.Pool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &buffer{
|
||||
data: make([]util.Float, 0, BUFFER_CAP),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
var (
|
||||
ErrNoData error = errors.New("no data for this metric/level")
|
||||
ErrDataDoesNotAlign error = errors.New("data from lower granularities does not align")
|
||||
)
|
||||
|
||||
// Each metric on each level has it's own buffer.
|
||||
// This is where the actual values go.
|
||||
// If `cap(data)` is reached, a new buffer is created and
|
||||
// becomes the new head of a buffer list.
|
||||
type buffer struct {
|
||||
frequency int64 // Time between two "slots"
|
||||
start int64 // Timestamp of when `data[0]` was written.
|
||||
data []util.Float // The slice should never reallocacte as `cap(data)` is respected.
|
||||
prev, next *buffer // `prev` contains older data, `next` newer data.
|
||||
archived bool // If true, this buffer is already archived
|
||||
|
||||
closed bool
|
||||
/*
|
||||
statisticts struct {
|
||||
samples int
|
||||
min Float
|
||||
max Float
|
||||
avg Float
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
func newBuffer(ts, freq int64) *buffer {
|
||||
b := bufferPool.Get().(*buffer)
|
||||
b.frequency = freq
|
||||
b.start = ts - (freq / 2)
|
||||
b.prev = nil
|
||||
b.next = nil
|
||||
b.archived = false
|
||||
b.closed = false
|
||||
b.data = b.data[:0]
|
||||
return b
|
||||
}
|
||||
|
||||
// If a new buffer was created, the new head is returnd.
|
||||
// Otherwise, the existing buffer is returnd.
|
||||
// Normaly, only "newer" data should be written, but if the value would
|
||||
// end up in the same buffer anyways it is allowed.
|
||||
func (b *buffer) write(ts int64, value util.Float) (*buffer, error) {
|
||||
if ts < b.start {
|
||||
return nil, errors.New("cannot write value to buffer from past")
|
||||
}
|
||||
|
||||
// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)
|
||||
idx := int((ts - b.start) / b.frequency)
|
||||
if idx >= cap(b.data) {
|
||||
newbuf := newBuffer(ts, b.frequency)
|
||||
newbuf.prev = b
|
||||
b.next = newbuf
|
||||
b.close()
|
||||
b = newbuf
|
||||
idx = 0
|
||||
}
|
||||
|
||||
// Overwriting value or writing value from past
|
||||
if idx < len(b.data) {
|
||||
b.data[idx] = value
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// Fill up unwritten slots with NaN
|
||||
for i := len(b.data); i < idx; i++ {
|
||||
b.data = append(b.data, util.NaN)
|
||||
}
|
||||
|
||||
b.data = append(b.data, value)
|
||||
return b, nil
|
||||
}
|
||||
|
||||
func (b *buffer) end() int64 {
|
||||
return b.firstWrite() + int64(len(b.data))*b.frequency
|
||||
}
|
||||
|
||||
func (b *buffer) firstWrite() int64 {
|
||||
return b.start + (b.frequency / 2)
|
||||
}
|
||||
|
||||
func (b *buffer) close() {}
|
||||
|
||||
/*
|
||||
func (b *buffer) close() {
|
||||
if b.closed {
|
||||
return
|
||||
}
|
||||
|
||||
b.closed = true
|
||||
n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64
|
||||
for _, x := range b.data {
|
||||
if x.IsNaN() {
|
||||
continue
|
||||
}
|
||||
|
||||
n += 1
|
||||
f := float64(x)
|
||||
sum += f
|
||||
min = math.Min(min, f)
|
||||
max = math.Max(max, f)
|
||||
}
|
||||
|
||||
b.statisticts.samples = n
|
||||
if n > 0 {
|
||||
b.statisticts.avg = Float(sum / float64(n))
|
||||
b.statisticts.min = Float(min)
|
||||
b.statisticts.max = Float(max)
|
||||
} else {
|
||||
b.statisticts.avg = NaN
|
||||
b.statisticts.min = NaN
|
||||
b.statisticts.max = NaN
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// func interpolate(idx int, data []Float) Float {
|
||||
// if idx == 0 || idx+1 == len(data) {
|
||||
// return NaN
|
||||
// }
|
||||
// return (data[idx-1] + data[idx+1]) / 2.0
|
||||
// }
|
||||
|
||||
// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
|
||||
// Simple linear interpolation is done between the two neighboring cells if possible.
|
||||
// If values at the start or end are missing, instead of NaN values, the second and thrid
|
||||
// return values contain the actual `from`/`to`.
|
||||
// This function goes back the buffer chain if `from` is older than the currents buffer start.
|
||||
// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
|
||||
// If `data` is not long enough to hold all values, this function will panic!
|
||||
func (b *buffer) read(from, to int64, data []util.Float) ([]util.Float, int64, int64, error) {
|
||||
if from < b.firstWrite() {
|
||||
if b.prev != nil {
|
||||
return b.prev.read(from, to, data)
|
||||
}
|
||||
from = b.firstWrite()
|
||||
}
|
||||
|
||||
var i int = 0
|
||||
var t int64 = from
|
||||
for ; t < to; t += b.frequency {
|
||||
idx := int((t - b.start) / b.frequency)
|
||||
if idx >= cap(b.data) {
|
||||
if b.next == nil {
|
||||
break
|
||||
}
|
||||
b = b.next
|
||||
idx = 0
|
||||
}
|
||||
|
||||
if idx >= len(b.data) {
|
||||
if b.next == nil || to <= b.next.start {
|
||||
break
|
||||
}
|
||||
data[i] += util.NaN
|
||||
} else if t < b.start {
|
||||
data[i] += util.NaN
|
||||
// } else if b.data[idx].IsNaN() {
|
||||
// data[i] += interpolate(idx, b.data)
|
||||
} else {
|
||||
data[i] += b.data[idx]
|
||||
}
|
||||
i++
|
||||
}
|
||||
|
||||
return data[:i], from, t, nil
|
||||
}
|
||||
|
||||
// Returns true if this buffer needs to be freed.
|
||||
func (b *buffer) free(t int64) (delme bool, n int) {
|
||||
if b.prev != nil {
|
||||
delme, m := b.prev.free(t)
|
||||
n += m
|
||||
if delme {
|
||||
b.prev.next = nil
|
||||
if cap(b.prev.data) == BUFFER_CAP {
|
||||
bufferPool.Put(b.prev)
|
||||
}
|
||||
b.prev = nil
|
||||
}
|
||||
}
|
||||
|
||||
end := b.end()
|
||||
if end < t {
|
||||
return true, n + 1
|
||||
}
|
||||
|
||||
return false, n
|
||||
}
|
||||
|
||||
// Call `callback` on every buffer that contains data in the range from `from` to `to`.
|
||||
func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
|
||||
if b == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := b.prev.iterFromTo(from, to, callback); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if from <= b.end() && b.start <= to {
|
||||
return callback(b)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *buffer) count() int64 {
|
||||
res := int64(len(b.data))
|
||||
if b.prev != nil {
|
||||
res += b.prev.count()
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
// Could also be called "node" as this forms a node in a tree structure.
|
||||
// Called level because "node" might be confusing here.
|
||||
// Can be both a leaf or a inner node. In this tree structue, inner nodes can
|
||||
// also hold data (in `metrics`).
|
||||
type level struct {
|
||||
lock sync.RWMutex
|
||||
metrics []*buffer // Every level can store metrics.
|
||||
children map[string]*level // Lower levels.
|
||||
}
|
||||
|
||||
// Find the correct level for the given selector, creating it if
|
||||
// it does not exist. Example selector in the context of the
|
||||
// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }.
|
||||
// This function would probably benefit a lot from `level.children` beeing a `sync.Map`?
|
||||
func (l *level) findLevelOrCreate(selector []string, nMetrics int) *level {
|
||||
if len(selector) == 0 {
|
||||
return l
|
||||
}
|
||||
|
||||
// Allow concurrent reads:
|
||||
l.lock.RLock()
|
||||
var child *level
|
||||
var ok bool
|
||||
if l.children == nil {
|
||||
// Children map needs to be created...
|
||||
l.lock.RUnlock()
|
||||
} else {
|
||||
child, ok := l.children[selector[0]]
|
||||
l.lock.RUnlock()
|
||||
if ok {
|
||||
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||
}
|
||||
}
|
||||
|
||||
// The level does not exist, take write lock for unqiue access:
|
||||
l.lock.Lock()
|
||||
// While this thread waited for the write lock, another thread
|
||||
// could have created the child node.
|
||||
if l.children != nil {
|
||||
child, ok = l.children[selector[0]]
|
||||
if ok {
|
||||
l.lock.Unlock()
|
||||
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||
}
|
||||
}
|
||||
|
||||
child = &level{
|
||||
metrics: make([]*buffer, nMetrics),
|
||||
children: nil,
|
||||
}
|
||||
|
||||
if l.children != nil {
|
||||
l.children[selector[0]] = child
|
||||
} else {
|
||||
l.children = map[string]*level{selector[0]: child}
|
||||
}
|
||||
l.lock.Unlock()
|
||||
return child.findLevelOrCreate(selector[1:], nMetrics)
|
||||
}
|
||||
|
||||
func (l *level) free(t int64) (int, error) {
|
||||
l.lock.Lock()
|
||||
defer l.lock.Unlock()
|
||||
|
||||
n := 0
|
||||
for i, b := range l.metrics {
|
||||
if b != nil {
|
||||
delme, m := b.free(t)
|
||||
n += m
|
||||
if delme {
|
||||
if cap(b.data) == BUFFER_CAP {
|
||||
bufferPool.Put(b)
|
||||
}
|
||||
l.metrics[i] = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, l := range l.children {
|
||||
m, err := l.free(t)
|
||||
n += m
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (l *level) sizeInBytes() int64 {
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
size := int64(0)
|
||||
|
||||
for _, b := range l.metrics {
|
||||
if b != nil {
|
||||
size += b.count() * int64(unsafe.Sizeof(util.Float(0)))
|
||||
}
|
||||
}
|
||||
|
||||
for _, child := range l.children {
|
||||
size += child.sizeInBytes()
|
||||
}
|
||||
|
||||
return size
|
||||
}
|
||||
|
||||
type MemoryStore struct {
|
||||
root level // root of the tree structure
|
||||
metrics map[string]config.MetricConfig
|
||||
}
|
||||
|
||||
// Return a new, initialized instance of a MemoryStore.
|
||||
// Will panic if values in the metric configurations are invalid.
|
||||
func NewMemoryStore(metrics map[string]config.MetricConfig) *MemoryStore {
|
||||
offset := 0
|
||||
for key, config := range metrics {
|
||||
if config.Frequency == 0 {
|
||||
panic("invalid frequency")
|
||||
}
|
||||
|
||||
metrics[key] = MetricConfig{
|
||||
Frequency: config.Frequency,
|
||||
Aggregation: config.Aggregation,
|
||||
offset: offset,
|
||||
}
|
||||
offset += 1
|
||||
}
|
||||
|
||||
return &MemoryStore{
|
||||
root: level{
|
||||
metrics: make([]*buffer, len(metrics)),
|
||||
children: make(map[string]*level),
|
||||
},
|
||||
metrics: metrics,
|
||||
}
|
||||
}
|
||||
|
||||
// Write all values in `metrics` to the level specified by `selector` for time `ts`.
|
||||
// Look at `findLevelOrCreate` for how selectors work.
|
||||
func (m *MemoryStore) Write(selector []string, ts int64, metrics []api.Metric) error {
|
||||
var ok bool
|
||||
for i, metric := range metrics {
|
||||
if metric.mc.Frequency == 0 {
|
||||
metric.mc, ok = m.metrics[metric.Name]
|
||||
if !ok {
|
||||
metric.mc.Frequency = 0
|
||||
}
|
||||
metrics[i] = metric
|
||||
}
|
||||
}
|
||||
|
||||
return m.WriteToLevel(&m.root, selector, ts, metrics)
|
||||
}
|
||||
|
||||
func (m *MemoryStore) GetLevel(selector []string) *level {
|
||||
return m.root.findLevelOrCreate(selector, len(m.metrics))
|
||||
}
|
||||
|
||||
// Assumes that `minfo` in `metrics` is filled in!
|
||||
func (m *MemoryStore) WriteToLevel(l *level, selector []string, ts int64, metrics []api.Metric) error {
|
||||
l = l.findLevelOrCreate(selector, len(m.metrics))
|
||||
l.lock.Lock()
|
||||
defer l.lock.Unlock()
|
||||
|
||||
for _, metric := range metrics {
|
||||
if metric.mc.Frequency == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
b := l.metrics[metric.mc.offset]
|
||||
if b == nil {
|
||||
// First write to this metric and level
|
||||
b = newBuffer(ts, metric.mc.Frequency)
|
||||
l.metrics[metric.mc.offset] = b
|
||||
}
|
||||
|
||||
nb, err := b.write(ts, metric.Value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Last write created a new buffer...
|
||||
if b != nb {
|
||||
l.metrics[metric.mc.offset] = nb
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns all values for metric `metric` from `from` to `to` for the selected level(s).
|
||||
// If the level does not hold the metric itself, the data will be aggregated recursively from the children.
|
||||
// The second and third return value are the actual from/to for the data. Those can be different from
|
||||
// the range asked for if no data was available.
|
||||
func (m *MemoryStore) Read(selector Selector, metric string, from, to int64) ([]Float, int64, int64, error) {
|
||||
if from > to {
|
||||
return nil, 0, 0, errors.New("invalid time range")
|
||||
}
|
||||
|
||||
minfo, ok := m.metrics[metric]
|
||||
if !ok {
|
||||
return nil, 0, 0, errors.New("unkown metric: " + metric)
|
||||
}
|
||||
|
||||
n, data := 0, make([]Float, (to-from)/minfo.Frequency+1)
|
||||
err := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {
|
||||
cdata, cfrom, cto, err := b.read(from, to, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
from, to = cfrom, cto
|
||||
} else if from != cfrom || to != cto || len(data) != len(cdata) {
|
||||
missingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)
|
||||
if missingfront != 0 {
|
||||
return ErrDataDoesNotAlign
|
||||
}
|
||||
|
||||
newlen := len(cdata) - missingback
|
||||
if newlen < 1 {
|
||||
return ErrDataDoesNotAlign
|
||||
}
|
||||
cdata = cdata[0:newlen]
|
||||
if len(cdata) != len(data) {
|
||||
return ErrDataDoesNotAlign
|
||||
}
|
||||
|
||||
from, to = cfrom, cto
|
||||
}
|
||||
|
||||
data = cdata
|
||||
n += 1
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, 0, 0, err
|
||||
} else if n == 0 {
|
||||
return nil, 0, 0, errors.New("metric or host not found")
|
||||
} else if n > 1 {
|
||||
if minfo.Aggregation == AvgAggregation {
|
||||
normalize := 1. / Float(n)
|
||||
for i := 0; i < len(data); i++ {
|
||||
data[i] *= normalize
|
||||
}
|
||||
} else if minfo.Aggregation != SumAggregation {
|
||||
return nil, 0, 0, errors.New("invalid aggregation")
|
||||
}
|
||||
}
|
||||
|
||||
return data, from, to, nil
|
||||
}
|
||||
|
||||
// Release all buffers for the selected level and all its children that contain only
|
||||
// values older than `t`.
|
||||
func (m *MemoryStore) Free(selector []string, t int64) (int, error) {
|
||||
return m.GetLevel(selector).free(t)
|
||||
}
|
||||
|
||||
func (m *MemoryStore) FreeAll() error {
|
||||
for k := range m.root.children {
|
||||
delete(m.root.children, k)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MemoryStore) SizeInBytes() int64 {
|
||||
return m.root.sizeInBytes()
|
||||
}
|
||||
|
||||
// Given a selector, return a list of all children of the level selected.
|
||||
func (m *MemoryStore) ListChildren(selector []string) []string {
|
||||
lvl := &m.root
|
||||
for lvl != nil && len(selector) != 0 {
|
||||
lvl.lock.RLock()
|
||||
next := lvl.children[selector[0]]
|
||||
lvl.lock.RUnlock()
|
||||
lvl = next
|
||||
selector = selector[1:]
|
||||
}
|
||||
|
||||
if lvl == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
lvl.lock.RLock()
|
||||
defer lvl.lock.RUnlock()
|
||||
|
||||
children := make([]string, 0, len(lvl.children))
|
||||
for child := range lvl.children {
|
||||
children = append(children, child)
|
||||
}
|
||||
|
||||
return children
|
||||
}
|
123
internal/memstore/selector.go
Normal file
123
internal/memstore/selector.go
Normal file
@@ -0,0 +1,123 @@
|
||||
package memstore
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
)
|
||||
|
||||
type SelectorElement struct {
|
||||
Any bool
|
||||
String string
|
||||
Group []string
|
||||
}
|
||||
|
||||
func (se *SelectorElement) UnmarshalJSON(input []byte) error {
|
||||
if input[0] == '"' {
|
||||
if err := json.Unmarshal(input, &se.String); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if se.String == "*" {
|
||||
se.Any = true
|
||||
se.String = ""
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if input[0] == '[' {
|
||||
return json.Unmarshal(input, &se.Group)
|
||||
}
|
||||
|
||||
return errors.New("the Go SelectorElement type can only be a string or an array of strings")
|
||||
}
|
||||
|
||||
func (se *SelectorElement) MarshalJSON() ([]byte, error) {
|
||||
if se.Any {
|
||||
return []byte("\"*\""), nil
|
||||
}
|
||||
|
||||
if se.String != "" {
|
||||
return json.Marshal(se.String)
|
||||
}
|
||||
|
||||
if se.Group != nil {
|
||||
return json.Marshal(se.Group)
|
||||
}
|
||||
|
||||
return nil, errors.New("a Go Selector must be a non-empty string or a non-empty slice of strings")
|
||||
}
|
||||
|
||||
type Selector []SelectorElement
|
||||
|
||||
func (l *level) findLevel(selector []string) *level {
|
||||
if len(selector) == 0 {
|
||||
return l
|
||||
}
|
||||
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
|
||||
lvl := l.children[selector[0]]
|
||||
if lvl == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return lvl.findLevel(selector[1:])
|
||||
}
|
||||
|
||||
func (l *level) findBuffers(selector Selector, offset int, f func(b *buffer) error) error {
|
||||
l.lock.RLock()
|
||||
defer l.lock.RUnlock()
|
||||
|
||||
if len(selector) == 0 {
|
||||
b := l.metrics[offset]
|
||||
if b != nil {
|
||||
return f(b)
|
||||
}
|
||||
|
||||
for _, lvl := range l.children {
|
||||
err := lvl.findBuffers(nil, offset, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
sel := selector[0]
|
||||
if len(sel.String) != 0 && l.children != nil {
|
||||
lvl, ok := l.children[sel.String]
|
||||
if ok {
|
||||
err := lvl.findBuffers(selector[1:], offset, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if sel.Group != nil && l.children != nil {
|
||||
for _, key := range sel.Group {
|
||||
lvl, ok := l.children[key]
|
||||
if ok {
|
||||
err := lvl.findBuffers(selector[1:], offset, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if sel.Any && l.children != nil {
|
||||
for _, lvl := range l.children {
|
||||
if err := lvl.findBuffers(selector[1:], offset, f); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
64
internal/util/float.go
Normal file
64
internal/util/float.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// Go's JSON encoder for floats does not support NaN (https://github.com/golang/go/issues/3480).
|
||||
// This program uses NaN as a signal for missing data.
|
||||
// For the HTTP JSON API to be able to handle NaN values,
|
||||
// we have to use our own type which implements encoding/json.Marshaler itself.
|
||||
type Float float64
|
||||
|
||||
var (
|
||||
NaN Float = Float(math.NaN())
|
||||
nullAsBytes []byte = []byte("null")
|
||||
)
|
||||
|
||||
func (f Float) IsNaN() bool {
|
||||
return math.IsNaN(float64(f))
|
||||
}
|
||||
|
||||
func (f Float) MarshalJSON() ([]byte, error) {
|
||||
if math.IsNaN(float64(f)) {
|
||||
return nullAsBytes, nil
|
||||
}
|
||||
|
||||
return strconv.AppendFloat(make([]byte, 0, 10), float64(f), 'f', 3, 64), nil
|
||||
}
|
||||
|
||||
func (f *Float) UnmarshalJSON(input []byte) error {
|
||||
if string(input) == "null" {
|
||||
*f = NaN
|
||||
return nil
|
||||
}
|
||||
|
||||
val, err := strconv.ParseFloat(string(input), 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*f = Float(val)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Same as `[]Float`, but can be marshaled to JSON with less allocations.
|
||||
type FloatArray []Float
|
||||
|
||||
func (fa FloatArray) MarshalJSON() ([]byte, error) {
|
||||
buf := make([]byte, 0, 2+len(fa)*8)
|
||||
buf = append(buf, '[')
|
||||
for i := 0; i < len(fa); i++ {
|
||||
if i != 0 {
|
||||
buf = append(buf, ',')
|
||||
}
|
||||
|
||||
if fa[i].IsNaN() {
|
||||
buf = append(buf, `null`...)
|
||||
} else {
|
||||
buf = strconv.AppendFloat(buf, float64(fa[i]), 'f', 3, 64)
|
||||
}
|
||||
}
|
||||
buf = append(buf, ']')
|
||||
return buf, nil
|
||||
}
|
117
internal/util/stats.go
Normal file
117
internal/util/stats.go
Normal file
@@ -0,0 +1,117 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"math"
|
||||
)
|
||||
|
||||
type Stats struct {
|
||||
Samples int
|
||||
Avg Float
|
||||
Min Float
|
||||
Max Float
|
||||
}
|
||||
|
||||
func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) {
|
||||
if from < b.start {
|
||||
if b.prev != nil {
|
||||
return b.prev.stats(from, to)
|
||||
}
|
||||
from = b.start
|
||||
}
|
||||
|
||||
// TODO: Check if b.closed and if so and the full buffer is queried,
|
||||
// use b.statistics instead of iterating over the buffer.
|
||||
|
||||
samples := 0
|
||||
sum, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
|
||||
|
||||
var t int64
|
||||
for t = from; t < to; t += b.frequency {
|
||||
idx := int((t - b.start) / b.frequency)
|
||||
if idx >= cap(b.data) {
|
||||
b = b.next
|
||||
if b == nil {
|
||||
break
|
||||
}
|
||||
idx = 0
|
||||
}
|
||||
|
||||
if t < b.start || idx >= len(b.data) {
|
||||
continue
|
||||
}
|
||||
|
||||
xf := float64(b.data[idx])
|
||||
if math.IsNaN(xf) {
|
||||
continue
|
||||
}
|
||||
|
||||
samples += 1
|
||||
sum += xf
|
||||
min = math.Min(min, xf)
|
||||
max = math.Max(max, xf)
|
||||
}
|
||||
|
||||
return Stats{
|
||||
Samples: samples,
|
||||
Avg: Float(sum) / Float(samples),
|
||||
Min: Float(min),
|
||||
Max: Float(max),
|
||||
}, from, t, nil
|
||||
}
|
||||
|
||||
// Returns statistics for the requested metric on the selected node/level.
|
||||
// Data is aggregated to the selected level the same way as in `MemoryStore.Read`.
|
||||
// If `Stats.Samples` is zero, the statistics should not be considered as valid.
|
||||
func (m *MemoryStore) Stats(selector Selector, metric string, from, to int64) (*Stats, int64, int64, error) {
|
||||
if from > to {
|
||||
return nil, 0, 0, errors.New("invalid time range")
|
||||
}
|
||||
|
||||
minfo, ok := m.metrics[metric]
|
||||
if !ok {
|
||||
return nil, 0, 0, errors.New("unkown metric: " + metric)
|
||||
}
|
||||
|
||||
n, samples := 0, 0
|
||||
avg, min, max := Float(0), math.MaxFloat32, -math.MaxFloat32
|
||||
err := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {
|
||||
stats, cfrom, cto, err := b.stats(from, to)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
from, to = cfrom, cto
|
||||
} else if from != cfrom || to != cto {
|
||||
return ErrDataDoesNotAlign
|
||||
}
|
||||
|
||||
samples += stats.Samples
|
||||
avg += stats.Avg
|
||||
min = math.Min(min, float64(stats.Min))
|
||||
max = math.Max(max, float64(stats.Max))
|
||||
n += 1
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, 0, 0, err
|
||||
}
|
||||
|
||||
if n == 0 {
|
||||
return nil, 0, 0, ErrNoData
|
||||
}
|
||||
|
||||
if minfo.Aggregation == AvgAggregation {
|
||||
avg /= Float(n)
|
||||
} else if n > 1 && minfo.Aggregation != SumAggregation {
|
||||
return nil, 0, 0, errors.New("invalid aggregation")
|
||||
}
|
||||
|
||||
return &Stats{
|
||||
Samples: samples,
|
||||
Avg: avg,
|
||||
Min: Float(min),
|
||||
Max: Float(max),
|
||||
}, from, to, nil
|
||||
}
|
Reference in New Issue
Block a user