Start a new api package

This commit is contained in:
Lou Knauer
2022-07-14 12:34:02 +02:00
parent ef6e09c3e2
commit eb319aee36
5 changed files with 168 additions and 38 deletions

View File

@@ -0,0 +1,326 @@
package api
import (
"context"
"errors"
"fmt"
"log"
"net"
"sync"
"time"
"github.com/ClusterCockpit/cc-metric-store/internal/memstore"
"github.com/ClusterCockpit/cc-metric-store/internal/types"
"github.com/influxdata/line-protocol/v2/lineprotocol"
"github.com/nats-io/nats.go"
)
type NatsConfig struct {
// Address of the nats server
Address string `json:"address"`
// Username/Password, optional
Username string `json:"username"`
Password string `json:"password"`
Subscriptions []struct {
// Channel name
SubscribeTo string `json:"subscribe-to"`
// Allow lines without a cluster tag, use this as default, optional
ClusterTag string `json:"cluster-tag"`
} `json:"subscriptions"`
}
// Currently unused, could be used to send messages via raw TCP.
// Each connection is handled in it's own goroutine. This is a blocking function.
func ReceiveRaw(ctx context.Context, listener net.Listener, handleLine func(*lineprotocol.Decoder, string) error) error {
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
<-ctx.Done()
if err := listener.Close(); err != nil {
log.Printf("listener.Close(): %s", err.Error())
}
}()
for {
conn, err := listener.Accept()
if err != nil {
if errors.Is(err, net.ErrClosed) {
break
}
log.Printf("listener.Accept(): %s", err.Error())
}
wg.Add(2)
go func() {
defer wg.Done()
defer conn.Close()
dec := lineprotocol.NewDecoder(conn)
connctx, cancel := context.WithCancel(context.Background())
defer cancel()
go func() {
defer wg.Done()
select {
case <-connctx.Done():
conn.Close()
case <-ctx.Done():
conn.Close()
}
}()
if err := handleLine(dec, "default"); err != nil {
if errors.Is(err, net.ErrClosed) {
return
}
log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
errmsg := make([]byte, 128)
errmsg = append(errmsg, `error: `...)
errmsg = append(errmsg, err.Error()...)
errmsg = append(errmsg, '\n')
conn.Write(errmsg)
}
}()
}
wg.Wait()
return nil
}
// Connect to a nats server and subscribe to "updates". This is a blocking
// function. handleLine will be called for each line recieved via nats.
// Send `true` through the done channel for gracefull termination.
func ReceiveNats(conf *NatsConfig, handleLine func(*lineprotocol.Decoder, string) error, workers int, ctx context.Context) error {
var opts []nats.Option
if conf.Username != "" && conf.Password != "" {
opts = append(opts, nats.UserInfo(conf.Username, conf.Password))
}
nc, err := nats.Connect(conf.Address, opts...)
if err != nil {
return err
}
defer nc.Close()
var wg sync.WaitGroup
var subs []*nats.Subscription
msgs := make(chan *nats.Msg, workers*2)
for _, sc := range conf.Subscriptions {
clusterTag := sc.ClusterTag
var sub *nats.Subscription
if workers > 1 {
wg.Add(workers)
for i := 0; i < workers; i++ {
go func() {
for m := range msgs {
dec := lineprotocol.NewDecoderWithBytes(m.Data)
if err := handleLine(dec, clusterTag); err != nil {
log.Printf("error: %s\n", err.Error())
}
}
wg.Done()
}()
}
sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
msgs <- m
})
} else {
sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
dec := lineprotocol.NewDecoderWithBytes(m.Data)
if err := handleLine(dec, clusterTag); err != nil {
log.Printf("error: %s\n", err.Error())
}
})
}
if err != nil {
return err
}
log.Printf("NATS subscription to '%s' on '%s' established\n", sc.SubscribeTo, conf.Address)
subs = append(subs, sub)
}
<-ctx.Done()
for _, sub := range subs {
err = sub.Unsubscribe()
if err != nil {
log.Printf("NATS unsubscribe failed: %s", err.Error())
}
}
close(msgs)
wg.Wait()
nc.Close()
log.Println("NATS connection closed")
return nil
}
// Place `prefix` in front of `buf` but if possible,
// do that inplace in `buf`.
func reorder(buf, prefix []byte) []byte {
n := len(prefix)
m := len(buf)
if cap(buf) < m+n {
return append(prefix[:n:n], buf...)
} else {
buf = buf[:n+m]
for i := m - 1; i >= 0; i-- {
buf[i+n] = buf[i]
}
for i := 0; i < n; i++ {
buf[i] = prefix[i]
}
return buf
}
}
// Decode lines using dec and make write calls to the MemoryStore.
// If a line is missing its cluster tag, use clusterDefault as default.
func decodeLine(memoryStore *memstore.MemoryStore, dec *lineprotocol.Decoder, clusterDefault string) error {
// Reduce allocations in loop:
t := time.Now()
metric, metricBuf := types.Metric{}, make([]byte, 0, 16)
selector := make([]string, 0, 4)
typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
// Optimize for the case where all lines in a "batch" are about the same
// cluster and host. By using `WriteToLevel` (level = host), we do not need
// to take the root- and cluster-level lock as often.
var lvl *memstore.Level = nil
var prevCluster, prevHost string = "", ""
var ok bool
for dec.Next() {
rawmeasurement, err := dec.Measurement()
if err != nil {
return err
}
// Needs to be copied because another call to dec.* would
// invalidate the returned slice.
metricBuf = append(metricBuf[:0], rawmeasurement...)
// The go compiler optimizes map[string(byteslice)] lookups:
metric.Conf, ok = memoryStore.GetMetricConf(string(rawmeasurement))
if !ok {
continue
}
typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
cluster, host := clusterDefault, ""
for {
key, val, err := dec.NextTag()
if err != nil {
return err
}
if key == nil {
break
}
// The go compiler optimizes string([]byte{...}) == "...":
switch string(key) {
case "cluster":
if string(val) == prevCluster {
cluster = prevCluster
} else {
cluster = string(val)
lvl = nil
}
case "hostname", "host":
if string(val) == prevHost {
host = prevHost
} else {
host = string(val)
lvl = nil
}
case "type":
if string(val) == "node" {
break
}
// We cannot be sure that the "type" tag comes before the "type-id" tag:
if len(typeBuf) == 0 {
typeBuf = append(typeBuf, val...)
} else {
typeBuf = reorder(typeBuf, val)
}
case "type-id":
typeBuf = append(typeBuf, val...)
case "subtype":
// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
if len(subTypeBuf) == 0 {
subTypeBuf = append(subTypeBuf, val...)
} else {
subTypeBuf = reorder(typeBuf, val)
}
case "stype-id":
subTypeBuf = append(subTypeBuf, val...)
default:
// Ignore unkown tags (cc-metric-collector might send us a unit for example that we do not need)
// return fmt.Errorf("unkown tag: '%s' (value: '%s')", string(key), string(val))
}
}
// If the cluster or host changed, the lvl was set to nil
if lvl == nil {
selector = selector[:2]
selector[0], selector[1] = cluster, host
lvl = memoryStore.GetLevel(selector)
prevCluster, prevHost = cluster, host
}
// subtypes:
selector = selector[:0]
if len(typeBuf) > 0 {
selector = append(selector, string(typeBuf)) // <- Allocation :(
if len(subTypeBuf) > 0 {
selector = append(selector, string(subTypeBuf))
}
}
for {
key, val, err := dec.NextField()
if err != nil {
return err
}
if key == nil {
break
}
if string(key) != "value" {
return fmt.Errorf("unkown field: '%s' (value: %#v)", string(key), val)
}
if val.Kind() == lineprotocol.Float {
metric.Value = types.Float(val.FloatV())
} else if val.Kind() == lineprotocol.Int {
metric.Value = types.Float(val.IntV())
} else if val.Kind() == lineprotocol.Uint {
metric.Value = types.Float(val.UintV())
} else {
return fmt.Errorf("unsupported value type in message: %s", val.Kind().String())
}
}
if t, err = dec.Time(lineprotocol.Second, t); err != nil {
return err
}
if err := memoryStore.WriteToLevel(lvl, selector, t.Unix(), []types.Metric{metric}); err != nil {
return err
}
}
return nil
}

View File

@@ -0,0 +1,147 @@
package api
import (
"bytes"
"strconv"
"testing"
"github.com/ClusterCockpit/cc-metric-store/internal/memstore"
"github.com/ClusterCockpit/cc-metric-store/internal/types"
"github.com/influxdata/line-protocol/v2/lineprotocol"
)
const TestDataClassicFormat string = `
m1,cluster=ctest,hostname=htest1,type=node value=1 123456789
m2,cluster=ctest,hostname=htest1,type=node value=2 123456789
m3,hostname=htest2,type=node value=3 123456789
m4,cluster=ctest,hostname=htest2,type=core,type-id=1 value=4 123456789
m4,cluster=ctest,hostname=htest2,type-id=2,type=core value=5 123456789
`
const BenchmarkLineBatch string = `
nm1,cluster=ctest,hostname=htest1,type=node value=123.0 123456789
nm2,cluster=ctest,hostname=htest1,type=node value=123.0 123456789
nm3,cluster=ctest,hostname=htest1,type=node value=123.0 123456789
nm4,cluster=ctest,hostname=htest1,type=node value=123.0 123456789
nm5,cluster=ctest,hostname=htest1,type=node value=123.0 123456789
nm6,cluster=ctest,hostname=htest1,type=node value=123.0 123456789
nm7,cluster=ctest,hostname=htest1,type=node value=123.0 123456789
nm8,cluster=ctest,hostname=htest1,type=node value=123.0 123456789
nm9,cluster=ctest,hostname=htest1,type=node value=123.0 123456789
cm1,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789
cm2,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789
cm3,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789
cm4,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789
cm5,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789
cm6,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789
cm7,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789
cm8,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789
cm9,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789
cm1,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789
cm2,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789
cm3,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789
cm4,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789
cm5,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789
cm6,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789
cm7,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789
cm8,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789
cm9,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789
cm1,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789
cm2,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789
cm3,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789
cm4,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789
cm5,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789
cm6,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789
cm7,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789
cm8,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789
cm9,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789
cm1,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789
cm2,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789
cm3,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789
cm4,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789
cm5,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789
cm6,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789
cm7,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789
cm8,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789
cm9,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789
`
/*
func TestLineprotocolDecoder(t *testing.T) {
prevMemoryStore := memoryStore
t.Cleanup(func() {
memoryStore = prevMemoryStore
})
memoryStore = NewMemoryStore(map[string]MetricConfig{
"m1": {Frequency: 1},
"m2": {Frequency: 1},
"m3": {Frequency: 1},
"m4": {Frequency: 1},
})
dec := lineprotocol.NewDecoderWithBytes([]byte(TestDataClassicFormat))
if err := decodeLine(dec, "ctest"); err != nil {
log.Fatal(err)
}
// memoryStore.DebugDump(bufio.NewWriter(os.Stderr))
h1 := memoryStore.GetLevel([]string{"ctest", "htest1"})
h1b1 := h1.metrics[memoryStore.metrics["m1"].offset]
h1b2 := h1.metrics[memoryStore.metrics["m2"].offset]
if h1b1.data[0] != 1.0 || h1b2.data[0] != 2.0 {
log.Fatal()
}
h2 := memoryStore.GetLevel([]string{"ctest", "htest2"})
h2b3 := h2.metrics[memoryStore.metrics["m3"].offset]
if h2b3.data[0] != 3.0 {
log.Fatal()
}
h2c1 := memoryStore.GetLevel([]string{"ctest", "htest2", "core1"})
h2c1b4 := h2c1.metrics[memoryStore.metrics["m4"].offset]
h2c2 := memoryStore.GetLevel([]string{"ctest", "htest2", "core2"})
h2c2b4 := h2c2.metrics[memoryStore.metrics["m4"].offset]
if h2c1b4.data[0] != 4.0 || h2c2b4.data[0] != 5.0 {
log.Fatal()
}
}
*/
func BenchmarkLineprotocolDecoder(b *testing.B) {
b.StopTimer()
memoryStore := memstore.NewMemoryStore(map[string]types.MetricConfig{
"nm1": {Frequency: 1},
"nm2": {Frequency: 1},
"nm3": {Frequency: 1},
"nm4": {Frequency: 1},
"nm5": {Frequency: 1},
"nm6": {Frequency: 1},
"nm7": {Frequency: 1},
"nm8": {Frequency: 1},
"nm9": {Frequency: 1},
"cm1": {Frequency: 1},
"cm2": {Frequency: 1},
"cm3": {Frequency: 1},
"cm4": {Frequency: 1},
"cm5": {Frequency: 1},
"cm6": {Frequency: 1},
"cm7": {Frequency: 1},
"cm8": {Frequency: 1},
"cm9": {Frequency: 1},
})
for i := 0; i < b.N; i++ {
data := []byte(BenchmarkLineBatch)
data = bytes.ReplaceAll(data, []byte("123456789"), []byte(strconv.Itoa(i+123456789)))
dec := lineprotocol.NewDecoderWithBytes(data)
b.StartTimer()
if err := decodeLine(memoryStore, dec, "ctest"); err != nil {
b.Fatal(err)
}
b.StopTimer()
}
}

View File

@@ -1,29 +1,33 @@
package memstore
import "sync"
import (
"sync"
"github.com/ClusterCockpit/cc-metric-store/internal/types"
)
// Could also be called "node" as this forms a node in a tree structure.
// Called level because "node" might be confusing here.
// Can be both a leaf or a inner node. In this tree structue, inner nodes can
// also hold data (in `metrics`).
type level struct {
type Level struct {
lock sync.RWMutex
metrics []*chunk // Every level can store metrics.
sublevels map[string]*level // Lower levels.
sublevels map[string]*Level // Lower levels.
}
// Find the correct level for the given selector, creating it if
// it does not exist. Example selector in the context of the
// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }.
// This function would probably benefit a lot from `level.children` beeing a `sync.Map`?
func (l *level) findLevelOrCreate(selector []string, nMetrics int) *level {
func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {
if len(selector) == 0 {
return l
}
// Allow concurrent reads:
l.lock.RLock()
var child *level
var child *Level
var ok bool
if l.sublevels == nil {
// sublevels map needs to be created...
@@ -48,7 +52,7 @@ func (l *level) findLevelOrCreate(selector []string, nMetrics int) *level {
}
}
child = &level{
child = &Level{
metrics: make([]*chunk, nMetrics),
sublevels: nil,
}
@@ -56,13 +60,13 @@ func (l *level) findLevelOrCreate(selector []string, nMetrics int) *level {
if l.sublevels != nil {
l.sublevels[selector[0]] = child
} else {
l.sublevels = map[string]*level{selector[0]: child}
l.sublevels = map[string]*Level{selector[0]: child}
}
l.lock.Unlock()
return child.findLevelOrCreate(selector[1:], nMetrics)
}
func (l *level) free(t int64) (delme bool, n int) {
func (l *Level) free(t int64) (delme bool, n int) {
l.lock.Lock()
defer l.lock.Unlock()
@@ -89,14 +93,41 @@ func (l *level) free(t int64) (delme bool, n int) {
}
type MemoryStore struct {
root level // root of the tree structure
root Level // root of the tree structure
// TODO...
metrics map[string]int // TODO...
metrics map[string]types.MetricConfig // TODO...
}
func (ms *MemoryStore) GetOffset(metric string) int {
return -1 // TODO!
// Return a new, initialized instance of a MemoryStore.
// Will panic if values in the metric configurations are invalid.
func NewMemoryStore(metrics map[string]types.MetricConfig) *MemoryStore {
offset := 0
for key, config := range metrics {
if config.Frequency == 0 {
panic("invalid frequency")
}
metrics[key] = types.MetricConfig{
Frequency: config.Frequency,
Aggregation: config.Aggregation,
Offset: offset,
}
offset += 1
}
return &MemoryStore{
root: Level{
metrics: make([]*chunk, len(metrics)),
sublevels: make(map[string]*Level),
},
metrics: metrics,
}
}
func (ms *MemoryStore) GetMetricConf(metric string) (types.MetricConfig, bool) {
conf, ok := ms.metrics[metric]
return conf, ok
}
func (ms *MemoryStore) GetMetricForOffset(offset int) string {
@@ -106,3 +137,37 @@ func (ms *MemoryStore) GetMetricForOffset(offset int) string {
func (ms *MemoryStore) MinFrequency() int64 {
return 10 // TODO
}
func (m *MemoryStore) GetLevel(selector []string) *Level {
return m.root.findLevelOrCreate(selector, len(m.metrics))
}
func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []types.Metric) error {
l = l.findLevelOrCreate(selector, len(m.metrics))
l.lock.Lock()
defer l.lock.Unlock()
for _, metric := range metrics {
if metric.Conf.Frequency == 0 {
continue
}
c := l.metrics[metric.Conf.Offset]
if c == nil {
// First write to this metric and level
c = newChunk(ts, metric.Conf.Frequency)
l.metrics[metric.Conf.Offset] = c
}
nc, err := c.write(ts, metric.Value)
if err != nil {
return err
}
// Last write started a new chunk...
if c != nc {
l.metrics[metric.Conf.Offset] = nc
}
}
return nil
}

View File

@@ -31,7 +31,7 @@ func (ms *MemoryStore) SaveCheckpoint(from, to int64, w io.Writer) error {
return nil
}
func (l *level) saveCheckpoint(ms *MemoryStore, from, to int64, w io.Writer, buf []byte, metricsbuf []types.Float) ([]byte, error) {
func (l *Level) saveCheckpoint(ms *MemoryStore, from, to int64, w io.Writer, buf []byte, metricsbuf []types.Float) ([]byte, error) {
var err error
l.lock.RLock()
defer l.lock.RUnlock()
@@ -112,7 +112,7 @@ func (ms *MemoryStore) LoadCheckpoint(r io.Reader) error {
}
// Blocks all other accesses for this level and all its sublevels!
func (l *level) loadCheckpoint(ms *MemoryStore, r io.Reader, buf []byte) error {
func (l *Level) loadCheckpoint(ms *MemoryStore, r io.Reader, buf []byte) error {
l.lock.Lock()
defer l.lock.Unlock()
@@ -158,8 +158,8 @@ func (l *level) loadCheckpoint(ms *MemoryStore, r io.Reader, buf []byte) error {
return fmt.Errorf("loading metric %#v: %w", key, err)
}
offset := ms.GetOffset(key)
if offset == -1 {
metricConf, ok := ms.GetMetricConf(key)
if !ok {
// Skip unkown metrics
ReleaseBytes(bytes)
continue
@@ -175,7 +175,7 @@ func (l *level) loadCheckpoint(ms *MemoryStore, r io.Reader, buf []byte) error {
checkpointed: true,
}
if prevchunk := l.metrics[offset]; prevchunk != nil {
if prevchunk := l.metrics[metricConf.Offset]; prevchunk != nil {
if prevchunk.end() > chunk.start {
return fmt.Errorf(
"loading metric %#v: loaded checkpoint overlaps with other chunks or is not loaded in correct order (%d - %d)",
@@ -183,9 +183,9 @@ func (l *level) loadCheckpoint(ms *MemoryStore, r io.Reader, buf []byte) error {
}
prevchunk.next = chunk
chunk.prev = prevchunk
l.metrics[offset] = chunk
l.metrics[metricConf.Offset] = chunk
} else {
l.metrics[offset] = chunk
l.metrics[metricConf.Offset] = chunk
}
}
@@ -198,11 +198,11 @@ func (l *level) loadCheckpoint(ms *MemoryStore, r io.Reader, buf []byte) error {
return err
}
if l.sublevels == nil {
l.sublevels = make(map[string]*level, n)
l.sublevels = make(map[string]*Level, n)
}
sublevel, ok := l.sublevels[key]
if !ok {
sublevel = &level{}
sublevel = &Level{}
}
if err = sublevel.loadCheckpoint(ms, r, buf); err != nil {

View File

@@ -1,8 +1,58 @@
package types
import (
"encoding/json"
"fmt"
)
type Stats struct {
Samples int `json:"samples"`
Min Float `json:"min"`
Avg Float `json:"avg"`
Max Float `json:"max"`
}
type MetricConfig struct {
// Interval in seconds at which measurements will arive.
Frequency int64 `json:"frequency"`
// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
Aggregation AggregationStrategy `json:"aggregation"`
// Private, used internally...
Offset int
}
type Metric struct {
Name string
Value Float
Conf MetricConfig
}
// For aggregation over multiple values at different cpus/sockets/..., not time!
type AggregationStrategy int
const (
NoAggregation AggregationStrategy = iota
SumAggregation
AvgAggregation
)
func (as *AggregationStrategy) UnmarshalJSON(data []byte) error {
var str string
if err := json.Unmarshal(data, &str); err != nil {
return err
}
switch str {
case "":
*as = NoAggregation
case "sum":
*as = SumAggregation
case "avg":
*as = AvgAggregation
default:
return fmt.Errorf("invalid aggregation strategy: %#v", str)
}
return nil
}