Intermediate state

Not working yet
2025-10-24 14:55:07 +02:00 · 2024-05-03 21:08:01 +02:00
parent 61e9191d4d
commit e1e6694656
14 changed files with 193 additions and 1330 deletions
--- a/internal/api/api.go
+++ b/internal/api/api.go
@@ -0,0 +1,424 @@
+package api
+
+import (
+	"bufio"
+	"context"
+	"crypto/ed25519"
+	"encoding/base64"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"math"
+	"net/http"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/golang-jwt/jwt/v4"
+	"github.com/gorilla/mux"
+	"github.com/influxdata/line-protocol/v2/lineprotocol"
+)
+
+type ApiMetricData struct {
+	Error *string    `json:"error,omitempty"`
+	From  int64      `json:"from"`
+	To    int64      `json:"to"`
+	Data  FloatArray `json:"data,omitempty"`
+	Avg   Float      `json:"avg"`
+	Min   Float      `json:"min"`
+	Max   Float      `json:"max"`
+}
+
+// TODO: Optimize this, just like the stats endpoint!
+func (data *ApiMetricData) AddStats() {
+	n := 0
+	sum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64
+	for _, x := range data.Data {
+		if x.IsNaN() {
+			continue
+		}
+
+		n += 1
+		sum += float64(x)
+		min = math.Min(min, float64(x))
+		max = math.Max(max, float64(x))
+	}
+
+	if n > 0 {
+		avg := sum / float64(n)
+		data.Avg = Float(avg)
+		data.Min = Float(min)
+		data.Max = Float(max)
+	} else {
+		data.Avg, data.Min, data.Max = NaN, NaN, NaN
+	}
+}
+
+func (data *ApiMetricData) ScaleBy(f Float) {
+	if f == 0 || f == 1 {
+		return
+	}
+
+	data.Avg *= f
+	data.Min *= f
+	data.Max *= f
+	for i := 0; i < len(data.Data); i++ {
+		data.Data[i] *= f
+	}
+}
+
+func (data *ApiMetricData) PadDataWithNull(from, to int64, metric string) {
+	minfo, ok := memoryStore.metrics[metric]
+	if !ok {
+		return
+	}
+
+	if (data.From / minfo.Frequency) > (from / minfo.Frequency) {
+		padfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency))
+		ndata := make([]Float, 0, padfront+len(data.Data))
+		for i := 0; i < padfront; i++ {
+			ndata = append(ndata, NaN)
+		}
+		for j := 0; j < len(data.Data); j++ {
+			ndata = append(ndata, data.Data[j])
+		}
+		data.Data = ndata
+	}
+}
+
+func handleFree(rw http.ResponseWriter, r *http.Request) {
+	rawTo := r.URL.Query().Get("to")
+	if rawTo == "" {
+		http.Error(rw, "'to' is a required query parameter", http.StatusBadRequest)
+		return
+	}
+
+	to, err := strconv.ParseInt(rawTo, 10, 64)
+	if err != nil {
+		http.Error(rw, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	// TODO: lastCheckpoint might be modified by different go-routines.
+	// Load it using the sync/atomic package?
+	freeUpTo := lastCheckpoint.Unix()
+	if to < freeUpTo {
+		freeUpTo = to
+	}
+
+	if r.Method != http.MethodPost {
+		http.Error(rw, "Method Not Allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	bodyDec := json.NewDecoder(r.Body)
+	var selectors [][]string
+	err = bodyDec.Decode(&selectors)
+	if err != nil {
+		http.Error(rw, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	n := 0
+	for _, sel := range selectors {
+		bn, err := memoryStore.Free(sel, freeUpTo)
+		if err != nil {
+			http.Error(rw, err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		n += bn
+	}
+
+	rw.WriteHeader(http.StatusOK)
+	rw.Write([]byte(fmt.Sprintf("buffers freed: %d\n", n)))
+}
+
+func handleWrite(rw http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodPost {
+		http.Error(rw, "Method Not Allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	bytes, err := io.ReadAll(r.Body)
+	if err != nil {
+		log.Printf("error while reading request body: %s", err.Error())
+		http.Error(rw, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	if debugDump != io.Discard {
+		now := time.Now()
+		msg := make([]byte, 0, 512)
+		msg = append(msg, "\n--- local unix time: "...)
+		msg = strconv.AppendInt(msg, now.Unix(), 10)
+		msg = append(msg, " ---\n"...)
+
+		debugDumpLock.Lock()
+		defer debugDumpLock.Unlock()
+		if _, err := debugDump.Write(msg); err != nil {
+			log.Printf("error while writing to debug dump: %s", err.Error())
+		}
+		if _, err := debugDump.Write(bytes); err != nil {
+			log.Printf("error while writing to debug dump: %s", err.Error())
+		}
+		return
+	}
+
+	dec := lineprotocol.NewDecoderWithBytes(bytes)
+	if err := decodeLine(dec, r.URL.Query().Get("cluster")); err != nil {
+		log.Printf("/api/write error: %s", err.Error())
+		http.Error(rw, err.Error(), http.StatusBadRequest)
+		return
+	}
+	rw.WriteHeader(http.StatusOK)
+}
+
+type ApiQueryRequest struct {
+	Cluster     string     `json:"cluster"`
+	From        int64      `json:"from"`
+	To          int64      `json:"to"`
+	WithStats   bool       `json:"with-stats"`
+	WithData    bool       `json:"with-data"`
+	WithPadding bool       `json:"with-padding"`
+	Queries     []ApiQuery `json:"queries"`
+	ForAllNodes []string   `json:"for-all-nodes"`
+}
+
+type ApiQueryResponse struct {
+	Queries []ApiQuery        `json:"queries,omitempty"`
+	Results [][]ApiMetricData `json:"results"`
+}
+
+type ApiQuery struct {
+	Metric      string   `json:"metric"`
+	Hostname    string   `json:"host"`
+	Aggregate   bool     `json:"aggreg"`
+	ScaleFactor Float    `json:"scale-by,omitempty"`
+	Type        *string  `json:"type,omitempty"`
+	TypeIds     []string `json:"type-ids,omitempty"`
+	SubType     *string  `json:"subtype,omitempty"`
+	SubTypeIds  []string `json:"subtype-ids,omitempty"`
+}
+
+func handleQuery(rw http.ResponseWriter, r *http.Request) {
+	var err error
+	var req ApiQueryRequest = ApiQueryRequest{WithStats: true, WithData: true, WithPadding: true}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		http.Error(rw, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	response := ApiQueryResponse{
+		Results: make([][]ApiMetricData, 0, len(req.Queries)),
+	}
+	if req.ForAllNodes != nil {
+		nodes := memoryStore.ListChildren([]string{req.Cluster})
+		for _, node := range nodes {
+			for _, metric := range req.ForAllNodes {
+				q := ApiQuery{
+					Metric:   metric,
+					Hostname: node,
+				}
+				req.Queries = append(req.Queries, q)
+				response.Queries = append(response.Queries, q)
+			}
+		}
+	}
+
+	for _, query := range req.Queries {
+		sels := make([]Selector, 0, 1)
+		if query.Aggregate || query.Type == nil {
+			sel := Selector{{String: req.Cluster}, {String: query.Hostname}}
+			if query.Type != nil {
+				if len(query.TypeIds) == 1 {
+					sel = append(sel, SelectorElement{String: *query.Type + query.TypeIds[0]})
+				} else {
+					ids := make([]string, len(query.TypeIds))
+					for i, id := range query.TypeIds {
+						ids[i] = *query.Type + id
+					}
+					sel = append(sel, SelectorElement{Group: ids})
+				}
+
+				if query.SubType != nil {
+					if len(query.SubTypeIds) == 1 {
+						sel = append(sel, SelectorElement{String: *query.SubType + query.SubTypeIds[0]})
+					} else {
+						ids := make([]string, len(query.SubTypeIds))
+						for i, id := range query.SubTypeIds {
+							ids[i] = *query.SubType + id
+						}
+						sel = append(sel, SelectorElement{Group: ids})
+					}
+				}
+			}
+			sels = append(sels, sel)
+		} else {
+			for _, typeId := range query.TypeIds {
+				if query.SubType != nil {
+					for _, subTypeId := range query.SubTypeIds {
+						sels = append(sels, Selector{
+							{String: req.Cluster},
+							{String: query.Hostname},
+							{String: *query.Type + typeId},
+							{String: *query.SubType + subTypeId},
+						})
+					}
+				} else {
+					sels = append(sels, Selector{
+						{String: req.Cluster},
+						{String: query.Hostname},
+						{String: *query.Type + typeId},
+					})
+				}
+			}
+		}
+
+		// log.Printf("query: %#v\n", query)
+		// log.Printf("sels: %#v\n", sels)
+
+		res := make([]ApiMetricData, 0, len(sels))
+		for _, sel := range sels {
+			data := ApiMetricData{}
+			data.Data, data.From, data.To, err = memoryStore.Read(sel, query.Metric, req.From, req.To)
+			// log.Printf("data: %#v, %#v, %#v, %#v", data.Data, data.From, data.To, err)
+			if err != nil {
+				msg := err.Error()
+				data.Error = &msg
+				res = append(res, data)
+				continue
+			}
+
+			if req.WithStats {
+				data.AddStats()
+			}
+			if query.ScaleFactor != 0 {
+				data.ScaleBy(query.ScaleFactor)
+			}
+			if req.WithPadding {
+				data.PadDataWithNull(req.From, req.To, query.Metric)
+			}
+			if !req.WithData {
+				data.Data = nil
+			}
+			res = append(res, data)
+		}
+		response.Results = append(response.Results, res)
+	}
+
+	rw.Header().Set("Content-Type", "application/json")
+	bw := bufio.NewWriter(rw)
+	defer bw.Flush()
+	if err := json.NewEncoder(bw).Encode(response); err != nil {
+		log.Print(err)
+		return
+	}
+}
+
+func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler {
+	cacheLock := sync.RWMutex{}
+	cache := map[string]*jwt.Token{}
+
+	return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
+		authheader := r.Header.Get("Authorization")
+		if authheader == "" || !strings.HasPrefix(authheader, "Bearer ") {
+			http.Error(rw, "Use JWT Authentication", http.StatusUnauthorized)
+			return
+		}
+
+		rawtoken := authheader[len("Bearer "):]
+		cacheLock.RLock()
+		token, ok := cache[rawtoken]
+		cacheLock.RUnlock()
+		if ok && token.Claims.Valid() == nil {
+			next.ServeHTTP(rw, r)
+			return
+		}
+
+		// The actual token is ignored for now.
+		// In case expiration and so on are specified, the Parse function
+		// already returns an error for expired tokens.
+		var err error
+		token, err = jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) {
+			if t.Method != jwt.SigningMethodEdDSA {
+				return nil, errors.New("only Ed25519/EdDSA supported")
+			}
+
+			return publicKey, nil
+		})
+		if err != nil {
+			http.Error(rw, err.Error(), http.StatusUnauthorized)
+			return
+		}
+
+		cacheLock.Lock()
+		cache[rawtoken] = token
+		cacheLock.Unlock()
+
+		// Let request through...
+		next.ServeHTTP(rw, r)
+	})
+}
+
+func StartApiServer(ctx context.Context, httpConfig *HttpConfig) error {
+	r := mux.NewRouter()
+
+	r.HandleFunc("/api/free", handleFree)
+	r.HandleFunc("/api/write", handleWrite)
+	r.HandleFunc("/api/query", handleQuery)
+	r.HandleFunc("/api/debug", func(rw http.ResponseWriter, r *http.Request) {
+		raw := r.URL.Query().Get("selector")
+		selector := []string{}
+		if len(raw) != 0 {
+			selector = strings.Split(raw, ":")
+		}
+
+		if err := memoryStore.DebugDump(bufio.NewWriter(rw), selector); err != nil {
+			rw.WriteHeader(http.StatusBadRequest)
+			rw.Write([]byte(err.Error()))
+		}
+	})
+
+	server := &http.Server{
+		Handler:      r,
+		Addr:         httpConfig.Address,
+		WriteTimeout: 30 * time.Second,
+		ReadTimeout:  30 * time.Second,
+	}
+
+	if len(conf.JwtPublicKey) > 0 {
+		buf, err := base64.StdEncoding.DecodeString(conf.JwtPublicKey)
+		if err != nil {
+			return err
+		}
+		publicKey := ed25519.PublicKey(buf)
+		server.Handler = authentication(server.Handler, publicKey)
+	}
+
+	go func() {
+		if httpConfig.CertFile != "" && httpConfig.KeyFile != "" {
+			log.Printf("API https endpoint listening on '%s'\n", httpConfig.Address)
+			err := server.ListenAndServeTLS(httpConfig.CertFile, httpConfig.KeyFile)
+			if err != nil && err != http.ErrServerClosed {
+				log.Println(err)
+			}
+		} else {
+			log.Printf("API http endpoint listening on '%s'\n", httpConfig.Address)
+			err := server.ListenAndServe()
+			if err != nil && err != http.ErrServerClosed {
+				log.Println(err)
+			}
+		}
+	}()
+
+	for {
+		<-ctx.Done()
+		err := server.Shutdown(context.Background())
+		log.Println("API server shut down")
+		return err
+	}
+}
--- a/internal/api/lineprotocol.go
+++ b/internal/api/lineprotocol.go
@@ -0,0 +1,316 @@
+package api
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+	"net"
+	"sync"
+	"time"
+
+	"github.com/ClusterCockpit/cc-metric-store/internal/config"
+	"github.com/ClusterCockpit/cc-metric-store/internal/memstore"
+	"github.com/ClusterCockpit/cc-metric-store/internal/util"
+	"github.com/influxdata/line-protocol/v2/lineprotocol"
+	"github.com/nats-io/nats.go"
+)
+
+type Metric struct {
+	Name  string
+	Value util.Float
+	mc    config.MetricConfig
+}
+
+// Currently unused, could be used to send messages via raw TCP.
+// Each connection is handled in it's own goroutine. This is a blocking function.
+func ReceiveRaw(ctx context.Context, listener net.Listener, handleLine func(*lineprotocol.Decoder, string) error) error {
+	var wg sync.WaitGroup
+
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		<-ctx.Done()
+		if err := listener.Close(); err != nil {
+			log.Printf("listener.Close(): %s", err.Error())
+		}
+	}()
+
+	for {
+		conn, err := listener.Accept()
+		if err != nil {
+			if errors.Is(err, net.ErrClosed) {
+				break
+			}
+
+			log.Printf("listener.Accept(): %s", err.Error())
+		}
+
+		wg.Add(2)
+		go func() {
+			defer wg.Done()
+			defer conn.Close()
+
+			dec := lineprotocol.NewDecoder(conn)
+			connctx, cancel := context.WithCancel(context.Background())
+			defer cancel()
+			go func() {
+				defer wg.Done()
+				select {
+				case <-connctx.Done():
+					conn.Close()
+				case <-ctx.Done():
+					conn.Close()
+				}
+			}()
+
+			if err := handleLine(dec, "default"); err != nil {
+				if errors.Is(err, net.ErrClosed) {
+					return
+				}
+
+				log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
+				errmsg := make([]byte, 128)
+				errmsg = append(errmsg, `error: `...)
+				errmsg = append(errmsg, err.Error()...)
+				errmsg = append(errmsg, '\n')
+				conn.Write(errmsg)
+			}
+		}()
+	}
+
+	wg.Wait()
+	return nil
+}
+
+// Connect to a nats server and subscribe to "updates". This is a blocking
+// function. handleLine will be called for each line recieved via nats.
+// Send `true` through the done channel for gracefull termination.
+func ReceiveNats(conf *config.NatsConfig, handleLine func(*lineprotocol.Decoder, string) error, workers int, ctx context.Context) error {
+	var opts []nats.Option
+	if conf.Username != "" && conf.Password != "" {
+		opts = append(opts, nats.UserInfo(conf.Username, conf.Password))
+	}
+
+	nc, err := nats.Connect(conf.Address, opts...)
+	if err != nil {
+		return err
+	}
+	defer nc.Close()
+
+	var wg sync.WaitGroup
+	var subs []*nats.Subscription
+
+	msgs := make(chan *nats.Msg, workers*2)
+
+	for _, sc := range conf.Subscriptions {
+		clusterTag := sc.ClusterTag
+		var sub *nats.Subscription
+		if workers > 1 {
+			wg.Add(workers)
+
+			for i := 0; i < workers; i++ {
+				go func() {
+					for m := range msgs {
+						dec := lineprotocol.NewDecoderWithBytes(m.Data)
+						if err := handleLine(dec, clusterTag); err != nil {
+							log.Printf("error: %s\n", err.Error())
+						}
+					}
+
+					wg.Done()
+				}()
+			}
+
+			sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
+				msgs <- m
+			})
+		} else {
+			sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
+				dec := lineprotocol.NewDecoderWithBytes(m.Data)
+				if err := handleLine(dec, clusterTag); err != nil {
+					log.Printf("error: %s\n", err.Error())
+				}
+			})
+		}
+
+		if err != nil {
+			return err
+		}
+		log.Printf("NATS subscription to '%s' on '%s' established\n", sc.SubscribeTo, conf.Address)
+		subs = append(subs, sub)
+	}
+
+	<-ctx.Done()
+	for _, sub := range subs {
+		err = sub.Unsubscribe()
+		if err != nil {
+			log.Printf("NATS unsubscribe failed: %s", err.Error())
+		}
+	}
+	close(msgs)
+	wg.Wait()
+
+	nc.Close()
+	log.Println("NATS connection closed")
+	return nil
+}
+
+// Place `prefix` in front of `buf` but if possible,
+// do that inplace in `buf`.
+func reorder(buf, prefix []byte) []byte {
+	n := len(prefix)
+	m := len(buf)
+	if cap(buf) < m+n {
+		return append(prefix[:n:n], buf...)
+	} else {
+		buf = buf[:n+m]
+		for i := m - 1; i >= 0; i-- {
+			buf[i+n] = buf[i]
+		}
+		for i := 0; i < n; i++ {
+			buf[i] = prefix[i]
+		}
+		return buf
+	}
+}
+
+// Decode lines using dec and make write calls to the MemoryStore.
+// If a line is missing its cluster tag, use clusterDefault as default.
+func decodeLine(dec *lineprotocol.Decoder, memoryStore *memstore.MemoryStore, clusterDefault string) error {
+	// Reduce allocations in loop:
+	t := time.Now()
+	metric, metricBuf := Metric{}, make([]byte, 0, 16)
+	selector := make([]string, 0, 4)
+	typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
+
+	// Optimize for the case where all lines in a "batch" are about the same
+	// cluster and host. By using `WriteToLevel` (level = host), we do not need
+	// to take the root- and cluster-level lock as often.
+	var lvl *level = nil
+	var prevCluster, prevHost string = "", ""
+
+	var ok bool
+	for dec.Next() {
+		rawmeasurement, err := dec.Measurement()
+		if err != nil {
+			return err
+		}
+
+		// Needs to be copied because another call to dec.* would
+		// invalidate the returned slice.
+		metricBuf = append(metricBuf[:0], rawmeasurement...)
+
+		// The go compiler optimizes map[string(byteslice)] lookups:
+		metric.mc, ok = memoryStore.metrics[string(rawmeasurement)]
+		if !ok {
+			continue
+		}
+
+		typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
+		cluster, host := clusterDefault, ""
+		for {
+			key, val, err := dec.NextTag()
+			if err != nil {
+				return err
+			}
+			if key == nil {
+				break
+			}
+
+			// The go compiler optimizes string([]byte{...}) == "...":
+			switch string(key) {
+			case "cluster":
+				if string(val) == prevCluster {
+					cluster = prevCluster
+				} else {
+					cluster = string(val)
+					lvl = nil
+				}
+			case "hostname", "host":
+				if string(val) == prevHost {
+					host = prevHost
+				} else {
+					host = string(val)
+					lvl = nil
+				}
+			case "type":
+				if string(val) == "node" {
+					break
+				}
+
+				// We cannot be sure that the "type" tag comes before the "type-id" tag:
+				if len(typeBuf) == 0 {
+					typeBuf = append(typeBuf, val...)
+				} else {
+					typeBuf = reorder(typeBuf, val)
+				}
+			case "type-id":
+				typeBuf = append(typeBuf, val...)
+			case "subtype":
+				// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
+				if len(subTypeBuf) == 0 {
+					subTypeBuf = append(subTypeBuf, val...)
+				} else {
+					subTypeBuf = reorder(typeBuf, val)
+				}
+			case "stype-id":
+				subTypeBuf = append(subTypeBuf, val...)
+			default:
+				// Ignore unkown tags (cc-metric-collector might send us a unit for example that we do not need)
+				// return fmt.Errorf("unkown tag: '%s' (value: '%s')", string(key), string(val))
+			}
+		}
+
+		// If the cluster or host changed, the lvl was set to nil
+		if lvl == nil {
+			selector = selector[:2]
+			selector[0], selector[1] = cluster, host
+			lvl = memoryStore.GetLevel(selector)
+			prevCluster, prevHost = cluster, host
+		}
+
+		// subtypes:
+		selector = selector[:0]
+		if len(typeBuf) > 0 {
+			selector = append(selector, string(typeBuf)) // <- Allocation :(
+			if len(subTypeBuf) > 0 {
+				selector = append(selector, string(subTypeBuf))
+			}
+		}
+
+		for {
+			key, val, err := dec.NextField()
+			if err != nil {
+				return err
+			}
+
+			if key == nil {
+				break
+			}
+
+			if string(key) != "value" {
+				return fmt.Errorf("unkown field: '%s' (value: %#v)", string(key), val)
+			}
+
+			if val.Kind() == lineprotocol.Float {
+				metric.Value = util.Float(val.FloatV())
+			} else if val.Kind() == lineprotocol.Int {
+				metric.Value = util.Float(val.IntV())
+			} else if val.Kind() == lineprotocol.Uint {
+				metric.Value = util.Float(val.UintV())
+			} else {
+				return fmt.Errorf("unsupported value type in message: %s", val.Kind().String())
+			}
+		}
+
+		if t, err = dec.Time(lineprotocol.Second, t); err != nil {
+			return err
+		}
+
+		if err := memoryStore.WriteToLevel(lvl, selector, t.Unix(), []Metric{metric}); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -0,0 +1,112 @@
+package config
+
+import (
+	"encoding/json"
+	"fmt"
+	"log"
+	"os"
+)
+
+// For aggregation over multiple values at different cpus/sockets/..., not time!
+type AggregationStrategy int
+
+const (
+	NoAggregation AggregationStrategy = iota
+	SumAggregation
+	AvgAggregation
+)
+
+func (as *AggregationStrategy) UnmarshalJSON(data []byte) error {
+	var str string
+	if err := json.Unmarshal(data, &str); err != nil {
+		return err
+	}
+
+	switch str {
+	case "":
+		*as = NoAggregation
+	case "sum":
+		*as = SumAggregation
+	case "avg":
+		*as = AvgAggregation
+	default:
+		return fmt.Errorf("invalid aggregation strategy: %#v", str)
+	}
+	return nil
+}
+
+type MetricConfig struct {
+	// Interval in seconds at which measurements will arive.
+	Frequency int64 `json:"frequency"`
+
+	// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
+	Aggregation AggregationStrategy `json:"aggregation"`
+
+	// Private, used internally...
+	offset int
+}
+
+type HttpConfig struct {
+	// Address to bind to, for example "0.0.0.0:8081"
+	Address string `json:"address"`
+
+	// If not the empty string, use https with this as the certificate file
+	CertFile string `json:"https-cert-file"`
+
+	// If not the empty string, use https with this as the key file
+	KeyFile string `json:"https-key-file"`
+}
+
+type NatsConfig struct {
+	// Address of the nats server
+	Address string `json:"address"`
+
+	// Username/Password, optional
+	Username string `json:"username"`
+	Password string `json:"password"`
+
+	Subscriptions []struct {
+		// Channel name
+		SubscribeTo string `json:"subscribe-to"`
+
+		// Allow lines without a cluster tag, use this as default, optional
+		ClusterTag string `json:"cluster-tag"`
+	} `json:"subscriptions"`
+}
+
+type Config struct {
+	Metrics     map[string]MetricConfig `json:"metrics"`
+	HttpConfig  *HttpConfig             `json:"http-api"`
+	Checkpoints struct {
+		Interval string `json:"interval"`
+		RootDir  string `json:"directory"`
+		Restore  string `json:"restore"`
+	} `json:"checkpoints"`
+	Debug struct {
+		DumpToFile string `json:"dump-to-file"`
+		EnableGops bool   `json:"gops"`
+	} `json:"debug"`
+	RetentionInMemory string `json:"retention-in-memory"`
+	JwtPublicKey      string `json:"jwt-public-key"`
+	Archive           struct {
+		Interval      string `json:"interval"`
+		RootDir       string `json:"directory"`
+		DeleteInstead bool   `json:"delete-instead"`
+	} `json:"archive"`
+	Nats []*NatsConfig `json:"nats"`
+}
+
+func LoadConfiguration(file string) Config {
+	var config Config
+	configFile, err := os.Open(file)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer configFile.Close()
+	dec := json.NewDecoder(configFile)
+	dec.DisallowUnknownFields()
+	if err := dec.Decode(&config); err != nil {
+		log.Fatal(err)
+	}
+	return config
+}
--- a/internal/memstore/archive.go
+++ b/internal/memstore/archive.go
@@ -0,0 +1,597 @@
+package memstore
+
+import (
+	"archive/zip"
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"log"
+	"os"
+	"path"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"sync/atomic"
+)
+
+// Whenever changed, update MarshalJSON as well!
+type CheckpointMetrics struct {
+	Frequency int64   `json:"frequency"`
+	Start     int64   `json:"start"`
+	Data      []Float `json:"data"`
+}
+
+// As `Float` implements a custom MarshalJSON() function,
+// serializing an array of such types has more overhead
+// than one would assume (because of extra allocations, interfaces and so on).
+func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {
+	buf := make([]byte, 0, 128+len(cm.Data)*8)
+	buf = append(buf, `{"frequency":`...)
+	buf = strconv.AppendInt(buf, cm.Frequency, 10)
+	buf = append(buf, `,"start":`...)
+	buf = strconv.AppendInt(buf, cm.Start, 10)
+	buf = append(buf, `,"data":[`...)
+	for i, x := range cm.Data {
+		if i != 0 {
+			buf = append(buf, ',')
+		}
+		if x.IsNaN() {
+			buf = append(buf, `null`...)
+		} else {
+			buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)
+		}
+	}
+	buf = append(buf, `]}`...)
+	return buf, nil
+}
+
+type CheckpointFile struct {
+	From     int64                         `json:"from"`
+	To       int64                         `json:"to"`
+	Metrics  map[string]*CheckpointMetrics `json:"metrics"`
+	Children map[string]*CheckpointFile    `json:"children"`
+}
+
+var ErrNoNewData error = errors.New("all data already archived")
+
+var NumWorkers int = 4
+
+func init() {
+	maxWorkers := 10
+	NumWorkers = runtime.NumCPU()/2 + 1
+	if NumWorkers > maxWorkers {
+		NumWorkers = maxWorkers
+	}
+}
+
+// Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
+// On a per-host basis a new JSON file is created. I have no idea if this will scale.
+// The good thing: Only a host at a time is locked, so this function can run
+// in parallel to writes/reads.
+func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
+	levels := make([]*level, 0)
+	selectors := make([][]string, 0)
+	m.root.lock.RLock()
+	for sel1, l1 := range m.root.children {
+		l1.lock.RLock()
+		for sel2, l2 := range l1.children {
+			levels = append(levels, l2)
+			selectors = append(selectors, []string{sel1, sel2})
+		}
+		l1.lock.RUnlock()
+	}
+	m.root.lock.RUnlock()
+
+	type workItem struct {
+		level    *level
+		dir      string
+		selector []string
+	}
+
+	n, errs := int32(0), int32(0)
+
+	var wg sync.WaitGroup
+	wg.Add(NumWorkers)
+	work := make(chan workItem, NumWorkers*2)
+	for worker := 0; worker < NumWorkers; worker++ {
+		go func() {
+			defer wg.Done()
+
+			for workItem := range work {
+				if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {
+					if err == ErrNoNewData {
+						continue
+					}
+
+					log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error())
+					atomic.AddInt32(&errs, 1)
+				} else {
+					atomic.AddInt32(&n, 1)
+				}
+			}
+		}()
+	}
+
+	for i := 0; i < len(levels); i++ {
+		dir := path.Join(dir, path.Join(selectors[i]...))
+		work <- workItem{
+			level:    levels[i],
+			dir:      dir,
+			selector: selectors[i],
+		}
+	}
+
+	close(work)
+	wg.Wait()
+
+	if errs > 0 {
+		return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n)
+	}
+	return int(n), nil
+}
+
+func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+
+	retval := &CheckpointFile{
+		From:     from,
+		To:       to,
+		Metrics:  make(map[string]*CheckpointMetrics),
+		Children: make(map[string]*CheckpointFile),
+	}
+
+	for metric, minfo := range m.metrics {
+		b := l.metrics[minfo.offset]
+		if b == nil {
+			continue
+		}
+
+		allArchived := true
+		b.iterFromTo(from, to, func(b *buffer) error {
+			if !b.archived {
+				allArchived = false
+			}
+			return nil
+		})
+
+		if allArchived {
+			continue
+		}
+
+		data := make([]Float, (to-from)/b.frequency+1)
+		data, start, end, err := b.read(from, to, data)
+		if err != nil {
+			return nil, err
+		}
+
+		for i := int((end - start) / b.frequency); i < len(data); i++ {
+			data[i] = NaN
+		}
+
+		retval.Metrics[metric] = &CheckpointMetrics{
+			Frequency: b.frequency,
+			Start:     start,
+			Data:      data,
+		}
+	}
+
+	for name, child := range l.children {
+		val, err := child.toCheckpointFile(from, to, m)
+		if err != nil {
+			return nil, err
+		}
+
+		if val != nil {
+			retval.Children[name] = val
+		}
+	}
+
+	if len(retval.Children) == 0 && len(retval.Metrics) == 0 {
+		return nil, nil
+	}
+
+	return retval, nil
+}
+
+func (l *level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
+	cf, err := l.toCheckpointFile(from, to, m)
+	if err != nil {
+		return err
+	}
+
+	if cf == nil {
+		return ErrNoNewData
+	}
+
+	filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
+	f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil && os.IsNotExist(err) {
+		err = os.MkdirAll(dir, 0755)
+		if err == nil {
+			f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644)
+		}
+	}
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	bw := bufio.NewWriter(f)
+	if err = json.NewEncoder(bw).Encode(cf); err != nil {
+		return err
+	}
+
+	return bw.Flush()
+}
+
+// Metrics stored at the lowest 2 levels are not loaded (root and cluster)!
+// This function can only be called once and before the very first write or read.
+// Different host's data is loaded to memory in parallel.
+func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
+	var wg sync.WaitGroup
+	work := make(chan [2]string, NumWorkers)
+	n, errs := int32(0), int32(0)
+
+	wg.Add(NumWorkers)
+	for worker := 0; worker < NumWorkers; worker++ {
+		go func() {
+			defer wg.Done()
+			for host := range work {
+				lvl := m.root.findLevelOrCreate(host[:], len(m.metrics))
+				nn, err := lvl.fromCheckpoint(filepath.Join(dir, host[0], host[1]), from, m)
+				if err != nil {
+					log.Fatalf("error while loading checkpoints: %s", err.Error())
+					atomic.AddInt32(&errs, 1)
+				}
+				atomic.AddInt32(&n, int32(nn))
+			}
+		}()
+	}
+
+	i := 0
+	clustersDir, err := os.ReadDir(dir)
+	for _, clusterDir := range clustersDir {
+		if !clusterDir.IsDir() {
+			err = errors.New("expected only directories at first level of checkpoints/ directory")
+			goto done
+		}
+
+		hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name()))
+		if e != nil {
+			err = e
+			goto done
+		}
+
+		for _, hostDir := range hostsDir {
+			if !hostDir.IsDir() {
+				err = errors.New("expected only directories at second level of checkpoints/ directory")
+				goto done
+			}
+
+			i++
+			if i%NumWorkers == 0 && i > 100 {
+				// Forcing garbage collection runs here regulary during the loading of checkpoints
+				// will decrease the total heap size after loading everything back to memory is done.
+				// While loading data, the heap will grow fast, so the GC target size will double
+				// almost always. By forcing GCs here, we can keep it growing more slowly so that
+				// at the end, less memory is wasted.
+				runtime.GC()
+			}
+
+			work <- [2]string{clusterDir.Name(), hostDir.Name()}
+		}
+	}
+done:
+	close(work)
+	wg.Wait()
+
+	if err != nil {
+		return int(n), err
+	}
+
+	if errs > 0 {
+		return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n)
+	}
+	return int(n), nil
+}
+
+func (l *level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
+	for name, metric := range cf.Metrics {
+		n := len(metric.Data)
+		b := &buffer{
+			frequency: metric.Frequency,
+			start:     metric.Start,
+			data:      metric.Data[0:n:n], // Space is wasted here :(
+			prev:      nil,
+			next:      nil,
+			archived:  true,
+		}
+		b.close()
+
+		minfo, ok := m.metrics[name]
+		if !ok {
+			continue
+			// return errors.New("Unkown metric: " + name)
+		}
+
+		prev := l.metrics[minfo.offset]
+		if prev == nil {
+			l.metrics[minfo.offset] = b
+		} else {
+			if prev.start > b.start {
+				return errors.New("wooops")
+			}
+
+			b.prev = prev
+			prev.next = b
+		}
+		l.metrics[minfo.offset] = b
+	}
+
+	if len(cf.Children) > 0 && l.children == nil {
+		l.children = make(map[string]*level)
+	}
+
+	for sel, childCf := range cf.Children {
+		child, ok := l.children[sel]
+		if !ok {
+			child = &level{
+				metrics:  make([]*buffer, len(m.metrics)),
+				children: nil,
+			}
+			l.children[sel] = child
+		}
+
+		if err := child.loadFile(childCf, m); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (l *level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, error) {
+	direntries, err := os.ReadDir(dir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return 0, nil
+		}
+
+		return 0, err
+	}
+
+	jsonFiles := make([]fs.DirEntry, 0)
+	filesLoaded := 0
+	for _, e := range direntries {
+		if e.IsDir() {
+			child := &level{
+				metrics:  make([]*buffer, len(m.metrics)),
+				children: make(map[string]*level),
+			}
+
+			files, err := child.fromCheckpoint(path.Join(dir, e.Name()), from, m)
+			filesLoaded += files
+			if err != nil {
+				return filesLoaded, err
+			}
+
+			l.children[e.Name()] = child
+		} else if strings.HasSuffix(e.Name(), ".json") {
+			jsonFiles = append(jsonFiles, e)
+		} else {
+			return filesLoaded, errors.New("unexpected file: " + dir + "/" + e.Name())
+		}
+	}
+
+	files, err := findFiles(jsonFiles, from, true)
+	if err != nil {
+		return filesLoaded, err
+	}
+
+	for _, filename := range files {
+		f, err := os.Open(path.Join(dir, filename))
+		if err != nil {
+			return filesLoaded, err
+		}
+		defer f.Close()
+
+		br := bufio.NewReader(f)
+		cf := &CheckpointFile{}
+		if err = json.NewDecoder(br).Decode(cf); err != nil {
+			return filesLoaded, err
+		}
+
+		if cf.To != 0 && cf.To < from {
+			continue
+		}
+
+		if err = l.loadFile(cf, m); err != nil {
+			return filesLoaded, err
+		}
+
+		filesLoaded += 1
+	}
+
+	return filesLoaded, nil
+}
+
+// This will probably get very slow over time!
+// A solution could be some sort of an index file in which all other files
+// and the timespan they contain is listed.
+func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {
+	nums := map[string]int64{}
+	for _, e := range direntries {
+		ts, err := strconv.ParseInt(strings.TrimSuffix(e.Name(), ".json"), 10, 64)
+		if err != nil {
+			return nil, err
+		}
+		nums[e.Name()] = ts
+	}
+
+	sort.Slice(direntries, func(i, j int) bool {
+		a, b := direntries[i], direntries[j]
+		return nums[a.Name()] < nums[b.Name()]
+	})
+
+	filenames := make([]string, 0)
+	for i := 0; i < len(direntries); i++ {
+		e := direntries[i]
+		ts1 := nums[e.Name()]
+
+		if findMoreRecentFiles && t <= ts1 || i == len(direntries)-1 {
+			filenames = append(filenames, e.Name())
+			continue
+		}
+
+		enext := direntries[i+1]
+		ts2 := nums[enext.Name()]
+
+		if findMoreRecentFiles {
+			if ts1 < t && t < ts2 {
+				filenames = append(filenames, e.Name())
+			}
+		} else {
+			if ts2 < t {
+				filenames = append(filenames, e.Name())
+			}
+		}
+	}
+
+	return filenames, nil
+}
+
+// ZIP all checkpoint files older than `from` together and write them to the `archiveDir`,
+// deleting them from the `checkpointsDir`.
+func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteInstead bool) (int, error) {
+	entries1, err := os.ReadDir(checkpointsDir)
+	if err != nil {
+		return 0, err
+	}
+
+	type workItem struct {
+		cdir, adir    string
+		cluster, host string
+	}
+
+	var wg sync.WaitGroup
+	n, errs := int32(0), int32(0)
+	work := make(chan workItem, NumWorkers)
+
+	wg.Add(NumWorkers)
+	for worker := 0; worker < NumWorkers; worker++ {
+		go func() {
+			defer wg.Done()
+			for workItem := range work {
+				m, err := archiveCheckpoints(workItem.cdir, workItem.adir, from, deleteInstead)
+				if err != nil {
+					log.Printf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error())
+					atomic.AddInt32(&errs, 1)
+				}
+				atomic.AddInt32(&n, int32(m))
+			}
+		}()
+	}
+
+	for _, de1 := range entries1 {
+		entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
+		if e != nil {
+			err = e
+		}
+
+		for _, de2 := range entries2 {
+			cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name())
+			adir := filepath.Join(archiveDir, de1.Name(), de2.Name())
+			work <- workItem{
+				adir: adir, cdir: cdir,
+				cluster: de1.Name(), host: de2.Name(),
+			}
+		}
+	}
+
+	close(work)
+	wg.Wait()
+
+	if err != nil {
+		return int(n), err
+	}
+
+	if errs > 0 {
+		return int(n), fmt.Errorf("%d errors happend while archiving (%d successes)", errs, n)
+	}
+	return int(n), nil
+}
+
+// Helper function for `ArchiveCheckpoints`.
+func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead bool) (int, error) {
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return 0, err
+	}
+
+	files, err := findFiles(entries, from, false)
+	if err != nil {
+		return 0, err
+	}
+
+	if deleteInstead {
+		n := 0
+		for _, checkpoint := range files {
+			filename := filepath.Join(dir, checkpoint)
+			if err = os.Remove(filename); err != nil {
+				return n, err
+			}
+			n += 1
+		}
+		return n, nil
+	}
+
+	filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from))
+	f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil && os.IsNotExist(err) {
+		err = os.MkdirAll(archiveDir, 0755)
+		if err == nil {
+			f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0644)
+		}
+	}
+	if err != nil {
+		return 0, err
+	}
+	defer f.Close()
+	bw := bufio.NewWriter(f)
+	defer bw.Flush()
+	zw := zip.NewWriter(bw)
+	defer zw.Close()
+
+	n := 0
+	for _, checkpoint := range files {
+		filename := filepath.Join(dir, checkpoint)
+		r, err := os.Open(filename)
+		if err != nil {
+			return n, err
+		}
+		defer r.Close()
+
+		w, err := zw.Create(checkpoint)
+		if err != nil {
+			return n, err
+		}
+
+		if _, err = io.Copy(w, r); err != nil {
+			return n, err
+		}
+
+		if err = os.Remove(filename); err != nil {
+			return n, err
+		}
+		n += 1
+	}
+
+	return n, nil
+}
--- a/internal/memstore/memstore.go
+++ b/internal/memstore/memstore.go
@@ -0,0 +1,542 @@
+package memstore
+
+import (
+	"errors"
+	"sync"
+	"unsafe"
+
+	"github.com/ClusterCockpit/cc-metric-store/internal/api"
+	"github.com/ClusterCockpit/cc-metric-store/internal/config"
+	"github.com/ClusterCockpit/cc-metric-store/internal/util"
+)
+
+// Default buffer capacity.
+// `buffer.data` will only ever grow up to it's capacity and a new link
+// in the buffer chain will be created if needed so that no copying
+// of data or reallocation needs to happen on writes.
+const (
+	BUFFER_CAP int = 512
+)
+
+// So that we can reuse allocations
+var bufferPool sync.Pool = sync.Pool{
+	New: func() interface{} {
+		return &buffer{
+			data: make([]util.Float, 0, BUFFER_CAP),
+		}
+	},
+}
+
+var (
+	ErrNoData           error = errors.New("no data for this metric/level")
+	ErrDataDoesNotAlign error = errors.New("data from lower granularities does not align")
+)
+
+// Each metric on each level has it's own buffer.
+// This is where the actual values go.
+// If `cap(data)` is reached, a new buffer is created and
+// becomes the new head of a buffer list.
+type buffer struct {
+	frequency  int64        // Time between two "slots"
+	start      int64        // Timestamp of when `data[0]` was written.
+	data       []util.Float // The slice should never reallocacte as `cap(data)` is respected.
+	prev, next *buffer      // `prev` contains older data, `next` newer data.
+	archived   bool         // If true, this buffer is already archived
+
+	closed bool
+	/*
+		statisticts struct {
+			samples int
+			min     Float
+			max     Float
+			avg     Float
+		}
+	*/
+}
+
+func newBuffer(ts, freq int64) *buffer {
+	b := bufferPool.Get().(*buffer)
+	b.frequency = freq
+	b.start = ts - (freq / 2)
+	b.prev = nil
+	b.next = nil
+	b.archived = false
+	b.closed = false
+	b.data = b.data[:0]
+	return b
+}
+
+// If a new buffer was created, the new head is returnd.
+// Otherwise, the existing buffer is returnd.
+// Normaly, only "newer" data should be written, but if the value would
+// end up in the same buffer anyways it is allowed.
+func (b *buffer) write(ts int64, value util.Float) (*buffer, error) {
+	if ts < b.start {
+		return nil, errors.New("cannot write value to buffer from past")
+	}
+
+	// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)
+	idx := int((ts - b.start) / b.frequency)
+	if idx >= cap(b.data) {
+		newbuf := newBuffer(ts, b.frequency)
+		newbuf.prev = b
+		b.next = newbuf
+		b.close()
+		b = newbuf
+		idx = 0
+	}
+
+	// Overwriting value or writing value from past
+	if idx < len(b.data) {
+		b.data[idx] = value
+		return b, nil
+	}
+
+	// Fill up unwritten slots with NaN
+	for i := len(b.data); i < idx; i++ {
+		b.data = append(b.data, util.NaN)
+	}
+
+	b.data = append(b.data, value)
+	return b, nil
+}
+
+func (b *buffer) end() int64 {
+	return b.firstWrite() + int64(len(b.data))*b.frequency
+}
+
+func (b *buffer) firstWrite() int64 {
+	return b.start + (b.frequency / 2)
+}
+
+func (b *buffer) close() {}
+
+/*
+func (b *buffer) close() {
+	if b.closed {
+		return
+	}
+
+	b.closed = true
+	n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64
+	for _, x := range b.data {
+		if x.IsNaN() {
+			continue
+		}
+
+		n += 1
+		f := float64(x)
+		sum += f
+		min = math.Min(min, f)
+		max = math.Max(max, f)
+	}
+
+	b.statisticts.samples = n
+	if n > 0 {
+		b.statisticts.avg = Float(sum / float64(n))
+		b.statisticts.min = Float(min)
+		b.statisticts.max = Float(max)
+	} else {
+		b.statisticts.avg = NaN
+		b.statisticts.min = NaN
+		b.statisticts.max = NaN
+	}
+}
+*/
+
+// func interpolate(idx int, data []Float) Float {
+// 	if idx == 0 || idx+1 == len(data) {
+// 		return NaN
+// 	}
+// 	return (data[idx-1] + data[idx+1]) / 2.0
+// }
+
+// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
+// Simple linear interpolation is done between the two neighboring cells if possible.
+// If values at the start or end are missing, instead of NaN values, the second and thrid
+// return values contain the actual `from`/`to`.
+// This function goes back the buffer chain if `from` is older than the currents buffer start.
+// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
+// If `data` is not long enough to hold all values, this function will panic!
+func (b *buffer) read(from, to int64, data []util.Float) ([]util.Float, int64, int64, error) {
+	if from < b.firstWrite() {
+		if b.prev != nil {
+			return b.prev.read(from, to, data)
+		}
+		from = b.firstWrite()
+	}
+
+	var i int = 0
+	var t int64 = from
+	for ; t < to; t += b.frequency {
+		idx := int((t - b.start) / b.frequency)
+		if idx >= cap(b.data) {
+			if b.next == nil {
+				break
+			}
+			b = b.next
+			idx = 0
+		}
+
+		if idx >= len(b.data) {
+			if b.next == nil || to <= b.next.start {
+				break
+			}
+			data[i] += util.NaN
+		} else if t < b.start {
+			data[i] += util.NaN
+			// } else if b.data[idx].IsNaN() {
+			// 	data[i] += interpolate(idx, b.data)
+		} else {
+			data[i] += b.data[idx]
+		}
+		i++
+	}
+
+	return data[:i], from, t, nil
+}
+
+// Returns true if this buffer needs to be freed.
+func (b *buffer) free(t int64) (delme bool, n int) {
+	if b.prev != nil {
+		delme, m := b.prev.free(t)
+		n += m
+		if delme {
+			b.prev.next = nil
+			if cap(b.prev.data) == BUFFER_CAP {
+				bufferPool.Put(b.prev)
+			}
+			b.prev = nil
+		}
+	}
+
+	end := b.end()
+	if end < t {
+		return true, n + 1
+	}
+
+	return false, n
+}
+
+// Call `callback` on every buffer that contains data in the range from `from` to `to`.
+func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
+	if b == nil {
+		return nil
+	}
+
+	if err := b.prev.iterFromTo(from, to, callback); err != nil {
+		return err
+	}
+
+	if from <= b.end() && b.start <= to {
+		return callback(b)
+	}
+
+	return nil
+}
+
+func (b *buffer) count() int64 {
+	res := int64(len(b.data))
+	if b.prev != nil {
+		res += b.prev.count()
+	}
+	return res
+}
+
+// Could also be called "node" as this forms a node in a tree structure.
+// Called level because "node" might be confusing here.
+// Can be both a leaf or a inner node. In this tree structue, inner nodes can
+// also hold data (in `metrics`).
+type level struct {
+	lock     sync.RWMutex
+	metrics  []*buffer         // Every level can store metrics.
+	children map[string]*level // Lower levels.
+}
+
+// Find the correct level for the given selector, creating it if
+// it does not exist. Example selector in the context of the
+// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }.
+// This function would probably benefit a lot from `level.children` beeing a `sync.Map`?
+func (l *level) findLevelOrCreate(selector []string, nMetrics int) *level {
+	if len(selector) == 0 {
+		return l
+	}
+
+	// Allow concurrent reads:
+	l.lock.RLock()
+	var child *level
+	var ok bool
+	if l.children == nil {
+		// Children map needs to be created...
+		l.lock.RUnlock()
+	} else {
+		child, ok := l.children[selector[0]]
+		l.lock.RUnlock()
+		if ok {
+			return child.findLevelOrCreate(selector[1:], nMetrics)
+		}
+	}
+
+	// The level does not exist, take write lock for unqiue access:
+	l.lock.Lock()
+	// While this thread waited for the write lock, another thread
+	// could have created the child node.
+	if l.children != nil {
+		child, ok = l.children[selector[0]]
+		if ok {
+			l.lock.Unlock()
+			return child.findLevelOrCreate(selector[1:], nMetrics)
+		}
+	}
+
+	child = &level{
+		metrics:  make([]*buffer, nMetrics),
+		children: nil,
+	}
+
+	if l.children != nil {
+		l.children[selector[0]] = child
+	} else {
+		l.children = map[string]*level{selector[0]: child}
+	}
+	l.lock.Unlock()
+	return child.findLevelOrCreate(selector[1:], nMetrics)
+}
+
+func (l *level) free(t int64) (int, error) {
+	l.lock.Lock()
+	defer l.lock.Unlock()
+
+	n := 0
+	for i, b := range l.metrics {
+		if b != nil {
+			delme, m := b.free(t)
+			n += m
+			if delme {
+				if cap(b.data) == BUFFER_CAP {
+					bufferPool.Put(b)
+				}
+				l.metrics[i] = nil
+			}
+		}
+	}
+
+	for _, l := range l.children {
+		m, err := l.free(t)
+		n += m
+		if err != nil {
+			return n, err
+		}
+	}
+
+	return n, nil
+}
+
+func (l *level) sizeInBytes() int64 {
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+	size := int64(0)
+
+	for _, b := range l.metrics {
+		if b != nil {
+			size += b.count() * int64(unsafe.Sizeof(util.Float(0)))
+		}
+	}
+
+	for _, child := range l.children {
+		size += child.sizeInBytes()
+	}
+
+	return size
+}
+
+type MemoryStore struct {
+	root    level // root of the tree structure
+	metrics map[string]config.MetricConfig
+}
+
+// Return a new, initialized instance of a MemoryStore.
+// Will panic if values in the metric configurations are invalid.
+func NewMemoryStore(metrics map[string]config.MetricConfig) *MemoryStore {
+	offset := 0
+	for key, config := range metrics {
+		if config.Frequency == 0 {
+			panic("invalid frequency")
+		}
+
+		metrics[key] = MetricConfig{
+			Frequency:   config.Frequency,
+			Aggregation: config.Aggregation,
+			offset:      offset,
+		}
+		offset += 1
+	}
+
+	return &MemoryStore{
+		root: level{
+			metrics:  make([]*buffer, len(metrics)),
+			children: make(map[string]*level),
+		},
+		metrics: metrics,
+	}
+}
+
+// Write all values in `metrics` to the level specified by `selector` for time `ts`.
+// Look at `findLevelOrCreate` for how selectors work.
+func (m *MemoryStore) Write(selector []string, ts int64, metrics []api.Metric) error {
+	var ok bool
+	for i, metric := range metrics {
+		if metric.mc.Frequency == 0 {
+			metric.mc, ok = m.metrics[metric.Name]
+			if !ok {
+				metric.mc.Frequency = 0
+			}
+			metrics[i] = metric
+		}
+	}
+
+	return m.WriteToLevel(&m.root, selector, ts, metrics)
+}
+
+func (m *MemoryStore) GetLevel(selector []string) *level {
+	return m.root.findLevelOrCreate(selector, len(m.metrics))
+}
+
+// Assumes that `minfo` in `metrics` is filled in!
+func (m *MemoryStore) WriteToLevel(l *level, selector []string, ts int64, metrics []api.Metric) error {
+	l = l.findLevelOrCreate(selector, len(m.metrics))
+	l.lock.Lock()
+	defer l.lock.Unlock()
+
+	for _, metric := range metrics {
+		if metric.mc.Frequency == 0 {
+			continue
+		}
+
+		b := l.metrics[metric.mc.offset]
+		if b == nil {
+			// First write to this metric and level
+			b = newBuffer(ts, metric.mc.Frequency)
+			l.metrics[metric.mc.offset] = b
+		}
+
+		nb, err := b.write(ts, metric.Value)
+		if err != nil {
+			return err
+		}
+
+		// Last write created a new buffer...
+		if b != nb {
+			l.metrics[metric.mc.offset] = nb
+		}
+	}
+	return nil
+}
+
+// Returns all values for metric `metric` from `from` to `to` for the selected level(s).
+// If the level does not hold the metric itself, the data will be aggregated recursively from the children.
+// The second and third return value are the actual from/to for the data. Those can be different from
+// the range asked for if no data was available.
+func (m *MemoryStore) Read(selector Selector, metric string, from, to int64) ([]Float, int64, int64, error) {
+	if from > to {
+		return nil, 0, 0, errors.New("invalid time range")
+	}
+
+	minfo, ok := m.metrics[metric]
+	if !ok {
+		return nil, 0, 0, errors.New("unkown metric: " + metric)
+	}
+
+	n, data := 0, make([]Float, (to-from)/minfo.Frequency+1)
+	err := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {
+		cdata, cfrom, cto, err := b.read(from, to, data)
+		if err != nil {
+			return err
+		}
+
+		if n == 0 {
+			from, to = cfrom, cto
+		} else if from != cfrom || to != cto || len(data) != len(cdata) {
+			missingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)
+			if missingfront != 0 {
+				return ErrDataDoesNotAlign
+			}
+
+			newlen := len(cdata) - missingback
+			if newlen < 1 {
+				return ErrDataDoesNotAlign
+			}
+			cdata = cdata[0:newlen]
+			if len(cdata) != len(data) {
+				return ErrDataDoesNotAlign
+			}
+
+			from, to = cfrom, cto
+		}
+
+		data = cdata
+		n += 1
+		return nil
+	})
+
+	if err != nil {
+		return nil, 0, 0, err
+	} else if n == 0 {
+		return nil, 0, 0, errors.New("metric or host not found")
+	} else if n > 1 {
+		if minfo.Aggregation == AvgAggregation {
+			normalize := 1. / Float(n)
+			for i := 0; i < len(data); i++ {
+				data[i] *= normalize
+			}
+		} else if minfo.Aggregation != SumAggregation {
+			return nil, 0, 0, errors.New("invalid aggregation")
+		}
+	}
+
+	return data, from, to, nil
+}
+
+// Release all buffers for the selected level and all its children that contain only
+// values older than `t`.
+func (m *MemoryStore) Free(selector []string, t int64) (int, error) {
+	return m.GetLevel(selector).free(t)
+}
+
+func (m *MemoryStore) FreeAll() error {
+	for k := range m.root.children {
+		delete(m.root.children, k)
+	}
+
+	return nil
+}
+
+func (m *MemoryStore) SizeInBytes() int64 {
+	return m.root.sizeInBytes()
+}
+
+// Given a selector, return a list of all children of the level selected.
+func (m *MemoryStore) ListChildren(selector []string) []string {
+	lvl := &m.root
+	for lvl != nil && len(selector) != 0 {
+		lvl.lock.RLock()
+		next := lvl.children[selector[0]]
+		lvl.lock.RUnlock()
+		lvl = next
+		selector = selector[1:]
+	}
+
+	if lvl == nil {
+		return nil
+	}
+
+	lvl.lock.RLock()
+	defer lvl.lock.RUnlock()
+
+	children := make([]string, 0, len(lvl.children))
+	for child := range lvl.children {
+		children = append(children, child)
+	}
+
+	return children
+}
--- a/internal/memstore/selector.go
+++ b/internal/memstore/selector.go
@@ -0,0 +1,123 @@
+package memstore
+
+import (
+	"encoding/json"
+	"errors"
+)
+
+type SelectorElement struct {
+	Any    bool
+	String string
+	Group  []string
+}
+
+func (se *SelectorElement) UnmarshalJSON(input []byte) error {
+	if input[0] == '"' {
+		if err := json.Unmarshal(input, &se.String); err != nil {
+			return err
+		}
+
+		if se.String == "*" {
+			se.Any = true
+			se.String = ""
+		}
+
+		return nil
+	}
+
+	if input[0] == '[' {
+		return json.Unmarshal(input, &se.Group)
+	}
+
+	return errors.New("the Go SelectorElement type can only be a string or an array of strings")
+}
+
+func (se *SelectorElement) MarshalJSON() ([]byte, error) {
+	if se.Any {
+		return []byte("\"*\""), nil
+	}
+
+	if se.String != "" {
+		return json.Marshal(se.String)
+	}
+
+	if se.Group != nil {
+		return json.Marshal(se.Group)
+	}
+
+	return nil, errors.New("a Go Selector must be a non-empty string or a non-empty slice of strings")
+}
+
+type Selector []SelectorElement
+
+func (l *level) findLevel(selector []string) *level {
+	if len(selector) == 0 {
+		return l
+	}
+
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+
+	lvl := l.children[selector[0]]
+	if lvl == nil {
+		return nil
+	}
+
+	return lvl.findLevel(selector[1:])
+}
+
+func (l *level) findBuffers(selector Selector, offset int, f func(b *buffer) error) error {
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+
+	if len(selector) == 0 {
+		b := l.metrics[offset]
+		if b != nil {
+			return f(b)
+		}
+
+		for _, lvl := range l.children {
+			err := lvl.findBuffers(nil, offset, f)
+			if err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	sel := selector[0]
+	if len(sel.String) != 0 && l.children != nil {
+		lvl, ok := l.children[sel.String]
+		if ok {
+			err := lvl.findBuffers(selector[1:], offset, f)
+			if err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	if sel.Group != nil && l.children != nil {
+		for _, key := range sel.Group {
+			lvl, ok := l.children[key]
+			if ok {
+				err := lvl.findBuffers(selector[1:], offset, f)
+				if err != nil {
+					return err
+				}
+			}
+		}
+		return nil
+	}
+
+	if sel.Any && l.children != nil {
+		for _, lvl := range l.children {
+			if err := lvl.findBuffers(selector[1:], offset, f); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	return nil
+}
--- a/internal/util/float.go
+++ b/internal/util/float.go
@@ -0,0 +1,64 @@
+package util
+
+import (
+	"math"
+	"strconv"
+)
+
+// Go's JSON encoder for floats does not support NaN (https://github.com/golang/go/issues/3480).
+// This program uses NaN as a signal for missing data.
+// For the HTTP JSON API to be able to handle NaN values,
+// we have to use our own type which implements encoding/json.Marshaler itself.
+type Float float64
+
+var (
+	NaN         Float  = Float(math.NaN())
+	nullAsBytes []byte = []byte("null")
+)
+
+func (f Float) IsNaN() bool {
+	return math.IsNaN(float64(f))
+}
+
+func (f Float) MarshalJSON() ([]byte, error) {
+	if math.IsNaN(float64(f)) {
+		return nullAsBytes, nil
+	}
+
+	return strconv.AppendFloat(make([]byte, 0, 10), float64(f), 'f', 3, 64), nil
+}
+
+func (f *Float) UnmarshalJSON(input []byte) error {
+	if string(input) == "null" {
+		*f = NaN
+		return nil
+	}
+
+	val, err := strconv.ParseFloat(string(input), 64)
+	if err != nil {
+		return err
+	}
+	*f = Float(val)
+	return nil
+}
+
+// Same as `[]Float`, but can be marshaled to JSON with less allocations.
+type FloatArray []Float
+
+func (fa FloatArray) MarshalJSON() ([]byte, error) {
+	buf := make([]byte, 0, 2+len(fa)*8)
+	buf = append(buf, '[')
+	for i := 0; i < len(fa); i++ {
+		if i != 0 {
+			buf = append(buf, ',')
+		}
+
+		if fa[i].IsNaN() {
+			buf = append(buf, `null`...)
+		} else {
+			buf = strconv.AppendFloat(buf, float64(fa[i]), 'f', 3, 64)
+		}
+	}
+	buf = append(buf, ']')
+	return buf, nil
+}
--- a/internal/util/stats.go
+++ b/internal/util/stats.go
@@ -0,0 +1,117 @@
+package util
+
+import (
+	"errors"
+	"math"
+)
+
+type Stats struct {
+	Samples int
+	Avg     Float
+	Min     Float
+	Max     Float
+}
+
+func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) {
+	if from < b.start {
+		if b.prev != nil {
+			return b.prev.stats(from, to)
+		}
+		from = b.start
+	}
+
+	// TODO: Check if b.closed and if so and the full buffer is queried,
+	// use b.statistics instead of iterating over the buffer.
+
+	samples := 0
+	sum, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
+
+	var t int64
+	for t = from; t < to; t += b.frequency {
+		idx := int((t - b.start) / b.frequency)
+		if idx >= cap(b.data) {
+			b = b.next
+			if b == nil {
+				break
+			}
+			idx = 0
+		}
+
+		if t < b.start || idx >= len(b.data) {
+			continue
+		}
+
+		xf := float64(b.data[idx])
+		if math.IsNaN(xf) {
+			continue
+		}
+
+		samples += 1
+		sum += xf
+		min = math.Min(min, xf)
+		max = math.Max(max, xf)
+	}
+
+	return Stats{
+		Samples: samples,
+		Avg:     Float(sum) / Float(samples),
+		Min:     Float(min),
+		Max:     Float(max),
+	}, from, t, nil
+}
+
+// Returns statistics for the requested metric on the selected node/level.
+// Data is aggregated to the selected level the same way as in `MemoryStore.Read`.
+// If `Stats.Samples` is zero, the statistics should not be considered as valid.
+func (m *MemoryStore) Stats(selector Selector, metric string, from, to int64) (*Stats, int64, int64, error) {
+	if from > to {
+		return nil, 0, 0, errors.New("invalid time range")
+	}
+
+	minfo, ok := m.metrics[metric]
+	if !ok {
+		return nil, 0, 0, errors.New("unkown metric: " + metric)
+	}
+
+	n, samples := 0, 0
+	avg, min, max := Float(0), math.MaxFloat32, -math.MaxFloat32
+	err := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error {
+		stats, cfrom, cto, err := b.stats(from, to)
+		if err != nil {
+			return err
+		}
+
+		if n == 0 {
+			from, to = cfrom, cto
+		} else if from != cfrom || to != cto {
+			return ErrDataDoesNotAlign
+		}
+
+		samples += stats.Samples
+		avg += stats.Avg
+		min = math.Min(min, float64(stats.Min))
+		max = math.Max(max, float64(stats.Max))
+		n += 1
+		return nil
+	})
+	if err != nil {
+		return nil, 0, 0, err
+	}
+
+	if n == 0 {
+		return nil, 0, 0, ErrNoData
+	}
+
+	if minfo.Aggregation == AvgAggregation {
+		avg /= Float(n)
+	} else if n > 1 && minfo.Aggregation != SumAggregation {
+		return nil, 0, 0, errors.New("invalid aggregation")
+	}
+
+	return &Stats{
+		Samples: samples,
+		Avg:     avg,
+		Min:     Float(min),
+		Max:     Float(max),
+	}, from, to, nil
+}