Merge branch 'dev' into add_uiconfig_schema

2026-07-21 22:40:37 +02:00 · 2025-09-26 13:27:18 +02:00
parent 7a54e2cfb3 935fb238a4
commit 916077c6f8
91 changed files with 9182 additions and 3159 deletions
@@ -241,7 +241,7 @@ func TestRestApi(t *testing.T) {
 		"numNodes":         1,
 		"numHwthreads":     8,
 		"numAcc":           0,
-		"exclusive":        1,
+		"shared":           "none",
 		"monitoringStatus": 1,
 		"smt":              1,
 		"resources": [
@@ -297,7 +297,6 @@ func TestRestApi(t *testing.T) {
 			job.NumNodes != 1 ||
 			job.NumHWThreads != 8 ||
 			job.NumAcc != 0 ||
-			job.Exclusive != 1 ||
 			job.MonitoringStatus != 1 ||
 			job.SMT != 1 ||
 			!reflect.DeepEqual(job.Resources, []*schema.Resource{{Hostname: "host123", HWThreads: []int{0, 1, 2, 3, 4, 5, 6, 7}}}) ||
@@ -397,7 +396,7 @@ func TestRestApi(t *testing.T) {
 		"partition":        "default",
 		"walltime":         3600,
 		"numNodes":         1,
-		"exclusive":        1,
+		"shared":        	"none",
 		"monitoringStatus": 1,
 		"smt":              1,
 		"resources": [
@@ -1261,9 +1261,27 @@ const docTemplate = `{
        "api.Node": {
            "type": "object",
            "properties": {
+                "cpusAllocated": {
+                    "type": "integer"
+                },
+                "cpusTotal": {
+                    "type": "integer"
+                },
+                "gpusAllocated": {
+                    "type": "integer"
+                },
+                "gpusTotal": {
+                    "type": "integer"
+                },
                "hostname": {
                    "type": "string"
                },
+                "memoryAllocated": {
+                    "type": "integer"
+                },
+                "memoryTotal": {
+                    "type": "integer"
+                },
                "states": {
                    "type": "array",
                    "items": {
@@ -1379,19 +1397,15 @@ const docTemplate = `{
                "energyFootprint": {
                    "type": "object",
                    "additionalProperties": {
-                        "type": "number"
+                        "type": "number",
+                        "format": "float64"
                    }
                },
-                "exclusive": {
-                    "type": "integer",
-                    "maximum": 2,
-                    "minimum": 0,
-                    "example": 1
-                },
                "footprint": {
                    "type": "object",
                    "additionalProperties": {
-                        "type": "number"
+                        "type": "number",
+                        "format": "float64"
                    }
                },
                "id": {
@@ -1403,12 +1417,18 @@ const docTemplate = `{
                },
                "jobState": {
                    "enum": [
-                        "completed",
-                        "failed",
+                        "boot_fail",
                        "cancelled",
-                        "stopped",
-                        "timeout",
-                        "out_of_memory"
+                        "completed",
+                        "deadline",
+                        "failed",
+                        "node_fail",
+                        "out-of-memory",
+                        "pending",
+                        "preempted",
+                        "running",
+                        "suspended",
+                        "timeout"
                    ],
                    "allOf": [
                        {
@@ -1464,6 +1484,14 @@ const docTemplate = `{
                        "$ref": "#/definitions/schema.Resource"
                    }
                },
+                "shared": {
+                    "type": "string",
+                    "enum": [
+                        "none",
+                        "single_user",
+                        "multi_user"
+                    ]
+                },
                "smt": {
                    "type": "integer",
                    "example": 4
@@ -1482,6 +1510,10 @@ const docTemplate = `{
                    "type": "string",
                    "example": "main"
                },
+                "submitTime": {
+                    "type": "integer",
+                    "example": 1649723812
+                },
                "tags": {
                    "type": "array",
                    "items": {
@@ -1547,24 +1579,32 @@ const docTemplate = `{
        "schema.JobState": {
            "type": "string",
            "enum": [
-                "running",
-                "completed",
-                "failed",
+                "boot_fail",
                "cancelled",
-                "stopped",
-                "timeout",
+                "completed",
+                "deadline",
+                "failed",
+                "node_fail",
+                "out_of_memory",
+                "pending",
                "preempted",
-                "out_of_memory"
+                "running",
+                "suspended",
+                "timeout"
            ],
            "x-enum-varnames": [
-                "JobStateRunning",
-                "JobStateCompleted",
-                "JobStateFailed",
+                "JobStateBootFail",
                "JobStateCancelled",
-                "JobStateStopped",
-                "JobStateTimeout",
+                "JobStateCompleted",
+                "JobStateDeadline",
+                "JobStateFailed",
+                "JobStateNodeFail",
+                "JobStateOutOfMemory",
+                "JobStatePending",
                "JobStatePreempted",
-                "JobStateOutOfMemory"
+                "JobStateRunning",
+                "JobStateSuspended",
+                "JobStateTimeout"
            ]
        },
        "schema.JobStatistics": {
@@ -1763,7 +1803,8 @@ const docTemplate = `{
                    "additionalProperties": {
                        "type": "array",
                        "items": {
-                            "type": "number"
+                            "type": "number",
+                            "format": "float64"
                        }
                    }
                }
@@ -17,6 +17,7 @@ import (
 	"time"

 	"github.com/ClusterCockpit/cc-backend/internal/archiver"
+	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-backend/internal/graph"
 	"github.com/ClusterCockpit/cc-backend/internal/graph/model"
 	"github.com/ClusterCockpit/cc-backend/internal/importer"
@@ -112,6 +113,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {

 	for key, vals := range r.URL.Query() {
 		switch key {
+		// TODO: add project filter
 		case "state":
 			for _, s := range vals {
 				state := schema.JobState(s)
@@ -124,7 +126,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
 			}
 		case "cluster":
 			filter.Cluster = &model.StringInput{Eq: &vals[0]}
-		case "start-time":
+		case "start-time": // ?startTime=1753707480-1754053139
 			st := strings.Split(vals[0], "-")
 			if len(st) != 2 {
 				handleError(fmt.Errorf("invalid query parameter value: startTime"),
@@ -142,7 +144,7 @@ func (api *RestApi) getJobs(rw http.ResponseWriter, r *http.Request) {
 				return
 			}
 			ufrom, uto := time.Unix(from, 0), time.Unix(to, 0)
-			filter.StartTime = &schema.TimeRange{From: &ufrom, To: &uto}
+			filter.StartTime = &config.TimeRange{From: &ufrom, To: &uto}
 		case "page":
 			x, err := strconv.Atoi(vals[0])
 			if err != nil {
@@ -646,7 +648,7 @@ func (api *RestApi) removeTags(rw http.ResponseWriter, r *http.Request) {
 // @router      /api/jobs/start_job/ [post]
 func (api *RestApi) startJob(rw http.ResponseWriter, r *http.Request) {
 	req := schema.Job{
-		Exclusive:        1,
+		Shared:           "none",
 		MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
 	}
 	if err := decode(r.Body, &req); err != nil {
@@ -15,6 +15,7 @@ import (

 	"github.com/ClusterCockpit/cc-backend/internal/auth"
 	"github.com/ClusterCockpit/cc-backend/internal/config"
+	"github.com/ClusterCockpit/cc-backend/internal/memorystore"
 	"github.com/ClusterCockpit/cc-backend/internal/repository"
 	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
 	"github.com/ClusterCockpit/cc-lib/schema"
@@ -95,6 +96,19 @@ func (api *RestApi) MountUserApiRoutes(r *mux.Router) {
 	r.HandleFunc("/jobs/metrics/{id}", api.getJobMetrics).Methods(http.MethodGet)
 }

+func (api *RestApi) MountMetricStoreApiRoutes(r *mux.Router) {
+	// REST API Uses TokenAuth
+	r.HandleFunc("/api/free", memorystore.HandleFree).Methods(http.MethodPost)
+	r.HandleFunc("/api/write", memorystore.HandleWrite).Methods(http.MethodPost)
+	r.HandleFunc("/api/debug", memorystore.HandleDebug).Methods(http.MethodGet)
+	r.HandleFunc("/api/healthcheck", memorystore.HandleHealthCheck).Methods(http.MethodGet)
+	// Refactor
+	r.HandleFunc("/api/free/", memorystore.HandleFree).Methods(http.MethodPost)
+	r.HandleFunc("/api/write/", memorystore.HandleWrite).Methods(http.MethodPost)
+	r.HandleFunc("/api/debug/", memorystore.HandleDebug).Methods(http.MethodGet)
+	r.HandleFunc("/api/healthcheck/", memorystore.HandleHealthCheck).Methods(http.MethodGet)
+}
+
 func (api *RestApi) MountConfigApiRoutes(r *mux.Router) {
 	r.StrictSlash(true)
 	// Settings Frontend Uses SessionAuth
@@ -402,7 +402,7 @@ func (auth *Authentication) AuthUserApi(
 					return
 				}
 			case len(user.Roles) >= 2:
-				if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
+				if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleSupport, schema.RoleAdmin}) {
 					ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
 					onsuccess.ServeHTTP(rw, r.WithContext(ctx))
 					return
@@ -417,6 +417,42 @@ func (auth *Authentication) AuthUserApi(
 	})
 }

+func (auth *Authentication) AuthMetricStoreApi(
+	onsuccess http.Handler,
+	onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
+) http.Handler {
+	return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
+		user, err := auth.JwtAuth.AuthViaJWT(rw, r)
+		if err != nil {
+			cclog.Infof("auth metricstore api -> authentication failed: %s", err.Error())
+			onfailure(rw, r, err)
+			return
+		}
+
+		if user != nil {
+			switch {
+			case len(user.Roles) == 1:
+				if user.HasRole(schema.RoleApi) {
+					ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
+					onsuccess.ServeHTTP(rw, r.WithContext(ctx))
+					return
+				}
+			case len(user.Roles) >= 2:
+				if user.HasRole(schema.RoleApi) && user.HasAnyRole([]schema.Role{schema.RoleUser, schema.RoleManager, schema.RoleAdmin}) {
+					ctx := context.WithValue(r.Context(), repository.ContextUserKey, user)
+					onsuccess.ServeHTTP(rw, r.WithContext(ctx))
+					return
+				}
+			default:
+				cclog.Info("auth metricstore api -> authentication failed: missing role")
+				onfailure(rw, r, errors.New("unauthorized"))
+			}
+		}
+		cclog.Info("auth metricstore api -> authentication failed: no auth")
+		onfailure(rw, r, errors.New("unauthorized"))
+	})
+}
+
 func (auth *Authentication) AuthConfigApi(
 	onsuccess http.Handler,
 	onfailure func(rw http.ResponseWriter, r *http.Request, authErr error),
@@ -494,6 +530,7 @@ func securedCheck(user *schema.User, r *http.Request) error {
 		IPAddress = r.RemoteAddr
 	}

+	// FIXME: IPV6 not handled
 	if strings.Contains(IPAddress, ":") {
 		IPAddress = strings.Split(IPAddress, ":")[0]
 	}
@@ -0,0 +1,475 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package avro
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"log"
+	"os"
+	"path"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/linkedin/goavro/v2"
+)
+
+var NumWorkers int = 4
+
+var ErrNoNewData error = errors.New("no data in the pool")
+
+func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
+	levels := make([]*AvroLevel, 0)
+	selectors := make([][]string, 0)
+	as.root.lock.RLock()
+	// Cluster
+	for sel1, l1 := range as.root.children {
+		l1.lock.RLock()
+		// Node
+		for sel2, l2 := range l1.children {
+			l2.lock.RLock()
+			// Frequency
+			for sel3, l3 := range l2.children {
+				levels = append(levels, l3)
+				selectors = append(selectors, []string{sel1, sel2, sel3})
+			}
+			l2.lock.RUnlock()
+		}
+		l1.lock.RUnlock()
+	}
+	as.root.lock.RUnlock()
+
+	type workItem struct {
+		level    *AvroLevel
+		dir      string
+		selector []string
+	}
+
+	n, errs := int32(0), int32(0)
+
+	var wg sync.WaitGroup
+	wg.Add(NumWorkers)
+	work := make(chan workItem, NumWorkers*2)
+	for range NumWorkers {
+		go func() {
+			defer wg.Done()
+
+			for workItem := range work {
+				from := getTimestamp(workItem.dir)
+
+				if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil {
+					if err == ErrNoNewData {
+						continue
+					}
+
+					log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error())
+					atomic.AddInt32(&errs, 1)
+				} else {
+					atomic.AddInt32(&n, 1)
+				}
+			}
+		}()
+	}
+
+	for i := range len(levels) {
+		dir := path.Join(dir, path.Join(selectors[i]...))
+		work <- workItem{
+			level:    levels[i],
+			dir:      dir,
+			selector: selectors[i],
+		}
+	}
+
+	close(work)
+	wg.Wait()
+
+	if errs > 0 {
+		return int(n), fmt.Errorf("%d errors happend while creating avro checkpoints (%d successes)", errs, n)
+	}
+	return int(n), nil
+}
+
+// getTimestamp returns the timestamp from the directory name
+func getTimestamp(dir string) int64 {
+	// Extract the resolution and timestamp from the directory name
+	// The existing avro file will be in epoch timestamp format
+	// iterate over all the files in the directory and find the maximum timestamp
+	// and return it
+
+	resolution := path.Base(dir)
+	dir = path.Dir(dir)
+
+	files, err := os.ReadDir(dir)
+	if err != nil {
+		return 0
+	}
+	var maxTs int64 = 0
+
+	if len(files) == 0 {
+		return 0
+	}
+
+	for _, file := range files {
+		if file.IsDir() {
+			continue
+		}
+		name := file.Name()
+
+		if len(name) < 5 || !strings.HasSuffix(name, ".avro") || !strings.HasPrefix(name, resolution+"_") {
+			continue
+		}
+
+		ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64)
+		if err != nil {
+			fmt.Printf("error while parsing timestamp: %s\n", err.Error())
+			continue
+		}
+
+		if ts > maxTs {
+			maxTs = ts
+		}
+	}
+
+	interval, _ := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Interval)
+	updateTime := time.Unix(maxTs, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix()
+
+	if updateTime < time.Now().Unix() {
+		return 0
+	}
+
+	return maxTs
+}
+
+func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
+	l.lock.Lock()
+	defer l.lock.Unlock()
+
+	// fmt.Printf("Checkpointing directory: %s\n", dir)
+	// filepath contains the resolution
+	int_res, _ := strconv.Atoi(path.Base(dir))
+
+	// find smallest overall timestamp in l.data map and delete it from l.data
+	minTs := int64(1<<63 - 1)
+	for ts, dat := range l.data {
+		if ts < minTs && len(dat) != 0 {
+			minTs = ts
+		}
+	}
+
+	if from == 0 && minTs != int64(1<<63-1) {
+		from = minTs
+	}
+
+	if from == 0 {
+		return ErrNoNewData
+	}
+
+	var schema string
+	var codec *goavro.Codec
+	record_list := make([]map[string]any, 0)
+
+	var f *os.File
+
+	filePath := dir + fmt.Sprintf("_%d.avro", from)
+
+	var err error
+
+	fp_, err_ := os.Stat(filePath)
+	if errors.Is(err_, os.ErrNotExist) {
+		err = os.MkdirAll(path.Dir(dir), 0o755)
+		if err != nil {
+			return fmt.Errorf("failed to create directory: %v", err)
+		}
+	} else if fp_.Size() != 0 {
+		f, err = os.Open(filePath)
+		if err != nil {
+			return fmt.Errorf("failed to open existing avro file: %v", err)
+		}
+
+		br := bufio.NewReader(f)
+
+		reader, err := goavro.NewOCFReader(br)
+		if err != nil {
+			return fmt.Errorf("failed to create OCF reader: %v", err)
+		}
+		codec = reader.Codec()
+		schema = codec.Schema()
+
+		f.Close()
+	}
+
+	time_ref := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
+
+	if dumpAll {
+		time_ref = time.Now().Unix()
+	}
+
+	// Empty values
+	if len(l.data) == 0 {
+		// we checkpoint avro files every 60 seconds
+		repeat := 60 / int_res
+
+		for range repeat {
+			record_list = append(record_list, make(map[string]any))
+		}
+	}
+
+	readFlag := true
+
+	for ts := range l.data {
+		flag := false
+		if ts < time_ref {
+			data := l.data[ts]
+
+			schema_gen, err := generateSchema(data)
+			if err != nil {
+				return err
+			}
+
+			flag, schema, err = compareSchema(schema, schema_gen)
+			if err != nil {
+				return fmt.Errorf("failed to compare read and generated schema: %v", err)
+			}
+			if flag && readFlag && !errors.Is(err_, os.ErrNotExist) {
+
+				f.Close()
+
+				f, err = os.Open(filePath)
+				if err != nil {
+					return fmt.Errorf("failed to open Avro file: %v", err)
+				}
+
+				br := bufio.NewReader(f)
+
+				ocfReader, err := goavro.NewOCFReader(br)
+				if err != nil {
+					return fmt.Errorf("failed to create OCF reader while changing schema: %v", err)
+				}
+
+				for ocfReader.Scan() {
+					record, err := ocfReader.Read()
+					if err != nil {
+						return fmt.Errorf("failed to read record: %v", err)
+					}
+
+					record_list = append(record_list, record.(map[string]any))
+				}
+
+				f.Close()
+
+				err = os.Remove(filePath)
+				if err != nil {
+					return fmt.Errorf("failed to delete file: %v", err)
+				}
+
+				readFlag = false
+			}
+			codec, err = goavro.NewCodec(schema)
+			if err != nil {
+				return fmt.Errorf("failed to create codec after merged schema: %v", err)
+			}
+
+			record_list = append(record_list, generateRecord(data))
+			delete(l.data, ts)
+		}
+	}
+
+	if len(record_list) == 0 {
+		return ErrNoNewData
+	}
+
+	f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
+	if err != nil {
+		return fmt.Errorf("failed to append new avro file: %v", err)
+	}
+
+	// fmt.Printf("Codec : %#v\n", codec)
+
+	writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
+		W:               f,
+		Codec:           codec,
+		CompressionName: goavro.CompressionDeflateLabel,
+	})
+	if err != nil {
+		return fmt.Errorf("failed to create OCF writer: %v", err)
+	}
+
+	// Append the new record
+	if err := writer.Append(record_list); err != nil {
+		return fmt.Errorf("failed to append record: %v", err)
+	}
+
+	f.Close()
+
+	return nil
+}
+
+func compareSchema(schemaRead, schemaGen string) (bool, string, error) {
+	var genSchema, readSchema AvroSchema
+
+	if schemaRead == "" {
+		return false, schemaGen, nil
+	}
+
+	// Unmarshal the schema strings into AvroSchema structs
+	if err := json.Unmarshal([]byte(schemaGen), &genSchema); err != nil {
+		return false, "", fmt.Errorf("failed to parse generated schema: %v", err)
+	}
+	if err := json.Unmarshal([]byte(schemaRead), &readSchema); err != nil {
+		return false, "", fmt.Errorf("failed to parse read schema: %v", err)
+	}
+
+	sort.Slice(genSchema.Fields, func(i, j int) bool {
+		return genSchema.Fields[i].Name < genSchema.Fields[j].Name
+	})
+
+	sort.Slice(readSchema.Fields, func(i, j int) bool {
+		return readSchema.Fields[i].Name < readSchema.Fields[j].Name
+	})
+
+	// Check if schemas are identical
+	schemasEqual := true
+	if len(genSchema.Fields) <= len(readSchema.Fields) {
+
+		for i := range genSchema.Fields {
+			if genSchema.Fields[i].Name != readSchema.Fields[i].Name {
+				schemasEqual = false
+				break
+			}
+		}
+
+		// If schemas are identical, return the read schema
+		if schemasEqual {
+			return false, schemaRead, nil
+		}
+	}
+
+	// Create a map to hold unique fields from both schemas
+	fieldMap := make(map[string]AvroField)
+
+	// Add fields from the read schema
+	for _, field := range readSchema.Fields {
+		fieldMap[field.Name] = field
+	}
+
+	// Add or update fields from the generated schema
+	for _, field := range genSchema.Fields {
+		fieldMap[field.Name] = field
+	}
+
+	// Create a union schema by collecting fields from the map
+	var mergedFields []AvroField
+	for _, field := range fieldMap {
+		mergedFields = append(mergedFields, field)
+	}
+
+	// Sort fields by name for consistency
+	sort.Slice(mergedFields, func(i, j int) bool {
+		return mergedFields[i].Name < mergedFields[j].Name
+	})
+
+	// Create the merged schema
+	mergedSchema := AvroSchema{
+		Type:   "record",
+		Name:   genSchema.Name,
+		Fields: mergedFields,
+	}
+
+	// Check if schemas are identical
+	schemasEqual = len(mergedSchema.Fields) == len(readSchema.Fields)
+	if schemasEqual {
+		for i := range mergedSchema.Fields {
+			if mergedSchema.Fields[i].Name != readSchema.Fields[i].Name {
+				schemasEqual = false
+				break
+			}
+		}
+
+		if schemasEqual {
+			return false, schemaRead, nil
+		}
+	}
+
+	// Marshal the merged schema back to JSON
+	mergedSchemaJson, err := json.Marshal(mergedSchema)
+	if err != nil {
+		return false, "", fmt.Errorf("failed to marshal merged schema: %v", err)
+	}
+
+	return true, string(mergedSchemaJson), nil
+}
+
+func generateSchema(data map[string]schema.Float) (string, error) {
+	// Define the Avro schema structure
+	schema := map[string]any{
+		"type":   "record",
+		"name":   "DataRecord",
+		"fields": []map[string]any{},
+	}
+
+	fieldTracker := make(map[string]struct{})
+
+	for key := range data {
+		if _, exists := fieldTracker[key]; !exists {
+			key = correctKey(key)
+
+			field := map[string]any{
+				"name":    key,
+				"type":    "double",
+				"default": -1.0,
+			}
+			schema["fields"] = append(schema["fields"].([]map[string]any), field)
+			fieldTracker[key] = struct{}{}
+		}
+	}
+
+	schemaString, err := json.Marshal(schema)
+	if err != nil {
+		return "", fmt.Errorf("failed to marshal schema: %v", err)
+	}
+
+	return string(schemaString), nil
+}
+
+func generateRecord(data map[string]schema.Float) map[string]any {
+	record := make(map[string]any)
+
+	// Iterate through each map in data
+	for key, value := range data {
+		key = correctKey(key)
+
+		// Set the value in the record
+		// avro only accepts basic types
+		record[key] = value.Double()
+	}
+
+	return record
+}
+
+func correctKey(key string) string {
+	// Replace any invalid characters in the key
+	// For example, replace spaces with underscores
+	key = strings.ReplaceAll(key, ":", "___")
+	key = strings.ReplaceAll(key, ".", "__")
+
+	return key
+}
+
+func ReplaceKey(key string) string {
+	// Replace any invalid characters in the key
+	// For example, replace spaces with underscores
+	key = strings.ReplaceAll(key, "___", ":")
+	key = strings.ReplaceAll(key, "__", ".")
+
+	return key
+}
@@ -0,0 +1,84 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package avro
+
+import (
+	"context"
+	"log"
+	"slices"
+	"strconv"
+	"sync"
+
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+)
+
+func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
+	// AvroPool is a pool of Avro writers.
+	go func() {
+		if config.MetricStoreKeys.Checkpoints.FileFormat == "json" {
+			wg.Done() // Mark this goroutine as done
+			return    // Exit the goroutine
+		}
+
+		defer wg.Done()
+
+		var avroLevel *AvroLevel
+		oldSelector := make([]string, 0)
+
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case val := <-LineProtocolMessages:
+				// Fetch the frequency of the metric from the global configuration
+				freq, err := config.GetMetricFrequency(val.MetricName)
+				if err != nil {
+					log.Printf("Error fetching metric frequency: %s\n", err)
+					continue
+				}
+
+				metricName := ""
+
+				for _, selector_name := range val.Selector {
+					metricName += selector_name + Delimiter
+				}
+
+				metricName += val.MetricName
+
+				// Create a new selector for the Avro level
+				// The selector is a slice of strings that represents the path to the
+				// Avro level. It is created by appending the cluster, node, and metric
+				// name to the selector.
+				var selector []string
+				selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
+
+				if !testEq(oldSelector, selector) {
+					// Get the Avro level for the metric
+					avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
+
+					// If the Avro level is nil, create a new one
+					if avroLevel == nil {
+						log.Printf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
+					}
+					oldSelector = slices.Clone(selector)
+				}
+
+				avroLevel.addMetric(metricName, val.Value, val.Timestamp, int(freq))
+			}
+		}
+	}()
+}
+
+func testEq(a, b []string) bool {
+	if len(a) != len(b) {
+		return false
+	}
+	for i := range a {
+		if a[i] != b[i] {
+			return false
+		}
+	}
+	return true
+}
@@ -0,0 +1,167 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package avro
+
+import (
+	"sync"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+)
+
+var (
+	LineProtocolMessages = make(chan *AvroStruct)
+	Delimiter            = "ZZZZZ"
+)
+
+// CheckpointBufferMinutes should always be in minutes.
+// Its controls the amount of data to hold for given amount of time.
+var CheckpointBufferMinutes = 3
+
+type AvroStruct struct {
+	MetricName string
+	Cluster    string
+	Node       string
+	Selector   []string
+	Value      schema.Float
+	Timestamp  int64
+}
+
+type AvroStore struct {
+	root AvroLevel
+}
+
+var avroStore AvroStore
+
+type AvroLevel struct {
+	children map[string]*AvroLevel
+	data     map[int64]map[string]schema.Float
+	lock     sync.RWMutex
+}
+
+type AvroField struct {
+	Name    string `json:"name"`
+	Type    any    `json:"type"`
+	Default any    `json:"default,omitempty"`
+}
+
+type AvroSchema struct {
+	Type   string      `json:"type"`
+	Name   string      `json:"name"`
+	Fields []AvroField `json:"fields"`
+}
+
+func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel {
+	if len(selector) == 0 {
+		return l
+	}
+
+	// Allow concurrent reads:
+	l.lock.RLock()
+	var child *AvroLevel
+	var ok bool
+	if l.children == nil {
+		// Children map needs to be created...
+		l.lock.RUnlock()
+	} else {
+		child, ok := l.children[selector[0]]
+		l.lock.RUnlock()
+		if ok {
+			return child.findAvroLevelOrCreate(selector[1:])
+		}
+	}
+
+	// The level does not exist, take write lock for unqiue access:
+	l.lock.Lock()
+	// While this thread waited for the write lock, another thread
+	// could have created the child node.
+	if l.children != nil {
+		child, ok = l.children[selector[0]]
+		if ok {
+			l.lock.Unlock()
+			return child.findAvroLevelOrCreate(selector[1:])
+		}
+	}
+
+	child = &AvroLevel{
+		data:     make(map[int64]map[string]schema.Float, 0),
+		children: nil,
+	}
+
+	if l.children != nil {
+		l.children[selector[0]] = child
+	} else {
+		l.children = map[string]*AvroLevel{selector[0]: child}
+	}
+	l.lock.Unlock()
+	return child.findAvroLevelOrCreate(selector[1:])
+}
+
+func (l *AvroLevel) addMetric(metricName string, value schema.Float, timestamp int64, Freq int) {
+	l.lock.Lock()
+	defer l.lock.Unlock()
+
+	KeyCounter := int(CheckpointBufferMinutes * 60 / Freq)
+
+	// Create keys in advance for the given amount of time
+	if len(l.data) != KeyCounter {
+		if len(l.data) == 0 {
+			for i := range KeyCounter {
+				l.data[timestamp+int64(i*Freq)] = make(map[string]schema.Float, 0)
+			}
+		} else {
+			// Get the last timestamp
+			var lastTs int64
+			for ts := range l.data {
+				if ts > lastTs {
+					lastTs = ts
+				}
+			}
+			// Create keys for the next KeyCounter timestamps
+			l.data[lastTs+int64(Freq)] = make(map[string]schema.Float, 0)
+		}
+	}
+
+	closestTs := int64(0)
+	minDiff := int64(Freq) + 1 // Start with diff just outside the valid range
+	found := false
+
+	// Iterate over timestamps and choose the one which is within range.
+	// Since its epoch time, we check if the difference is less than 60 seconds.
+	for ts, dat := range l.data {
+		// Check if timestamp is within range
+		diff := timestamp - ts
+		if diff < -int64(Freq) || diff > int64(Freq) {
+			continue
+		}
+
+		// Metric already present at this timestamp — skip
+		if _, ok := dat[metricName]; ok {
+			continue
+		}
+
+		// Check if this is the closest timestamp so far
+		if Abs(diff) < minDiff {
+			minDiff = Abs(diff)
+			closestTs = ts
+			found = true
+		}
+	}
+
+	if found {
+		l.data[closestTs][metricName] = value
+	}
+}
+
+func GetAvroStore() *AvroStore {
+	return &avroStore
+}
+
+// Abs returns the absolute value of x.
+func Abs(x int64) int64 {
+	if x < 0 {
+		return -x
+	}
+	return x
+}
@@ -162,7 +162,7 @@ func Init(mainConfig json.RawMessage, clusterConfig json.RawMessage) {
 		cclog.Abortf("Config Init: Could not decode config file '%s'.\nError: %s\n", mainConfig, err.Error())
 	}

-	if Clusters == nil || len(Clusters) < 1 {
+	if len(Clusters) < 1 {
 		cclog.Abort("Config Init: At least one cluster required in config. Exited with error.")
 	}
 }
@@ -0,0 +1,130 @@
+package config
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+)
+
+var InternalCCMSFlag bool = false
+
+// --------------------
+// Metric Store config
+// --------------------
+type MetricStoreConfig struct {
+	Checkpoints struct {
+		FileFormat string `json:"file-format"`
+		Interval   string `json:"interval"`
+		RootDir    string `json:"directory"`
+		Restore    string `json:"restore"`
+	} `json:"checkpoints"`
+	Debug struct {
+		DumpToFile string `json:"dump-to-file"`
+		EnableGops bool   `json:"gops"`
+	} `json:"debug"`
+	RetentionInMemory string `json:"retention-in-memory"`
+	Archive           struct {
+		Interval      string `json:"interval"`
+		RootDir       string `json:"directory"`
+		DeleteInstead bool   `json:"delete-instead"`
+	} `json:"archive"`
+	Nats []*NatsConfig `json:"nats"`
+}
+
+type NatsConfig struct {
+	// Address of the nats server
+	Address string `json:"address"`
+
+	// Username/Password, optional
+	Username string `json:"username"`
+	Password string `json:"password"`
+
+	//Creds file path
+	Credsfilepath string `json:"creds-file-path"`
+
+	Subscriptions []struct {
+		// Channel name
+		SubscribeTo string `json:"subscribe-to"`
+
+		// Allow lines without a cluster tag, use this as default, optional
+		ClusterTag string `json:"cluster-tag"`
+	} `json:"subscriptions"`
+}
+
+var MetricStoreKeys MetricStoreConfig
+
+// For aggregation over multiple values at different cpus/sockets/..., not time!
+type AggregationStrategy int
+
+const (
+	NoAggregation AggregationStrategy = iota
+	SumAggregation
+	AvgAggregation
+)
+
+func AssignAggregationStratergy(str string) (AggregationStrategy, error) {
+	switch str {
+	case "":
+		return NoAggregation, nil
+	case "sum":
+		return SumAggregation, nil
+	case "avg":
+		return AvgAggregation, nil
+	default:
+		return NoAggregation, fmt.Errorf("[METRICSTORE]> unknown aggregation strategy: %s", str)
+	}
+}
+
+type MetricConfig struct {
+	// Interval in seconds at which measurements will arive.
+	Frequency int64
+
+	// Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy.
+	Aggregation AggregationStrategy
+
+	// Private, used internally...
+	Offset int
+}
+
+var Metrics map[string]MetricConfig
+
+func InitMetricStore(msConfig json.RawMessage) {
+	// Validate(msConfigSchema, msConfig)
+	dec := json.NewDecoder(bytes.NewReader(msConfig))
+	dec.DisallowUnknownFields()
+	if err := dec.Decode(&MetricStoreKeys); err != nil {
+		cclog.Abortf("[METRICSTORE]> Metric Store Config Init: Could not decode config file '%s'.\nError: %s\n", msConfig, err.Error())
+	}
+}
+
+func GetMetricFrequency(metricName string) (int64, error) {
+	if metric, ok := Metrics[metricName]; ok {
+		return metric.Frequency, nil
+	}
+	return 0, fmt.Errorf("[METRICSTORE]> metric %s not found", metricName)
+}
+
+// add logic to add metrics. Redundant metrics should be updated with max frequency.
+// use metric.Name to check if the metric already exists.
+// if not, add it to the Metrics map.
+func AddMetric(name string, metric MetricConfig) error {
+
+	if Metrics == nil {
+		Metrics = make(map[string]MetricConfig, 0)
+	}
+
+	if existingMetric, ok := Metrics[name]; ok {
+		if existingMetric.Frequency != metric.Frequency {
+			if existingMetric.Frequency < metric.Frequency {
+				existingMetric.Frequency = metric.Frequency
+				Metrics[name] = existingMetric
+			}
+		}
+	} else {
+		Metrics[name] = metric
+	}
+
+	return nil
+}
@@ -208,6 +208,11 @@ var uiConfigSchema = `
            "properties": {
              "name": {
                "description": "The name of the cluster.",
+              "kind": {
+                "type": "string",
+                "enum": ["influxdb", "prometheus", "cc-metric-store", "cc-metric-store-internal", "test"]
+              },
+              "url": {
                "type": "string"
              },
              "subClusters" {
@@ -3,11 +3,13 @@
 package model

 import (
+	"bytes"
 	"fmt"
 	"io"
 	"strconv"
 	"time"

+	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-lib/schema"
 )

@@ -58,16 +60,16 @@ type JobFilter struct {
 	JobName         *StringInput      `json:"jobName,omitempty"`
 	Cluster         *StringInput      `json:"cluster,omitempty"`
 	Partition       *StringInput      `json:"partition,omitempty"`
-	Duration        *schema.IntRange  `json:"duration,omitempty"`
+	Duration        *config.IntRange  `json:"duration,omitempty"`
 	Energy          *FloatRange       `json:"energy,omitempty"`
 	MinRunningFor   *int              `json:"minRunningFor,omitempty"`
-	NumNodes        *schema.IntRange  `json:"numNodes,omitempty"`
-	NumAccelerators *schema.IntRange  `json:"numAccelerators,omitempty"`
-	NumHWThreads    *schema.IntRange  `json:"numHWThreads,omitempty"`
-	StartTime       *schema.TimeRange `json:"startTime,omitempty"`
+	NumNodes        *config.IntRange  `json:"numNodes,omitempty"`
+	NumAccelerators *config.IntRange  `json:"numAccelerators,omitempty"`
+	NumHWThreads    *config.IntRange  `json:"numHWThreads,omitempty"`
+	StartTime       *config.TimeRange `json:"startTime,omitempty"`
 	State           []schema.JobState `json:"state,omitempty"`
 	MetricStats     []*MetricStatItem `json:"metricStats,omitempty"`
-	Exclusive       *int              `json:"exclusive,omitempty"`
+	Shared          *string           `json:"shared,omitempty"`
 	Node            *StringInput      `json:"node,omitempty"`
 }

@@ -112,6 +114,7 @@ type JobStats struct {
 type JobsStatistics struct {
 	ID             string               `json:"id"`
 	Name           string               `json:"name"`
+	TotalUsers     int                  `json:"totalUsers"`
 	TotalJobs      int                  `json:"totalJobs"`
 	RunningJobs    int                  `json:"runningJobs"`
 	ShortJobs      int                  `json:"shortJobs"`
@@ -170,6 +173,7 @@ type NamedStatsWithScope struct {
 type NodeFilter struct {
 	Hostname    *StringInput      `json:"hostname,omitempty"`
 	Cluster     *StringInput      `json:"cluster,omitempty"`
+	Subcluster  *StringInput      `json:"subcluster,omitempty"`
 	NodeState   *string           `json:"nodeState,omitempty"`
 	HealthState *schema.NodeState `json:"healthState,omitempty"`
 }
@@ -185,7 +189,7 @@ type NodeStateResultList struct {
 	Count *int           `json:"count,omitempty"`
 }

-type NodeStats struct {
+type NodeStates struct {
 	State string `json:"state"`
 	Count int    `json:"count"`
 }
@@ -246,20 +250,22 @@ type User struct {
 type Aggregate string

 const (
-	AggregateUser    Aggregate = "USER"
-	AggregateProject Aggregate = "PROJECT"
-	AggregateCluster Aggregate = "CLUSTER"
+	AggregateUser       Aggregate = "USER"
+	AggregateProject    Aggregate = "PROJECT"
+	AggregateCluster    Aggregate = "CLUSTER"
+	AggregateSubcluster Aggregate = "SUBCLUSTER"
 )

 var AllAggregate = []Aggregate{
 	AggregateUser,
 	AggregateProject,
 	AggregateCluster,
+	AggregateSubcluster,
 }

 func (e Aggregate) IsValid() bool {
 	switch e {
-	case AggregateUser, AggregateProject, AggregateCluster:
+	case AggregateUser, AggregateProject, AggregateCluster, AggregateSubcluster:
 		return true
 	}
 	return false
@@ -286,11 +292,26 @@ func (e Aggregate) MarshalGQL(w io.Writer) {
 	fmt.Fprint(w, strconv.Quote(e.String()))
 }

+func (e *Aggregate) UnmarshalJSON(b []byte) error {
+	s, err := strconv.Unquote(string(b))
+	if err != nil {
+		return err
+	}
+	return e.UnmarshalGQL(s)
+}
+
+func (e Aggregate) MarshalJSON() ([]byte, error) {
+	var buf bytes.Buffer
+	e.MarshalGQL(&buf)
+	return buf.Bytes(), nil
+}
+
 type SortByAggregate string

 const (
 	SortByAggregateTotalwalltime  SortByAggregate = "TOTALWALLTIME"
 	SortByAggregateTotaljobs      SortByAggregate = "TOTALJOBS"
+	SortByAggregateTotalusers     SortByAggregate = "TOTALUSERS"
 	SortByAggregateTotalnodes     SortByAggregate = "TOTALNODES"
 	SortByAggregateTotalnodehours SortByAggregate = "TOTALNODEHOURS"
 	SortByAggregateTotalcores     SortByAggregate = "TOTALCORES"
@@ -302,6 +323,7 @@ const (
 var AllSortByAggregate = []SortByAggregate{
 	SortByAggregateTotalwalltime,
 	SortByAggregateTotaljobs,
+	SortByAggregateTotalusers,
 	SortByAggregateTotalnodes,
 	SortByAggregateTotalnodehours,
 	SortByAggregateTotalcores,
@@ -312,7 +334,7 @@ var AllSortByAggregate = []SortByAggregate{

 func (e SortByAggregate) IsValid() bool {
 	switch e {
-	case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
+	case SortByAggregateTotalwalltime, SortByAggregateTotaljobs, SortByAggregateTotalusers, SortByAggregateTotalnodes, SortByAggregateTotalnodehours, SortByAggregateTotalcores, SortByAggregateTotalcorehours, SortByAggregateTotalaccs, SortByAggregateTotalacchours:
 		return true
 	}
 	return false
@@ -339,6 +361,20 @@ func (e SortByAggregate) MarshalGQL(w io.Writer) {
 	fmt.Fprint(w, strconv.Quote(e.String()))
 }

+func (e *SortByAggregate) UnmarshalJSON(b []byte) error {
+	s, err := strconv.Unquote(string(b))
+	if err != nil {
+		return err
+	}
+	return e.UnmarshalGQL(s)
+}
+
+func (e SortByAggregate) MarshalJSON() ([]byte, error) {
+	var buf bytes.Buffer
+	e.MarshalGQL(&buf)
+	return buf.Bytes(), nil
+}
+
 type SortDirectionEnum string

 const (
@@ -379,3 +415,17 @@ func (e *SortDirectionEnum) UnmarshalGQL(v any) error {
 func (e SortDirectionEnum) MarshalGQL(w io.Writer) {
 	fmt.Fprint(w, strconv.Quote(e.String()))
 }
+
+func (e *SortDirectionEnum) UnmarshalJSON(b []byte) error {
+	s, err := strconv.Unquote(string(b))
+	if err != nil {
+		return err
+	}
+	return e.UnmarshalGQL(s)
+}
+
+func (e SortDirectionEnum) MarshalJSON() ([]byte, error) {
+	var buf bytes.Buffer
+	e.MarshalGQL(&buf)
+	return buf.Bytes(), nil
+}
@@ -2,7 +2,7 @@ package graph

 // This file will be automatically regenerated based on the schema, any resolver implementations
 // will be copied through when generating and any unknown code will be moved to the end.
-// Code generated by github.com/99designs/gqlgen version v0.17.66
+// Code generated by github.com/99designs/gqlgen version v0.17.78

 import (
 	"context"
@@ -43,7 +43,7 @@ func (r *jobResolver) Tags(ctx context.Context, obj *schema.Job) ([]*schema.Tag,
 // ConcurrentJobs is the resolver for the concurrentJobs field.
 func (r *jobResolver) ConcurrentJobs(ctx context.Context, obj *schema.Job) (*model.JobLinkResultList, error) {
 	// FIXME: Make the hardcoded duration configurable
-	if obj.Exclusive != 1 && obj.Duration > 600 {
+	if obj.Shared != "none" && obj.Duration > 600 {
 		return r.Repo.FindConcurrentJobs(ctx, obj)
 	}

@@ -305,14 +305,20 @@ func (r *mutationResolver) UpdateConfiguration(ctx context.Context, name string,
 	return nil, nil
 }

-// NodeState is the resolver for the nodeState field.
-func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) {
-	panic(fmt.Errorf("not implemented: NodeState - nodeState"))
+// RunningJobs is the resolver for the runningJobs field.
+func (r *nodeResolver) RunningJobs(ctx context.Context, obj *schema.Node) (int, error) {
+	panic(fmt.Errorf("not implemented: RunningJobs - runningJobs"))
 }

-// HealthState is the resolver for the HealthState field.
+// NodeState is the resolver for the nodeState field.
+func (r *nodeResolver) NodeState(ctx context.Context, obj *schema.Node) (string, error) {
+	return string(obj.NodeState), nil
+}
+
+// HealthState is the resolver for the healthState field.
 func (r *nodeResolver) HealthState(ctx context.Context, obj *schema.Node) (schema.NodeState, error) {
-	panic(fmt.Errorf("not implemented: HealthState - HealthState"))
+	// FIXME: Why is Output of schema.NodeState Type?
+	panic(fmt.Errorf("not implemented: HealthState - healthState"))
 }

 // MetaData is the resolver for the metaData field.
@@ -378,9 +384,26 @@ func (r *queryResolver) Nodes(ctx context.Context, filter []*model.NodeFilter, o
 	return &model.NodeStateResultList{Items: nodes, Count: &count}, err
 }

-// NodeStats is the resolver for the nodeStats field.
-func (r *queryResolver) NodeStats(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStats, error) {
-	panic(fmt.Errorf("not implemented: NodeStats - nodeStats"))
+// NodeStates is the resolver for the nodeStates field.
+func (r *queryResolver) NodeStates(ctx context.Context, filter []*model.NodeFilter) ([]*model.NodeStates, error) {
+	repo := repository.GetNodeRepository()
+
+	stateCounts, serr := repo.CountNodeStates(ctx, filter)
+	if serr != nil {
+		cclog.Warnf("Error while counting nodeStates: %s", serr.Error())
+		return nil, serr
+	}
+
+	healthCounts, herr := repo.CountHealthStates(ctx, filter)
+	if herr != nil {
+		cclog.Warnf("Error while counting healthStates: %s", herr.Error())
+		return nil, herr
+	}
+
+	allCounts := make([]*model.NodeStates, 0)
+	allCounts = append(stateCounts, healthCounts...)
+
+	return allCounts, nil
 }

 // Job is the resolver for the job field.
@@ -558,7 +581,7 @@ func (r *queryResolver) JobsStatistics(ctx context.Context, filter []*model.JobF
 	defaultDurationBins := "1h"
 	defaultMetricBins := 10

-	if requireField(ctx, "totalJobs") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
+	if requireField(ctx, "totalJobs") || requireField(ctx, "totalUsers") || requireField(ctx, "totalWalltime") || requireField(ctx, "totalNodes") || requireField(ctx, "totalCores") ||
 		requireField(ctx, "totalAccs") || requireField(ctx, "totalNodeHours") || requireField(ctx, "totalCoreHours") || requireField(ctx, "totalAccHours") {
 		if groupBy == nil {
 			stats, err = r.Repo.JobsStats(ctx, filter)
@@ -831,3 +854,15 @@ type mutationResolver struct{ *Resolver }
 type nodeResolver struct{ *Resolver }
 type queryResolver struct{ *Resolver }
 type subClusterResolver struct{ *Resolver }
+
+// !!! WARNING !!!
+// The code below was going to be deleted when updating resolvers. It has been copied here so you have
+// one last chance to move it out of harms way if you want. There are two reasons this happens:
+//  - When renaming or deleting a resolver the old code will be put in here. You can safely delete
+//    it when you're done.
+//  - You have helper methods in this file. Move them out to keep these resolver files clean.
+/*
+	func (r *jobResolver) Exclusive(ctx context.Context, obj *schema.Job) (int, error) {
+	panic(fmt.Errorf("not implemented: Exclusive - exclusive"))
+}
+*/
@@ -43,7 +43,7 @@ func HandleImportFlag(flag string) error {
 		dec := json.NewDecoder(bytes.NewReader(raw))
 		dec.DisallowUnknownFields()
 		job := schema.Job{
-			Exclusive:        1,
+			Shared:           "none",
 			MonitoringStatus: schema.MonitoringStatusRunningOrArchiving,
 		}
 		if err = dec.Decode(&job); err != nil {
@@ -40,7 +40,7 @@ func InitDB() error {
 	}
 	tags := make(map[string]int64)

-	// Not using log.Print because we want the line to end with `\r` and
+	// Not using cclog.Print because we want the line to end with `\r` and
 	// this function is only ever called when a special command line flag
 	// is passed anyways.
 	fmt.Printf("%d jobs inserted...\r", 0)
@@ -1 +1 @@
-{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"exclusive":1,"monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash  -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n   UserId=k106eb10(210387) GroupId=80111\n   Account=k106eb QOS=normal \n   Requeue=False Restarts=0 BatchFlag=True \n   TimeLimit=1439\n   SubmitTime=2023-02-09T14:11:22\n   Partition=singlenode \n   NodeList=f0720\n   NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n   NTasksPerNode:Socket:Core=0:None:None\n   TRES_req=cpu=72,mem=250000M,node=1,billing=72\n   TRES_alloc=cpu=72,node=1,billing=72\n   Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n   WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n   StdErr=\n   StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}
+{"jobId":398955,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","partition":"singlenode","arrayJobId":0,"numNodes":1,"numHwthreads":72,"numAcc":0,"shared":"none","monitoringStatus":1,"smt":0,"jobState":"completed","duration":260,"walltime":86340,"resources":[{"hostname":"f0720"}],"metaData":{"jobName":"ams_pipeline","jobScript":"#!/bin/bash  -l\n#SBATCH --job-name=ams_pipeline\n#SBATCH --time=23:59:00\n#SBATCH --partition=singlenode\n#SBATCH --ntasks=72\n#SBATCH --hint=multithread\n#SBATCH --chdir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n#SBATCH --export=NONE\nunset SLURM_EXPORT_ENV\nuss=$(whoami)\nfind /dev/shm/ -user $uss -type f -mmin +30 -delete\ncd \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\"\nams_pipeline pipeline.json \u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.out\" 2\u003e \"/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh.err\"\n","slurmInfo":"\nJobId=398955 JobName=ams_pipeline\n   UserId=k106eb10(210387) GroupId=80111\n   Account=k106eb QOS=normal \n   Requeue=False Restarts=0 BatchFlag=True \n   TimeLimit=1439\n   SubmitTime=2023-02-09T14:11:22\n   Partition=singlenode \n   NodeList=f0720\n   NumNodes=1 NumCPUs=72 NumTasks=72 CPUs/Task=1\n   NTasksPerNode:Socket:Core=0:None:None\n   TRES_req=cpu=72,mem=250000M,node=1,billing=72\n   TRES_alloc=cpu=72,node=1,billing=72\n   Command=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11/ams_pipeline_job.sh\n   WorkDir=/home/atuin/k106eb/k106eb10/ACE/Ni-Al/DFT/VASP_PBE_500_0.125_0.1_NM/AlNi/binaries/bulk/base-hcp/occ-shaken/hcp16.occ.4.shake.0/cfg/NiAl3NiAl11\n   StdErr=\n   StdOut=ams_pipeline.o%j\n"},"startTime":1675956725,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":2335.254,"min":800.418,"max":2734.922},"cpu_load":{"unit":{"base":""},"avg":52.72,"min":34.46,"max":71.91},"cpu_power":{"unit":{"base":"W"},"avg":407.767,"min":93.932,"max":497.636},"cpu_user":{"unit":{"base":""},"avg":63.678,"min":19.872,"max":96.633},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":635.672,"min":0,"max":1332.874},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":261.006,"min":0,"max":382.294},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":113.659,"min":0,"max":568.286},"ib_recv":{"unit":{"base":"B/s"},"avg":27981.111,"min":69.4,"max":48084.589},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":398.939,"min":0.5,"max":693.817},"ib_xmit":{"unit":{"base":"B/s"},"avg":188.513,"min":39.597,"max":724.568},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":0.867,"min":0.2,"max":2.933},"ipc":{"unit":{"base":"IPC"},"avg":0.944,"min":0.564,"max":1.291},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":79.565,"min":0.021,"max":116.02},"mem_power":{"unit":{"base":"W"},"avg":24.692,"min":7.883,"max":31.318},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":22.566,"min":8.225,"max":27.613},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":647,"min":0,"max":1946},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6181.6,"min":1270,"max":11411},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":22.4,"min":11,"max":29},"vectorization_ratio":{"unit":{"base":"%"},"avg":77.351,"min":0,"max":98.837}}}
@@ -1 +1 @@
-{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"exclusive":1,"jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}
+{"jobId":398764,"user":"k106eb10","project":"k106eb","cluster":"fritz","subCluster":"main","numNodes":1,"shared":"none","jobState":"completed","duration":177,"resources":[{"hostname":"f0649"}],"startTime":1675954353,"statistics":{"clock":{"unit":{"base":"Hz","prefix":"M"},"avg":1336.519,"min":801.564,"max":2348.215},"cpu_load":{"unit":{"base":""},"avg":31.64,"min":17.36,"max":45.54},"cpu_power":{"unit":{"base":"W"},"avg":150.018,"min":93.672,"max":261.592},"cpu_user":{"unit":{"base":""},"avg":28.518,"min":0.09,"max":57.343},"flops_any":{"unit":{"base":"F/s","prefix":"G"},"avg":45.012,"min":0,"max":135.037},"flops_dp":{"unit":{"base":"F/s","prefix":"G"},"avg":22.496,"min":0,"max":67.488},"flops_sp":{"unit":{"base":"F/s","prefix":"G"},"avg":0.02,"min":0,"max":0.061},"ib_recv":{"unit":{"base":"B/s"},"avg":14442.82,"min":219.998,"max":42581.368},"ib_recv_pkts":{"unit":{"base":"packets/s"},"avg":201.532,"min":1.25,"max":601.345},"ib_xmit":{"unit":{"base":"B/s"},"avg":282.098,"min":56.2,"max":569.363},"ib_xmit_pkts":{"unit":{"base":"packets/s"},"avg":1.228,"min":0.433,"max":2},"ipc":{"unit":{"base":"IPC"},"avg":0.77,"min":0.564,"max":0.906},"mem_bw":{"unit":{"base":"B/s","prefix":"G"},"avg":4.872,"min":0.025,"max":14.552},"mem_power":{"unit":{"base":"W"},"avg":7.725,"min":6.286,"max":10.556},"mem_used":{"unit":{"base":"B","prefix":"G"},"avg":6.162,"min":6.103,"max":6.226},"nfs4_read":{"unit":{"base":"B/s","prefix":"M"},"avg":1045.333,"min":311,"max":1525},"nfs4_total":{"unit":{"base":"B/s","prefix":"M"},"avg":6430,"min":2796,"max":11518},"nfs4_write":{"unit":{"base":"B/s","prefix":"M"},"avg":24.333,"min":0,"max":38},"vectorization_ratio":{"unit":{"base":"%"},"avg":25.528,"min":0,"max":76.585}}}
@@ -0,0 +1,419 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package memorystore
+
+import (
+	"bufio"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"math"
+	"net/http"
+	"strconv"
+	"strings"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/ClusterCockpit/cc-lib/util"
+
+	"github.com/influxdata/line-protocol/v2/lineprotocol"
+)
+
+// @title                      cc-metric-store REST API
+// @version                    1.0.0
+// @description                API for cc-metric-store
+
+// @contact.name               ClusterCockpit Project
+// @contact.url                https://clustercockpit.org
+// @contact.email              support@clustercockpit.org
+
+// @license.name               MIT License
+// @license.url                https://opensource.org/licenses/MIT
+
+// @host                       localhost:8082
+// @basePath                   /api/
+
+// @securityDefinitions.apikey ApiKeyAuth
+// @in                         header
+// @name                       X-Auth-Token
+
+// ErrorResponse model
+type ErrorResponse struct {
+	// Statustext of Errorcode
+	Status string `json:"status"`
+	Error  string `json:"error"` // Error Message
+}
+
+type ApiMetricData struct {
+	Error      *string           `json:"error,omitempty"`
+	Data       schema.FloatArray `json:"data,omitempty"`
+	From       int64             `json:"from"`
+	To         int64             `json:"to"`
+	Resolution int64             `json:"resolution"`
+	Avg        schema.Float      `json:"avg"`
+	Min        schema.Float      `json:"min"`
+	Max        schema.Float      `json:"max"`
+}
+
+func handleError(err error, statusCode int, rw http.ResponseWriter) {
+	// log.Warnf("REST ERROR : %s", err.Error())
+	rw.Header().Add("Content-Type", "application/json")
+	rw.WriteHeader(statusCode)
+	json.NewEncoder(rw).Encode(ErrorResponse{
+		Status: http.StatusText(statusCode),
+		Error:  err.Error(),
+	})
+}
+
+// TODO: Optimize this, just like the stats endpoint!
+func (data *ApiMetricData) AddStats() {
+	n := 0
+	sum, min, max := 0.0, math.MaxFloat64, -math.MaxFloat64
+	for _, x := range data.Data {
+		if x.IsNaN() {
+			continue
+		}
+
+		n += 1
+		sum += float64(x)
+		min = math.Min(min, float64(x))
+		max = math.Max(max, float64(x))
+	}
+
+	if n > 0 {
+		avg := sum / float64(n)
+		data.Avg = schema.Float(avg)
+		data.Min = schema.Float(min)
+		data.Max = schema.Float(max)
+	} else {
+		data.Avg, data.Min, data.Max = schema.NaN, schema.NaN, schema.NaN
+	}
+}
+
+func (data *ApiMetricData) ScaleBy(f schema.Float) {
+	if f == 0 || f == 1 {
+		return
+	}
+
+	data.Avg *= f
+	data.Min *= f
+	data.Max *= f
+	for i := 0; i < len(data.Data); i++ {
+		data.Data[i] *= f
+	}
+}
+
+func (data *ApiMetricData) PadDataWithNull(ms *MemoryStore, from, to int64, metric string) {
+	minfo, ok := ms.Metrics[metric]
+	if !ok {
+		return
+	}
+
+	if (data.From / minfo.Frequency) > (from / minfo.Frequency) {
+		padfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency))
+		ndata := make([]schema.Float, 0, padfront+len(data.Data))
+		for i := 0; i < padfront; i++ {
+			ndata = append(ndata, schema.NaN)
+		}
+		for j := 0; j < len(data.Data); j++ {
+			ndata = append(ndata, data.Data[j])
+		}
+		data.Data = ndata
+	}
+}
+
+// handleFree godoc
+// @summary
+// @tags free
+// @description This endpoint allows the users to free the Buffers from the
+// metric store. This endpoint offers the users to remove then systematically
+// and also allows then to prune the data under node, if they do not want to
+// remove the whole node.
+// @produce     json
+// @param       to        query    string        false  "up to timestamp"
+// @success     200            {string} string  "ok"
+// @failure     400            {object} api.ErrorResponse       "Bad Request"
+// @failure     401            {object} api.ErrorResponse       "Unauthorized"
+// @failure     403            {object} api.ErrorResponse       "Forbidden"
+// @failure     500            {object} api.ErrorResponse       "Internal Server Error"
+// @security    ApiKeyAuth
+// @router      /free/ [post]
+func HandleFree(rw http.ResponseWriter, r *http.Request) {
+	rawTo := r.URL.Query().Get("to")
+	if rawTo == "" {
+		handleError(errors.New("'to' is a required query parameter"), http.StatusBadRequest, rw)
+		return
+	}
+
+	to, err := strconv.ParseInt(rawTo, 10, 64)
+	if err != nil {
+		handleError(err, http.StatusInternalServerError, rw)
+		return
+	}
+
+	// // TODO: lastCheckpoint might be modified by different go-routines.
+	// // Load it using the sync/atomic package?
+	// freeUpTo := lastCheckpoint.Unix()
+	// if to < freeUpTo {
+	// 	freeUpTo = to
+	// }
+
+	bodyDec := json.NewDecoder(r.Body)
+	var selectors [][]string
+	err = bodyDec.Decode(&selectors)
+	if err != nil {
+		http.Error(rw, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	ms := GetMemoryStore()
+	n := 0
+	for _, sel := range selectors {
+		bn, err := ms.Free(sel, to)
+		if err != nil {
+			handleError(err, http.StatusInternalServerError, rw)
+			return
+		}
+
+		n += bn
+	}
+
+	rw.WriteHeader(http.StatusOK)
+	fmt.Fprintf(rw, "buffers freed: %d\n", n)
+}
+
+// handleWrite godoc
+// @summary Receive metrics in InfluxDB line-protocol
+// @tags write
+// @description Write data to the in-memory store in the InfluxDB line-protocol using [this format](https://github.com/ClusterCockpit/cc-specifications/blob/master/metrics/lineprotocol_alternative.md)
+
+// @accept      plain
+// @produce     json
+// @param       cluster        query string false "If the lines in the body do not have a cluster tag, use this value instead."
+// @success     200            {string} string  "ok"
+// @failure     400            {object} api.ErrorResponse       "Bad Request"
+// @failure     401            {object} api.ErrorResponse       "Unauthorized"
+// @failure     403            {object} api.ErrorResponse       "Forbidden"
+// @failure     500            {object} api.ErrorResponse       "Internal Server Error"
+// @security    ApiKeyAuth
+// @router      /write/ [post]
+func HandleWrite(rw http.ResponseWriter, r *http.Request) {
+	bytes, err := io.ReadAll(r.Body)
+	rw.Header().Add("Content-Type", "application/json")
+	if err != nil {
+		handleError(err, http.StatusInternalServerError, rw)
+		return
+	}
+
+	ms := GetMemoryStore()
+	dec := lineprotocol.NewDecoderWithBytes(bytes)
+	if err := decodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil {
+		log.Printf("/api/write error: %s", err.Error())
+		handleError(err, http.StatusBadRequest, rw)
+		return
+	}
+	rw.WriteHeader(http.StatusOK)
+}
+
+type ApiQueryRequest struct {
+	Cluster     string     `json:"cluster"`
+	Queries     []ApiQuery `json:"queries"`
+	ForAllNodes []string   `json:"for-all-nodes"`
+	From        int64      `json:"from"`
+	To          int64      `json:"to"`
+	WithStats   bool       `json:"with-stats"`
+	WithData    bool       `json:"with-data"`
+	WithPadding bool       `json:"with-padding"`
+}
+
+type ApiQueryResponse struct {
+	Queries []ApiQuery        `json:"queries,omitempty"`
+	Results [][]ApiMetricData `json:"results"`
+}
+
+type ApiQuery struct {
+	Type        *string      `json:"type,omitempty"`
+	SubType     *string      `json:"subtype,omitempty"`
+	Metric      string       `json:"metric"`
+	Hostname    string       `json:"host"`
+	Resolution  int64        `json:"resolution"`
+	TypeIds     []string     `json:"type-ids,omitempty"`
+	SubTypeIds  []string     `json:"subtype-ids,omitempty"`
+	ScaleFactor schema.Float `json:"scale-by,omitempty"`
+	Aggregate   bool         `json:"aggreg"`
+}
+
+func FetchData(req ApiQueryRequest) (*ApiQueryResponse, error) {
+
+	req.WithData = true
+	req.WithData = true
+	req.WithData = true
+
+	ms := GetMemoryStore()
+
+	response := ApiQueryResponse{
+		Results: make([][]ApiMetricData, 0, len(req.Queries)),
+	}
+	if req.ForAllNodes != nil {
+		nodes := ms.ListChildren([]string{req.Cluster})
+		for _, node := range nodes {
+			for _, metric := range req.ForAllNodes {
+				q := ApiQuery{
+					Metric:   metric,
+					Hostname: node,
+				}
+				req.Queries = append(req.Queries, q)
+				response.Queries = append(response.Queries, q)
+			}
+		}
+	}
+
+	for _, query := range req.Queries {
+		sels := make([]util.Selector, 0, 1)
+		if query.Aggregate || query.Type == nil {
+			sel := util.Selector{{String: req.Cluster}, {String: query.Hostname}}
+			if query.Type != nil {
+				if len(query.TypeIds) == 1 {
+					sel = append(sel, util.SelectorElement{String: *query.Type + query.TypeIds[0]})
+				} else {
+					ids := make([]string, len(query.TypeIds))
+					for i, id := range query.TypeIds {
+						ids[i] = *query.Type + id
+					}
+					sel = append(sel, util.SelectorElement{Group: ids})
+				}
+
+				if query.SubType != nil {
+					if len(query.SubTypeIds) == 1 {
+						sel = append(sel, util.SelectorElement{String: *query.SubType + query.SubTypeIds[0]})
+					} else {
+						ids := make([]string, len(query.SubTypeIds))
+						for i, id := range query.SubTypeIds {
+							ids[i] = *query.SubType + id
+						}
+						sel = append(sel, util.SelectorElement{Group: ids})
+					}
+				}
+			}
+			sels = append(sels, sel)
+		} else {
+			for _, typeId := range query.TypeIds {
+				if query.SubType != nil {
+					for _, subTypeId := range query.SubTypeIds {
+						sels = append(sels, util.Selector{
+							{String: req.Cluster},
+							{String: query.Hostname},
+							{String: *query.Type + typeId},
+							{String: *query.SubType + subTypeId},
+						})
+					}
+				} else {
+					sels = append(sels, util.Selector{
+						{String: req.Cluster},
+						{String: query.Hostname},
+						{String: *query.Type + typeId},
+					})
+				}
+			}
+		}
+
+		// log.Printf("query: %#v\n", query)
+		// log.Printf("sels: %#v\n", sels)
+		var err error
+		res := make([]ApiMetricData, 0, len(sels))
+		for _, sel := range sels {
+			data := ApiMetricData{}
+
+			data.Data, data.From, data.To, data.Resolution, err = ms.Read(sel, query.Metric, req.From, req.To, query.Resolution)
+
+			if err != nil {
+				msg := err.Error()
+				data.Error = &msg
+				res = append(res, data)
+				continue
+			}
+
+			if req.WithStats {
+				data.AddStats()
+			}
+			if query.ScaleFactor != 0 {
+				data.ScaleBy(query.ScaleFactor)
+			}
+			if req.WithPadding {
+				data.PadDataWithNull(ms, req.From, req.To, query.Metric)
+			}
+			if !req.WithData {
+				data.Data = nil
+			}
+			res = append(res, data)
+		}
+		response.Results = append(response.Results, res)
+	}
+
+	return &response, nil
+}
+
+// handleDebug godoc
+// @summary Debug endpoint
+// @tags debug
+// @description This endpoint allows the users to print the content of
+// nodes/clusters/metrics to review the state of the data.
+// @produce     json
+// @param       selector        query    string            false "Selector"
+// @success     200            {string} string  "Debug dump"
+// @failure     400            {object} api.ErrorResponse       "Bad Request"
+// @failure     401            {object} api.ErrorResponse       "Unauthorized"
+// @failure     403            {object} api.ErrorResponse       "Forbidden"
+// @failure     500            {object} api.ErrorResponse       "Internal Server Error"
+// @security    ApiKeyAuth
+// @router      /debug/ [post]
+func HandleDebug(rw http.ResponseWriter, r *http.Request) {
+	raw := r.URL.Query().Get("selector")
+	rw.Header().Add("Content-Type", "application/json")
+	selector := []string{}
+	if len(raw) != 0 {
+		selector = strings.Split(raw, ":")
+	}
+
+	ms := GetMemoryStore()
+	if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil {
+		handleError(err, http.StatusBadRequest, rw)
+		return
+	}
+}
+
+// handleHealthCheck godoc
+// @summary HealthCheck endpoint
+// @tags healthcheck
+// @description This endpoint allows the users to check if a node is healthy
+// @produce     json
+// @param       selector        query    string            false "Selector"
+// @success     200            {string} string  "Debug dump"
+// @failure     400            {object} api.ErrorResponse       "Bad Request"
+// @failure     401            {object} api.ErrorResponse       "Unauthorized"
+// @failure     403            {object} api.ErrorResponse       "Forbidden"
+// @failure     500            {object} api.ErrorResponse       "Internal Server Error"
+// @security    ApiKeyAuth
+// @router      /healthcheck/ [get]
+func HandleHealthCheck(rw http.ResponseWriter, r *http.Request) {
+	rawCluster := r.URL.Query().Get("cluster")
+	rawNode := r.URL.Query().Get("node")
+
+	if rawCluster == "" || rawNode == "" {
+		handleError(errors.New("'cluster' and 'node' are required query parameter"), http.StatusBadRequest, rw)
+		return
+	}
+
+	rw.Header().Add("Content-Type", "application/json")
+
+	selector := []string{rawCluster, rawNode}
+
+	ms := GetMemoryStore()
+	if err := ms.HealthCheck(bufio.NewWriter(rw), selector); err != nil {
+		handleError(err, http.StatusBadRequest, rw)
+		return
+	}
+}
@@ -0,0 +1,192 @@
+// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
+// All rights reserved. This file is part of cc-backend.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+package memorystore
+
+import (
+	"archive/zip"
+	"bufio"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"path/filepath"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
+)
+
+func Archiving(wg *sync.WaitGroup, ctx context.Context) {
+	go func() {
+		defer wg.Done()
+		d, err := time.ParseDuration(config.MetricStoreKeys.Archive.Interval)
+		if err != nil {
+			log.Fatalf("[METRICSTORE]> error parsing archive interval duration: %v\n", err)
+		}
+		if d <= 0 {
+			return
+		}
+
+		ticks := func() <-chan time.Time {
+			if d <= 0 {
+				return nil
+			}
+			return time.NewTicker(d).C
+		}()
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticks:
+				t := time.Now().Add(-d)
+				log.Printf("[METRICSTORE]> start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339))
+				n, err := ArchiveCheckpoints(config.MetricStoreKeys.Checkpoints.RootDir,
+					config.MetricStoreKeys.Archive.RootDir, t.Unix(), config.MetricStoreKeys.Archive.DeleteInstead)
+
+				if err != nil {
+					log.Printf("[METRICSTORE]> archiving failed: %s\n", err.Error())
+				} else {
+					log.Printf("[METRICSTORE]> done: %d files zipped and moved to archive\n", n)
+				}
+			}
+		}
+	}()
+}
+
+var ErrNoNewData error = errors.New("all data already archived")
+
+// ZIP all checkpoint files older than `from` together and write them to the `archiveDir`,
+// deleting them from the `checkpointsDir`.
+func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteInstead bool) (int, error) {
+	entries1, err := os.ReadDir(checkpointsDir)
+	if err != nil {
+		return 0, err
+	}
+
+	type workItem struct {
+		cdir, adir    string
+		cluster, host string
+	}
+
+	var wg sync.WaitGroup
+	n, errs := int32(0), int32(0)
+	work := make(chan workItem, NumWorkers)
+
+	wg.Add(NumWorkers)
+	for worker := 0; worker < NumWorkers; worker++ {
+		go func() {
+			defer wg.Done()
+			for workItem := range work {
+				m, err := archiveCheckpoints(workItem.cdir, workItem.adir, from, deleteInstead)
+				if err != nil {
+					cclog.Errorf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error())
+					atomic.AddInt32(&errs, 1)
+				}
+				atomic.AddInt32(&n, int32(m))
+			}
+		}()
+	}
+
+	for _, de1 := range entries1 {
+		entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
+		if e != nil {
+			err = e
+		}
+
+		for _, de2 := range entries2 {
+			cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name())
+			adir := filepath.Join(archiveDir, de1.Name(), de2.Name())
+			work <- workItem{
+				adir: adir, cdir: cdir,
+				cluster: de1.Name(), host: de2.Name(),
+			}
+		}
+	}
+
+	close(work)
+	wg.Wait()
+
+	if err != nil {
+		return int(n), err
+	}
+
+	if errs > 0 {
+		return int(n), fmt.Errorf("%d errors happend while archiving (%d successes)", errs, n)
+	}
+	return int(n), nil
+}
+
+// Helper function for `ArchiveCheckpoints`.
+func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead bool) (int, error) {
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		return 0, err
+	}
+
+	extension := config.MetricStoreKeys.Checkpoints.FileFormat
+	files, err := findFiles(entries, from, extension, false)
+	if err != nil {
+		return 0, err
+	}
+
+	if deleteInstead {
+		n := 0
+		for _, checkpoint := range files {
+			filename := filepath.Join(dir, checkpoint)
+			if err = os.Remove(filename); err != nil {
+				return n, err
+			}
+			n += 1
+		}
+		return n, nil
+	}
+
+	filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from))
+	f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
+	if err != nil && os.IsNotExist(err) {
+		err = os.MkdirAll(archiveDir, 0o755)
+		if err == nil {
+			f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644)
+		}
+	}
+	if err != nil {
+		return 0, err
+	}
+	defer f.Close()
+	bw := bufio.NewWriter(f)
+	defer bw.Flush()
+	zw := zip.NewWriter(bw)
+	defer zw.Close()
+
+	n := 0
+	for _, checkpoint := range files {
+		filename := filepath.Join(dir, checkpoint)
+		r, err := os.Open(filename)
+		if err != nil {
+			return n, err
+		}
+		defer r.Close()
+
+		w, err := zw.Create(checkpoint)
+		if err != nil {
+			return n, err
+		}
+
+		if _, err = io.Copy(w, r); err != nil {
+			return n, err
+		}
+
+		if err = os.Remove(filename); err != nil {
+			return n, err
+		}
+		n += 1
+	}
+
+	return n, nil
+}
@@ -0,0 +1,233 @@
+package memorystore
+
+import (
+	"errors"
+	"sync"
+
+	"github.com/ClusterCockpit/cc-lib/schema"
+)
+
+// Default buffer capacity.
+// `buffer.data` will only ever grow up to it's capacity and a new link
+// in the buffer chain will be created if needed so that no copying
+// of data or reallocation needs to happen on writes.
+const (
+	BUFFER_CAP int = 512
+)
+
+// So that we can reuse allocations
+var bufferPool sync.Pool = sync.Pool{
+	New: func() interface{} {
+		return &buffer{
+			data: make([]schema.Float, 0, BUFFER_CAP),
+		}
+	},
+}
+
+var (
+	ErrNoData           error = errors.New("[METRICSTORE]> no data for this metric/level")
+	ErrDataDoesNotAlign error = errors.New("[METRICSTORE]> data from lower granularities does not align")
+)
+
+// Each metric on each level has it's own buffer.
+// This is where the actual values go.
+// If `cap(data)` is reached, a new buffer is created and
+// becomes the new head of a buffer list.
+type buffer struct {
+	prev      *buffer
+	next      *buffer
+	data      []schema.Float
+	frequency int64
+	start     int64
+	archived  bool
+	closed    bool
+}
+
+func newBuffer(ts, freq int64) *buffer {
+	b := bufferPool.Get().(*buffer)
+	b.frequency = freq
+	b.start = ts - (freq / 2)
+	b.prev = nil
+	b.next = nil
+	b.archived = false
+	b.closed = false
+	b.data = b.data[:0]
+	return b
+}
+
+// If a new buffer was created, the new head is returnd.
+// Otherwise, the existing buffer is returnd.
+// Normaly, only "newer" data should be written, but if the value would
+// end up in the same buffer anyways it is allowed.
+func (b *buffer) write(ts int64, value schema.Float) (*buffer, error) {
+	if ts < b.start {
+		return nil, errors.New("[METRICSTORE]> cannot write value to buffer from past")
+	}
+
+	// idx := int((ts - b.start + (b.frequency / 3)) / b.frequency)
+	idx := int((ts - b.start) / b.frequency)
+	if idx >= cap(b.data) {
+		newbuf := newBuffer(ts, b.frequency)
+		newbuf.prev = b
+		b.next = newbuf
+		b.close()
+		b = newbuf
+		idx = 0
+	}
+
+	// Overwriting value or writing value from past
+	if idx < len(b.data) {
+		b.data[idx] = value
+		return b, nil
+	}
+
+	// Fill up unwritten slots with NaN
+	for i := len(b.data); i < idx; i++ {
+		b.data = append(b.data, schema.NaN)
+	}
+
+	b.data = append(b.data, value)
+	return b, nil
+}
+
+func (b *buffer) end() int64 {
+	return b.firstWrite() + int64(len(b.data))*b.frequency
+}
+
+func (b *buffer) firstWrite() int64 {
+	return b.start + (b.frequency / 2)
+}
+
+func (b *buffer) close() {}
+
+/*
+func (b *buffer) close() {
+	if b.closed {
+		return
+	}
+
+	b.closed = true
+	n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64
+	for _, x := range b.data {
+		if x.IsNaN() {
+			continue
+		}
+
+		n += 1
+		f := float64(x)
+		sum += f
+		min = math.Min(min, f)
+		max = math.Max(max, f)
+	}
+
+	b.statisticts.samples = n
+	if n > 0 {
+		b.statisticts.avg = Float(sum / float64(n))
+		b.statisticts.min = Float(min)
+		b.statisticts.max = Float(max)
+	} else {
+		b.statisticts.avg = NaN
+		b.statisticts.min = NaN
+		b.statisticts.max = NaN
+	}
+}
+*/
+
+// func interpolate(idx int, data []Float) Float {
+// 	if idx == 0 || idx+1 == len(data) {
+// 		return NaN
+// 	}
+// 	return (data[idx-1] + data[idx+1]) / 2.0
+// }
+
+// Return all known values from `from` to `to`. Gaps of information are represented as NaN.
+// Simple linear interpolation is done between the two neighboring cells if possible.
+// If values at the start or end are missing, instead of NaN values, the second and thrid
+// return values contain the actual `from`/`to`.
+// This function goes back the buffer chain if `from` is older than the currents buffer start.
+// The loaded values are added to `data` and `data` is returned, possibly with a shorter length.
+// If `data` is not long enough to hold all values, this function will panic!
+func (b *buffer) read(from, to int64, data []schema.Float) ([]schema.Float, int64, int64, error) {
+	if from < b.firstWrite() {
+		if b.prev != nil {
+			return b.prev.read(from, to, data)
+		}
+		from = b.firstWrite()
+	}
+
+	i := 0
+	t := from
+	for ; t < to; t += b.frequency {
+		idx := int((t - b.start) / b.frequency)
+		if idx >= cap(b.data) {
+			if b.next == nil {
+				break
+			}
+			b = b.next
+			idx = 0
+		}
+
+		if idx >= len(b.data) {
+			if b.next == nil || to <= b.next.start {
+				break
+			}
+			data[i] += schema.NaN
+		} else if t < b.start {
+			data[i] += schema.NaN
+			// } else if b.data[idx].IsNaN() {
+			// 	data[i] += interpolate(idx, b.data)
+		} else {
+			data[i] += b.data[idx]
+		}
+		i++
+	}
+
+	return data[:i], from, t, nil
+}
+
+// Returns true if this buffer needs to be freed.
+func (b *buffer) free(t int64) (delme bool, n int) {
+	if b.prev != nil {
+		delme, m := b.prev.free(t)
+		n += m
+		if delme {
+			b.prev.next = nil
+			if cap(b.prev.data) == BUFFER_CAP {
+				bufferPool.Put(b.prev)
+			}
+			b.prev = nil
+		}
+	}
+
+	end := b.end()
+	if end < t {
+		return true, n + 1
+	}
+
+	return false, n
+}
+
+// Call `callback` on every buffer that contains data in the range from `from` to `to`.
+func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
+	if b == nil {
+		return nil
+	}
+
+	if err := b.prev.iterFromTo(from, to, callback); err != nil {
+		return err
+	}
+
+	if from <= b.end() && b.start <= to {
+		return callback(b)
+	}
+
+	return nil
+}
+
+func (b *buffer) count() int64 {
+	res := int64(len(b.data))
+	if b.prev != nil {
+		res += b.prev.count()
+	}
+	return res
+}
@@ -0,0 +1,765 @@
+package memorystore
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io/fs"
+	"log"
+	"os"
+	"path"
+	"path/filepath"
+	"runtime"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/ClusterCockpit/cc-backend/internal/avro"
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/linkedin/goavro/v2"
+)
+
+// Whenever changed, update MarshalJSON as well!
+type CheckpointMetrics struct {
+	Data      []schema.Float `json:"data"`
+	Frequency int64          `json:"frequency"`
+	Start     int64          `json:"start"`
+}
+
+type CheckpointFile struct {
+	Metrics  map[string]*CheckpointMetrics `json:"metrics"`
+	Children map[string]*CheckpointFile    `json:"children"`
+	From     int64                         `json:"from"`
+	To       int64                         `json:"to"`
+}
+
+var lastCheckpoint time.Time
+
+func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
+	lastCheckpoint = time.Now()
+
+	if config.MetricStoreKeys.Checkpoints.FileFormat == "json" {
+		ms := GetMemoryStore()
+
+		go func() {
+			defer wg.Done()
+			d, err := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Interval)
+			if err != nil {
+				log.Fatal(err)
+			}
+			if d <= 0 {
+				return
+			}
+
+			ticks := func() <-chan time.Time {
+				if d <= 0 {
+					return nil
+				}
+				return time.NewTicker(d).C
+			}()
+			for {
+				select {
+				case <-ctx.Done():
+					return
+				case <-ticks:
+					log.Printf("[METRICSTORE]> start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339))
+					now := time.Now()
+					n, err := ms.ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir,
+						lastCheckpoint.Unix(), now.Unix())
+					if err != nil {
+						log.Printf("[METRICSTORE]> checkpointing failed: %s\n", err.Error())
+					} else {
+						log.Printf("[METRICSTORE]> done: %d checkpoint files created\n", n)
+						lastCheckpoint = now
+					}
+				}
+			}
+		}()
+	} else {
+		go func() {
+			defer wg.Done()
+			d, _ := time.ParseDuration("1m")
+
+			select {
+			case <-ctx.Done():
+				return
+			case <-time.After(time.Duration(avro.CheckpointBufferMinutes) * time.Minute):
+				// This is the first tick untill we collect the data for given minutes.
+				avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, false)
+				// log.Printf("Checkpointing %d avro files", count)
+
+			}
+
+			ticks := func() <-chan time.Time {
+				if d <= 0 {
+					return nil
+				}
+				return time.NewTicker(d).C
+			}()
+
+			for {
+				select {
+				case <-ctx.Done():
+					return
+				case <-ticks:
+					// Regular ticks of 1 minute to write data.
+					avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, false)
+					// log.Printf("Checkpointing %d avro files", count)
+				}
+			}
+		}()
+	}
+}
+
+// As `Float` implements a custom MarshalJSON() function,
+// serializing an array of such types has more overhead
+// than one would assume (because of extra allocations, interfaces and so on).
+func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {
+	buf := make([]byte, 0, 128+len(cm.Data)*8)
+	buf = append(buf, `{"frequency":`...)
+	buf = strconv.AppendInt(buf, cm.Frequency, 10)
+	buf = append(buf, `,"start":`...)
+	buf = strconv.AppendInt(buf, cm.Start, 10)
+	buf = append(buf, `,"data":[`...)
+	for i, x := range cm.Data {
+		if i != 0 {
+			buf = append(buf, ',')
+		}
+		if x.IsNaN() {
+			buf = append(buf, `null`...)
+		} else {
+			buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32)
+		}
+	}
+	buf = append(buf, `]}`...)
+	return buf, nil
+}
+
+// Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
+// On a per-host basis a new JSON file is created. I have no idea if this will scale.
+// The good thing: Only a host at a time is locked, so this function can run
+// in parallel to writes/reads.
+func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
+	levels := make([]*Level, 0)
+	selectors := make([][]string, 0)
+	m.root.lock.RLock()
+	for sel1, l1 := range m.root.children {
+		l1.lock.RLock()
+		for sel2, l2 := range l1.children {
+			levels = append(levels, l2)
+			selectors = append(selectors, []string{sel1, sel2})
+		}
+		l1.lock.RUnlock()
+	}
+	m.root.lock.RUnlock()
+
+	type workItem struct {
+		level    *Level
+		dir      string
+		selector []string
+	}
+
+	n, errs := int32(0), int32(0)
+
+	var wg sync.WaitGroup
+	wg.Add(NumWorkers)
+	work := make(chan workItem, NumWorkers*2)
+	for worker := 0; worker < NumWorkers; worker++ {
+		go func() {
+			defer wg.Done()
+
+			for workItem := range work {
+				if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil {
+					if err == ErrNoNewData {
+						continue
+					}
+
+					log.Printf("[METRICSTORE]> error while checkpointing %#v: %s", workItem.selector, err.Error())
+					atomic.AddInt32(&errs, 1)
+				} else {
+					atomic.AddInt32(&n, 1)
+				}
+			}
+		}()
+	}
+
+	for i := 0; i < len(levels); i++ {
+		dir := path.Join(dir, path.Join(selectors[i]...))
+		work <- workItem{
+			level:    levels[i],
+			dir:      dir,
+			selector: selectors[i],
+		}
+	}
+
+	close(work)
+	wg.Wait()
+
+	if errs > 0 {
+		return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
+	}
+	return int(n), nil
+}
+
+func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+
+	retval := &CheckpointFile{
+		From:     from,
+		To:       to,
+		Metrics:  make(map[string]*CheckpointMetrics),
+		Children: make(map[string]*CheckpointFile),
+	}
+
+	for metric, minfo := range m.Metrics {
+		b := l.metrics[minfo.Offset]
+		if b == nil {
+			continue
+		}
+
+		allArchived := true
+		b.iterFromTo(from, to, func(b *buffer) error {
+			if !b.archived {
+				allArchived = false
+			}
+			return nil
+		})
+
+		if allArchived {
+			continue
+		}
+
+		data := make([]schema.Float, (to-from)/b.frequency+1)
+		data, start, end, err := b.read(from, to, data)
+		if err != nil {
+			return nil, err
+		}
+
+		for i := int((end - start) / b.frequency); i < len(data); i++ {
+			data[i] = schema.NaN
+		}
+
+		retval.Metrics[metric] = &CheckpointMetrics{
+			Frequency: b.frequency,
+			Start:     start,
+			Data:      data,
+		}
+	}
+
+	for name, child := range l.children {
+		val, err := child.toCheckpointFile(from, to, m)
+		if err != nil {
+			return nil, err
+		}
+
+		if val != nil {
+			retval.Children[name] = val
+		}
+	}
+
+	if len(retval.Children) == 0 && len(retval.Metrics) == 0 {
+		return nil, nil
+	}
+
+	return retval, nil
+}
+
+func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
+	cf, err := l.toCheckpointFile(from, to, m)
+	if err != nil {
+		return err
+	}
+
+	if cf == nil {
+		return ErrNoNewData
+	}
+
+	filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
+	f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
+	if err != nil && os.IsNotExist(err) {
+		err = os.MkdirAll(dir, 0o755)
+		if err == nil {
+			f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644)
+		}
+	}
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	bw := bufio.NewWriter(f)
+	if err = json.NewEncoder(bw).Encode(cf); err != nil {
+		return err
+	}
+
+	return bw.Flush()
+}
+
+func (m *MemoryStore) FromCheckpoint(dir string, from int64, extension string) (int, error) {
+	var wg sync.WaitGroup
+	work := make(chan [2]string, NumWorkers)
+	n, errs := int32(0), int32(0)
+
+	wg.Add(NumWorkers)
+	for worker := 0; worker < NumWorkers; worker++ {
+		go func() {
+			defer wg.Done()
+			for host := range work {
+				lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics))
+				nn, err := lvl.fromCheckpoint(m, filepath.Join(dir, host[0], host[1]), from, extension)
+				if err != nil {
+					log.Fatalf("[METRICSTORE]> error while loading checkpoints: %s", err.Error())
+					atomic.AddInt32(&errs, 1)
+				}
+				atomic.AddInt32(&n, int32(nn))
+			}
+		}()
+	}
+
+	i := 0
+	clustersDir, err := os.ReadDir(dir)
+	for _, clusterDir := range clustersDir {
+		if !clusterDir.IsDir() {
+			err = errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
+			goto done
+		}
+
+		hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name()))
+		if e != nil {
+			err = e
+			goto done
+		}
+
+		for _, hostDir := range hostsDir {
+			if !hostDir.IsDir() {
+				err = errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
+				goto done
+			}
+
+			i++
+			if i%NumWorkers == 0 && i > 100 {
+				// Forcing garbage collection runs here regulary during the loading of checkpoints
+				// will decrease the total heap size after loading everything back to memory is done.
+				// While loading data, the heap will grow fast, so the GC target size will double
+				// almost always. By forcing GCs here, we can keep it growing more slowly so that
+				// at the end, less memory is wasted.
+				runtime.GC()
+			}
+
+			work <- [2]string{clusterDir.Name(), hostDir.Name()}
+		}
+	}
+done:
+	close(work)
+	wg.Wait()
+
+	if err != nil {
+		return int(n), err
+	}
+
+	if errs > 0 {
+		return int(n), fmt.Errorf("[METRICSTORE]> %d errors happend while creating checkpoints (%d successes)", errs, n)
+	}
+	return int(n), nil
+}
+
+// Metrics stored at the lowest 2 levels are not loaded (root and cluster)!
+// This function can only be called once and before the very first write or read.
+// Different host's data is loaded to memory in parallel.
+func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
+	if _, err := os.Stat(dir); os.IsNotExist(err) {
+		// The directory does not exist, so create it using os.MkdirAll()
+		err := os.MkdirAll(dir, 0755) // 0755 sets the permissions for the directory
+		if err != nil {
+			log.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
+		}
+		log.Printf("[METRICSTORE]> %#v Directory created successfully.\n", dir)
+	}
+
+	// Config read (replace with your actual config read)
+	fileFormat := config.MetricStoreKeys.Checkpoints.FileFormat
+	if fileFormat == "" {
+		fileFormat = "avro"
+	}
+
+	// Map to easily get the fallback format
+	oppositeFormat := map[string]string{
+		"json": "avro",
+		"avro": "json",
+	}
+
+	// First, attempt to load the specified format
+	if found, err := checkFilesWithExtension(dir, fileFormat); err != nil {
+		return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
+	} else if found {
+		log.Printf("[METRICSTORE]> Loading %s files because fileformat is %s\n", fileFormat, fileFormat)
+		return m.FromCheckpoint(dir, from, fileFormat)
+	}
+
+	// If not found, attempt the opposite format
+	altFormat := oppositeFormat[fileFormat]
+	if found, err := checkFilesWithExtension(dir, altFormat); err != nil {
+		return 0, fmt.Errorf("[METRICSTORE]> error checking files with extension: %v", err)
+	} else if found {
+		log.Printf("[METRICSTORE]> Loading %s files but fileformat is %s\n", altFormat, fileFormat)
+		return m.FromCheckpoint(dir, from, altFormat)
+	}
+
+	log.Println("[METRICSTORE]> No valid checkpoint files found in the directory.")
+	return 0, nil
+}
+
+func checkFilesWithExtension(dir string, extension string) (bool, error) {
+	found := false
+
+	err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return fmt.Errorf("[METRICSTORE]> error accessing path %s: %v", path, err)
+		}
+		if !info.IsDir() && filepath.Ext(info.Name()) == "."+extension {
+			found = true
+			return nil
+		}
+		return nil
+	})
+	if err != nil {
+		return false, fmt.Errorf("[METRICSTORE]> error walking through directories: %s", err)
+	}
+
+	return found, nil
+}
+
+func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
+	br := bufio.NewReader(f)
+
+	fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:]
+	resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64)
+	if err != nil {
+		return fmt.Errorf("[METRICSTORE]> error while reading avro file (resolution parsing) : %s", err)
+	}
+
+	from_timestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64)
+
+	// Same logic according to lineprotocol
+	from_timestamp -= (resolution / 2)
+
+	if err != nil {
+		return fmt.Errorf("[METRICSTORE]> error converting timestamp from the avro file : %s", err)
+	}
+
+	// fmt.Printf("File : %s with resolution : %d\n", fileName, resolution)
+
+	var recordCounter int64 = 0
+
+	// Create a new OCF reader from the buffered reader
+	ocfReader, err := goavro.NewOCFReader(br)
+	if err != nil {
+		panic(err)
+	}
+
+	metricsData := make(map[string]schema.FloatArray)
+
+	for ocfReader.Scan() {
+		datum, err := ocfReader.Read()
+		if err != nil {
+			return fmt.Errorf("[METRICSTORE]> error while reading avro file : %s", err)
+		}
+
+		record, ok := datum.(map[string]interface{})
+		if !ok {
+			panic("[METRICSTORE]> failed to assert datum as map[string]interface{}")
+		}
+
+		for key, value := range record {
+			metricsData[key] = append(metricsData[key], schema.ConvertToFloat(value.(float64)))
+		}
+
+		recordCounter += 1
+	}
+
+	to := (from_timestamp + (recordCounter / (60 / resolution) * 60))
+	if to < from {
+		return nil
+	}
+
+	for key, floatArray := range metricsData {
+		metricName := avro.ReplaceKey(key)
+
+		if strings.Contains(metricName, avro.Delimiter) {
+			subString := strings.Split(metricName, avro.Delimiter)
+
+			lvl := l
+
+			for i := 0; i < len(subString)-1; i++ {
+
+				sel := subString[i]
+
+				if lvl.children == nil {
+					lvl.children = make(map[string]*Level)
+				}
+
+				child, ok := lvl.children[sel]
+				if !ok {
+					child = &Level{
+						metrics:  make([]*buffer, len(m.Metrics)),
+						children: nil,
+					}
+					lvl.children[sel] = child
+				}
+				lvl = child
+			}
+
+			leafMetricName := subString[len(subString)-1]
+			err = lvl.createBuffer(m, leafMetricName, floatArray, from_timestamp, resolution)
+			if err != nil {
+				return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
+			}
+		} else {
+			err = l.createBuffer(m, metricName, floatArray, from_timestamp, resolution)
+			if err != nil {
+				return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
+			}
+		}
+
+	}
+
+	return nil
+}
+
+func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schema.FloatArray, from int64, resolution int64) error {
+	n := len(floatArray)
+	b := &buffer{
+		frequency: resolution,
+		start:     from,
+		data:      floatArray[0:n:n],
+		prev:      nil,
+		next:      nil,
+		archived:  true,
+	}
+	b.close()
+
+	minfo, ok := m.Metrics[metricName]
+	if !ok {
+		return nil
+		// return errors.New("Unkown metric: " + name)
+	}
+
+	prev := l.metrics[minfo.Offset]
+	if prev == nil {
+		l.metrics[minfo.Offset] = b
+	} else {
+		if prev.start > b.start {
+			return errors.New("wooops")
+		}
+
+		b.prev = prev
+		prev.next = b
+
+		missingCount := ((int(b.start) - int(prev.start)) - len(prev.data)*int(b.frequency))
+		if missingCount > 0 {
+			missingCount /= int(b.frequency)
+
+			for range missingCount {
+				prev.data = append(prev.data, schema.NaN)
+			}
+
+			prev.data = prev.data[0:len(prev.data):len(prev.data)]
+		}
+	}
+	l.metrics[minfo.Offset] = b
+
+	return nil
+}
+
+func (l *Level) loadJsonFile(m *MemoryStore, f *os.File, from int64) error {
+	br := bufio.NewReader(f)
+	cf := &CheckpointFile{}
+	if err := json.NewDecoder(br).Decode(cf); err != nil {
+		return err
+	}
+
+	if cf.To != 0 && cf.To < from {
+		return nil
+	}
+
+	if err := l.loadFile(cf, m); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
+	for name, metric := range cf.Metrics {
+		n := len(metric.Data)
+		b := &buffer{
+			frequency: metric.Frequency,
+			start:     metric.Start,
+			data:      metric.Data[0:n:n], // Space is wasted here :(
+			prev:      nil,
+			next:      nil,
+			archived:  true,
+		}
+		b.close()
+
+		minfo, ok := m.Metrics[name]
+		if !ok {
+			continue
+			// return errors.New("Unkown metric: " + name)
+		}
+
+		prev := l.metrics[minfo.Offset]
+		if prev == nil {
+			l.metrics[minfo.Offset] = b
+		} else {
+			if prev.start > b.start {
+				return errors.New("wooops")
+			}
+
+			b.prev = prev
+			prev.next = b
+		}
+		l.metrics[minfo.Offset] = b
+	}
+
+	if len(cf.Children) > 0 && l.children == nil {
+		l.children = make(map[string]*Level)
+	}
+
+	for sel, childCf := range cf.Children {
+		child, ok := l.children[sel]
+		if !ok {
+			child = &Level{
+				metrics:  make([]*buffer, len(m.Metrics)),
+				children: nil,
+			}
+			l.children[sel] = child
+		}
+
+		if err := child.loadFile(childCf, m); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64, extension string) (int, error) {
+	direntries, err := os.ReadDir(dir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return 0, nil
+		}
+
+		return 0, err
+	}
+
+	allFiles := make([]fs.DirEntry, 0)
+	filesLoaded := 0
+	for _, e := range direntries {
+		if e.IsDir() {
+			child := &Level{
+				metrics:  make([]*buffer, len(m.Metrics)),
+				children: make(map[string]*Level),
+			}
+
+			files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from, extension)
+			filesLoaded += files
+			if err != nil {
+				return filesLoaded, err
+			}
+
+			l.children[e.Name()] = child
+		} else if strings.HasSuffix(e.Name(), "."+extension) {
+			allFiles = append(allFiles, e)
+		} else {
+			continue
+		}
+	}
+
+	files, err := findFiles(allFiles, from, extension, true)
+	if err != nil {
+		return filesLoaded, err
+	}
+
+	loaders := map[string]func(*MemoryStore, *os.File, int64) error{
+		"json": l.loadJsonFile,
+		"avro": l.loadAvroFile,
+	}
+
+	loader := loaders[extension]
+
+	for _, filename := range files {
+		f, err := os.Open(path.Join(dir, filename))
+		if err != nil {
+			return filesLoaded, err
+		}
+		defer f.Close()
+
+		if err = loader(m, f, from); err != nil {
+			return filesLoaded, err
+		}
+
+		filesLoaded += 1
+	}
+
+	return filesLoaded, nil
+}
+
+// This will probably get very slow over time!
+// A solution could be some sort of an index file in which all other files
+// and the timespan they contain is listed.
+func findFiles(direntries []fs.DirEntry, t int64, extension string, findMoreRecentFiles bool) ([]string, error) {
+	nums := map[string]int64{}
+	for _, e := range direntries {
+		if !strings.HasSuffix(e.Name(), "."+extension) {
+			continue
+		}
+
+		ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64)
+		if err != nil {
+			return nil, err
+		}
+		nums[e.Name()] = ts
+	}
+
+	sort.Slice(direntries, func(i, j int) bool {
+		a, b := direntries[i], direntries[j]
+		return nums[a.Name()] < nums[b.Name()]
+	})
+
+	filenames := make([]string, 0)
+	for i := 0; i < len(direntries); i++ {
+		e := direntries[i]
+		ts1 := nums[e.Name()]
+
+		if findMoreRecentFiles && t <= ts1 {
+			filenames = append(filenames, e.Name())
+		}
+		if i == len(direntries)-1 {
+			continue
+		}
+
+		enext := direntries[i+1]
+		ts2 := nums[enext.Name()]
+
+		if findMoreRecentFiles {
+			if ts1 < t && t < ts2 {
+				filenames = append(filenames, e.Name())
+			}
+		} else {
+			if ts2 < t {
+				filenames = append(filenames, e.Name())
+			}
+		}
+	}
+
+	return filenames, nil
+}
@@ -0,0 +1,107 @@
+package memorystore
+
+import (
+	"bufio"
+	"fmt"
+	"strconv"
+)
+
+func (b *buffer) debugDump(buf []byte) []byte {
+	if b.prev != nil {
+		buf = b.prev.debugDump(buf)
+	}
+
+	start, len, end := b.start, len(b.data), b.start+b.frequency*int64(len(b.data))
+	buf = append(buf, `{"start":`...)
+	buf = strconv.AppendInt(buf, start, 10)
+	buf = append(buf, `,"len":`...)
+	buf = strconv.AppendInt(buf, int64(len), 10)
+	buf = append(buf, `,"end":`...)
+	buf = strconv.AppendInt(buf, end, 10)
+	if b.archived {
+		buf = append(buf, `,"saved":true`...)
+	}
+	if b.next != nil {
+		buf = append(buf, `},`...)
+	} else {
+		buf = append(buf, `}`...)
+	}
+	return buf
+}
+
+func (l *Level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf []byte, depth int) ([]byte, error) {
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+	for i := 0; i < depth; i++ {
+		buf = append(buf, '\t')
+	}
+	buf = append(buf, '"')
+	buf = append(buf, lvlname...)
+	buf = append(buf, "\":{\n"...)
+	depth += 1
+	objitems := 0
+	for name, mc := range m.Metrics {
+		if b := l.metrics[mc.Offset]; b != nil {
+			for i := 0; i < depth; i++ {
+				buf = append(buf, '\t')
+			}
+
+			buf = append(buf, '"')
+			buf = append(buf, name...)
+			buf = append(buf, `":[`...)
+			buf = b.debugDump(buf)
+			buf = append(buf, "],\n"...)
+			objitems++
+		}
+	}
+
+	for name, lvl := range l.children {
+		_, err := w.Write(buf)
+		if err != nil {
+			return nil, err
+		}
+
+		buf = buf[0:0]
+		buf, err = lvl.debugDump(m, w, name, buf, depth)
+		if err != nil {
+			return nil, err
+		}
+
+		buf = append(buf, ',', '\n')
+		objitems++
+	}
+
+	// remove final `,`:
+	if objitems > 0 {
+		buf = append(buf[0:len(buf)-1], '\n')
+	}
+
+	depth -= 1
+	for i := 0; i < depth; i++ {
+		buf = append(buf, '\t')
+	}
+	buf = append(buf, '}')
+	return buf, nil
+}
+
+func (m *MemoryStore) DebugDump(w *bufio.Writer, selector []string) error {
+	lvl := m.root.findLevel(selector)
+	if lvl == nil {
+		return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
+	}
+
+	buf := make([]byte, 0, 2048)
+	buf = append(buf, "{"...)
+
+	buf, err := lvl.debugDump(m, w, "data", buf, 0)
+	if err != nil {
+		return err
+	}
+
+	buf = append(buf, "}\n"...)
+	if _, err = w.Write(buf); err != nil {
+		return err
+	}
+
+	return w.Flush()
+}
@@ -0,0 +1,88 @@
+package memorystore
+
+import (
+	"bufio"
+	"fmt"
+	"time"
+)
+
+// This is a threshold that allows a node to be healthy with certain number of data points missing.
+// Suppose a node does not receive last 5 data points, then healthCheck endpoint will still say a
+// node is healthy. Anything more than 5 missing points in metrics of the node will deem the node unhealthy.
+const MaxMissingDataPoints int64 = 5
+
+// This is a threshold which allows upto certain number of metrics in a node to be unhealthly.
+// Works with MaxMissingDataPoints. Say 5 metrics (including submetrics) do not receive the last
+// MaxMissingDataPoints data points, then the node will be deemed healthy. Any more metrics that does
+// not receive data for MaxMissingDataPoints data points will deem the node unhealthy.
+const MaxUnhealthyMetrics int64 = 5
+
+func (b *buffer) healthCheck() int64 {
+
+	// Check if the buffer is empty
+	if b.data == nil {
+		return 1
+	}
+
+	buffer_end := b.start + b.frequency*int64(len(b.data))
+	t := time.Now().Unix()
+
+	// Check if the buffer is too old
+	if t-buffer_end > MaxMissingDataPoints*b.frequency {
+		return 1
+	}
+
+	return 0
+}
+
+func (l *Level) healthCheck(m *MemoryStore, count int64) (int64, error) {
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+
+	for _, mc := range m.Metrics {
+		if b := l.metrics[mc.Offset]; b != nil {
+			count += b.healthCheck()
+		}
+	}
+
+	for _, lvl := range l.children {
+		c, err := lvl.healthCheck(m, 0)
+		if err != nil {
+			return 0, err
+		}
+		count += c
+	}
+
+	return count, nil
+}
+
+func (m *MemoryStore) HealthCheck(w *bufio.Writer, selector []string) error {
+	lvl := m.root.findLevel(selector)
+	if lvl == nil {
+		return fmt.Errorf("[METRICSTORE]> not found: %#v", selector)
+	}
+
+	buf := make([]byte, 0, 25)
+	// buf = append(buf, "{"...)
+
+	var count int64 = 0
+
+	unhealthyMetricsCount, err := lvl.healthCheck(m, count)
+	if err != nil {
+		return err
+	}
+
+	if unhealthyMetricsCount < MaxUnhealthyMetrics {
+		buf = append(buf, "Healthy"...)
+	} else {
+		buf = append(buf, "Unhealthy"...)
+	}
+
+	// buf = append(buf, "}\n"...)
+
+	if _, err = w.Write(buf); err != nil {
+		return err
+	}
+
+	return w.Flush()
+}
@@ -0,0 +1,187 @@
+package memorystore
+
+import (
+	"sync"
+	"unsafe"
+
+	"github.com/ClusterCockpit/cc-lib/util"
+)
+
+// Could also be called "node" as this forms a node in a tree structure.
+// Called Level because "node" might be confusing here.
+// Can be both a leaf or a inner node. In this tree structue, inner nodes can
+// also hold data (in `metrics`).
+type Level struct {
+	children map[string]*Level
+	metrics  []*buffer
+	lock     sync.RWMutex
+}
+
+// Find the correct level for the given selector, creating it if
+// it does not exist. Example selector in the context of the
+// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }.
+// This function would probably benefit a lot from `level.children` beeing a `sync.Map`?
+func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level {
+	if len(selector) == 0 {
+		return l
+	}
+
+	// Allow concurrent reads:
+	l.lock.RLock()
+	var child *Level
+	var ok bool
+	if l.children == nil {
+		// Children map needs to be created...
+		l.lock.RUnlock()
+	} else {
+		child, ok := l.children[selector[0]]
+		l.lock.RUnlock()
+		if ok {
+			return child.findLevelOrCreate(selector[1:], nMetrics)
+		}
+	}
+
+	// The level does not exist, take write lock for unqiue access:
+	l.lock.Lock()
+	// While this thread waited for the write lock, another thread
+	// could have created the child node.
+	if l.children != nil {
+		child, ok = l.children[selector[0]]
+		if ok {
+			l.lock.Unlock()
+			return child.findLevelOrCreate(selector[1:], nMetrics)
+		}
+	}
+
+	child = &Level{
+		metrics:  make([]*buffer, nMetrics),
+		children: nil,
+	}
+
+	if l.children != nil {
+		l.children[selector[0]] = child
+	} else {
+		l.children = map[string]*Level{selector[0]: child}
+	}
+	l.lock.Unlock()
+	return child.findLevelOrCreate(selector[1:], nMetrics)
+}
+
+func (l *Level) free(t int64) (int, error) {
+	l.lock.Lock()
+	defer l.lock.Unlock()
+
+	n := 0
+	for i, b := range l.metrics {
+		if b != nil {
+			delme, m := b.free(t)
+			n += m
+			if delme {
+				if cap(b.data) == BUFFER_CAP {
+					bufferPool.Put(b)
+				}
+				l.metrics[i] = nil
+			}
+		}
+	}
+
+	for _, l := range l.children {
+		m, err := l.free(t)
+		n += m
+		if err != nil {
+			return n, err
+		}
+	}
+
+	return n, nil
+}
+
+func (l *Level) sizeInBytes() int64 {
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+	size := int64(0)
+
+	for _, b := range l.metrics {
+		if b != nil {
+			size += b.count() * int64(unsafe.Sizeof(util.Float(0)))
+		}
+	}
+
+	for _, child := range l.children {
+		size += child.sizeInBytes()
+	}
+
+	return size
+}
+
+func (l *Level) findLevel(selector []string) *Level {
+	if len(selector) == 0 {
+		return l
+	}
+
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+
+	lvl := l.children[selector[0]]
+	if lvl == nil {
+		return nil
+	}
+
+	return lvl.findLevel(selector[1:])
+}
+
+func (l *Level) findBuffers(selector util.Selector, offset int, f func(b *buffer) error) error {
+	l.lock.RLock()
+	defer l.lock.RUnlock()
+
+	if len(selector) == 0 {
+		b := l.metrics[offset]
+		if b != nil {
+			return f(b)
+		}
+
+		for _, lvl := range l.children {
+			err := lvl.findBuffers(nil, offset, f)
+			if err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	sel := selector[0]
+	if len(sel.String) != 0 && l.children != nil {
+		lvl, ok := l.children[sel.String]
+		if ok {
+			err := lvl.findBuffers(selector[1:], offset, f)
+			if err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	if sel.Group != nil && l.children != nil {
+		for _, key := range sel.Group {
+			lvl, ok := l.children[key]
+			if ok {
+				err := lvl.findBuffers(selector[1:], offset, f)
+				if err != nil {
+					return err
+				}
+			}
+		}
+		return nil
+	}
+
+	if sel.Any && l.children != nil {
+		for _, lvl := range l.children {
+			if err := lvl.findBuffers(selector[1:], offset, f); err != nil {
+				return err
+			}
+		}
+		return nil
+	}
+
+	return nil
+}
@@ -0,0 +1,347 @@
+package memorystore
+
+import (
+	"context"
+	"fmt"
+	"log"
+	"sync"
+	"time"
+
+	"github.com/ClusterCockpit/cc-backend/internal/avro"
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/influxdata/line-protocol/v2/lineprotocol"
+	"github.com/nats-io/nats.go"
+)
+
+// Each connection is handled in it's own goroutine. This is a blocking function.
+// func ReceiveRaw(ctx context.Context,
+// 	listener net.Listener,
+// 	handleLine func(*lineprotocol.Decoder, string) error,
+// ) error {
+// 	var wg sync.WaitGroup
+
+// 	wg.Add(1)
+// 	go func() {
+// 		defer wg.Done()
+// 		<-ctx.Done()
+// 		if err := listener.Close(); err != nil {
+// 			log.Printf("listener.Close(): %s", err.Error())
+// 		}
+// 	}()
+
+// 	for {
+// 		conn, err := listener.Accept()
+// 		if err != nil {
+// 			if errors.Is(err, net.ErrClosed) {
+// 				break
+// 			}
+
+// 			log.Printf("listener.Accept(): %s", err.Error())
+// 		}
+
+// 		wg.Add(2)
+// 		go func() {
+// 			defer wg.Done()
+// 			defer conn.Close()
+
+// 			dec := lineprotocol.NewDecoder(conn)
+// 			connctx, cancel := context.WithCancel(context.Background())
+// 			defer cancel()
+// 			go func() {
+// 				defer wg.Done()
+// 				select {
+// 				case <-connctx.Done():
+// 					conn.Close()
+// 				case <-ctx.Done():
+// 					conn.Close()
+// 				}
+// 			}()
+
+// 			if err := handleLine(dec, "default"); err != nil {
+// 				if errors.Is(err, net.ErrClosed) {
+// 					return
+// 				}
+
+// 				log.Printf("%s: %s", conn.RemoteAddr().String(), err.Error())
+// 				errmsg := make([]byte, 128)
+// 				errmsg = append(errmsg, `error: `...)
+// 				errmsg = append(errmsg, err.Error()...)
+// 				errmsg = append(errmsg, '\n')
+// 				conn.Write(errmsg)
+// 			}
+// 		}()
+// 	}
+
+// 	wg.Wait()
+// 	return nil
+// }
+
+// Connect to a nats server and subscribe to "updates". This is a blocking
+// function. handleLine will be called for each line recieved via nats.
+// Send `true` through the done channel for gracefull termination.
+func ReceiveNats(conf *(config.NatsConfig),
+	ms *MemoryStore,
+	workers int,
+	ctx context.Context,
+) error {
+	var opts []nats.Option
+	if conf.Username != "" && conf.Password != "" {
+		opts = append(opts, nats.UserInfo(conf.Username, conf.Password))
+	}
+
+	if conf.Credsfilepath != "" {
+		opts = append(opts, nats.UserCredentials(conf.Credsfilepath))
+	}
+
+	nc, err := nats.Connect(conf.Address, opts...)
+	if err != nil {
+		return err
+	}
+	defer nc.Close()
+
+	var wg sync.WaitGroup
+	var subs []*nats.Subscription
+
+	msgs := make(chan *nats.Msg, workers*2)
+
+	for _, sc := range conf.Subscriptions {
+		clusterTag := sc.ClusterTag
+		var sub *nats.Subscription
+		if workers > 1 {
+			wg.Add(workers)
+
+			for range workers {
+				go func() {
+					for m := range msgs {
+						dec := lineprotocol.NewDecoderWithBytes(m.Data)
+						if err := decodeLine(dec, ms, clusterTag); err != nil {
+							log.Printf("error: %s\n", err.Error())
+						}
+					}
+
+					wg.Done()
+				}()
+			}
+
+			sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
+				msgs <- m
+			})
+		} else {
+			sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) {
+				dec := lineprotocol.NewDecoderWithBytes(m.Data)
+				if err := decodeLine(dec, ms, clusterTag); err != nil {
+					log.Printf("error: %s\n", err.Error())
+				}
+			})
+		}
+
+		if err != nil {
+			return err
+		}
+		log.Printf("NATS subscription to '%s' on '%s' established\n", sc.SubscribeTo, conf.Address)
+		subs = append(subs, sub)
+	}
+
+	<-ctx.Done()
+	for _, sub := range subs {
+		err = sub.Unsubscribe()
+		if err != nil {
+			log.Printf("NATS unsubscribe failed: %s", err.Error())
+		}
+	}
+	close(msgs)
+	wg.Wait()
+
+	nc.Close()
+	log.Println("NATS connection closed")
+	return nil
+}
+
+// Place `prefix` in front of `buf` but if possible,
+// do that inplace in `buf`.
+func reorder(buf, prefix []byte) []byte {
+	n := len(prefix)
+	m := len(buf)
+	if cap(buf) < m+n {
+		return append(prefix[:n:n], buf...)
+	} else {
+		buf = buf[:n+m]
+		for i := m - 1; i >= 0; i-- {
+			buf[i+n] = buf[i]
+		}
+		for i := 0; i < n; i++ {
+			buf[i] = prefix[i]
+		}
+		return buf
+	}
+}
+
+// Decode lines using dec and make write calls to the MemoryStore.
+// If a line is missing its cluster tag, use clusterDefault as default.
+func decodeLine(dec *lineprotocol.Decoder,
+	ms *MemoryStore,
+	clusterDefault string,
+) error {
+	// Reduce allocations in loop:
+	t := time.Now()
+	metric, metricBuf := Metric{}, make([]byte, 0, 16)
+	selector := make([]string, 0, 4)
+	typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
+
+	// Optimize for the case where all lines in a "batch" are about the same
+	// cluster and host. By using `WriteToLevel` (level = host), we do not need
+	// to take the root- and cluster-level lock as often.
+	var lvl *Level = nil
+	prevCluster, prevHost := "", ""
+
+	var ok bool
+	for dec.Next() {
+		rawmeasurement, err := dec.Measurement()
+		if err != nil {
+			return err
+		}
+
+		// Needs to be copied because another call to dec.* would
+		// invalidate the returned slice.
+		metricBuf = append(metricBuf[:0], rawmeasurement...)
+
+		// The go compiler optimizes map[string(byteslice)] lookups:
+		metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]
+		if !ok {
+			continue
+		}
+
+		typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
+		cluster, host := clusterDefault, ""
+		for {
+			key, val, err := dec.NextTag()
+			if err != nil {
+				return err
+			}
+			if key == nil {
+				break
+			}
+
+			// The go compiler optimizes string([]byte{...}) == "...":
+			switch string(key) {
+			case "cluster":
+				if string(val) == prevCluster {
+					cluster = prevCluster
+				} else {
+					cluster = string(val)
+					lvl = nil
+				}
+			case "hostname", "host":
+				if string(val) == prevHost {
+					host = prevHost
+				} else {
+					host = string(val)
+					lvl = nil
+				}
+			case "type":
+				if string(val) == "node" {
+					break
+				}
+
+				// We cannot be sure that the "type" tag comes before the "type-id" tag:
+				if len(typeBuf) == 0 {
+					typeBuf = append(typeBuf, val...)
+				} else {
+					typeBuf = reorder(typeBuf, val)
+				}
+			case "type-id":
+				typeBuf = append(typeBuf, val...)
+			case "subtype":
+				// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
+				if len(subTypeBuf) == 0 {
+					subTypeBuf = append(subTypeBuf, val...)
+				} else {
+					subTypeBuf = reorder(subTypeBuf, val)
+					// subTypeBuf = reorder(typeBuf, val)
+				}
+			case "stype-id":
+				subTypeBuf = append(subTypeBuf, val...)
+			default:
+				// Ignore unkown tags (cc-metric-collector might send us a unit for example that we do not need)
+				// return fmt.Errorf("unkown tag: '%s' (value: '%s')", string(key), string(val))
+			}
+		}
+
+		// If the cluster or host changed, the lvl was set to nil
+		if lvl == nil {
+			selector = selector[:2]
+			selector[0], selector[1] = cluster, host
+			lvl = ms.GetLevel(selector)
+			prevCluster, prevHost = cluster, host
+		}
+
+		// subtypes:
+		selector = selector[:0]
+		if len(typeBuf) > 0 {
+			selector = append(selector, string(typeBuf)) // <- Allocation :(
+			if len(subTypeBuf) > 0 {
+				selector = append(selector, string(subTypeBuf))
+			}
+		}
+
+		for {
+			key, val, err := dec.NextField()
+			if err != nil {
+				return err
+			}
+
+			if key == nil {
+				break
+			}
+
+			if string(key) != "value" {
+				return fmt.Errorf("host %s: unknown field: '%s' (value: %#v)", host, string(key), val)
+			}
+
+			if val.Kind() == lineprotocol.Float {
+				metric.Value = schema.Float(val.FloatV())
+			} else if val.Kind() == lineprotocol.Int {
+				metric.Value = schema.Float(val.IntV())
+			} else if val.Kind() == lineprotocol.Uint {
+				metric.Value = schema.Float(val.UintV())
+			} else {
+				return fmt.Errorf("host %s: unsupported value type in message: %s", host, val.Kind().String())
+			}
+		}
+
+		if t, err = dec.Time(lineprotocol.Second, t); err != nil {
+			t = time.Now()
+			if t, err = dec.Time(lineprotocol.Millisecond, t); err != nil {
+				t = time.Now()
+				if t, err = dec.Time(lineprotocol.Microsecond, t); err != nil {
+					t = time.Now()
+					if t, err = dec.Time(lineprotocol.Nanosecond, t); err != nil {
+						return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error())
+					}
+				}
+			}
+		}
+
+		if err != nil {
+			return fmt.Errorf("host %s: timestamp : %#v with error : %#v", host, t, err.Error())
+		}
+
+		time := t.Unix()
+
+		if config.MetricStoreKeys.Checkpoints.FileFormat != "json" {
+			avro.LineProtocolMessages <- &avro.AvroStruct{
+				MetricName: string(metricBuf),
+				Cluster:    cluster,
+				Node:       host,
+				Selector:   append([]string{}, selector...),
+				Value:      metric.Value,
+				Timestamp:  time}
+		}
+
+		if err := ms.WriteToLevel(lvl, selector, time, []Metric{metric}); err != nil {
+			return err
+		}
+	}
+	return nil
+}
@@ -0,0 +1,446 @@
+package memorystore
+
+import (
+	"context"
+	"errors"
+	"log"
+	"os"
+	"os/signal"
+	"runtime"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/ClusterCockpit/cc-backend/internal/avro"
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	"github.com/ClusterCockpit/cc-lib/resampler"
+	"github.com/ClusterCockpit/cc-lib/runtimeEnv"
+	"github.com/ClusterCockpit/cc-lib/schema"
+	"github.com/ClusterCockpit/cc-lib/util"
+)
+
+var (
+	singleton  sync.Once
+	msInstance *MemoryStore
+)
+
+var Clusters = make([]string, 0)
+
+var NumWorkers int = 4
+
+func init() {
+	maxWorkers := 10
+	NumWorkers = runtime.NumCPU()/2 + 1
+	if NumWorkers > maxWorkers {
+		NumWorkers = maxWorkers
+	}
+}
+
+type Metric struct {
+	Name         string
+	Value        schema.Float
+	MetricConfig config.MetricConfig
+}
+
+type MemoryStore struct {
+	Metrics map[string]config.MetricConfig
+	root    Level
+}
+
+func Init(wg *sync.WaitGroup) {
+	startupTime := time.Now()
+
+	//Pass the config.MetricStoreKeys
+	InitMetrics(config.Metrics)
+
+	ms := GetMemoryStore()
+
+	d, err := time.ParseDuration(config.MetricStoreKeys.Checkpoints.Restore)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	restoreFrom := startupTime.Add(-d)
+	log.Printf("[METRICSTORE]> Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339))
+	files, err := ms.FromCheckpointFiles(config.MetricStoreKeys.Checkpoints.RootDir, restoreFrom.Unix())
+	loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB
+	if err != nil {
+		log.Fatalf("[METRICSTORE]> Loading checkpoints failed: %s\n", err.Error())
+	} else {
+		log.Printf("[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds())
+	}
+
+	// Try to use less memory by forcing a GC run here and then
+	// lowering the target percentage. The default of 100 means
+	// that only once the ratio of new allocations execeds the
+	// previously active heap, a GC is triggered.
+	// Forcing a GC here will set the "previously active heap"
+	// to a minumum.
+	runtime.GC()
+
+	ctx, shutdown := context.WithCancel(context.Background())
+
+	wg.Add(4)
+
+	Retention(wg, ctx)
+	Checkpointing(wg, ctx)
+	Archiving(wg, ctx)
+	avro.DataStaging(wg, ctx)
+
+	wg.Add(1)
+	sigs := make(chan os.Signal, 1)
+	signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
+	go func() {
+		defer wg.Done()
+		<-sigs
+		runtimeEnv.SystemdNotifiy(false, "[METRICSTORE]> Shutting down ...")
+		shutdown()
+	}()
+
+	if config.MetricStoreKeys.Nats != nil {
+		for _, natsConf := range config.MetricStoreKeys.Nats {
+			// TODO: When multiple nats configs share a URL, do a single connect.
+			wg.Add(1)
+			nc := natsConf
+			go func() {
+				// err := ReceiveNats(conf.Nats, decodeLine, runtime.NumCPU()-1, ctx)
+				err := ReceiveNats(nc, ms, 1, ctx)
+				if err != nil {
+					log.Fatal(err)
+				}
+				wg.Done()
+			}()
+		}
+	}
+}
+
+// Create a new, initialized instance of a MemoryStore.
+// Will panic if values in the metric configurations are invalid.
+func InitMetrics(metrics map[string]config.MetricConfig) {
+	singleton.Do(func() {
+		offset := 0
+		for key, cfg := range metrics {
+			if cfg.Frequency == 0 {
+				panic("[METRICSTORE]> invalid frequency")
+			}
+
+			metrics[key] = config.MetricConfig{
+				Frequency:   cfg.Frequency,
+				Aggregation: cfg.Aggregation,
+				Offset:      offset,
+			}
+			offset += 1
+		}
+
+		msInstance = &MemoryStore{
+			root: Level{
+				metrics:  make([]*buffer, len(metrics)),
+				children: make(map[string]*Level),
+			},
+			Metrics: metrics,
+		}
+	})
+}
+
+func GetMemoryStore() *MemoryStore {
+	if msInstance == nil {
+		log.Fatalf("[METRICSTORE]> MemoryStore not initialized!")
+	}
+
+	return msInstance
+}
+
+func Shutdown() {
+	log.Printf("[METRICSTORE]> Writing to '%s'...\n", config.MetricStoreKeys.Checkpoints.RootDir)
+	var files int
+	var err error
+
+	ms := GetMemoryStore()
+
+	if config.MetricStoreKeys.Checkpoints.FileFormat == "json" {
+		files, err = ms.ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
+	} else {
+		files, err = avro.GetAvroStore().ToCheckpoint(config.MetricStoreKeys.Checkpoints.RootDir, true)
+		close(avro.LineProtocolMessages)
+	}
+
+	if err != nil {
+		log.Printf("[METRICSTORE]> Writing checkpoint failed: %s\n", err.Error())
+	}
+	log.Printf("[METRICSTORE]> Done! (%d files written)\n", files)
+
+	// ms.PrintHeirarchy()
+}
+
+// func (m *MemoryStore) PrintHeirarchy() {
+// 	m.root.lock.Lock()
+// 	defer m.root.lock.Unlock()
+
+// 	fmt.Printf("Root : \n")
+
+// 	for lvl1, sel1 := range m.root.children {
+// 		fmt.Printf("\t%s\n", lvl1)
+// 		for lvl2, sel2 := range sel1.children {
+// 			fmt.Printf("\t\t%s\n", lvl2)
+// 			if lvl1 == "fritz" && lvl2 == "f0201" {
+
+// 				for name, met := range m.Metrics {
+// 					mt := sel2.metrics[met.Offset]
+
+// 					fmt.Printf("\t\t\t\t%s\n", name)
+// 					fmt.Printf("\t\t\t\t")
+
+// 					for mt != nil {
+// 						// if name == "cpu_load" {
+// 						fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
+// 						// }
+// 						mt = mt.prev
+// 					}
+// 					fmt.Printf("\n")
+
+// 				}
+// 			}
+// 			for lvl3, sel3 := range sel2.children {
+// 				if lvl1 == "fritz" && lvl2 == "f0201" && lvl3 == "hwthread70" {
+
+// 					fmt.Printf("\t\t\t\t\t%s\n", lvl3)
+
+// 					for name, met := range m.Metrics {
+// 						mt := sel3.metrics[met.Offset]
+
+// 						fmt.Printf("\t\t\t\t\t\t%s\n", name)
+
+// 						fmt.Printf("\t\t\t\t\t\t")
+
+// 						for mt != nil {
+// 							// if name == "clock" {
+// 							fmt.Printf("%d(%d) -> %#v", mt.start, len(mt.data), mt.data)
+
+// 							mt = mt.prev
+// 						}
+// 						fmt.Printf("\n")
+
+// 					}
+
+// 					// for i, _ := range sel3.metrics {
+// 					// 	fmt.Printf("\t\t\t\t\t%s\n", getName(configmetrics, i))
+// 					// }
+// 				}
+// 			}
+// 		}
+// 	}
+
+// }
+
+func getName(m *MemoryStore, i int) string {
+	for key, val := range m.Metrics {
+		if val.Offset == i {
+			return key
+		}
+	}
+	return ""
+}
+
+func Retention(wg *sync.WaitGroup, ctx context.Context) {
+	ms := GetMemoryStore()
+
+	go func() {
+		defer wg.Done()
+		d, err := time.ParseDuration(config.MetricStoreKeys.RetentionInMemory)
+		if err != nil {
+			log.Fatal(err)
+		}
+		if d <= 0 {
+			return
+		}
+
+		ticks := func() <-chan time.Time {
+			d := d / 2
+			if d <= 0 {
+				return nil
+			}
+			return time.NewTicker(d).C
+		}()
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticks:
+				t := time.Now().Add(-d)
+				log.Printf("[METRICSTORE]> start freeing buffers (older than %s)...\n", t.Format(time.RFC3339))
+				freed, err := ms.Free(nil, t.Unix())
+				if err != nil {
+					log.Printf("[METRICSTORE]> freeing up buffers failed: %s\n", err.Error())
+				} else {
+					log.Printf("[METRICSTORE]> done: %d buffers freed\n", freed)
+				}
+			}
+		}
+	}()
+}
+
+// Write all values in `metrics` to the level specified by `selector` for time `ts`.
+// Look at `findLevelOrCreate` for how selectors work.
+func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error {
+	var ok bool
+	for i, metric := range metrics {
+		if metric.MetricConfig.Frequency == 0 {
+			metric.MetricConfig, ok = m.Metrics[metric.Name]
+			if !ok {
+				metric.MetricConfig.Frequency = 0
+			}
+			metrics[i] = metric
+		}
+	}
+
+	return m.WriteToLevel(&m.root, selector, ts, metrics)
+}
+
+func (m *MemoryStore) GetLevel(selector []string) *Level {
+	return m.root.findLevelOrCreate(selector, len(m.Metrics))
+}
+
+// Assumes that `minfo` in `metrics` is filled in!
+func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error {
+	l = l.findLevelOrCreate(selector, len(m.Metrics))
+	l.lock.Lock()
+	defer l.lock.Unlock()
+
+	for _, metric := range metrics {
+		if metric.MetricConfig.Frequency == 0 {
+			continue
+		}
+
+		b := l.metrics[metric.MetricConfig.Offset]
+		if b == nil {
+			// First write to this metric and level
+			b = newBuffer(ts, metric.MetricConfig.Frequency)
+			l.metrics[metric.MetricConfig.Offset] = b
+		}
+
+		nb, err := b.write(ts, metric.Value)
+		if err != nil {
+			return err
+		}
+
+		// Last write created a new buffer...
+		if b != nb {
+			l.metrics[metric.MetricConfig.Offset] = nb
+		}
+	}
+	return nil
+}
+
+// Returns all values for metric `metric` from `from` to `to` for the selected level(s).
+// If the level does not hold the metric itself, the data will be aggregated recursively from the children.
+// The second and third return value are the actual from/to for the data. Those can be different from
+// the range asked for if no data was available.
+func (m *MemoryStore) Read(selector util.Selector, metric string, from, to, resolution int64) ([]schema.Float, int64, int64, int64, error) {
+	if from > to {
+		return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid time range\n")
+	}
+
+	minfo, ok := m.Metrics[metric]
+	if !ok {
+		return nil, 0, 0, 0, errors.New("[METRICSTORE]> unkown metric: \n" + metric)
+	}
+
+	n, data := 0, make([]schema.Float, (to-from)/minfo.Frequency+1)
+
+	err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error {
+		cdata, cfrom, cto, err := b.read(from, to, data)
+		if err != nil {
+			return err
+		}
+
+		if n == 0 {
+			from, to = cfrom, cto
+		} else if from != cfrom || to != cto || len(data) != len(cdata) {
+			missingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency)
+			if missingfront != 0 {
+				return ErrDataDoesNotAlign
+			}
+
+			newlen := len(cdata) - missingback
+			if newlen < 1 {
+				return ErrDataDoesNotAlign
+			}
+			cdata = cdata[0:newlen]
+			if len(cdata) != len(data) {
+				return ErrDataDoesNotAlign
+			}
+
+			from, to = cfrom, cto
+		}
+
+		data = cdata
+		n += 1
+		return nil
+	})
+
+	if err != nil {
+		return nil, 0, 0, 0, err
+	} else if n == 0 {
+		return nil, 0, 0, 0, errors.New("[METRICSTORE]> metric or host not found\n")
+	} else if n > 1 {
+		if minfo.Aggregation == config.AvgAggregation {
+			normalize := 1. / schema.Float(n)
+			for i := 0; i < len(data); i++ {
+				data[i] *= normalize
+			}
+		} else if minfo.Aggregation != config.SumAggregation {
+			return nil, 0, 0, 0, errors.New("[METRICSTORE]> invalid aggregation")
+		}
+	}
+
+	data, resolution, err = resampler.LargestTriangleThreeBucket(data, minfo.Frequency, resolution)
+	if err != nil {
+		return nil, 0, 0, 0, err
+	}
+
+	return data, from, to, resolution, nil
+}
+
+// Release all buffers for the selected level and all its children that contain only
+// values older than `t`.
+func (m *MemoryStore) Free(selector []string, t int64) (int, error) {
+	return m.GetLevel(selector).free(t)
+}
+
+func (m *MemoryStore) FreeAll() error {
+	for k := range m.root.children {
+		delete(m.root.children, k)
+	}
+
+	return nil
+}
+
+func (m *MemoryStore) SizeInBytes() int64 {
+	return m.root.sizeInBytes()
+}
+
+// Given a selector, return a list of all children of the level selected.
+func (m *MemoryStore) ListChildren(selector []string) []string {
+	lvl := &m.root
+	for lvl != nil && len(selector) != 0 {
+		lvl.lock.RLock()
+		next := lvl.children[selector[0]]
+		lvl.lock.RUnlock()
+		lvl = next
+		selector = selector[1:]
+	}
+
+	if lvl == nil {
+		return nil
+	}
+
+	lvl.lock.RLock()
+	defer lvl.lock.RUnlock()
+
+	children := make([]string, 0, len(lvl.children))
+	for child := range lvl.children {
+		children = append(children, child)
+	}
+
+	return children
+}
@@ -0,0 +1,120 @@
+package memorystore
+
+import (
+	"errors"
+	"math"
+
+	"github.com/ClusterCockpit/cc-backend/internal/config"
+	"github.com/ClusterCockpit/cc-lib/util"
+)
+
+type Stats struct {
+	Samples int
+	Avg     util.Float
+	Min     util.Float
+	Max     util.Float
+}
+
+func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) {
+	if from < b.start {
+		if b.prev != nil {
+			return b.prev.stats(from, to)
+		}
+		from = b.start
+	}
+
+	// TODO: Check if b.closed and if so and the full buffer is queried,
+	// use b.statistics instead of iterating over the buffer.
+
+	samples := 0
+	sum, min, max := 0.0, math.MaxFloat32, -math.MaxFloat32
+
+	var t int64
+	for t = from; t < to; t += b.frequency {
+		idx := int((t - b.start) / b.frequency)
+		if idx >= cap(b.data) {
+			b = b.next
+			if b == nil {
+				break
+			}
+			idx = 0
+		}
+
+		if t < b.start || idx >= len(b.data) {
+			continue
+		}
+
+		xf := float64(b.data[idx])
+		if math.IsNaN(xf) {
+			continue
+		}
+
+		samples += 1
+		sum += xf
+		min = math.Min(min, xf)
+		max = math.Max(max, xf)
+	}
+
+	return Stats{
+		Samples: samples,
+		Avg:     util.Float(sum) / util.Float(samples),
+		Min:     util.Float(min),
+		Max:     util.Float(max),
+	}, from, t, nil
+}
+
+// Returns statistics for the requested metric on the selected node/level.
+// Data is aggregated to the selected level the same way as in `MemoryStore.Read`.
+// If `Stats.Samples` is zero, the statistics should not be considered as valid.
+func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int64) (*Stats, int64, int64, error) {
+	if from > to {
+		return nil, 0, 0, errors.New("invalid time range")
+	}
+
+	minfo, ok := m.Metrics[metric]
+	if !ok {
+		return nil, 0, 0, errors.New("unkown metric: " + metric)
+	}
+
+	n, samples := 0, 0
+	avg, min, max := util.Float(0), math.MaxFloat32, -math.MaxFloat32
+	err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error {
+		stats, cfrom, cto, err := b.stats(from, to)
+		if err != nil {
+			return err
+		}
+
+		if n == 0 {
+			from, to = cfrom, cto
+		} else if from != cfrom || to != cto {
+			return ErrDataDoesNotAlign
+		}
+
+		samples += stats.Samples
+		avg += stats.Avg
+		min = math.Min(min, float64(stats.Min))
+		max = math.Max(max, float64(stats.Max))
+		n += 1
+		return nil
+	})
+	if err != nil {
+		return nil, 0, 0, err
+	}
+
+	if n == 0 {
+		return nil, 0, 0, ErrNoData
+	}
+
+	if minfo.Aggregation == config.AvgAggregation {
+		avg /= util.Float(n)
+	} else if n > 1 && minfo.Aggregation != config.SumAggregation {
+		return nil, 0, 0, errors.New("invalid aggregation")
+	}
+
+	return &Stats{
+		Samples: samples,
+		Avg:     avg,
+		Min:     util.Float(min),
+		Max:     util.Float(max),
+	}, from, to, nil
+}
@@ -91,14 +91,14 @@ func LoadData(job *schema.Job,
 			// Pass the resolution from frontend here.
 			for _, v := range jd {
 				for _, v_ := range v {
-					timestep := 0
+					timestep := int64(0)
 					for i := 0; i < len(v_.Series); i += 1 {
-						v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, v_.Timestep, resolution)
+						v_.Series[i].Data, timestep, err = resampler.LargestTriangleThreeBucket(v_.Series[i].Data, int64(v_.Timestep), int64(resolution))
 						if err != nil {
 							return err, 0, 0
 						}
 					}
-					v_.Timestep = timestep
+					v_.Timestep = int(timestep)
 				}
 			}

@@ -1,5 +1,5 @@
 // Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
-// All rights reserved. This file is part of cc-backend.
+// All rights reserved.
 // Use of this source code is governed by a MIT-style
 // license that can be found in the LICENSE file.
 package metricdata
@@ -12,7 +12,6 @@ import (
 	"fmt"
 	"net/http"
 	"sort"
-	"strconv"
 	"strings"
 	"time"

@@ -270,14 +269,6 @@ func (ccms *CCMetricStore) LoadData(
 	return jobData, nil
 }

-var (
-	hwthreadString     = string(schema.MetricScopeHWThread)
-	coreString         = string(schema.MetricScopeCore)
-	memoryDomainString = string(schema.MetricScopeMemoryDomain)
-	socketString       = string(schema.MetricScopeSocket)
-	acceleratorString  = string(schema.MetricScopeAccelerator)
-)
-
 func (ccms *CCMetricStore) buildQueries(
 	job *schema.Job,
 	metrics []string,
@@ -306,7 +297,7 @@ func (ccms *CCMetricStore) buildQueries(
 		if len(mc.SubClusters) != 0 {
 			isRemoved := false
 			for _, scConfig := range mc.SubClusters {
-				if scConfig.Name == job.SubCluster && scConfig.Remove == true {
+				if scConfig.Name == job.SubCluster && scConfig.Remove {
 					isRemoved = true
 					break
 				}
@@ -570,6 +561,7 @@ func (ccms *CCMetricStore) LoadStats(
 	metrics []string,
 	ctx context.Context,
 ) (map[string]map[string]schema.MetricStatistics, error) {
+
 	queries, _, err := ccms.buildQueries(job, metrics, []schema.MetricScope{schema.MetricScopeNode}, 0) // #166 Add scope shere for analysis view accelerator normalization?
 	if err != nil {
 		cclog.Errorf("Error while building queries for jobId %d, Metrics %v: %s", job.JobID, metrics, err.Error())
@@ -815,6 +807,7 @@ func (ccms *CCMetricStore) LoadNodeListData(
 	page *model.PageRequest,
 	ctx context.Context,
 ) (map[string]schema.JobData, int, bool, error) {
+
 	// 0) Init additional vars
 	var totalNodes int = 0
 	var hasNextPage bool = false
@@ -850,7 +843,7 @@ func (ccms *CCMetricStore) LoadNodeListData(
 	if len(nodes) > page.ItemsPerPage {
 		start := (page.Page - 1) * page.ItemsPerPage
 		end := start + page.ItemsPerPage
-		if end >= len(nodes) {
+		if end > len(nodes) {
 			end = len(nodes)
 			hasNextPage = false
 		} else {
@@ -973,6 +966,7 @@ func (ccms *CCMetricStore) buildNodeQueries(
 	scopes []schema.MetricScope,
 	resolution int,
 ) ([]ApiQuery, []schema.MetricScope, error) {
+
 	queries := make([]ApiQuery, 0, len(metrics)*len(scopes)*len(nodes))
 	assignedScope := []schema.MetricScope{}

@@ -1000,7 +994,7 @@ func (ccms *CCMetricStore) buildNodeQueries(
 		if mc.SubClusters != nil {
 			isRemoved := false
 			for _, scConfig := range mc.SubClusters {
-				if scConfig.Name == subCluster && scConfig.Remove == true {
+				if scConfig.Name == subCluster && scConfig.Remove {
 					isRemoved = true
 					break
 				}
@@ -1273,11 +1267,3 @@ func (ccms *CCMetricStore) buildNodeQueries(

 	return queries, assignedScope, nil
 }
-
-func intToStringSlice(is []int) []string {
-	ss := make([]string, len(is))
-	for i, x := range is {
-		ss[i] = strconv.Itoa(x)
-	}
-	return ss
-}
@@ -54,6 +54,9 @@ func Init() error {
 			switch kind.Kind {
 			case "cc-metric-store":
 				mdr = &CCMetricStore{}
+			case "cc-metric-store-internal":
+				mdr = &CCMetricStoreInternal{}
+				config.InternalCCMSFlag = true
 			case "prometheus":
 				mdr = &PrometheusDataRepository{}
 			case "test":
@@ -74,9 +74,8 @@ func (tmdr *TestMetricDataRepository) LoadNodeListData(
 }

 func DeepCopy(jd_temp schema.JobData) schema.JobData {
-	var jd schema.JobData

-	jd = make(schema.JobData, len(jd_temp))
+	jd := make(schema.JobData, len(jd_temp))
 	for k, v := range jd_temp {
 		jd[k] = make(map[schema.MetricScope]*schema.JobMetric, len(jd_temp[k]))
 		for k_, v_ := range v {
@@ -52,18 +52,18 @@ func GetJobRepository() *JobRepository {
 }

 var jobColumns []string = []string{
-	"job.id", "job.job_id", "job.hpc_user", "job.project", "job.cluster", "job.subcluster",
+	"job.id", "job.job_id", "job.hpc_user", "job.project", "job.hpc_cluster", "job.subcluster",
 	"job.start_time", "job.cluster_partition", "job.array_job_id", "job.num_nodes",
-	"job.num_hwthreads", "job.num_acc", "job.exclusive", "job.monitoring_status",
+	"job.num_hwthreads", "job.num_acc", "job.shared", "job.monitoring_status",
 	"job.smt", "job.job_state", "job.duration", "job.walltime", "job.resources",
 	"job.footprint", "job.energy",
 }

 var jobCacheColumns []string = []string{
-	"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.cluster",
+	"job_cache.id", "job_cache.job_id", "job_cache.hpc_user", "job_cache.project", "job_cache.hpc_cluster",
 	"job_cache.subcluster", "job_cache.start_time", "job_cache.cluster_partition",
 	"job_cache.array_job_id", "job_cache.num_nodes", "job_cache.num_hwthreads",
-	"job_cache.num_acc", "job_cache.exclusive", "job_cache.monitoring_status", "job_cache.smt",
+	"job_cache.num_acc", "job_cache.shared", "job_cache.monitoring_status", "job_cache.smt",
 	"job_cache.job_state", "job_cache.duration", "job_cache.walltime", "job_cache.resources",
 	"job_cache.footprint", "job_cache.energy",
 }
@@ -74,7 +74,7 @@ func scanJob(row interface{ Scan(...any) error }) (*schema.Job, error) {
 	if err := row.Scan(
 		&job.ID, &job.JobID, &job.User, &job.Project, &job.Cluster, &job.SubCluster,
 		&job.StartTime, &job.Partition, &job.ArrayJobId, &job.NumNodes, &job.NumHWThreads,
-		&job.NumAcc, &job.Exclusive, &job.MonitoringStatus, &job.SMT, &job.State,
+		&job.NumAcc, &job.Shared, &job.MonitoringStatus, &job.SMT, &job.State,
 		&job.Duration, &job.Walltime, &job.RawResources, &job.RawFootprint, &job.Energy); err != nil {
 		cclog.Warnf("Error while scanning rows (Job): %v", err)
 		return nil, err
@@ -337,10 +337,10 @@ func (r *JobRepository) FindColumnValue(user *schema.User, searchterm string, ta

 		// theSql, args, theErr := theQuery.ToSql()
 		// if theErr != nil {
-		// 	log.Warn("Error while converting query to sql")
+		// 	cclog.Warn("Error while converting query to sql")
 		// 	return "", err
 		// }
-		// log.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args)
+		// cclog.Debugf("SQL query (FindColumnValue): `%s`, args: %#v", theSql, args)

 		err := theQuery.RunWith(r.stmtCache).QueryRow().Scan(&result)

@@ -390,7 +390,7 @@ func (r *JobRepository) Partitions(cluster string) ([]string, error) {
 	start := time.Now()
 	partitions := r.cache.Get("partitions:"+cluster, func() (any, time.Duration, int) {
 		parts := []string{}
-		if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.cluster = ?;`, cluster); err != nil {
+		if err = r.DB.Select(&parts, `SELECT DISTINCT job.cluster_partition FROM job WHERE job.hpc_cluster = ?;`, cluster); err != nil {
 			return nil, 0, 1000
 		}

@@ -410,7 +410,7 @@ func (r *JobRepository) AllocatedNodes(cluster string) (map[string]map[string]in
 	subclusters := make(map[string]map[string]int)
 	rows, err := sq.Select("resources", "subcluster").From("job").
 		Where("job.job_state = 'running'").
-		Where("job.cluster = ?", cluster).
+		Where("job.hpc_cluster = ?", cluster).
 		RunWith(r.stmtCache).Query()
 	if err != nil {
 		cclog.Error("Error while running query")
@@ -505,7 +505,7 @@ func (r *JobRepository) FindJobIdsByTag(tagId int64) ([]int64, error) {
 // FIXME: Reconsider filtering short jobs with harcoded threshold
 func (r *JobRepository) FindRunningJobs(cluster string) ([]*schema.Job, error) {
 	query := sq.Select(jobColumns...).From("job").
-		Where(fmt.Sprintf("job.cluster = '%s'", cluster)).
+		Where(fmt.Sprintf("job.hpc_cluster = '%s'", cluster)).
 		Where("job.job_state = 'running'").
 		Where("job.duration > 600")

@@ -14,19 +14,19 @@ import (
 )

 const NamedJobCacheInsert string = `INSERT INTO job_cache (
-	job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
-	exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
+	job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
+	shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
 ) VALUES (
-	:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
-  :exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint,  :energy, :energy_footprint, :resources, :meta_data
+	:job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
+  :shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint,  :energy, :energy_footprint, :resources, :meta_data
 );`

 const NamedJobInsert string = `INSERT INTO job (
-	job_id, hpc_user, project, cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
-	exclusive, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
+	job_id, hpc_user, project, hpc_cluster, subcluster, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc,
+	shared, monitoring_status, smt, job_state, start_time, duration, walltime, footprint, energy, energy_footprint, resources, meta_data
 ) VALUES (
-	:job_id, :hpc_user, :project, :cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
-  :exclusive, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint,  :energy, :energy_footprint, :resources, :meta_data
+	:job_id, :hpc_user, :project, :hpc_cluster, :subcluster, :cluster_partition, :array_job_id, :num_nodes, :num_hwthreads, :num_acc,
+  :shared, :monitoring_status, :smt, :job_state, :start_time, :duration, :walltime, :footprint,  :energy, :energy_footprint, :resources, :meta_data
 );`

 func (r *JobRepository) InsertJob(job *schema.Job) (int64, error) {
@@ -70,7 +70,7 @@ func (r *JobRepository) SyncJobs() ([]*schema.Job, error) {
 	}

 	_, err = r.DB.Exec(
-		"INSERT INTO job (job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, exclusive, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
+		"INSERT INTO job (job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data) SELECT job_id, hpc_cluster, subcluster, start_time, hpc_user, project, cluster_partition, array_job_id, num_nodes, num_hwthreads, num_acc, shared, monitoring_status, smt, job_state, duration, walltime, footprint, energy, energy_footprint, resources, meta_data FROM job_cache")
 	if err != nil {
 		cclog.Warnf("Error while Job sync: %v", err)
 		return nil, err
@@ -31,7 +31,7 @@ func (r *JobRepository) Find(
 		Where("job.job_id = ?", *jobId)

 	if cluster != nil {
-		q = q.Where("job.cluster = ?", *cluster)
+		q = q.Where("job.hpc_cluster = ?", *cluster)
 	}
 	if startTime != nil {
 		q = q.Where("job.start_time = ?", *startTime)
@@ -52,7 +52,7 @@ func (r *JobRepository) FindCached(
 		Where("job_cache.job_id = ?", *jobId)

 	if cluster != nil {
-		q = q.Where("job_cache.cluster = ?", *cluster)
+		q = q.Where("job_cache.hpc_cluster = ?", *cluster)
 	}
 	if startTime != nil {
 		q = q.Where("job_cache.start_time = ?", *startTime)
@@ -78,7 +78,7 @@ func (r *JobRepository) FindAll(
 		Where("job.job_id = ?", *jobId)

 	if cluster != nil {
-		q = q.Where("job.cluster = ?", *cluster)
+		q = q.Where("job.hpc_cluster = ?", *cluster)
 	}
 	if startTime != nil {
 		q = q.Where("job.start_time = ?", *startTime)
@@ -183,7 +183,7 @@ func (r *JobRepository) FindByJobId(ctx context.Context, jobId int64, startTime
 	q := sq.Select(jobColumns...).
 		From("job").
 		Where("job.job_id = ?", jobId).
-		Where("job.cluster = ?", cluster).
+		Where("job.hpc_cluster = ?", cluster).
 		Where("job.start_time = ?", startTime)

 	q, qerr := SecurityCheck(ctx, q)
@@ -203,7 +203,7 @@ func (r *JobRepository) IsJobOwner(jobId int64, startTime int64, user string, cl
 		From("job").
 		Where("job.job_id = ?", jobId).
 		Where("job.hpc_user = ?", user).
-		Where("job.cluster = ?", cluster).
+		Where("job.hpc_cluster = ?", cluster).
 		Where("job.start_time = ?", startTime)

 	_, err := scanJob(q.RunWith(r.stmtCache).QueryRow())
@@ -12,6 +12,7 @@ import (
 	"strings"
 	"time"

+	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-backend/internal/graph/model"
 	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
 	"github.com/ClusterCockpit/cc-lib/schema"
@@ -167,7 +168,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
 		query = buildMetaJsonCondition("jobName", filter.JobName, query)
 	}
 	if filter.Cluster != nil {
-		query = buildStringCondition("job.cluster", filter.Cluster, query)
+		query = buildStringCondition("job.hpc_cluster", filter.Cluster, query)
 	}
 	if filter.Partition != nil {
 		query = buildStringCondition("job.cluster_partition", filter.Partition, query)
@@ -182,8 +183,8 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
 		now := time.Now().Unix() // There does not seam to be a portable way to get the current unix timestamp accross different DBs.
 		query = query.Where("(job.job_state != 'running' OR (? - job.start_time) > ?)", now, *filter.MinRunningFor)
 	}
-	if filter.Exclusive != nil {
-		query = query.Where("job.exclusive = ?", *filter.Exclusive)
+	if filter.Shared != nil {
+		query = query.Where("job.shared = ?", *filter.Shared)
 	}
 	if filter.State != nil {
 		states := make([]string, len(filter.State))
@@ -216,7 +217,7 @@ func BuildWhereClause(filter *model.JobFilter, query sq.SelectBuilder) sq.Select
 	return query
 }

-func buildIntCondition(field string, cond *schema.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
+func buildIntCondition(field string, cond *config.IntRange, query sq.SelectBuilder) sq.SelectBuilder {
 	return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
 }

@@ -224,7 +225,7 @@ func buildFloatCondition(field string, cond *model.FloatRange, query sq.SelectBu
 	return query.Where(field+" BETWEEN ? AND ?", cond.From, cond.To)
 }

-func buildTimeCondition(field string, cond *schema.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
+func buildTimeCondition(field string, cond *config.TimeRange, query sq.SelectBuilder) sq.SelectBuilder {
 	if cond.From != nil && cond.To != nil {
 		return query.Where(field+" BETWEEN ? AND ?", cond.From.Unix(), cond.To.Unix())
 	} else if cond.From != nil {
@@ -1,9 +1,10 @@
 CREATE TABLE "job_cache" (
    id INTEGER PRIMARY KEY,
    job_id BIGINT NOT NULL,
-    cluster VARCHAR(255) NOT NULL,
+    hpc_cluster VARCHAR(255) NOT NULL,
    subcluster VARCHAR(255) NOT NULL,
-    start_time BIGINT NOT NULL, -- Unix timestamp
+    submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
+    start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
    hpc_user VARCHAR(255) NOT NULL,
    project VARCHAR(255) NOT NULL,
    cluster_partition VARCHAR(255),
@@ -12,8 +13,9 @@ CREATE TABLE "job_cache" (
    walltime INT NOT NULL,
    job_state VARCHAR(255) NOT NULL
    CHECK (job_state IN (
-        'running', 'completed', 'failed', 'cancelled',
-        'stopped', 'timeout', 'preempted', 'out_of_memory'
+        'boot_fail', 'cancelled', 'completed', 'deadline',
+        'failed', 'node_fail', 'out-of-memory', 'pending',
+        'preempted', 'running', 'suspended', 'timeout'
    )),
    meta_data TEXT,          -- JSON
    resources TEXT NOT NULL, -- JSON
@@ -21,11 +23,75 @@ CREATE TABLE "job_cache" (
    num_hwthreads INT,
    num_acc INT,
    smt TINYINT NOT NULL DEFAULT 1 CHECK (smt IN (0, 1)),
-    exclusive TINYINT NOT NULL DEFAULT 1 CHECK (exclusive IN (0, 1, 2)),
+    shared TEXT NOT NULL
+    CHECK (shared IN ("none", "single_user", "multi_user")),
    monitoring_status TINYINT NOT NULL DEFAULT 1
    CHECK (monitoring_status IN (0, 1, 2, 3)),
    energy REAL NOT NULL DEFAULT 0.0,
    energy_footprint TEXT DEFAULT NULL,
    footprint TEXT DEFAULT NULL,
-    UNIQUE (job_id, cluster, start_time)
+    UNIQUE (job_id, hpc_cluster, start_time)
 );
+
+CREATE TABLE "job_new" (
+    id INTEGER PRIMARY KEY,
+    job_id BIGINT NOT NULL,
+    hpc_cluster TEXT NOT NULL,
+    subcluster TEXT NOT NULL,
+    submit_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
+    start_time BIGINT NOT NULL DEFAULT 0, -- Unix timestamp
+    hpc_user TEXT NOT NULL,
+    project TEXT NOT NULL,
+    cluster_partition TEXT,
+    array_job_id BIGINT,
+    duration INT NOT NULL,
+    walltime INT NOT NULL,
+    job_state TEXT NOT NULL
+    CHECK (job_state IN (
+        'boot_fail', 'cancelled', 'completed', 'deadline',
+        'failed', 'node_fail', 'out-of-memory', 'pending',
+        'preempted', 'running', 'suspended', 'timeout'
+    )),
+    meta_data TEXT,          -- JSON
+    resources TEXT NOT NULL, -- JSON
+    num_nodes INT NOT NULL,
+    num_hwthreads INT,
+    num_acc INT,
+    smt INT NOT NULL DEFAULT 1,
+    shared TEXT NOT NULL
+    CHECK (shared IN ("none", "single_user", "multi_user")),
+    monitoring_status TINYINT NOT NULL DEFAULT 1
+    CHECK (monitoring_status IN (0, 1, 2, 3)),
+    energy REAL NOT NULL DEFAULT 0.0,
+    energy_footprint TEXT DEFAULT NULL,
+    footprint TEXT DEFAULT NULL,
+    UNIQUE (job_id, hpc_cluster, start_time)
+);
+
+ALTER TABLE job RENAME COLUMN cluster TO hpc_cluster;
+
+CREATE TABLE IF NOT EXISTS lookup_exclusive (
+    id INTEGER PRIMARY KEY,
+    name TEXT NOT NULL UNIQUE
+);
+
+INSERT INTO lookup_exclusive (id, name) VALUES
+    (0, 'multi_user'),
+    (1, 'none'),
+    (2, 'single_user');
+
+INSERT INTO job_new (
+    id, job_id, hpc_cluster, subcluster, submit_time, start_time, hpc_user, project,
+    cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
+    num_nodes, num_hwthreads, num_acc, smt, shared, monitoring_status, energy,
+    energy_footprint, footprint
+) SELECT
+    id, job_id, hpc_cluster, subcluster, 0, start_time, hpc_user, project,
+    cluster_partition, array_job_id, duration, walltime, job_state, meta_data, resources,
+    num_nodes, num_hwthreads, num_acc, smt, (SELECT name FROM lookup_exclusive WHERE id=job.exclusive), monitoring_status, energy,
+    energy_footprint, footprint
+FROM job;
+
+DROP TABLE lookup_exclusive;
+DROP TABLE job;
+ALTER TABLE job_new RENAME TO job;
@@ -1,14 +1,16 @@
 CREATE TABLE "node" (
    id INTEGER PRIMARY KEY,
+    time_stamp INTEGER NOT NULL,
    hostname VARCHAR(255) NOT NULL,
    cluster VARCHAR(255) NOT NULL,
    subcluster VARCHAR(255) NOT NULL,
-    cpus_allocated INTEGER NOT NULL,
-    cpus_total INTEGER NOT NULL,
-    memory_allocated INTEGER NOT NULL,
-    memory_total INTEGER NOT NULL,
-    gpus_allocated INTEGER NOT NULL,
-    gpus_total INTEGER NOT NULL,
+    jobs_running INTEGER DEFAULT 0 NOT NULL,
+    cpus_allocated INTEGER DEFAULT 0 NOT NULL,
+    cpus_total INTEGER DEFAULT 0 NOT NULL,
+    memory_allocated INTEGER DEFAULT 0 NOT NULL,
+    memory_total INTEGER DEFAULT 0 NOT NULL,
+    gpus_allocated INTEGER DEFAULT 0 NOT NULL,
+    gpus_total INTEGER DEFAULT 0 NOT NULL,
    node_state VARCHAR(255) NOT NULL
    CHECK (node_state IN (
        'allocated', 'reserved', 'idle', 'mixed',
@@ -32,4 +34,4 @@ CREATE INDEX IF NOT EXISTS nodes_cluster_health ON node (cluster, health_state);

 -- Add Indices For Increased Amounts of Tags
 CREATE INDEX IF NOT EXISTS tags_jobid ON jobtag (job_id);
-CREATE INDEX IF NOT EXISTS tags_tagid ON jobtag (tag_id);
+CREATE INDEX IF NOT EXISTS tags_tagid ON jobtag (tag_id);
@@ -49,6 +49,12 @@ func GetNodeRepository() *NodeRepository {
 	return nodeRepoInstance
 }

+var nodeColumns []string = []string{
+	// "node.id,"
+	"node.hostname", "node.cluster", "node.subcluster",
+	"node.node_state", "node.health_state", // "node.meta_data",
+}
+
 func (r *NodeRepository) FetchMetadata(node *schema.Node) (map[string]string, error) {
 	start := time.Now()
 	cachekey := fmt.Sprintf("metadata:%d", node.ID)
@@ -218,9 +224,9 @@ func (r *NodeRepository) DeleteNode(id int64) error {
 func (r *NodeRepository) QueryNodes(
 	ctx context.Context,
 	filters []*model.NodeFilter,
-	order *model.OrderByInput,
+	order *model.OrderByInput, // Currently unused!
 ) ([]*schema.Node, error) {
-	query, qerr := SecurityCheck(ctx, sq.Select(jobColumns...).From("node"))
+	query, qerr := AccessCheck(ctx, sq.Select(nodeColumns...).From("node"))
 	if qerr != nil {
 		return nil, qerr
 	}
@@ -232,6 +238,9 @@ func (r *NodeRepository) QueryNodes(
 		if f.Cluster != nil {
 			query = buildStringCondition("node.cluster", f.Cluster, query)
 		}
+		if f.Subcluster != nil {
+			query = buildStringCondition("node.subcluster", f.Subcluster, query)
+		}
 		if f.NodeState != nil {
 			query = query.Where("node.node_state = ?", f.NodeState)
 		}
@@ -287,3 +296,123 @@ func (r *NodeRepository) ListNodes(cluster string) ([]*schema.Node, error) {

 	return nodeList, nil
 }
+
+func (r *NodeRepository) CountNodeStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) {
+	query, qerr := AccessCheck(ctx, sq.Select("node_state AS state", "count(*) AS count").From("node"))
+	if qerr != nil {
+		return nil, qerr
+	}
+
+	for _, f := range filters {
+		if f.Hostname != nil {
+			query = buildStringCondition("node.hostname", f.Hostname, query)
+		}
+		if f.Cluster != nil {
+			query = buildStringCondition("node.cluster", f.Cluster, query)
+		}
+		if f.Subcluster != nil {
+			query = buildStringCondition("node.subcluster", f.Subcluster, query)
+		}
+		if f.NodeState != nil {
+			query = query.Where("node.node_state = ?", f.NodeState)
+		}
+		if f.HealthState != nil {
+			query = query.Where("node.health_state = ?", f.HealthState)
+		}
+	}
+
+	// Add Group and Order
+	query = query.GroupBy("state").OrderBy("count DESC")
+
+	rows, err := query.RunWith(r.stmtCache).Query()
+	if err != nil {
+		queryString, queryVars, _ := query.ToSql()
+		cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
+		return nil, err
+	}
+
+	nodes := make([]*model.NodeStates, 0)
+	for rows.Next() {
+		node := model.NodeStates{}
+
+		if err := rows.Scan(&node.State, &node.Count); err != nil {
+			rows.Close()
+			cclog.Warn("Error while scanning rows (NodeStates)")
+			return nil, err
+		}
+		nodes = append(nodes, &node)
+	}
+
+	return nodes, nil
+}
+
+func (r *NodeRepository) CountHealthStates(ctx context.Context, filters []*model.NodeFilter) ([]*model.NodeStates, error) {
+	query, qerr := AccessCheck(ctx, sq.Select("health_state AS state", "count(*) AS count").From("node"))
+	if qerr != nil {
+		return nil, qerr
+	}
+
+	for _, f := range filters {
+		if f.Hostname != nil {
+			query = buildStringCondition("node.hostname", f.Hostname, query)
+		}
+		if f.Cluster != nil {
+			query = buildStringCondition("node.cluster", f.Cluster, query)
+		}
+		if f.Subcluster != nil {
+			query = buildStringCondition("node.subcluster", f.Subcluster, query)
+		}
+		if f.NodeState != nil {
+			query = query.Where("node.node_state = ?", f.NodeState)
+		}
+		if f.HealthState != nil {
+			query = query.Where("node.health_state = ?", f.HealthState)
+		}
+	}
+
+	// Add Group and Order
+	query = query.GroupBy("state").OrderBy("count DESC")
+
+	rows, err := query.RunWith(r.stmtCache).Query()
+	if err != nil {
+		queryString, queryVars, _ := query.ToSql()
+		cclog.Errorf("Error while running query '%s' %v: %v", queryString, queryVars, err)
+		return nil, err
+	}
+
+	nodes := make([]*model.NodeStates, 0)
+	for rows.Next() {
+		node := model.NodeStates{}
+
+		if err := rows.Scan(&node.State, &node.Count); err != nil {
+			rows.Close()
+			cclog.Warn("Error while scanning rows (NodeStates)")
+			return nil, err
+		}
+		nodes = append(nodes, &node)
+	}
+
+	return nodes, nil
+}
+
+func AccessCheck(ctx context.Context, query sq.SelectBuilder) (sq.SelectBuilder, error) {
+	user := GetUserFromContext(ctx)
+	return AccessCheckWithUser(user, query)
+}
+
+func AccessCheckWithUser(user *schema.User, query sq.SelectBuilder) (sq.SelectBuilder, error) {
+	if user == nil {
+		var qnil sq.SelectBuilder
+		return qnil, fmt.Errorf("user context is nil")
+	}
+
+	switch {
+	// case len(user.Roles) == 1 && user.HasRole(schema.RoleApi): // API-User : Access NodeInfos
+	// 	return query, nil
+	case user.HasAnyRole([]schema.Role{schema.RoleAdmin, schema.RoleSupport}): // Admin & Support : Access NodeInfos
+		return query, nil
+	default: // No known Role: No Access, return error
+		var qnil sq.SelectBuilder
+		return qnil, fmt.Errorf("user has no or unknown roles")
+	}
+}
@@ -23,11 +23,12 @@ import (
 var groupBy2column = map[model.Aggregate]string{
 	model.AggregateUser:    "job.hpc_user",
 	model.AggregateProject: "job.project",
-	model.AggregateCluster: "job.cluster",
+	model.AggregateCluster: "job.hpc_cluster",
 }

 var sortBy2column = map[model.SortByAggregate]string{
 	model.SortByAggregateTotaljobs:      "totalJobs",
+	model.SortByAggregateTotalusers:     "totalUsers",
 	model.SortByAggregateTotalwalltime:  "totalWalltime",
 	model.SortByAggregateTotalnodes:     "totalNodes",
 	model.SortByAggregateTotalnodehours: "totalNodeHours",
@@ -76,8 +77,12 @@ func (r *JobRepository) buildStatsQuery(
 	// fmt.Sprintf(`CAST(ROUND((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) / 3600) as %s) as value`, time.Now().Unix(), castType)

 	if col != "" {
-		// Scan columns: id, totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
-		query = sq.Select(col, "COUNT(job.id) as totalJobs", "name",
+		// Scan columns: id, name, totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
+		query = sq.Select(
+			col,
+			"name",
+			"COUNT(job.id) as totalJobs",
+			"COUNT(DISTINCT job.hpc_user) AS totalUsers",
 			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s) as totalWalltime`, time.Now().Unix(), castType),
 			fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s) as totalNodes`, castType),
 			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s) as totalNodeHours`, time.Now().Unix(), castType),
@@ -87,8 +92,10 @@ func (r *JobRepository) buildStatsQuery(
 			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_acc) / 3600) as %s) as totalAccHours`, time.Now().Unix(), castType),
 		).From("job").LeftJoin("hpc_user ON hpc_user.username = job.hpc_user").GroupBy(col)
 	} else {
-		// Scan columns: totalJobs, name, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
-		query = sq.Select("COUNT(job.id)",
+		// Scan columns: totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours
+		query = sq.Select(
+			"COUNT(job.id) as totalJobs",
+			"COUNT(DISTINCT job.hpc_user) AS totalUsers",
 			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END)) / 3600) as %s)`, time.Now().Unix(), castType),
 			fmt.Sprintf(`CAST(SUM(job.num_nodes) as %s)`, castType),
 			fmt.Sprintf(`CAST(ROUND(SUM((CASE WHEN job.job_state = "running" THEN %d - job.start_time ELSE job.duration END) * job.num_nodes) / 3600) as %s)`, time.Now().Unix(), castType),
@@ -167,14 +174,14 @@ func (r *JobRepository) JobsStatsGrouped(
 	for rows.Next() {
 		var id sql.NullString
 		var name sql.NullString
-		var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
-		if err := rows.Scan(&id, &jobs, &name, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
+		var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
+		if err := rows.Scan(&id, &name, &jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
 			cclog.Warn("Error while scanning rows")
 			return nil, err
 		}

 		if id.Valid {
-			var totalJobs, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
+			var totalJobs, totalUsers, totalWalltime, totalNodes, totalNodeHours, totalCores, totalCoreHours, totalAccs, totalAccHours int
 			var personName string

 			if name.Valid {
@@ -185,6 +192,10 @@ func (r *JobRepository) JobsStatsGrouped(
 				totalJobs = int(jobs.Int64)
 			}

+			if users.Valid {
+				totalUsers = int(users.Int64)
+			}
+
 			if walltime.Valid {
 				totalWalltime = int(walltime.Int64)
 			}
@@ -228,8 +239,9 @@ func (r *JobRepository) JobsStatsGrouped(
 				stats = append(stats,
 					&model.JobsStatistics{
 						ID:             id.String,
-						TotalJobs:      int(jobs.Int64),
-						TotalWalltime:  int(walltime.Int64),
+						TotalJobs:      totalJobs,
+						TotalUsers:     totalUsers,
+						TotalWalltime:  totalWalltime,
 						TotalNodes:     totalNodes,
 						TotalNodeHours: totalNodeHours,
 						TotalCores:     totalCores,
@@ -259,8 +271,8 @@ func (r *JobRepository) JobsStats(
 	row := query.RunWith(r.DB).QueryRow()
 	stats := make([]*model.JobsStatistics, 0, 1)

-	var jobs, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
-	if err := row.Scan(&jobs, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
+	var jobs, users, walltime, nodes, nodeHours, cores, coreHours, accs, accHours sql.NullInt64
+	if err := row.Scan(&jobs, &users, &walltime, &nodes, &nodeHours, &cores, &coreHours, &accs, &accHours); err != nil {
 		cclog.Warn("Error while scanning rows")
 		return nil, err
 	}
@@ -280,6 +292,7 @@ func (r *JobRepository) JobsStats(
 		stats = append(stats,
 			&model.JobsStatistics{
 				TotalJobs:      int(jobs.Int64),
+				TotalUsers:     int(users.Int64),
 				TotalWalltime:  int(walltime.Int64),
 				TotalNodeHours: totalNodeHours,
 				TotalCoreHours: totalCoreHours,
@@ -1 +1,2 @@
 vasp
+VASP
@@ -240,13 +240,13 @@ func (t *JobClassTagger) Match(job *schema.Job) {

 		// Initialize environment
 		env["job"] = map[string]any{
-			"exclusive": job.Exclusive,
-			"duration":  job.Duration,
-			"numCores":  job.NumHWThreads,
-			"numNodes":  job.NumNodes,
-			"jobState":  job.State,
-			"numAcc":    job.NumAcc,
-			"smt":       job.SMT,
+			"shared":   job.Shared,
+			"duration": job.Duration,
+			"numCores": job.NumHWThreads,
+			"numNodes": job.NumNodes,
+			"jobState": job.State,
+			"numAcc":   job.NumAcc,
+			"smt":      job.SMT,
 		}

 		// add metrics to env
@@ -8,7 +8,7 @@
  ],
  "metrics": ["cpu_load"],
  "requirements": [
-    "job.exclusive == 1",
+    "job.shared == \"none\"",
    "job.duration > job_min_duration_seconds"
  ],
  "variables": [
@@ -4,7 +4,7 @@
  "parameters": ["job_min_duration_seconds"],
  "metrics": ["flops_any", "mem_bw"],
  "requirements": [
-    "job.exclusive == 1",
+    "job.shared == \"none\"",
    "job.duration > job_min_duration_seconds"
  ],
  "variables": [
@@ -8,7 +8,7 @@
  ],
  "metrics": ["cpu_load"],
  "requirements": [
-    "job.exclusive == 1",
+    "job.shared == \"none\"",
    "job.duration > job_min_duration_seconds"
  ],
  "variables": [
@@ -20,15 +20,15 @@ func RegisterCommitJobService() {
 		frequency = "2m"
 	}
 	d, _ := time.ParseDuration(frequency)
-	cclog.Infof("Register commitJob service with %s interval", frequency)
+	cclog.Infof("register commitJob service with %s interval", frequency)

 	s.NewJob(gocron.DurationJob(d),
 		gocron.NewTask(
 			func() {
 				start := time.Now()
-				cclog.Printf("Jobcache sync started at %s", start.Format(time.RFC3339))
+				cclog.Debugf("jobcache sync started at %s\n", start.Format(time.RFC3339))
 				jobs, _ := jobRepo.SyncJobs()
 				repository.CallJobStartHooks(jobs)
-				cclog.Printf("Jobcache sync and job callbacks are done and took %s", time.Since(start))
+				cclog.Debugf("jobcache sync and job callbacks are done and took %s\n", time.Since(start))
 			}))
 }
@@ -13,7 +13,6 @@ import (
 	"github.com/ClusterCockpit/cc-backend/internal/config"
 	"github.com/ClusterCockpit/cc-backend/internal/repository"
 	cclog "github.com/ClusterCockpit/cc-lib/ccLogger"
-	"github.com/ClusterCockpit/cc-lib/schema"
 	"github.com/go-co-op/gocron/v2"
 )

@@ -69,12 +68,12 @@ func Start(cronCfg, archiveConfig json.RawMessage) {
 	dec := json.NewDecoder(bytes.NewReader(cronCfg))
 	dec.DisallowUnknownFields()
 	if err := dec.Decode(&Keys); err != nil {
-		cclog.Errorf("error while decoding ldap config: %v", err)
+		cclog.Errorf("error while decoding cron config: %v", err)
 	}

 	var cfg struct {
-		Retention   schema.Retention `json:"retention"`
-		Compression int              `json:"compression"`
+		Retention   Retention `json:"retention"`
+		Compression int       `json:"compression"`
 	}
 	cfg.Retention.IncludeDB = true

@@ -25,8 +25,8 @@ func RegisterUpdateDurationWorker() {
 		gocron.NewTask(
 			func() {
 				start := time.Now()
-				cclog.Printf("Update duration started at %s", start.Format(time.RFC3339))
+				cclog.Printf("Update duration started at %s\n", start.Format(time.RFC3339))
 				jobRepo.UpdateDuration()
-				cclog.Printf("Update duration is done and took %s", time.Since(start))
+				cclog.Printf("Update duration is done and took %s\n", time.Since(start))
 			}))
 }
@@ -34,7 +34,7 @@ func RegisterFootprintWorker() {
 				c := 0
 				ce := 0
 				cl := 0
-				cclog.Printf("Update Footprints started at %s", s.Format(time.RFC3339))
+				cclog.Printf("Update Footprints started at %s\n", s.Format(time.RFC3339))

 				for _, cluster := range archive.Clusters {
 					s_cluster := time.Now()
@@ -134,8 +134,8 @@ func RegisterFootprintWorker() {
 						}
 						jobRepo.TransactionEnd(t)
 					}
-					cclog.Debugf("Finish Cluster %s, took %s", cluster.Name, time.Since(s_cluster))
+					cclog.Debugf("Finish Cluster %s, took %s\n", cluster.Name, time.Since(s_cluster))
 				}
-				cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s", c, cl, ce, time.Since(s))
+				cclog.Printf("Updating %d (of %d; Skipped %d) Footprints is done and took %s\n", c, cl, ce, time.Since(s))
 			}))
 }