mirror of
https://github.com/ClusterCockpit/cc-metric-store.git
synced 2024-11-10 05:07:25 +01:00
Do re-write buffers loaded from checkpoint; Add SIGUSR1 for debugging
This commit is contained in:
parent
d006e26fd4
commit
458383d152
7
TODO.md
7
TODO.md
@ -1,11 +1,12 @@
|
|||||||
# TODO
|
# TODO
|
||||||
|
|
||||||
- Delete this file and create more GitHub issues instead?
|
- Improve checkpoints/archives
|
||||||
|
- Store information in each buffer if already archived
|
||||||
|
- Do not create new checkpoint if all buffers already archived
|
||||||
- Missing Testcases:
|
- Missing Testcases:
|
||||||
- Port at least all blackbox tests from the "old" `MemoryStore` to the new implementation
|
- General tests
|
||||||
- Check for corner cases that should fail gracefully
|
- Check for corner cases that should fail gracefully
|
||||||
- Write a more realistic `ToArchive`/`FromArchive` tests
|
- Write a more realistic `ToArchive`/`FromArchive` tests
|
||||||
- Test edgecases for horizontal aggregations
|
|
||||||
- Optimization: Once a buffer is full, calculate min, max and avg
|
- Optimization: Once a buffer is full, calculate min, max and avg
|
||||||
- Calculate averages buffer-wise, average weighted by length of buffer
|
- Calculate averages buffer-wise, average weighted by length of buffer
|
||||||
- Only the head-buffer needs to be fully traversed
|
- Only the head-buffer needs to be fully traversed
|
||||||
|
32
api.go
32
api.go
@ -222,8 +222,7 @@ func handleWrite(rw http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
reader := bufio.NewReader(r.Body)
|
dec := lineprotocol.NewDecoder(bufio.NewReader(r.Body))
|
||||||
dec := lineprotocol.NewDecoder(reader)
|
|
||||||
// Unlike the name suggests, handleLine can handle multiple lines
|
// Unlike the name suggests, handleLine can handle multiple lines
|
||||||
if err := handleLine(dec); err != nil {
|
if err := handleLine(dec); err != nil {
|
||||||
http.Error(rw, err.Error(), http.StatusBadRequest)
|
http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
@ -305,6 +304,34 @@ func handleAllNodes(rw http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// func handleCheckpoint(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
// vars := mux.Vars(r)
|
||||||
|
// from, err := strconv.ParseInt(vars["from"], 10, 64)
|
||||||
|
// if err != nil {
|
||||||
|
// http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
// return
|
||||||
|
// }
|
||||||
|
// to, err := strconv.ParseInt(vars["to"], 10, 64)
|
||||||
|
// if err != nil {
|
||||||
|
// http.Error(rw, err.Error(), http.StatusBadRequest)
|
||||||
|
// return
|
||||||
|
// }
|
||||||
|
|
||||||
|
// log.Println("Checkpoint creation started...")
|
||||||
|
// n, err := memoryStore.ToCheckpoint(conf.Checkpoints.RootDir, from, to)
|
||||||
|
// if err != nil {
|
||||||
|
// log.Printf("Checkpoint creation failed: %s\n", err.Error())
|
||||||
|
// rw.WriteHeader(http.StatusInternalServerError)
|
||||||
|
// return
|
||||||
|
// } else {
|
||||||
|
// log.Printf("Checkpoint finished (%d files)\n", n)
|
||||||
|
// }
|
||||||
|
|
||||||
|
// memoryStore.FreeAll()
|
||||||
|
|
||||||
|
// rw.WriteHeader(http.StatusOK)
|
||||||
|
// }
|
||||||
|
|
||||||
func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler {
|
func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler {
|
||||||
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
|
||||||
authheader := r.Header.Get("Authorization")
|
authheader := r.Header.Get("Authorization")
|
||||||
@ -343,6 +370,7 @@ func StartApiServer(address string, ctx context.Context) error {
|
|||||||
r.HandleFunc("/api/{cluster}/peek", handlePeek)
|
r.HandleFunc("/api/{cluster}/peek", handlePeek)
|
||||||
r.HandleFunc("/api/{cluster}/{from:[0-9]+}/{to:[0-9]+}/all-nodes", handleAllNodes)
|
r.HandleFunc("/api/{cluster}/{from:[0-9]+}/{to:[0-9]+}/all-nodes", handleAllNodes)
|
||||||
r.HandleFunc("/api/write", handleWrite)
|
r.HandleFunc("/api/write", handleWrite)
|
||||||
|
// r.HandleFunc("/api/{from:[0-9]+}/{to:[0-9]+}/checkpoint", handleCheckpoint)
|
||||||
|
|
||||||
server := &http.Server{
|
server := &http.Server{
|
||||||
Handler: r,
|
Handler: r,
|
||||||
|
38
archive.go
38
archive.go
@ -28,6 +28,8 @@ type CheckpointFile struct {
|
|||||||
Children map[string]*CheckpointFile `json:"children"`
|
Children map[string]*CheckpointFile `json:"children"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var ErrNoNewData error = errors.New("all data already archived")
|
||||||
|
|
||||||
// Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
|
// Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
|
||||||
// On a per-host basis a new JSON file is created. I have no idea if this will scale.
|
// On a per-host basis a new JSON file is created. I have no idea if this will scale.
|
||||||
// The good thing: Only a host at a time is locked, so this function can run
|
// The good thing: Only a host at a time is locked, so this function can run
|
||||||
@ -46,15 +48,22 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
|
|||||||
}
|
}
|
||||||
m.root.lock.RUnlock()
|
m.root.lock.RUnlock()
|
||||||
|
|
||||||
|
n := 0
|
||||||
for i := 0; i < len(levels); i++ {
|
for i := 0; i < len(levels); i++ {
|
||||||
dir := path.Join(dir, path.Join(selectors[i]...))
|
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||||
err := levels[i].toCheckpoint(dir, from, to, m)
|
err := levels[i].toCheckpoint(dir, from, to, m)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return i, err
|
if err == ErrNoNewData {
|
||||||
}
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
return len(levels), nil
|
return i, err
|
||||||
|
}
|
||||||
|
|
||||||
|
n += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
|
func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
|
||||||
@ -73,6 +82,18 @@ func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFil
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
allArchived := true
|
||||||
|
b.iterFromTo(from, to, func(b *buffer) error {
|
||||||
|
if !b.archived {
|
||||||
|
allArchived = false
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
if allArchived {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
data := make([]Float, (to-from)/b.frequency+1)
|
data := make([]Float, (to-from)/b.frequency+1)
|
||||||
data, start, end, err := b.read(from, to, data)
|
data, start, end, err := b.read(from, to, data)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -96,8 +117,14 @@ func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFil
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if val != nil {
|
||||||
retval.Children[name] = val
|
retval.Children[name] = val
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(retval.Children) == 0 && len(retval.Metrics) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
return retval, nil
|
return retval, nil
|
||||||
}
|
}
|
||||||
@ -108,6 +135,10 @@ func (l *level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if cf == nil {
|
||||||
|
return ErrNoNewData
|
||||||
|
}
|
||||||
|
|
||||||
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
|
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
|
||||||
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644)
|
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644)
|
||||||
if err != nil && os.IsNotExist(err) {
|
if err != nil && os.IsNotExist(err) {
|
||||||
@ -146,6 +177,7 @@ func (l *level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
|
|||||||
data: metric.Data[0:n:n], // Space is wasted here :(
|
data: metric.Data[0:n:n], // Space is wasted here :(
|
||||||
prev: nil,
|
prev: nil,
|
||||||
next: nil,
|
next: nil,
|
||||||
|
archived: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
minfo, ok := m.metrics[name]
|
minfo, ok := m.metrics[name]
|
||||||
|
4
debug.go
4
debug.go
@ -16,7 +16,7 @@ func (b *buffer) debugDump(w *bufio.Writer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
to := b.start + b.frequency*int64(len(b.data))
|
to := b.start + b.frequency*int64(len(b.data))
|
||||||
fmt.Fprintf(w, "buffer(from=%d, len=%d, to=%d)%s", b.start, len(b.data), to, end)
|
fmt.Fprintf(w, "buffer(from=%d, len=%d, to=%d, archived=%v)%s", b.start, len(b.data), to, b.archived, end)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *level) debugDump(w *bufio.Writer, m *MemoryStore, indent string) error {
|
func (l *level) debugDump(w *bufio.Writer, m *MemoryStore, indent string) error {
|
||||||
@ -32,7 +32,7 @@ func (l *level) debugDump(w *bufio.Writer, m *MemoryStore, indent string) error
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if l.children != nil {
|
if l.children != nil && len(l.children) > 0 {
|
||||||
fmt.Fprintf(w, "%schildren:\n", indent)
|
fmt.Fprintf(w, "%schildren:\n", indent)
|
||||||
for name, lvl := range l.children {
|
for name, lvl := range l.children {
|
||||||
fmt.Fprintf(w, "%s'%s':\n", indent, name)
|
fmt.Fprintf(w, "%s'%s':\n", indent, name)
|
||||||
|
27
memstore.go
27
memstore.go
@ -36,6 +36,7 @@ type buffer struct {
|
|||||||
start int64 // Timestamp of when `data[0]` was written.
|
start int64 // Timestamp of when `data[0]` was written.
|
||||||
data []Float // The slice should never reallocacte as `cap(data)` is respected.
|
data []Float // The slice should never reallocacte as `cap(data)` is respected.
|
||||||
prev, next *buffer // `prev` contains older data, `next` newer data.
|
prev, next *buffer // `prev` contains older data, `next` newer data.
|
||||||
|
archived bool // If true, this buffer is already archived
|
||||||
}
|
}
|
||||||
|
|
||||||
func newBuffer(ts, freq int64) *buffer {
|
func newBuffer(ts, freq int64) *buffer {
|
||||||
@ -155,6 +156,24 @@ func (b *buffer) free(t int64) (int, error) {
|
|||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Call `callback` on every buffer that contains data in the range from `from` to `to`.
|
||||||
|
func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
|
||||||
|
if b == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := b.prev.iterFromTo(from, to, callback); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
end := b.start + int64(len(b.data))*b.frequency
|
||||||
|
if from <= b.start && end <= to {
|
||||||
|
return callback(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Could also be called "node" as this forms a node in a tree structure.
|
// Could also be called "node" as this forms a node in a tree structure.
|
||||||
// Called level because "node" might be confusing here.
|
// Called level because "node" might be confusing here.
|
||||||
// Can be both a leaf or a inner node. In this tree structue, inner nodes can
|
// Can be both a leaf or a inner node. In this tree structue, inner nodes can
|
||||||
@ -370,3 +389,11 @@ func (m *MemoryStore) Free(selector Selector, t int64) (int, error) {
|
|||||||
})
|
})
|
||||||
return n, err
|
return n, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *MemoryStore) FreeAll() error {
|
||||||
|
for k := range m.root.children {
|
||||||
|
delete(m.root.children, k)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
@ -120,6 +121,7 @@ func handleLine(dec *lineprotocol.Decoder) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// log.Printf("write: %s (%v) -> %v\n", string(measurement), selector, value)
|
||||||
if err := memoryStore.Write(selector, t.Unix(), []Metric{
|
if err := memoryStore.Write(selector, t.Unix(), []Metric{
|
||||||
{Name: string(measurement), Value: value},
|
{Name: string(measurement), Value: value},
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
@ -217,18 +219,25 @@ func main() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("Loading checkpoints failed: %s\n", err.Error())
|
log.Fatalf("Loading checkpoints failed: %s\n", err.Error())
|
||||||
} else {
|
} else {
|
||||||
log.Printf("Checkpoints loaded (%d files)\n", files)
|
log.Printf("Checkpoints loaded (%d files, from %s on)\n", files, restoreFrom.Format(time.RFC3339))
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx, shutdown := context.WithCancel(context.Background())
|
ctx, shutdown := context.WithCancel(context.Background())
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
sigs := make(chan os.Signal, 1)
|
sigs := make(chan os.Signal, 1)
|
||||||
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
|
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGUSR1)
|
||||||
go func() {
|
go func() {
|
||||||
<-sigs
|
for {
|
||||||
|
sig := <-sigs
|
||||||
|
if sig == syscall.SIGUSR1 {
|
||||||
|
memoryStore.DebugDump(bufio.NewWriter(os.Stdout))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
log.Println("Shuting down...")
|
log.Println("Shuting down...")
|
||||||
shutdown()
|
shutdown()
|
||||||
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
intervals(&wg, ctx)
|
intervals(&wg, ctx)
|
||||||
|
Loading…
Reference in New Issue
Block a user