Do re-write buffers loaded from checkpoint; Add SIGUSR1 for debugging

This commit is contained in:
Lou Knauer 2021-11-22 17:04:09 +01:00
parent d006e26fd4
commit 458383d152
6 changed files with 111 additions and 14 deletions

View File

@ -1,11 +1,12 @@
# TODO # TODO
- Delete this file and create more GitHub issues instead? - Improve checkpoints/archives
- Store information in each buffer if already archived
- Do not create new checkpoint if all buffers already archived
- Missing Testcases: - Missing Testcases:
- Port at least all blackbox tests from the "old" `MemoryStore` to the new implementation - General tests
- Check for corner cases that should fail gracefully - Check for corner cases that should fail gracefully
- Write a more realistic `ToArchive`/`FromArchive` tests - Write a more realistic `ToArchive`/`FromArchive` tests
- Test edgecases for horizontal aggregations
- Optimization: Once a buffer is full, calculate min, max and avg - Optimization: Once a buffer is full, calculate min, max and avg
- Calculate averages buffer-wise, average weighted by length of buffer - Calculate averages buffer-wise, average weighted by length of buffer
- Only the head-buffer needs to be fully traversed - Only the head-buffer needs to be fully traversed

32
api.go
View File

@ -222,8 +222,7 @@ func handleWrite(rw http.ResponseWriter, r *http.Request) {
return return
} }
reader := bufio.NewReader(r.Body) dec := lineprotocol.NewDecoder(bufio.NewReader(r.Body))
dec := lineprotocol.NewDecoder(reader)
// Unlike the name suggests, handleLine can handle multiple lines // Unlike the name suggests, handleLine can handle multiple lines
if err := handleLine(dec); err != nil { if err := handleLine(dec); err != nil {
http.Error(rw, err.Error(), http.StatusBadRequest) http.Error(rw, err.Error(), http.StatusBadRequest)
@ -305,6 +304,34 @@ func handleAllNodes(rw http.ResponseWriter, r *http.Request) {
} }
} }
// func handleCheckpoint(rw http.ResponseWriter, r *http.Request) {
// vars := mux.Vars(r)
// from, err := strconv.ParseInt(vars["from"], 10, 64)
// if err != nil {
// http.Error(rw, err.Error(), http.StatusBadRequest)
// return
// }
// to, err := strconv.ParseInt(vars["to"], 10, 64)
// if err != nil {
// http.Error(rw, err.Error(), http.StatusBadRequest)
// return
// }
// log.Println("Checkpoint creation started...")
// n, err := memoryStore.ToCheckpoint(conf.Checkpoints.RootDir, from, to)
// if err != nil {
// log.Printf("Checkpoint creation failed: %s\n", err.Error())
// rw.WriteHeader(http.StatusInternalServerError)
// return
// } else {
// log.Printf("Checkpoint finished (%d files)\n", n)
// }
// memoryStore.FreeAll()
// rw.WriteHeader(http.StatusOK)
// }
func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler { func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
authheader := r.Header.Get("Authorization") authheader := r.Header.Get("Authorization")
@ -343,6 +370,7 @@ func StartApiServer(address string, ctx context.Context) error {
r.HandleFunc("/api/{cluster}/peek", handlePeek) r.HandleFunc("/api/{cluster}/peek", handlePeek)
r.HandleFunc("/api/{cluster}/{from:[0-9]+}/{to:[0-9]+}/all-nodes", handleAllNodes) r.HandleFunc("/api/{cluster}/{from:[0-9]+}/{to:[0-9]+}/all-nodes", handleAllNodes)
r.HandleFunc("/api/write", handleWrite) r.HandleFunc("/api/write", handleWrite)
// r.HandleFunc("/api/{from:[0-9]+}/{to:[0-9]+}/checkpoint", handleCheckpoint)
server := &http.Server{ server := &http.Server{
Handler: r, Handler: r,

View File

@ -28,6 +28,8 @@ type CheckpointFile struct {
Children map[string]*CheckpointFile `json:"children"` Children map[string]*CheckpointFile `json:"children"`
} }
var ErrNoNewData error = errors.New("all data already archived")
// Metrics stored at the lowest 2 levels are not stored away (root and cluster)! // Metrics stored at the lowest 2 levels are not stored away (root and cluster)!
// On a per-host basis a new JSON file is created. I have no idea if this will scale. // On a per-host basis a new JSON file is created. I have no idea if this will scale.
// The good thing: Only a host at a time is locked, so this function can run // The good thing: Only a host at a time is locked, so this function can run
@ -46,15 +48,22 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
} }
m.root.lock.RUnlock() m.root.lock.RUnlock()
n := 0
for i := 0; i < len(levels); i++ { for i := 0; i < len(levels); i++ {
dir := path.Join(dir, path.Join(selectors[i]...)) dir := path.Join(dir, path.Join(selectors[i]...))
err := levels[i].toCheckpoint(dir, from, to, m) err := levels[i].toCheckpoint(dir, from, to, m)
if err != nil { if err != nil {
if err == ErrNoNewData {
continue
}
return i, err return i, err
} }
n += 1
} }
return len(levels), nil return 0, nil
} }
func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) { func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) {
@ -73,6 +82,18 @@ func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFil
continue continue
} }
allArchived := true
b.iterFromTo(from, to, func(b *buffer) error {
if !b.archived {
allArchived = false
}
return nil
})
if allArchived {
continue
}
data := make([]Float, (to-from)/b.frequency+1) data := make([]Float, (to-from)/b.frequency+1)
data, start, end, err := b.read(from, to, data) data, start, end, err := b.read(from, to, data)
if err != nil { if err != nil {
@ -96,7 +117,13 @@ func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFil
return nil, err return nil, err
} }
retval.Children[name] = val if val != nil {
retval.Children[name] = val
}
}
if len(retval.Children) == 0 && len(retval.Metrics) == 0 {
return nil, nil
} }
return retval, nil return retval, nil
@ -108,6 +135,10 @@ func (l *level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
return err return err
} }
if cf == nil {
return ErrNoNewData
}
filepath := path.Join(dir, fmt.Sprintf("%d.json", from)) filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644) f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644)
if err != nil && os.IsNotExist(err) { if err != nil && os.IsNotExist(err) {
@ -146,6 +177,7 @@ func (l *level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
data: metric.Data[0:n:n], // Space is wasted here :( data: metric.Data[0:n:n], // Space is wasted here :(
prev: nil, prev: nil,
next: nil, next: nil,
archived: true,
} }
minfo, ok := m.metrics[name] minfo, ok := m.metrics[name]

View File

@ -16,7 +16,7 @@ func (b *buffer) debugDump(w *bufio.Writer) {
} }
to := b.start + b.frequency*int64(len(b.data)) to := b.start + b.frequency*int64(len(b.data))
fmt.Fprintf(w, "buffer(from=%d, len=%d, to=%d)%s", b.start, len(b.data), to, end) fmt.Fprintf(w, "buffer(from=%d, len=%d, to=%d, archived=%v)%s", b.start, len(b.data), to, b.archived, end)
} }
func (l *level) debugDump(w *bufio.Writer, m *MemoryStore, indent string) error { func (l *level) debugDump(w *bufio.Writer, m *MemoryStore, indent string) error {
@ -32,7 +32,7 @@ func (l *level) debugDump(w *bufio.Writer, m *MemoryStore, indent string) error
} }
} }
if l.children != nil { if l.children != nil && len(l.children) > 0 {
fmt.Fprintf(w, "%schildren:\n", indent) fmt.Fprintf(w, "%schildren:\n", indent)
for name, lvl := range l.children { for name, lvl := range l.children {
fmt.Fprintf(w, "%s'%s':\n", indent, name) fmt.Fprintf(w, "%s'%s':\n", indent, name)

View File

@ -36,6 +36,7 @@ type buffer struct {
start int64 // Timestamp of when `data[0]` was written. start int64 // Timestamp of when `data[0]` was written.
data []Float // The slice should never reallocacte as `cap(data)` is respected. data []Float // The slice should never reallocacte as `cap(data)` is respected.
prev, next *buffer // `prev` contains older data, `next` newer data. prev, next *buffer // `prev` contains older data, `next` newer data.
archived bool // If true, this buffer is already archived
} }
func newBuffer(ts, freq int64) *buffer { func newBuffer(ts, freq int64) *buffer {
@ -155,6 +156,24 @@ func (b *buffer) free(t int64) (int, error) {
return 0, nil return 0, nil
} }
// Call `callback` on every buffer that contains data in the range from `from` to `to`.
func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error {
if b == nil {
return nil
}
if err := b.prev.iterFromTo(from, to, callback); err != nil {
return err
}
end := b.start + int64(len(b.data))*b.frequency
if from <= b.start && end <= to {
return callback(b)
}
return nil
}
// Could also be called "node" as this forms a node in a tree structure. // Could also be called "node" as this forms a node in a tree structure.
// Called level because "node" might be confusing here. // Called level because "node" might be confusing here.
// Can be both a leaf or a inner node. In this tree structue, inner nodes can // Can be both a leaf or a inner node. In this tree structue, inner nodes can
@ -370,3 +389,11 @@ func (m *MemoryStore) Free(selector Selector, t int64) (int, error) {
}) })
return n, err return n, err
} }
func (m *MemoryStore) FreeAll() error {
for k := range m.root.children {
delete(m.root.children, k)
}
return nil
}

View File

@ -1,6 +1,7 @@
package main package main
import ( import (
"bufio"
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
@ -120,6 +121,7 @@ func handleLine(dec *lineprotocol.Decoder) error {
return err return err
} }
// log.Printf("write: %s (%v) -> %v\n", string(measurement), selector, value)
if err := memoryStore.Write(selector, t.Unix(), []Metric{ if err := memoryStore.Write(selector, t.Unix(), []Metric{
{Name: string(measurement), Value: value}, {Name: string(measurement), Value: value},
}); err != nil { }); err != nil {
@ -217,18 +219,25 @@ func main() {
if err != nil { if err != nil {
log.Fatalf("Loading checkpoints failed: %s\n", err.Error()) log.Fatalf("Loading checkpoints failed: %s\n", err.Error())
} else { } else {
log.Printf("Checkpoints loaded (%d files)\n", files) log.Printf("Checkpoints loaded (%d files, from %s on)\n", files, restoreFrom.Format(time.RFC3339))
} }
ctx, shutdown := context.WithCancel(context.Background()) ctx, shutdown := context.WithCancel(context.Background())
var wg sync.WaitGroup var wg sync.WaitGroup
sigs := make(chan os.Signal, 1) sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM, syscall.SIGUSR1)
go func() { go func() {
<-sigs for {
log.Println("Shuting down...") sig := <-sigs
shutdown() if sig == syscall.SIGUSR1 {
memoryStore.DebugDump(bufio.NewWriter(os.Stdout))
continue
}
log.Println("Shuting down...")
shutdown()
}
}() }()
intervals(&wg, ctx) intervals(&wg, ctx)