parallelize zipping/archiving

This commit is contained in:
Lou Knauer 2022-02-21 09:53:40 +01:00
parent dea577424e
commit eb2f4edfe1

View File

@ -20,12 +20,34 @@ import (
"sync/atomic" "sync/atomic"
) )
// Whenever changed, update MarshalJSON as well!
type CheckpointMetrics struct { type CheckpointMetrics struct {
Frequency int64 `json:"frequency"` Frequency int64 `json:"frequency"`
Start int64 `json:"start"` Start int64 `json:"start"`
Data []Float `json:"data"` Data []Float `json:"data"`
} }
func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {
buf := make([]byte, 0, 128+len(cm.Data)*8)
buf = append(buf, `{"frequency":`...)
buf = strconv.AppendInt(buf, cm.Frequency, 10)
buf = append(buf, `,"start":`...)
buf = strconv.AppendInt(buf, cm.Start, 10)
buf = append(buf, `,"data":[`...)
for i, x := range cm.Data {
if i != 0 {
buf = append(buf, ',')
}
if x.IsNaN() {
buf = append(buf, `null`...)
} else {
buf = strconv.AppendFloat(buf, float64(x), 'f', -1, 32)
}
}
buf = append(buf, `]}`...)
return buf, nil
}
type CheckpointFile struct { type CheckpointFile struct {
From int64 `json:"from"` From int64 `json:"from"`
To int64 `json:"to"` To int64 `json:"to"`
@ -76,7 +98,7 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) {
continue continue
} }
log.Printf("checkpointing %#v failed: %s", workItem.selector, err.Error()) log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error())
atomic.AddInt32(&errs, 1) atomic.AddInt32(&errs, 1)
} else { } else {
atomic.AddInt32(&n, 1) atomic.AddInt32(&n, 1)
@ -216,8 +238,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
nn, err := lvl.fromCheckpoint(filepath.Join(dir, host[0], host[1]), from, m) nn, err := lvl.fromCheckpoint(filepath.Join(dir, host[0], host[1]), from, m)
if err != nil { if err != nil {
log.Fatalf("error while loading checkpoints: %s", err.Error()) log.Fatalf("error while loading checkpoints: %s", err.Error())
atomic.AddInt32(&errs, int32(nn)) atomic.AddInt32(&errs, 1)
continue
} }
atomic.AddInt32(&n, int32(nn)) atomic.AddInt32(&n, int32(nn))
} }
@ -436,26 +457,57 @@ func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64) (int, err
return 0, err return 0, err
} }
n := 0 type workItem struct {
for _, de1 := range entries1 { cdir, adir string
entries2, err := os.ReadDir(filepath.Join(checkpointsDir, de1.Name())) cluster, host string
}
var wg sync.WaitGroup
n, errs := int32(0), int32(0)
work := make(chan workItem, NumWorkers*2)
wg.Add(NumWorkers)
for worker := 0; worker < NumWorkers; worker++ {
go func() {
defer wg.Done()
for workItem := range work {
m, err := archiveCheckpoints(workItem.cdir, workItem.adir, from)
if err != nil { if err != nil {
return n, err log.Printf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error())
atomic.AddInt32(&errs, 1)
}
atomic.AddInt32(&n, int32(m))
}
}()
}
for _, de1 := range entries1 {
entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
if e != nil {
err = e
} }
for _, de2 := range entries2 { for _, de2 := range entries2 {
cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name()) cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name())
adir := filepath.Join(archiveDir, de1.Name(), de2.Name()) adir := filepath.Join(archiveDir, de1.Name(), de2.Name())
m, err := archiveCheckpoints(cdir, adir, from) work <- workItem{
n += m adir: adir, cdir: cdir,
cluster: de1.Name(), host: de2.Name(),
}
}
}
close(work)
wg.Wait()
if err != nil { if err != nil {
return n, err return int(n), err
} }
if errs > 0 {
return int(n), fmt.Errorf("%d errors happend while archiving (%d successes)", errs, n)
} }
} return int(n), nil
return n, nil
} }
// Helper function for `ArchiveCheckpoints`. // Helper function for `ArchiveCheckpoints`.