support the new job archive directory structure

This commit is contained in:
Lou Knauer 2021-12-08 10:08:41 +01:00
parent c1f0d2ed40
commit 84c5cd47f6

View File

@ -5,6 +5,7 @@ import (
"database/sql" "database/sql"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log"
"os" "os"
"path/filepath" "path/filepath"
"strings" "strings"
@ -58,7 +59,7 @@ func initDB(db *sqlx.DB, archive string) error {
return err return err
} }
entries0, err := os.ReadDir(archive) clustersDir, err := os.ReadDir(archive)
if err != nil { if err != nil {
return err return err
} }
@ -70,50 +71,73 @@ func initDB(db *sqlx.DB, archive string) error {
return err return err
} }
var tx *sql.Tx = nil tx, err := db.Begin()
var i int = 0 if err != nil {
return err
}
i := 0
tags := make(map[string]int64) tags := make(map[string]int64)
for _, entry0 := range entries0 { handleDirectory := func(filename string) error {
entries1, err := os.ReadDir(filepath.Join(archive, entry0.Name())) // Bundle 100 inserts into one transaction for better performance:
if err != nil { if i%100 == 0 {
return err if tx != nil {
} if err := tx.Commit(); err != nil {
return err
for _, entry1 := range entries1 { }
if !entry1.IsDir() {
continue
} }
entries2, err := os.ReadDir(filepath.Join(archive, entry0.Name(), entry1.Name())) tx, err = db.Begin()
if err != nil { if err != nil {
return err return err
} }
for _, entry2 := range entries2 { insertstmt = tx.Stmt(insertstmt)
// Bundle 200 inserts into one transaction for better performance: fmt.Printf("%d jobs inserted...\r", i)
if i%200 == 0 { }
if tx != nil {
if err := tx.Commit(); err != nil { err := loadJob(tx, insertstmt, tags, filename)
return err if err == nil {
i += 1
}
return err
}
for _, clusterDir := range clustersDir {
lvl1Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name()))
if err != nil {
return err
}
for _, lvl1Dir := range lvl1Dirs {
if !lvl1Dir.IsDir() {
// Could be the cluster.json file
continue
}
lvl2Dirs, err := os.ReadDir(filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name()))
if err != nil {
return err
}
for _, lvl2Dir := range lvl2Dirs {
dirpath := filepath.Join(archive, clusterDir.Name(), lvl1Dir.Name(), lvl2Dir.Name())
startTimeDirs, err := os.ReadDir(dirpath)
if err != nil {
return err
}
for _, startTiemDir := range startTimeDirs {
if startTiemDir.Type().IsRegular() && startTiemDir.Name() == "meta.json" {
if err := handleDirectory(dirpath); err != nil {
log.Printf("in %s: %s\n", dirpath, err.Error())
}
} else if startTiemDir.IsDir() {
if err := handleDirectory(filepath.Join(dirpath, startTiemDir.Name())); err != nil {
log.Printf("in %s: %s\n", filepath.Join(dirpath, startTiemDir.Name()), err.Error())
} }
} }
tx, err = db.Begin()
if err != nil {
return err
}
insertstmt = tx.Stmt(insertstmt)
fmt.Printf("%d jobs inserted...\r", i)
} }
filename := filepath.Join(archive, entry0.Name(), entry1.Name(), entry2.Name())
if err = loadJob(tx, insertstmt, tags, filename); err != nil {
fmt.Printf("failed to load '%s': %s", filename, err.Error())
continue
}
i += 1
} }
} }
} }
@ -130,7 +154,7 @@ func initDB(db *sqlx.DB, archive string) error {
return err return err
} }
fmt.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds()) log.Printf("A total of %d jobs have been registered in %.3f seconds.\n", i, time.Since(starttime).Seconds())
return nil return nil
} }