diff --git a/config.json b/config.json index 0631780..16fa894 100644 --- a/config.json +++ b/config.json @@ -1,41 +1,21 @@ { "metrics": { - "load_one": { "frequency": 3, "aggregation": null, "scope": "node" }, - "load_five": { "frequency": 3, "aggregation": null, "scope": "node" }, - "load_fifteen": { "frequency": 3, "aggregation": null, "scope": "node" }, - "proc_run": { "frequency": 3, "aggregation": null, "scope": "node" }, - "proc_total": { "frequency": 3, "aggregation": null, "scope": "node" }, - "mem_free": { "frequency": 3, "aggregation": null, "scope": "node" }, - "mem_cached": { "frequency": 3, "aggregation": null, "scope": "node" }, - "mem_total": { "frequency": 3, "aggregation": null, "scope": "node" }, - "swap_total": { "frequency": 3, "aggregation": null, "scope": "node" }, - "mem_slab": { "frequency": 3, "aggregation": null, "scope": "node" }, - "mem_buffers": { "frequency": 3, "aggregation": null, "scope": "node" }, - "mem_sreclaimable": { "frequency": 3, "aggregation": null, "scope": "node" }, - "mem_available": { "frequency": 3, "aggregation": null, "scope": "node" }, - "swap_free": { "frequency": 3, "aggregation": null, "scope": "node" }, - "mem_used": { "frequency": 3, "aggregation": null, "scope": "node" }, - "cpu_user": { "frequency": 3, "aggregation": "sum", "scope": "cpu" }, - "cpu_nice": { "frequency": 3, "aggregation": "sum", "scope": "cpu" }, - "cpu_system": { "frequency": 3, "aggregation": "sum", "scope": "cpu" }, - "cpu_idle": { "frequency": 3, "aggregation": "sum", "scope": "cpu" }, - "cpu_iowait": { "frequency": 3, "aggregation": "sum", "scope": "cpu" }, - "cpu_irq": { "frequency": 3, "aggregation": "sum", "scope": "cpu" }, - "cpu_softirq": { "frequency": 3, "aggregation": "sum", "scope": "cpu" }, - "cpu_steal": { "frequency": 3, "aggregation": "sum", "scope": "cpu" }, - "cpu_guest": { "frequency": 3, "aggregation": "sum", "scope": "cpu" }, - "cpu_guest_nice": { "frequency": 3, "aggregation": "sum", "scope": "cpu" } + "flops_any": { "frequency": 15, "aggregation": "sum", "scope": "cpu" }, + "flops_dp": { "frequency": 15, "aggregation": "sum", "scope": "cpu" }, + "flops_sp": { "frequency": 15, "aggregation": "sum", "scope": "cpu" }, + "mem_bw": { "frequency": 15, "aggregation": "sum", "scope": "socket" }, + "load_one": { "frequency": 15, "aggregation": null, "scope": "node" } }, "checkpoints": { - "interval": 60, + "interval": "12h", "directory": "./var/checkpoints", - "restore": 120 + "restore": "48h" }, "archive": { - "interval": 180, + "interval": "168h", "directory": "./var/archive" }, - "retention-in-memory": 120, + "retention-in-memory": "48h", "http-api-address": "0.0.0.0:8081", "nats": null, "jwt-public-key": "kzfYrYy+TzpanWZHJ5qSdMj5uKUWgq74BWhQG6copP0=" diff --git a/metric-store.go b/metric-store.go index 1665dae..358ae5d 100644 --- a/metric-store.go +++ b/metric-store.go @@ -22,17 +22,17 @@ type MetricConfig struct { type Config struct { Metrics map[string]MetricConfig `json:"metrics"` - RetentionInMemory int `json:"retention-in-memory"` + RetentionInMemory string `json:"retention-in-memory"` Nats string `json:"nats"` JwtPublicKey string `json:"jwt-public-key"` HttpApiAddress string `json:"http-api-address"` Checkpoints struct { - Interval int `json:"interval"` + Interval string `json:"interval"` RootDir string `json:"directory"` - Restore int `json:"restore"` + Restore string `json:"restore"` } `json:"checkpoints"` Archive struct { - Interval int `json:"interval"` + Interval string `json:"interval"` RootDir string `json:"directory"` } `json:"archive"` } @@ -57,10 +57,14 @@ func intervals(wg *sync.WaitGroup, ctx context.Context) { wg.Add(3) go func() { defer wg.Done() - d := time.Duration(conf.RetentionInMemory) * time.Second + d, err := time.ParseDuration(conf.RetentionInMemory) + if err != nil { + log.Fatal(err) + } if d <= 0 { return } + ticks := time.Tick(d / 2) for { select { @@ -82,10 +86,14 @@ func intervals(wg *sync.WaitGroup, ctx context.Context) { lastCheckpoint = time.Now() go func() { defer wg.Done() - d := time.Duration(conf.Checkpoints.Interval) * time.Second + d, err := time.ParseDuration(conf.Checkpoints.Interval) + if err != nil { + log.Fatal(err) + } if d <= 0 { return } + ticks := time.Tick(d) for { select { @@ -108,10 +116,14 @@ func intervals(wg *sync.WaitGroup, ctx context.Context) { go func() { defer wg.Done() - d := time.Duration(conf.Archive.Interval) * time.Second + d, err := time.ParseDuration(conf.Archive.Interval) + if err != nil { + log.Fatal(err) + } if d <= 0 { return } + ticks := time.Tick(d) for { select { @@ -140,7 +152,12 @@ func main() { conf = loadConfiguration(configFile) memoryStore = NewMemoryStore(conf.Metrics) - restoreFrom := startupTime.Add(-time.Duration(conf.Checkpoints.Restore) * time.Second) + d, err := time.ParseDuration(conf.Checkpoints.Restore) + if err != nil { + log.Fatal(err) + } + + restoreFrom := startupTime.Add(d) log.Printf("Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339)) files, err := memoryStore.FromCheckpoint(conf.Checkpoints.RootDir, restoreFrom.Unix()) if err != nil {