From e1e6694656b296c361e939808e298169d672f4a0 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Fri, 3 May 2024 21:08:01 +0200 Subject: [PATCH 1/6] Intermediate state Not working yet --- .../cc-metric-store/main.go | 134 +---- go.mod | 14 +- go.sum | 17 +- api.go => internal/api/api.go | 12 +- .../api/lineprotocol.go | 20 +- internal/config/config.go | 112 ++++ archive.go => internal/memstore/archive.go | 2 +- memstore.go => internal/memstore/memstore.go | 38 +- selector.go => internal/memstore/selector.go | 2 +- float.go => internal/util/float.go | 9 +- stats.go => internal/util/stats.go | 3 +- lineprotocol_test.go | 144 ----- memoryStore_test.go.orig | 504 ----------------- memstore_test.go | 512 ------------------ 14 files changed, 193 insertions(+), 1330 deletions(-) rename cc-metric-store.go => cmd/cc-metric-store/main.go (61%) rename api.go => internal/api/api.go (98%) rename lineprotocol.go => internal/api/lineprotocol.go (92%) create mode 100644 internal/config/config.go rename archive.go => internal/memstore/archive.go (99%) rename memstore.go => internal/memstore/memstore.go (91%) rename selector.go => internal/memstore/selector.go (99%) rename float.go => internal/util/float.go (92%) rename stats.go => internal/util/stats.go (99%) delete mode 100644 lineprotocol_test.go delete mode 100644 memoryStore_test.go.orig delete mode 100644 memstore_test.go diff --git a/cc-metric-store.go b/cmd/cc-metric-store/main.go similarity index 61% rename from cc-metric-store.go rename to cmd/cc-metric-store/main.go index 8e69c95..5d0f1a3 100644 --- a/cc-metric-store.go +++ b/cmd/cc-metric-store/main.go @@ -3,9 +3,7 @@ package main import ( "bufio" "context" - "encoding/json" "flag" - "fmt" "io" "log" "os" @@ -16,119 +14,22 @@ import ( "syscall" "time" + "github.com/ClusterCockpit/cc-metric-store/internal/api" + "github.com/ClusterCockpit/cc-metric-store/internal/config" + "github.com/ClusterCockpit/cc-metric-store/internal/memstore" "github.com/google/gops/agent" ) -// For aggregation over multiple values at different cpus/sockets/..., not time! -type AggregationStrategy int - -const ( - NoAggregation AggregationStrategy = iota - SumAggregation - AvgAggregation +var ( + conf config.Config + memoryStore *memstore.MemoryStore = nil + lastCheckpoint time.Time ) -func (as *AggregationStrategy) UnmarshalJSON(data []byte) error { - var str string - if err := json.Unmarshal(data, &str); err != nil { - return err - } - - switch str { - case "": - *as = NoAggregation - case "sum": - *as = SumAggregation - case "avg": - *as = AvgAggregation - default: - return fmt.Errorf("invalid aggregation strategy: %#v", str) - } - return nil -} - -type MetricConfig struct { - // Interval in seconds at which measurements will arive. - Frequency int64 `json:"frequency"` - - // Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy. - Aggregation AggregationStrategy `json:"aggregation"` - - // Private, used internally... - offset int -} - -type HttpConfig struct { - // Address to bind to, for example "0.0.0.0:8081" - Address string `json:"address"` - - // If not the empty string, use https with this as the certificate file - CertFile string `json:"https-cert-file"` - - // If not the empty string, use https with this as the key file - KeyFile string `json:"https-key-file"` -} - -type NatsConfig struct { - // Address of the nats server - Address string `json:"address"` - - // Username/Password, optional - Username string `json:"username"` - Password string `json:"password"` - - Subscriptions []struct { - // Channel name - SubscribeTo string `json:"subscribe-to"` - - // Allow lines without a cluster tag, use this as default, optional - ClusterTag string `json:"cluster-tag"` - } `json:"subscriptions"` -} - -type Config struct { - Metrics map[string]MetricConfig `json:"metrics"` - RetentionInMemory string `json:"retention-in-memory"` - Nats []*NatsConfig `json:"nats"` - JwtPublicKey string `json:"jwt-public-key"` - HttpConfig *HttpConfig `json:"http-api"` - Checkpoints struct { - Interval string `json:"interval"` - RootDir string `json:"directory"` - Restore string `json:"restore"` - } `json:"checkpoints"` - Archive struct { - Interval string `json:"interval"` - RootDir string `json:"directory"` - DeleteInstead bool `json:"delete-instead"` - } `json:"archive"` - Debug struct { - EnableGops bool `json:"gops"` - DumpToFile string `json:"dump-to-file"` - } `json:"debug"` -} - -var conf Config -var memoryStore *MemoryStore = nil -var lastCheckpoint time.Time - -var debugDumpLock sync.Mutex -var debugDump io.Writer = io.Discard - -func loadConfiguration(file string) Config { - var config Config - configFile, err := os.Open(file) - if err != nil { - log.Fatal(err) - } - defer configFile.Close() - dec := json.NewDecoder(configFile) - dec.DisallowUnknownFields() - if err := dec.Decode(&config); err != nil { - log.Fatal(err) - } - return config -} +var ( + debugDumpLock sync.Mutex + debugDump io.Writer = io.Discard +) func intervals(wg *sync.WaitGroup, ctx context.Context) { wg.Add(3) @@ -222,7 +123,7 @@ func intervals(wg *sync.WaitGroup, ctx context.Context) { case <-ticks: t := time.Now().Add(-d) log.Printf("start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339)) - n, err := ArchiveCheckpoints(conf.Checkpoints.RootDir, conf.Archive.RootDir, t.Unix(), conf.Archive.DeleteInstead) + n, err := memstore.ArchiveCheckpoints(conf.Checkpoints.RootDir, conf.Archive.RootDir, t.Unix(), conf.Archive.DeleteInstead) if err != nil { log.Printf("archiving failed: %s\n", err.Error()) } else { @@ -241,8 +142,8 @@ func main() { flag.Parse() startupTime := time.Now() - conf = loadConfiguration(configFile) - memoryStore = NewMemoryStore(conf.Metrics) + conf = config.LoadConfiguration(configFile) + memoryStore = memstore.NewMemoryStore(conf.Metrics) if enableGopsAgent || conf.Debug.EnableGops { if err := agent.Listen(agent.Options{}); err != nil { @@ -298,7 +199,7 @@ func main() { continue } - log.Println("Shuting down...") + log.Println("Shutting down...") shutdown() } }() @@ -308,7 +209,7 @@ func main() { wg.Add(1) go func() { - err := StartApiServer(ctx, conf.HttpConfig) + err := api.StartApiServer(ctx, conf.HttpConfig) if err != nil { log.Fatal(err) } @@ -322,8 +223,7 @@ func main() { nc := natsConf go func() { // err := ReceiveNats(conf.Nats, decodeLine, runtime.NumCPU()-1, ctx) - err := ReceiveNats(nc, decodeLine, 1, ctx) - + err := api.ReceiveNats(nc, decodeLine, 1, ctx) if err != nil { log.Fatal(err) } diff --git a/go.mod b/go.mod index 41b6df0..8bfedc2 100644 --- a/go.mod +++ b/go.mod @@ -1,13 +1,21 @@ module github.com/ClusterCockpit/cc-metric-store -go 1.16 +go 1.19 require ( github.com/golang-jwt/jwt/v4 v4.0.0 - github.com/golang/protobuf v1.5.2 // indirect github.com/google/gops v0.3.22 github.com/gorilla/mux v1.8.0 github.com/influxdata/line-protocol/v2 v2.2.0 - github.com/nats-io/nats-server/v2 v2.2.6 // indirect github.com/nats-io/nats.go v1.11.0 ) + +require ( + github.com/golang/protobuf v1.5.2 // indirect + github.com/nats-io/nats-server/v2 v2.2.6 // indirect + github.com/nats-io/nkeys v0.3.0 // indirect + github.com/nats-io/nuid v1.0.1 // indirect + golang.org/x/crypto v0.0.0-20210314154223-e6e6c4f2bb5b // indirect + golang.org/x/sys v0.0.0-20210902050250-f475640dd07b // indirect + google.golang.org/protobuf v1.26.0 // indirect +) diff --git a/go.sum b/go.sum index 479c8f2..17b80a9 100644 --- a/go.sum +++ b/go.sum @@ -1,15 +1,12 @@ -github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA= github.com/StackExchange/wmi v1.2.1/go.mod h1:rcmrprowKIVzvc+NUiLncP2uuArMWLCbu9SBzvHz7e8= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s= +github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk= github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU= github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= -github.com/go-ole/go-ole v1.2.6-0.20210915003542-8b1f7f90f6b1 h1:4dntyT+x6QTOSCIrgczbQ+ockAEha0cfxD5Wi0iCzjY= github.com/go-ole/go-ole v1.2.6-0.20210915003542-8b1f7f90f6b1/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= -github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY= -github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= github.com/golang-jwt/jwt/v4 v4.0.0 h1:RAqyYixv1p7uEnocuy8P1nru5wprCh/MH2BIlW5z5/o= github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= @@ -25,24 +22,27 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/gops v0.3.22 h1:lyvhDxfPLHAOR2xIYwjPhN387qHxyU21Sk9sz/GhmhQ= github.com/google/gops v0.3.22/go.mod h1:7diIdLsqpCihPSX3fQagksT/Ku/y4RL9LHTlKyEUDl8= github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI= github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98= +github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig= github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo= github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY= github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY= github.com/influxdata/line-protocol/v2 v2.2.0 h1:UPmAqE15Hw5zu9E10SYhoXVLWnEJkWnuCbaCiRsA3c0= github.com/influxdata/line-protocol/v2 v2.2.0/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM= -github.com/keybase/go-ps v0.0.0-20190827175125-91aafc93ba19 h1:WjT3fLi9n8YWh/Ih8Q1LHAPsTqGddPcHqscN+PJ3i68= github.com/keybase/go-ps v0.0.0-20190827175125-91aafc93ba19/go.mod h1:hY+WOq6m2FpbvyrI93sMaypsttvaIL5nhVR92dTMUcQ= github.com/klauspost/compress v1.11.12 h1:famVnQVu7QwryBN4jNseQdUKES71ZAOnB6UQQJPZvqk= github.com/klauspost/compress v1.11.12/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/minio/highwayhash v1.0.1 h1:dZ6IIu8Z14VlC0VpfKofAhCy74wu/Qb5gcn52yWoz/0= github.com/minio/highwayhash v1.0.1/go.mod h1:BQskDq+xkJ12lmlUUi7U0M5Swg3EWR+dLTk+kldvVxY= @@ -61,15 +61,11 @@ github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/shirou/gopsutil/v3 v3.21.9 h1:Vn4MUz2uXhqLSiCbGFRc0DILbMVLAY92DSkT8bsYrHg= github.com/shirou/gopsutil/v3 v3.21.9/go.mod h1:YWp/H8Qs5fVmf17v7JNZzA0mPJ+mS2e9JdiUF9LlKzQ= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/tklauser/go-sysconf v0.3.9 h1:JeUVdAOWhhxVcU6Eqr/ATFHgXk/mmiItdKeJPev3vTo= github.com/tklauser/go-sysconf v0.3.9/go.mod h1:11DU/5sG7UexIrp/O6g35hrWzu0JxlwQ3LSFUzyeuhs= -github.com/tklauser/numcpus v0.3.0 h1:ILuRUQBtssgnxw0XXIjKUC56fgnOrFoQQ/4+DeU2biQ= github.com/tklauser/numcpus v0.3.0/go.mod h1:yFGUr7TUHQRAhyqBcEg0Ge34zDBAsIvJJcyE6boqnA8= -github.com/xlab/treeprint v1.1.0 h1:G/1DjNkPpfZCFt9CSh6b5/nY4VimlbHF3Rh4obvtzDk= github.com/xlab/treeprint v1.1.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200323165209-0ec3e9974c59/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -81,7 +77,6 @@ golang.org/x/sys v0.0.0-20190130150945-aca44879d564/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68 h1:nxC68pudNYkKU6jWhgrqdreuFiOQWj1Fs7T3VrH4Pjw= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210902050250-f475640dd07b h1:S7hKs0Flbq0bbc9xgYt4stIEG1zNDFqyrPwAX2Wj/sE= @@ -92,6 +87,7 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1 h1:NusfzzA6yGQ+ua51ck7E3omNUX/JuqbFSaRGqU8CcLI= golang.org/x/time v0.0.0-20200416051211-89c76fbcd5d1/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= @@ -106,5 +102,4 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -rsc.io/goversion v1.2.0 h1:SPn+NLTiAG7w30IRK/DKp1BjvpWabYgxlLp/+kx5J8w= rsc.io/goversion v1.2.0/go.mod h1:Eih9y/uIBS3ulggl7KNJ09xGSLcuNaLgmvvqa07sgfo= diff --git a/api.go b/internal/api/api.go similarity index 98% rename from api.go rename to internal/api/api.go index dbe72cb..987f2c6 100644 --- a/api.go +++ b/internal/api/api.go @@ -1,4 +1,4 @@ -package main +package api import ( "bufio" @@ -262,15 +262,18 @@ func handleQuery(rw http.ResponseWriter, r *http.Request) { if query.SubType != nil { for _, subTypeId := range query.SubTypeIds { sels = append(sels, Selector{ - {String: req.Cluster}, {String: query.Hostname}, + {String: req.Cluster}, + {String: query.Hostname}, {String: *query.Type + typeId}, - {String: *query.SubType + subTypeId}}) + {String: *query.SubType + subTypeId}, + }) } } else { sels = append(sels, Selector{ {String: req.Cluster}, {String: query.Hostname}, - {String: *query.Type + typeId}}) + {String: *query.Type + typeId}, + }) } } } @@ -347,7 +350,6 @@ func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler return publicKey, nil }) - if err != nil { http.Error(rw, err.Error(), http.StatusUnauthorized) return diff --git a/lineprotocol.go b/internal/api/lineprotocol.go similarity index 92% rename from lineprotocol.go rename to internal/api/lineprotocol.go index b2e1692..9814463 100644 --- a/lineprotocol.go +++ b/internal/api/lineprotocol.go @@ -1,4 +1,4 @@ -package main +package api import ( "context" @@ -9,15 +9,17 @@ import ( "sync" "time" + "github.com/ClusterCockpit/cc-metric-store/internal/config" + "github.com/ClusterCockpit/cc-metric-store/internal/memstore" + "github.com/ClusterCockpit/cc-metric-store/internal/util" "github.com/influxdata/line-protocol/v2/lineprotocol" "github.com/nats-io/nats.go" ) type Metric struct { Name string - Value Float - - mc MetricConfig + Value util.Float + mc config.MetricConfig } // Currently unused, could be used to send messages via raw TCP. @@ -84,7 +86,7 @@ func ReceiveRaw(ctx context.Context, listener net.Listener, handleLine func(*lin // Connect to a nats server and subscribe to "updates". This is a blocking // function. handleLine will be called for each line recieved via nats. // Send `true` through the done channel for gracefull termination. -func ReceiveNats(conf *NatsConfig, handleLine func(*lineprotocol.Decoder, string) error, workers int, ctx context.Context) error { +func ReceiveNats(conf *config.NatsConfig, handleLine func(*lineprotocol.Decoder, string) error, workers int, ctx context.Context) error { var opts []nats.Option if conf.Username != "" && conf.Password != "" { opts = append(opts, nats.UserInfo(conf.Username, conf.Password)) @@ -175,7 +177,7 @@ func reorder(buf, prefix []byte) []byte { // Decode lines using dec and make write calls to the MemoryStore. // If a line is missing its cluster tag, use clusterDefault as default. -func decodeLine(dec *lineprotocol.Decoder, clusterDefault string) error { +func decodeLine(dec *lineprotocol.Decoder, memoryStore *memstore.MemoryStore, clusterDefault string) error { // Reduce allocations in loop: t := time.Now() metric, metricBuf := Metric{}, make([]byte, 0, 16) @@ -292,11 +294,11 @@ func decodeLine(dec *lineprotocol.Decoder, clusterDefault string) error { } if val.Kind() == lineprotocol.Float { - metric.Value = Float(val.FloatV()) + metric.Value = util.Float(val.FloatV()) } else if val.Kind() == lineprotocol.Int { - metric.Value = Float(val.IntV()) + metric.Value = util.Float(val.IntV()) } else if val.Kind() == lineprotocol.Uint { - metric.Value = Float(val.UintV()) + metric.Value = util.Float(val.UintV()) } else { return fmt.Errorf("unsupported value type in message: %s", val.Kind().String()) } diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..54b64e0 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,112 @@ +package config + +import ( + "encoding/json" + "fmt" + "log" + "os" +) + +// For aggregation over multiple values at different cpus/sockets/..., not time! +type AggregationStrategy int + +const ( + NoAggregation AggregationStrategy = iota + SumAggregation + AvgAggregation +) + +func (as *AggregationStrategy) UnmarshalJSON(data []byte) error { + var str string + if err := json.Unmarshal(data, &str); err != nil { + return err + } + + switch str { + case "": + *as = NoAggregation + case "sum": + *as = SumAggregation + case "avg": + *as = AvgAggregation + default: + return fmt.Errorf("invalid aggregation strategy: %#v", str) + } + return nil +} + +type MetricConfig struct { + // Interval in seconds at which measurements will arive. + Frequency int64 `json:"frequency"` + + // Can be 'sum', 'avg' or null. Describes how to aggregate metrics from the same timestep over the hierarchy. + Aggregation AggregationStrategy `json:"aggregation"` + + // Private, used internally... + offset int +} + +type HttpConfig struct { + // Address to bind to, for example "0.0.0.0:8081" + Address string `json:"address"` + + // If not the empty string, use https with this as the certificate file + CertFile string `json:"https-cert-file"` + + // If not the empty string, use https with this as the key file + KeyFile string `json:"https-key-file"` +} + +type NatsConfig struct { + // Address of the nats server + Address string `json:"address"` + + // Username/Password, optional + Username string `json:"username"` + Password string `json:"password"` + + Subscriptions []struct { + // Channel name + SubscribeTo string `json:"subscribe-to"` + + // Allow lines without a cluster tag, use this as default, optional + ClusterTag string `json:"cluster-tag"` + } `json:"subscriptions"` +} + +type Config struct { + Metrics map[string]MetricConfig `json:"metrics"` + HttpConfig *HttpConfig `json:"http-api"` + Checkpoints struct { + Interval string `json:"interval"` + RootDir string `json:"directory"` + Restore string `json:"restore"` + } `json:"checkpoints"` + Debug struct { + DumpToFile string `json:"dump-to-file"` + EnableGops bool `json:"gops"` + } `json:"debug"` + RetentionInMemory string `json:"retention-in-memory"` + JwtPublicKey string `json:"jwt-public-key"` + Archive struct { + Interval string `json:"interval"` + RootDir string `json:"directory"` + DeleteInstead bool `json:"delete-instead"` + } `json:"archive"` + Nats []*NatsConfig `json:"nats"` +} + +func LoadConfiguration(file string) Config { + var config Config + configFile, err := os.Open(file) + if err != nil { + log.Fatal(err) + } + defer configFile.Close() + dec := json.NewDecoder(configFile) + dec.DisallowUnknownFields() + if err := dec.Decode(&config); err != nil { + log.Fatal(err) + } + return config +} diff --git a/archive.go b/internal/memstore/archive.go similarity index 99% rename from archive.go rename to internal/memstore/archive.go index 302afb3..e1654f7 100644 --- a/archive.go +++ b/internal/memstore/archive.go @@ -1,4 +1,4 @@ -package main +package memstore import ( "archive/zip" diff --git a/memstore.go b/internal/memstore/memstore.go similarity index 91% rename from memstore.go rename to internal/memstore/memstore.go index 79b3f05..99b6295 100644 --- a/memstore.go +++ b/internal/memstore/memstore.go @@ -1,9 +1,13 @@ -package main +package memstore import ( "errors" "sync" "unsafe" + + "github.com/ClusterCockpit/cc-metric-store/internal/api" + "github.com/ClusterCockpit/cc-metric-store/internal/config" + "github.com/ClusterCockpit/cc-metric-store/internal/util" ) // Default buffer capacity. @@ -18,7 +22,7 @@ const ( var bufferPool sync.Pool = sync.Pool{ New: func() interface{} { return &buffer{ - data: make([]Float, 0, BUFFER_CAP), + data: make([]util.Float, 0, BUFFER_CAP), } }, } @@ -33,11 +37,11 @@ var ( // If `cap(data)` is reached, a new buffer is created and // becomes the new head of a buffer list. type buffer struct { - frequency int64 // Time between two "slots" - start int64 // Timestamp of when `data[0]` was written. - data []Float // The slice should never reallocacte as `cap(data)` is respected. - prev, next *buffer // `prev` contains older data, `next` newer data. - archived bool // If true, this buffer is already archived + frequency int64 // Time between two "slots" + start int64 // Timestamp of when `data[0]` was written. + data []util.Float // The slice should never reallocacte as `cap(data)` is respected. + prev, next *buffer // `prev` contains older data, `next` newer data. + archived bool // If true, this buffer is already archived closed bool /* @@ -66,7 +70,7 @@ func newBuffer(ts, freq int64) *buffer { // Otherwise, the existing buffer is returnd. // Normaly, only "newer" data should be written, but if the value would // end up in the same buffer anyways it is allowed. -func (b *buffer) write(ts int64, value Float) (*buffer, error) { +func (b *buffer) write(ts int64, value util.Float) (*buffer, error) { if ts < b.start { return nil, errors.New("cannot write value to buffer from past") } @@ -90,7 +94,7 @@ func (b *buffer) write(ts int64, value Float) (*buffer, error) { // Fill up unwritten slots with NaN for i := len(b.data); i < idx; i++ { - b.data = append(b.data, NaN) + b.data = append(b.data, util.NaN) } b.data = append(b.data, value) @@ -154,7 +158,7 @@ func (b *buffer) close() { // This function goes back the buffer chain if `from` is older than the currents buffer start. // The loaded values are added to `data` and `data` is returned, possibly with a shorter length. // If `data` is not long enough to hold all values, this function will panic! -func (b *buffer) read(from, to int64, data []Float) ([]Float, int64, int64, error) { +func (b *buffer) read(from, to int64, data []util.Float) ([]util.Float, int64, int64, error) { if from < b.firstWrite() { if b.prev != nil { return b.prev.read(from, to, data) @@ -178,9 +182,9 @@ func (b *buffer) read(from, to int64, data []Float) ([]Float, int64, int64, erro if b.next == nil || to <= b.next.start { break } - data[i] += NaN + data[i] += util.NaN } else if t < b.start { - data[i] += NaN + data[i] += util.NaN // } else if b.data[idx].IsNaN() { // data[i] += interpolate(idx, b.data) } else { @@ -335,7 +339,7 @@ func (l *level) sizeInBytes() int64 { for _, b := range l.metrics { if b != nil { - size += b.count() * int64(unsafe.Sizeof(Float(0))) + size += b.count() * int64(unsafe.Sizeof(util.Float(0))) } } @@ -348,12 +352,12 @@ func (l *level) sizeInBytes() int64 { type MemoryStore struct { root level // root of the tree structure - metrics map[string]MetricConfig + metrics map[string]config.MetricConfig } // Return a new, initialized instance of a MemoryStore. // Will panic if values in the metric configurations are invalid. -func NewMemoryStore(metrics map[string]MetricConfig) *MemoryStore { +func NewMemoryStore(metrics map[string]config.MetricConfig) *MemoryStore { offset := 0 for key, config := range metrics { if config.Frequency == 0 { @@ -379,7 +383,7 @@ func NewMemoryStore(metrics map[string]MetricConfig) *MemoryStore { // Write all values in `metrics` to the level specified by `selector` for time `ts`. // Look at `findLevelOrCreate` for how selectors work. -func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error { +func (m *MemoryStore) Write(selector []string, ts int64, metrics []api.Metric) error { var ok bool for i, metric := range metrics { if metric.mc.Frequency == 0 { @@ -399,7 +403,7 @@ func (m *MemoryStore) GetLevel(selector []string) *level { } // Assumes that `minfo` in `metrics` is filled in! -func (m *MemoryStore) WriteToLevel(l *level, selector []string, ts int64, metrics []Metric) error { +func (m *MemoryStore) WriteToLevel(l *level, selector []string, ts int64, metrics []api.Metric) error { l = l.findLevelOrCreate(selector, len(m.metrics)) l.lock.Lock() defer l.lock.Unlock() diff --git a/selector.go b/internal/memstore/selector.go similarity index 99% rename from selector.go rename to internal/memstore/selector.go index 25fe209..7bc498a 100644 --- a/selector.go +++ b/internal/memstore/selector.go @@ -1,4 +1,4 @@ -package main +package memstore import ( "encoding/json" diff --git a/float.go b/internal/util/float.go similarity index 92% rename from float.go rename to internal/util/float.go index eae2d98..603bc91 100644 --- a/float.go +++ b/internal/util/float.go @@ -1,4 +1,4 @@ -package main +package util import ( "math" @@ -11,8 +11,10 @@ import ( // we have to use our own type which implements encoding/json.Marshaler itself. type Float float64 -var NaN Float = Float(math.NaN()) -var nullAsBytes []byte = []byte("null") +var ( + NaN Float = Float(math.NaN()) + nullAsBytes []byte = []byte("null") +) func (f Float) IsNaN() bool { return math.IsNaN(float64(f)) @@ -55,7 +57,6 @@ func (fa FloatArray) MarshalJSON() ([]byte, error) { buf = append(buf, `null`...) } else { buf = strconv.AppendFloat(buf, float64(fa[i]), 'f', 3, 64) - } } buf = append(buf, ']') diff --git a/stats.go b/internal/util/stats.go similarity index 99% rename from stats.go rename to internal/util/stats.go index 510891b..8e0f41f 100644 --- a/stats.go +++ b/internal/util/stats.go @@ -1,4 +1,4 @@ -package main +package util import ( "errors" @@ -94,7 +94,6 @@ func (m *MemoryStore) Stats(selector Selector, metric string, from, to int64) (* n += 1 return nil }) - if err != nil { return nil, 0, 0, err } diff --git a/lineprotocol_test.go b/lineprotocol_test.go deleted file mode 100644 index a6df786..0000000 --- a/lineprotocol_test.go +++ /dev/null @@ -1,144 +0,0 @@ -package main - -import ( - "bytes" - "log" - "strconv" - "testing" - - "github.com/influxdata/line-protocol/v2/lineprotocol" -) - -const TestDataClassicFormat string = ` -m1,cluster=ctest,hostname=htest1,type=node value=1 123456789 -m2,cluster=ctest,hostname=htest1,type=node value=2 123456789 -m3,hostname=htest2,type=node value=3 123456789 -m4,cluster=ctest,hostname=htest2,type=core,type-id=1 value=4 123456789 -m4,cluster=ctest,hostname=htest2,type-id=2,type=core value=5 123456789 -` - -const BenchmarkLineBatch string = ` -nm1,cluster=ctest,hostname=htest1,type=node value=123.0 123456789 -nm2,cluster=ctest,hostname=htest1,type=node value=123.0 123456789 -nm3,cluster=ctest,hostname=htest1,type=node value=123.0 123456789 -nm4,cluster=ctest,hostname=htest1,type=node value=123.0 123456789 -nm5,cluster=ctest,hostname=htest1,type=node value=123.0 123456789 -nm6,cluster=ctest,hostname=htest1,type=node value=123.0 123456789 -nm7,cluster=ctest,hostname=htest1,type=node value=123.0 123456789 -nm8,cluster=ctest,hostname=htest1,type=node value=123.0 123456789 -nm9,cluster=ctest,hostname=htest1,type=node value=123.0 123456789 -cm1,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789 -cm2,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789 -cm3,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789 -cm4,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789 -cm5,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789 -cm6,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789 -cm7,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789 -cm8,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789 -cm9,cluster=ctest,hostname=htest1,type=core,type-id=1 value=234.0 123456789 -cm1,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789 -cm2,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789 -cm3,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789 -cm4,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789 -cm5,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789 -cm6,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789 -cm7,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789 -cm8,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789 -cm9,cluster=ctest,hostname=htest1,type=core,type-id=2 value=345.0 123456789 -cm1,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789 -cm2,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789 -cm3,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789 -cm4,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789 -cm5,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789 -cm6,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789 -cm7,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789 -cm8,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789 -cm9,cluster=ctest,hostname=htest1,type=core,type-id=3 value=456.0 123456789 -cm1,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789 -cm2,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789 -cm3,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789 -cm4,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789 -cm5,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789 -cm6,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789 -cm7,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789 -cm8,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789 -cm9,cluster=ctest,hostname=htest1,type=core,type-id=4 value=567.0 123456789 -` - -func TestLineprotocolDecoder(t *testing.T) { - prevMemoryStore := memoryStore - t.Cleanup(func() { - memoryStore = prevMemoryStore - }) - - memoryStore = NewMemoryStore(map[string]MetricConfig{ - "m1": {Frequency: 1}, - "m2": {Frequency: 1}, - "m3": {Frequency: 1}, - "m4": {Frequency: 1}, - }) - - dec := lineprotocol.NewDecoderWithBytes([]byte(TestDataClassicFormat)) - if err := decodeLine(dec, "ctest"); err != nil { - log.Fatal(err) - } - - // memoryStore.DebugDump(bufio.NewWriter(os.Stderr)) - - h1 := memoryStore.GetLevel([]string{"ctest", "htest1"}) - h1b1 := h1.metrics[memoryStore.metrics["m1"].offset] - h1b2 := h1.metrics[memoryStore.metrics["m2"].offset] - if h1b1.data[0] != 1.0 || h1b2.data[0] != 2.0 { - log.Fatal() - } - - h2 := memoryStore.GetLevel([]string{"ctest", "htest2"}) - h2b3 := h2.metrics[memoryStore.metrics["m3"].offset] - if h2b3.data[0] != 3.0 { - log.Fatal() - } - - h2c1 := memoryStore.GetLevel([]string{"ctest", "htest2", "core1"}) - h2c1b4 := h2c1.metrics[memoryStore.metrics["m4"].offset] - h2c2 := memoryStore.GetLevel([]string{"ctest", "htest2", "core2"}) - h2c2b4 := h2c2.metrics[memoryStore.metrics["m4"].offset] - if h2c1b4.data[0] != 4.0 || h2c2b4.data[0] != 5.0 { - log.Fatal() - } -} - -func BenchmarkLineprotocolDecoder(b *testing.B) { - b.StopTimer() - memoryStore = NewMemoryStore(map[string]MetricConfig{ - "nm1": {Frequency: 1}, - "nm2": {Frequency: 1}, - "nm3": {Frequency: 1}, - "nm4": {Frequency: 1}, - "nm5": {Frequency: 1}, - "nm6": {Frequency: 1}, - "nm7": {Frequency: 1}, - "nm8": {Frequency: 1}, - "nm9": {Frequency: 1}, - "cm1": {Frequency: 1}, - "cm2": {Frequency: 1}, - "cm3": {Frequency: 1}, - "cm4": {Frequency: 1}, - "cm5": {Frequency: 1}, - "cm6": {Frequency: 1}, - "cm7": {Frequency: 1}, - "cm8": {Frequency: 1}, - "cm9": {Frequency: 1}, - }) - - for i := 0; i < b.N; i++ { - data := []byte(BenchmarkLineBatch) - data = bytes.ReplaceAll(data, []byte("123456789"), []byte(strconv.Itoa(i+123456789))) - dec := lineprotocol.NewDecoderWithBytes(data) - - b.StartTimer() - if err := decodeLine(dec, "ctest"); err != nil { - b.Fatal(err) - } - b.StopTimer() - } -} diff --git a/memoryStore_test.go.orig b/memoryStore_test.go.orig deleted file mode 100644 index b64c85b..0000000 --- a/memoryStore_test.go.orig +++ /dev/null @@ -1,504 +0,0 @@ -package main - -import ( - "fmt" - "log" - "math" - "testing" - - "github.com/ClusterCockpit/cc-metric-store/lineprotocol" -) - -var testMetrics [][]lineprotocol.Metric = [][]lineprotocol.Metric{ - {{"flops", 100.5}, {"mem_bw", 2088.67}}, - {{"flops", 180.5}, {"mem_bw", 4078.32}, {"mem_capacity", 1020}}, - {{"flops", 980.5}, {"mem_bw", 9078.32}, {"mem_capacity", 5010}}, - {{"flops", 940.5}, {"mem_bw", 9278.32}, {"mem_capacity", 6010}}, - {{"flops", 930.5}, {"mem_bw", 9378.32}, {"mem_capacity", 7010}}, - {{"flops", 980.5}, {"mem_bw", 9478.32}, {"mem_capacity", 8010}}, - {{"flops", 980.5}, {"mem_bw", 9478.32}, {"mem_capacity", 8010}}, - {{"flops", 980.5}, {"mem_bw", 9478.32}, {"mem_capacity", 8010}}, - {{"flops", 970.5}, {"mem_bw", 9178.32}, {"mem_capacity", 2010}}, - {{"flops", 970.5}, {"mem_bw", 9178.32}, {"mem_capacity", 2010}}} - -var testMetricsAlt [][]lineprotocol.Metric = [][]lineprotocol.Metric{ - {{"flops", 120.5}, {"mem_bw", 2080.67}}, - {{"flops", 130.5}, {"mem_bw", 4071.32}, {"mem_capacity", 1120}}, - {{"flops", 940.5}, {"mem_bw", 9072.32}, {"mem_capacity", 5210}}, - {{"flops", 950.5}, {"mem_bw", 9273.32}, {"mem_capacity", 6310}}, - {{"flops", 960.5}, {"mem_bw", 9374.32}, {"mem_capacity", 7410}}, - {{"flops", 970.5}, {"mem_bw", 9475.32}, {"mem_capacity", 8510}}, - {{"flops", 990.5}, {"mem_bw", 9476.32}, {"mem_capacity", 8610}}, - {{"flops", 910.5}, {"mem_bw", 9477.32}, {"mem_capacity", 8710}}, - {{"flops", 920.5}, {"mem_bw", 9178.32}, {"mem_capacity", 2810}}, - {{"flops", 930.5}, {"mem_bw", 9179.32}, {"mem_capacity", 2910}}} - -func dumpStoreBuffer(s *storeBuffer) { - log.Printf("Start TS %d\n", s.start) - ctr := 0 - - for _, val := range s.store { - fmt.Printf("%f\t", val) - ctr++ - - if ctr == 10 { - fmt.Printf("\n") - ctr = 0 - } - } -} - -func printMemStore(m *MemoryStore) { - log.Println("########################") - log.Printf("Frequency %d, Metrics %d Slots %d\n", - m.frequency, m.numMetrics, m.numSlots) - log.Println("##Offsets") - for key, val := range m.offsets { - log.Printf("\t%s = %d\n", key, val) - } - log.Println("##Containers") - for key, c := range m.containers { - log.Printf("ID %s\n", key) - log.Println("###current") - dumpStoreBuffer(c.current) - log.Println("###next") - dumpStoreBuffer(c.next) - } - log.Println("########################") -} - -//############################ -//#### Whitebox tests ######## -//############################ -func TestAddMetricSimple(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - // printMemStore(m) - - m.AddMetrics(key, 1584022800, testMetrics[0]) - m.AddMetrics(key, 1584022890, testMetrics[1]) - - want := testMetrics[0][0].Value - got := m.containers[key].current.store[0] - if got != want { - t.Errorf("Want %f got %f\n", want, got) - } - - want = testMetrics[1][2].Value - got = m.containers[key].current.store[21] - if got != want { - t.Errorf("Want %f got %f\n", want, got) - } - // printMemStore(m) -} - -func TestAddMetricReplace(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - // printMemStore(m) - - m.AddMetrics(key, 1584022800, testMetrics[0]) - m.AddMetrics(key, 1584022800, testMetrics[1]) - - want := testMetrics[1][0].Value - got := m.containers[key].current.store[0] - if got != want { - t.Errorf("Want %f got %f\n", want, got) - } - - m.AddMetrics(key, 1584022850, testMetrics[0]) - want = testMetrics[0][0].Value - got = m.containers[key].current.store[0] - if got != want { - t.Errorf("Want %f got %f\n", want, got) - } - - m.AddMetrics(key, 1584022860, testMetrics[1]) - want = testMetrics[0][0].Value - got = m.containers[key].current.store[0] - if got != want { - t.Errorf("Want %f got %f\n", want, got) - } - // printMemStore(m) -} - -func TestAddMetricSwitch(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - // printMemStore(m) - - m.AddMetrics(key, 1584023000, testMetrics[0]) - m.AddMetrics(key, 1584023580, testMetrics[1]) - - want := testMetrics[1][2].Value - got := m.containers[key].current.store[29] - if got != want { - t.Errorf("Want %f got %f\n", want, got) - } - - m.AddMetrics(key, 1584023600, testMetrics[2]) - want = testMetrics[2][2].Value - got = m.containers[key].current.store[20] - if got != want { - t.Errorf("Want %f got %f\n", want, got) - } - - // printMemStore(m) -} - -//############################ -//#### Blackbox tests ######## -//############################ - -func TestAddMetricOutOfBounds(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 30, 60) - - err := m.AddMetrics(key, 1584023000, testMetrics[0]) - if err != nil { - t.Errorf("Got error 1584023000\n") - } - err = m.AddMetrics(key, 1584026600, testMetrics[0]) - if err == nil { - t.Errorf("Got no error 1584026600\n") - } - err = m.AddMetrics(key, 1584021580, testMetrics[1]) - if err == nil { - t.Errorf("Got no error 1584021580\n") - } - err = m.AddMetrics(key, 1584024580, testMetrics[1]) - if err != nil { - t.Errorf("Got error 1584024580\n") - } - err = m.AddMetrics(key, 1584091580, testMetrics[1]) - if err == nil { - t.Errorf("Got no error 1584091580\n") - } - err = m.AddMetrics(key, 1584024780, testMetrics[0]) - if err != nil { - t.Errorf("Got error 1584024780\n") - } -} - -func TestGetMetricPlainCurrent(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - - // printMemStore(m) - val, tsFrom, err := m.GetMetric(key, "flops", 1584023000, 1584023560) - - if err != nil { - t.Errorf("Got error\n") - } - if tsFrom != 1584023000 { - t.Errorf("Start ts differs: %d\n", tsFrom) - } - if len(val) != 9 { - t.Errorf("Want 9. Got %d\n", len(val)) - } - if val[0] != 100.5 { - t.Errorf("Want 100.5 Got %f\n", val[0]) - } - if val[8] != 970.5 { - t.Errorf("Want 970.5 Got %f\n", val[9]) - } -} - -func TestGetMetricPlainNext(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023600+i*60), testMetricsAlt[i]) - } - - // printMemStore(m) - val, tsFrom, err := m.GetMetric(key, "flops", 1584023000, 1584023560) - - if err != nil { - t.Errorf("Got error\n") - } - if tsFrom != 1584023000 { - t.Errorf("Start ts differs: %d\n", tsFrom) - } - if len(val) != 9 { - t.Errorf("Want 9. Got %d\n", len(val)) - } - if val[0] != 100.5 { - t.Errorf("Want 100.5 Got %f\n", val[0]) - } - if val[8] != 970.5 { - t.Errorf("Want 970.5 Got %f\n", val[9]) - } -} - -func TestGetMetricGap(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*120), testMetrics[i]) - } - - val, tsFrom, err := m.GetMetric(key, "flops", 1584023000, 1584023600) - - if err != nil { - t.Errorf("Got error\n") - } - if tsFrom != 1584023000 { - t.Errorf("Start ts differs: %d\n", tsFrom) - } - if len(val) != 10 { - t.Errorf("Want 10. Got %d\n", len(val)) - } - if val[0] != 100.5 { - t.Errorf("Want 100.5 Got %f\n", val[0]) - } - if !math.IsNaN(float64(val[1])) { - t.Errorf("Want NaN Got %f\n", val[1]) - } - if val[0] != 100.5 { - t.Errorf("Want 100.5 Got %f\n", val[0]) - } - - // fmt.Println(val) -} - -func TestGetMetricSplit(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023600+i*60), testMetricsAlt[i]) - } - - // printMemStore(m) - - val, tsFrom, err := m.GetMetric(key, "flops", 1584023200, 1584023860) - - if err != nil { - t.Errorf("Got error\n") - } - if tsFrom != 1584023200 { - t.Errorf("Start ts differs: %d\n", tsFrom) - } - if len(val) != 11 { - t.Errorf("Want 11. Got %d\n", len(val)) - } - if val[0] != 940.5 { - t.Errorf("Want 940.5 Got %f\n", val[0]) - } - if val[10] != 950.5 { - t.Errorf("Want 950.5 Got %f\n", val[0]) - } -} - -func TestGetMetricExceedNext(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023600+i*60), testMetricsAlt[i]) - } - - // printMemStore(m) - - val, tsFrom, err := m.GetMetric(key, "flops", 1584022800, 1584023400) - - if err != nil { - t.Errorf("Got error\n") - } - if tsFrom != 1584023000 { - t.Errorf("Start ts differs: %d\n", tsFrom) - } - if len(val) != 6 { - t.Errorf("Want 6. Got %d\n", len(val)) - } - if val[0] != 100.5 { - t.Errorf("Want 100.5 Got %f\n", val[0]) - } - if val[5] != 980.5 { - t.Errorf("Want 980.5 Got %f\n", val[5]) - } -} - -func TestGetMetricExceedNextSplit(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023600+i*60), testMetricsAlt[i]) - } - - // printMemStore(m) - - val, tsFrom, err := m.GetMetric(key, "flops", 1584022800, 1584023900) - - if err != nil { - t.Errorf("Got error\n") - } - if tsFrom != 1584023000 { - t.Errorf("Start ts differs: %d\n", tsFrom) - } - if len(val) != 15 { - t.Errorf("Want 14. Got %d\n", len(val)) - } - if val[0] != 100.5 { - t.Errorf("Want 100.5 Got %f\n", val[0]) - } - if val[14] != 960.5 { - t.Errorf("Want 960.5 Got %f\n", val[13]) - } -} - -func TestGetMetricExceedCurrent(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023600+i*60), testMetricsAlt[i]) - } - - // printMemStore(m) - - val, tsFrom, err := m.GetMetric(key, "flops", 1584023800, 1584027900) - - if err != nil { - t.Errorf("Got error\n") - } - if tsFrom != 1584023800 { - t.Errorf("Start ts differs: %d\n", tsFrom) - } - if len(val) != 7 { - t.Errorf("Want 6. Got %d\n", len(val)) - } - if val[0] != 950.5 { - t.Errorf("Want 950.5 Got %f\n", val[0]) - } - if val[6] != 930.5 { - t.Errorf("Want 930.5 Got %f\n", val[5]) - } -} - -func TestGetMetricExceedCurrentSplit(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023600+i*60), testMetricsAlt[i]) - } - - // printMemStore(m) - - val, tsFrom, err := m.GetMetric(key, "flops", 1584023120, 1584027900) - - if err != nil { - t.Errorf("Got error\n") - } - if tsFrom != 1584023120 { - t.Errorf("Start ts differs: %d\n", tsFrom) - } - if len(val) != 18 { - t.Errorf("Want 18. Got %d\n", len(val)) - } - if val[0] != 980.5 { - t.Errorf("Want 950.5 Got %f\n", val[0]) - } - if val[17] != 930.5 { - t.Errorf("Want 930.5 Got %f\n", val[17]) - } -} - -func TestGetMetricExceedBoth(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023600+i*60), testMetricsAlt[i]) - } - - // printMemStore(m) - - val, tsFrom, err := m.GetMetric(key, "flops", 1584022800, 1584027900) - - if err != nil { - t.Errorf("Got error\n") - } - if tsFrom != 1584023000 { - t.Errorf("Start ts differs: %d\n", tsFrom) - } - if len(val) != 20 { - t.Errorf("Want 20. Got %d\n", len(val)) - } - if val[0] != 100.5 { - t.Errorf("Want 950.5 Got %f\n", val[0]) - } - if val[19] != 930.5 { - t.Errorf("Want 930.5 Got %f\n", val[17]) - } -} - -func TestGetMetricOutUpper(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023600+i*60), testMetricsAlt[i]) - } - - // printMemStore(m) - - _, _, err := m.GetMetric(key, "flops", 1584032800, 1584037900) - - if err == nil { - t.Errorf("Got no error\n") - } -} - -func TestGetMetricOutLower(t *testing.T) { - key := "m1220" - m := newMemoryStore([]string{"flops", "mem_bw", "mem_capacity"}, 10, 60) - - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023000+i*60), testMetrics[i]) - } - for i := 0; i < len(testMetrics); i++ { - m.AddMetrics(key, int64(1584023600+i*60), testMetricsAlt[i]) - } - - // printMemStore(m) - - _, _, err := m.GetMetric(key, "flops", 1584002800, 1584007900) - - if err == nil { - t.Errorf("Got no error\n") - } -} diff --git a/memstore_test.go b/memstore_test.go deleted file mode 100644 index 8821e7a..0000000 --- a/memstore_test.go +++ /dev/null @@ -1,512 +0,0 @@ -package main - -import ( - "fmt" - "math" - "math/rand" - "sync" - "testing" -) - -func TestMemoryStoreBasics(t *testing.T) { - frequency := int64(10) - start, count := int64(100), int64(5000) - store := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: frequency}, - "b": {Frequency: frequency * 2}, - }) - - for i := int64(0); i < count; i++ { - err := store.Write([]string{"testhost"}, start+i*frequency, []Metric{ - {Name: "a", Value: Float(i)}, - {Name: "b", Value: Float(i / 2)}, - }) - if err != nil { - t.Error(err) - return - } - } - - sel := Selector{{String: "testhost"}} - adata, from, to, err := store.Read(sel, "a", start, start+count*frequency) - if err != nil || from != start || to != start+count*frequency { - t.Error(err) - return - } - bdata, _, _, err := store.Read(sel, "b", start, start+count*frequency) - if err != nil { - t.Error(err) - return - } - - if len(adata) != int(count) || len(bdata) != int(count/2) { - t.Error("unexpected count of returned values") - return - } - - for i := 0; i < int(count); i++ { - if adata[i] != Float(i) { - t.Errorf("incorrect value for metric a (%f vs. %f)", adata[i], Float(i)) - return - } - } - - for i := 0; i < int(count/2); i++ { - if bdata[i] != Float(i) && bdata[i] != Float(i-1) { - t.Errorf("incorrect value for metric b (%f) at index %d", bdata[i], i) - return - } - } - -} - -func TestMemoryStoreTooMuchWrites(t *testing.T) { - frequency := int64(10) - count := BUFFER_CAP*3 + 10 - store := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: frequency}, - "b": {Frequency: frequency * 2}, - "c": {Frequency: frequency / 2}, - "d": {Frequency: frequency * 3}, - }) - - start := int64(100) - for i := 0; i < count; i++ { - if err := store.Write([]string{"test"}, start+int64(i)*frequency, []Metric{ - {Name: "a", Value: Float(i)}, - {Name: "b", Value: Float(i / 2)}, - {Name: "c", Value: Float(i * 2)}, - {Name: "d", Value: Float(i / 3)}, - }); err != nil { - t.Fatal(err) - } - } - - end := start + int64(count)*frequency - data, from, to, err := store.Read(Selector{{String: "test"}}, "a", start, end) - if len(data) != count || from != start || to != end || err != nil { - t.Fatalf("a: err=%#v, from=%d, to=%d, data=%#v\n", err, from, to, data) - } - - data, from, to, err = store.Read(Selector{{String: "test"}}, "b", start, end) - if len(data) != count/2 || from != start || to != end || err != nil { - t.Fatalf("b: err=%#v, from=%d, to=%d, data=%#v\n", err, from, to, data) - } - - data, from, to, err = store.Read(Selector{{String: "test"}}, "c", start, end) - if len(data) != count*2-1 || from != start || to != end-frequency/2 || err != nil { - t.Fatalf("c: err=%#v, from=%d, to=%d, data=%#v\n", err, from, to, data) - } - - data, from, to, err = store.Read(Selector{{String: "test"}}, "d", start, end) - if len(data) != count/3+1 || from != start || to != end+frequency*2 || err != nil { - t.Errorf("expected: err=nil, from=%d, to=%d, len(data)=%d\n", start, end+frequency*2, count/3) - t.Fatalf("d: err=%#v, from=%d, to=%d, data=%#v\n", err, from, to, data) - } -} - -func TestMemoryStoreOutOfBounds(t *testing.T) { - count := 2000 - toffset := 1000 - store := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: 60}, - }) - - for i := 0; i < count; i++ { - if err := store.Write([]string{"cluster", "host", "cpu"}, int64(toffset+i*60), []Metric{ - {Name: "a", Value: Float(i)}, - }); err != nil { - t.Fatal(err) - } - } - - sel := Selector{{String: "cluster"}, {String: "host"}, {String: "cpu"}} - data, from, to, err := store.Read(sel, "a", 500, int64(toffset+count*60+500)) - if err != nil { - t.Fatal(err) - } - - if from/60 != int64(toffset)/60 || to/60 != int64(toffset+count*60)/60 { - t.Fatalf("Got %d-%d, expected %d-%d", from, to, toffset, toffset+count*60) - } - - if len(data) != count || data[0] != 0 || data[len(data)-1] != Float((count-1)) { - t.Fatalf("Wrong data (got: %d, %f, %f, expected: %d, %f, %f)", - len(data), data[0], data[len(data)-1], count, 0., Float(count-1)) - } - - testfrom, testlen := int64(100000000), int64(10000) - data, from, to, err = store.Read(sel, "a", testfrom, testfrom+testlen) - if len(data) != 0 || from != testfrom || to != testfrom || err != nil { - t.Fatal("Unexpected data returned when reading range after valid data") - } - - testfrom, testlen = 0, 10 - data, from, to, err = store.Read(sel, "a", testfrom, testfrom+testlen) - if len(data) != 0 || from/60 != int64(toffset)/60 || to/60 != int64(toffset)/60 || err != nil { - t.Fatal("Unexpected data returned when reading range before valid data") - } -} - -func TestMemoryStoreMissingDatapoints(t *testing.T) { - count := 3000 - store := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: 1}, - }) - - for i := 0; i < count; i++ { - if i%3 != 0 { - continue - } - - err := store.Write([]string{"testhost"}, int64(i), []Metric{ - {Name: "a", Value: Float(i)}, - }) - if err != nil { - t.Error(err) - return - } - } - - sel := Selector{{String: "testhost"}} - adata, _, _, err := store.Read(sel, "a", 0, int64(count)) - if err != nil { - t.Error(err) - return - } - - if len(adata) != count-2 { - t.Error("unexpected len") - return - } - - for i := 0; i < count-2; i++ { - if i%3 == 0 { - if adata[i] != Float(i) { - t.Error("unexpected value") - return - } - } else { - if !math.IsNaN(float64(adata[i])) { - t.Errorf("NaN expected (i = %d, value = %f)\n", i, adata[i]) - return - } - } - } -} - -func TestMemoryStoreAggregation(t *testing.T) { - count := 3000 - store := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: 1, Aggregation: SumAggregation}, - }) - - for i := 0; i < count; i++ { - err := store.Write([]string{"host0", "cpu0"}, int64(i), []Metric{ - {Name: "a", Value: Float(i) / 2.}, - }) - if err != nil { - t.Error(err) - return - } - - err = store.Write([]string{"host0", "cpu1"}, int64(i), []Metric{ - {Name: "a", Value: Float(i) * 2.}, - }) - if err != nil { - t.Error(err) - return - } - } - - adata, from, to, err := store.Read(Selector{{String: "host0"}}, "a", int64(0), int64(count)) - if err != nil { - t.Error(err) - return - } - - if len(adata) != count || from != 0 || to != int64(count) { - t.Error("unexpected length or time range of returned data") - return - } - - for i := 0; i < count; i++ { - expected := Float(i)/2. + Float(i)*2. - if adata[i] != expected { - t.Errorf("expected: %f, got: %f", expected, adata[i]) - return - } - } -} - -func TestMemoryStoreStats(t *testing.T) { - count := 3000 - store := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: 1}, - "b": {Frequency: 1, Aggregation: AvgAggregation}, - }) - - sel1 := []string{"cluster", "host1"} - sel2 := []string{"cluster", "host2", "left"} - sel3 := []string{"cluster", "host2", "right"} - - samples := 0 - asum, amin, amax := 0., math.MaxFloat32, -math.MaxFloat32 - bsum, bmin, bmax := 0., math.MaxFloat32, -math.MaxFloat32 - - for i := 0; i < count; i++ { - if i%5 == 0 { - // Skip some writes, test if samples is calculated correctly - continue - } - - samples += 1 - a := float64(rand.Int()%100 - 50) - asum += a - amin = math.Min(amin, a) - amax = math.Max(amax, a) - b := float64(rand.Int()%100 - 50) - bsum += b * 2 - bmin = math.Min(bmin, b) - bmax = math.Max(bmax, b) - - store.Write(sel1, int64(i), []Metric{ - {Name: "a", Value: Float(a)}, - }) - store.Write(sel2, int64(i), []Metric{ - {Name: "b", Value: Float(b)}, - }) - store.Write(sel3, int64(i), []Metric{ - {Name: "b", Value: Float(b)}, - }) - } - - stats, from, to, err := store.Stats(Selector{{String: "cluster"}, {String: "host1"}}, "a", 0, int64(count)) - if err != nil { - t.Fatal(err) - } - - if from != 1 || to != int64(count) || stats.Samples != samples { - t.Fatalf("unexpected: from=%d, to=%d, stats.Samples=%d (expected samples=%d)\n", from, to, stats.Samples, samples) - } - - if stats.Avg != Float(asum/float64(samples)) || stats.Min != Float(amin) || stats.Max != Float(amax) { - t.Fatalf("wrong stats: %#v\n", stats) - } - - stats, from, to, err = store.Stats(Selector{{String: "cluster"}, {String: "host2"}}, "b", 0, int64(count)) - if err != nil { - t.Fatal(err) - } - - if from != 1 || to != int64(count) || stats.Samples != samples*2 { - t.Fatalf("unexpected: from=%d, to=%d, stats.Samples=%d (expected samples=%d)\n", from, to, stats.Samples, samples*2) - } - - if stats.Avg != Float(bsum/float64(samples*2)) || stats.Min != Float(bmin) || stats.Max != Float(bmax) { - t.Fatalf("wrong stats: %#v (expected: avg=%f, min=%f, max=%f)\n", stats, bsum/float64(samples*2), bmin, bmax) - } -} - -func TestMemoryStoreArchive(t *testing.T) { - store1 := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: 1}, - "b": {Frequency: 1}, - }) - - count := 2000 - for i := 0; i < count; i++ { - err := store1.Write([]string{"cluster", "host", "cpu0"}, 100+int64(i), []Metric{ - {Name: "a", Value: Float(i)}, - {Name: "b", Value: Float(i * 2)}, - }) - if err != nil { - t.Error(err) - return - } - } - - // store1.DebugDump(bufio.NewWriter(os.Stdout)) - - archiveRoot := t.TempDir() - _, err := store1.ToCheckpoint(archiveRoot, 100, 100+int64(count/2)) - if err != nil { - t.Error(err) - return - } - - _, err = store1.ToCheckpoint(archiveRoot, 100+int64(count/2), 100+int64(count)) - if err != nil { - t.Error(err) - return - } - - store2 := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: 1}, - "b": {Frequency: 1}, - }) - n, err := store2.FromCheckpoint(archiveRoot, 100) - if err != nil { - t.Error(err) - return - } - - sel := Selector{{String: "cluster"}, {String: "host"}, {String: "cpu0"}} - adata, from, to, err := store2.Read(sel, "a", 100, int64(100+count)) - if err != nil { - t.Error(err) - return - } - - if n != 2 || len(adata) != count || from != 100 || to != int64(100+count) { - t.Errorf("unexpected: n=%d, len=%d, from=%d, to=%d\n", n, len(adata), from, to) - return - } - - for i := 0; i < count; i++ { - expected := Float(i) - if adata[i] != expected { - t.Errorf("expected: %f, got: %f", expected, adata[i]) - } - } -} - -func TestMemoryStoreFree(t *testing.T) { - store := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: 1}, - "b": {Frequency: 2}, - }) - - count := 3000 - sel := []string{"cluster", "host", "1"} - for i := 0; i < count; i++ { - err := store.Write(sel, int64(i), []Metric{ - {Name: "a", Value: Float(i)}, - {Name: "b", Value: Float(i)}, - }) - if err != nil { - t.Fatal(err) - } - } - - n, err := store.Free([]string{"cluster", "host"}, int64(BUFFER_CAP*2)+100) - if err != nil { - t.Fatal(err) - } - - if n != 3 { - t.Fatal("two buffers expected to be released") - } - - adata, from, to, err := store.Read(Selector{{String: "cluster"}, {String: "host"}, {String: "1"}}, "a", 0, int64(count)) - if err != nil { - t.Fatal(err) - } - - if from != int64(BUFFER_CAP*2) || to != int64(count) || len(adata) != count-2*BUFFER_CAP { - t.Fatalf("unexpected values from call to `Read`: from=%d, to=%d, len=%d", from, to, len(adata)) - } - - // bdata, from, to, err := store.Read(Selector{{String: "cluster"}, {String: "host"}, {String: "1"}}, "b", 0, int64(count)) - // if err != nil { - // t.Fatal(err) - // } - - // if from != int64(BUFFER_CAP*2) || to != int64(count) || len(bdata) != (count-2*BUFFER_CAP)/2 { - // t.Fatalf("unexpected values from call to `Read`: from=%d (expected: %d), to=%d (expected: %d), len=%d (expected: %d)", - // from, BUFFER_CAP*2, to, count, len(bdata), (count-2*BUFFER_CAP)/2) - // } - - if adata[0] != Float(BUFFER_CAP*2) || adata[len(adata)-1] != Float(count-1) { - t.Fatal("wrong values") - } -} - -func BenchmarkMemoryStoreConcurrentWrites(b *testing.B) { - frequency := int64(5) - count := b.N - goroutines := 4 - store := NewMemoryStore(map[string]MetricConfig{ - "a": {Frequency: frequency}, - }) - - var wg sync.WaitGroup - wg.Add(goroutines) - - for g := 0; g < goroutines; g++ { - go func(g int) { - host := fmt.Sprintf("host%d", g) - for i := 0; i < count; i++ { - store.Write([]string{"cluster", host, "cpu0"}, int64(i)*frequency, []Metric{ - {Name: "a", Value: Float(i)}, - }) - } - wg.Done() - }(g) - } - - wg.Wait() - b.StopTimer() - - for g := 0; g < goroutines; g++ { - host := fmt.Sprintf("host%d", g) - sel := Selector{{String: "cluster"}, {String: host}, {String: "cpu0"}} - adata, _, _, err := store.Read(sel, "a", 0, int64(count)*frequency) - if err != nil { - b.Error(err) - return - } - - if len(adata) != count { - b.Error("unexpected count") - return - } - - for i := 0; i < count; i++ { - expected := Float(i) - if adata[i] != expected { - b.Error("incorrect value for metric a") - return - } - } - } -} - -func BenchmarkMemoryStoreAggregation(b *testing.B) { - b.StopTimer() - count := 2000 - store := NewMemoryStore(map[string]MetricConfig{ - "flops_any": {Frequency: 1, Aggregation: AvgAggregation}, - }) - - sel := []string{"testcluster", "host123", "cpu0"} - for i := 0; i < count; i++ { - sel[2] = "cpu0" - err := store.Write(sel, int64(i), []Metric{ - {Name: "flops_any", Value: Float(i)}, - }) - if err != nil { - b.Fatal(err) - } - - sel[2] = "cpu1" - err = store.Write(sel, int64(i), []Metric{ - {Name: "flops_any", Value: Float(i)}, - }) - if err != nil { - b.Fatal(err) - } - } - - b.StartTimer() - for n := 0; n < b.N; n++ { - data, from, to, err := store.Read(Selector{{String: "testcluster"}, {String: "host123"}}, "flops_any", 0, int64(count)) - if err != nil { - b.Fatal(err) - } - - if len(data) != count || from != 0 || to != int64(count) { - b.Fatal() - } - } -} From b2528f958cf8185f9031535a9596f8fa49ee27f8 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Mon, 6 May 2024 09:27:28 +0200 Subject: [PATCH 2/6] Continue restructuring. Intermediate state. --- cmd/cc-metric-store/main.go | 23 +- internal/api/api.go | 37 +- internal/api/lineprotocol.go | 39 +- internal/config/config.go | 2 +- internal/{memstore => memorystore}/archive.go | 44 +- internal/memorystore/buffer.go | 241 ++++++++ debug.go => internal/memorystore/debug.go | 6 +- internal/memorystore/level.go | 183 ++++++ internal/memorystore/memorystore.go | 222 +++++++ internal/memorystore/selector.go | 51 ++ internal/{util => memorystore}/stats.go | 33 +- internal/memstore/memstore.go | 542 ------------------ internal/memstore/selector.go | 123 ---- 13 files changed, 795 insertions(+), 751 deletions(-) rename internal/{memstore => memorystore}/archive.go (92%) create mode 100644 internal/memorystore/buffer.go rename debug.go => internal/memorystore/debug.go (94%) create mode 100644 internal/memorystore/level.go create mode 100644 internal/memorystore/memorystore.go create mode 100644 internal/memorystore/selector.go rename internal/{util => memorystore}/stats.go (75%) delete mode 100644 internal/memstore/memstore.go delete mode 100644 internal/memstore/selector.go diff --git a/cmd/cc-metric-store/main.go b/cmd/cc-metric-store/main.go index 5d0f1a3..601dfe2 100644 --- a/cmd/cc-metric-store/main.go +++ b/cmd/cc-metric-store/main.go @@ -16,13 +16,13 @@ import ( "github.com/ClusterCockpit/cc-metric-store/internal/api" "github.com/ClusterCockpit/cc-metric-store/internal/config" - "github.com/ClusterCockpit/cc-metric-store/internal/memstore" + "github.com/ClusterCockpit/cc-metric-store/internal/memorystore" "github.com/google/gops/agent" ) var ( conf config.Config - memoryStore *memstore.MemoryStore = nil + ms *memorystore.MemoryStore = nil lastCheckpoint time.Time ) @@ -64,7 +64,7 @@ func intervals(wg *sync.WaitGroup, ctx context.Context) { case <-ticks: t := time.Now().Add(-d) log.Printf("start freeing buffers (older than %s)...\n", t.Format(time.RFC3339)) - freed, err := memoryStore.Free(nil, t.Unix()) + freed, err := ms.Free(nil, t.Unix()) if err != nil { log.Printf("freeing up buffers failed: %s\n", err.Error()) } else { @@ -93,7 +93,7 @@ func intervals(wg *sync.WaitGroup, ctx context.Context) { case <-ticks: log.Printf("start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339)) now := time.Now() - n, err := memoryStore.ToCheckpoint(conf.Checkpoints.RootDir, + n, err := ms.ToCheckpoint(conf.Checkpoints.RootDir, lastCheckpoint.Unix(), now.Unix()) if err != nil { log.Printf("checkpointing failed: %s\n", err.Error()) @@ -123,7 +123,7 @@ func intervals(wg *sync.WaitGroup, ctx context.Context) { case <-ticks: t := time.Now().Add(-d) log.Printf("start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339)) - n, err := memstore.ArchiveCheckpoints(conf.Checkpoints.RootDir, conf.Archive.RootDir, t.Unix(), conf.Archive.DeleteInstead) + n, err := memorystore.ArchiveCheckpoints(conf.Checkpoints.RootDir, conf.Archive.RootDir, t.Unix(), conf.Archive.DeleteInstead) if err != nil { log.Printf("archiving failed: %s\n", err.Error()) } else { @@ -143,7 +143,8 @@ func main() { startupTime := time.Now() conf = config.LoadConfiguration(configFile) - memoryStore = memstore.NewMemoryStore(conf.Metrics) + memorystore.Init(conf.Metrics) + ms = memorystore.GetMemoryStore() if enableGopsAgent || conf.Debug.EnableGops { if err := agent.Listen(agent.Options{}); err != nil { @@ -167,8 +168,8 @@ func main() { restoreFrom := startupTime.Add(-d) log.Printf("Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339)) - files, err := memoryStore.FromCheckpoint(conf.Checkpoints.RootDir, restoreFrom.Unix()) - loadedData := memoryStore.SizeInBytes() / 1024 / 1024 // In MB + files, err := ms.FromCheckpoint(conf.Checkpoints.RootDir, restoreFrom.Unix()) + loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB if err != nil { log.Fatalf("Loading checkpoints failed: %s\n", err.Error()) } else { @@ -195,7 +196,7 @@ func main() { for { sig := <-sigs if sig == syscall.SIGUSR1 { - memoryStore.DebugDump(bufio.NewWriter(os.Stdout), nil) + ms.DebugDump(bufio.NewWriter(os.Stdout), nil) continue } @@ -223,7 +224,7 @@ func main() { nc := natsConf go func() { // err := ReceiveNats(conf.Nats, decodeLine, runtime.NumCPU()-1, ctx) - err := api.ReceiveNats(nc, decodeLine, 1, ctx) + err := api.ReceiveNats(nc, ms, 1, ctx) if err != nil { log.Fatal(err) } @@ -235,7 +236,7 @@ func main() { wg.Wait() log.Printf("Writing to '%s'...\n", conf.Checkpoints.RootDir) - files, err = memoryStore.ToCheckpoint(conf.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix()) + files, err = ms.ToCheckpoint(conf.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix()) if err != nil { log.Printf("Writing checkpoint failed: %s\n", err.Error()) } diff --git a/internal/api/api.go b/internal/api/api.go index 987f2c6..397390d 100644 --- a/internal/api/api.go +++ b/internal/api/api.go @@ -17,19 +17,22 @@ import ( "sync" "time" + "github.com/ClusterCockpit/cc-metric-store/internal/config" + "github.com/ClusterCockpit/cc-metric-store/internal/memorystore" + "github.com/ClusterCockpit/cc-metric-store/internal/util" "github.com/golang-jwt/jwt/v4" "github.com/gorilla/mux" "github.com/influxdata/line-protocol/v2/lineprotocol" ) type ApiMetricData struct { - Error *string `json:"error,omitempty"` - From int64 `json:"from"` - To int64 `json:"to"` - Data FloatArray `json:"data,omitempty"` - Avg Float `json:"avg"` - Min Float `json:"min"` - Max Float `json:"max"` + Error *string `json:"error,omitempty"` + From int64 `json:"from"` + To int64 `json:"to"` + Data util.FloatArray `json:"data,omitempty"` + Avg util.Float `json:"avg"` + Min util.Float `json:"min"` + Max util.Float `json:"max"` } // TODO: Optimize this, just like the stats endpoint! @@ -49,15 +52,15 @@ func (data *ApiMetricData) AddStats() { if n > 0 { avg := sum / float64(n) - data.Avg = Float(avg) - data.Min = Float(min) - data.Max = Float(max) + data.Avg = util.Float(avg) + data.Min = util.Float(min) + data.Max = util.Float(max) } else { - data.Avg, data.Min, data.Max = NaN, NaN, NaN + data.Avg, data.Min, data.Max = util.NaN, util.NaN, util.NaN } } -func (data *ApiMetricData) ScaleBy(f Float) { +func (data *ApiMetricData) ScaleBy(f util.Float) { if f == 0 || f == 1 { return } @@ -78,9 +81,9 @@ func (data *ApiMetricData) PadDataWithNull(from, to int64, metric string) { if (data.From / minfo.Frequency) > (from / minfo.Frequency) { padfront := int((data.From / minfo.Frequency) - (from / minfo.Frequency)) - ndata := make([]Float, 0, padfront+len(data.Data)) + ndata := make([]util.Float, 0, padfront+len(data.Data)) for i := 0; i < padfront; i++ { - ndata = append(ndata, NaN) + ndata = append(ndata, util.NaN) } for j := 0; j < len(data.Data); j++ { ndata = append(ndata, data.Data[j]) @@ -212,11 +215,13 @@ func handleQuery(rw http.ResponseWriter, r *http.Request) { return } + ms := memorystore.GetMemoryStore() + response := ApiQueryResponse{ Results: make([][]ApiMetricData, 0, len(req.Queries)), } if req.ForAllNodes != nil { - nodes := memoryStore.ListChildren([]string{req.Cluster}) + nodes := ms.ListChildren([]string{req.Cluster}) for _, node := range nodes { for _, metric := range req.ForAllNodes { q := ApiQuery{ @@ -364,7 +369,7 @@ func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler }) } -func StartApiServer(ctx context.Context, httpConfig *HttpConfig) error { +func StartApiServer(ctx context.Context, httpConfig *config.HttpConfig) error { r := mux.NewRouter() r.HandleFunc("/api/free", handleFree) diff --git a/internal/api/lineprotocol.go b/internal/api/lineprotocol.go index 9814463..f48f7c3 100644 --- a/internal/api/lineprotocol.go +++ b/internal/api/lineprotocol.go @@ -10,21 +10,17 @@ import ( "time" "github.com/ClusterCockpit/cc-metric-store/internal/config" - "github.com/ClusterCockpit/cc-metric-store/internal/memstore" + "github.com/ClusterCockpit/cc-metric-store/internal/memorystore" "github.com/ClusterCockpit/cc-metric-store/internal/util" "github.com/influxdata/line-protocol/v2/lineprotocol" "github.com/nats-io/nats.go" ) -type Metric struct { - Name string - Value util.Float - mc config.MetricConfig -} - -// Currently unused, could be used to send messages via raw TCP. // Each connection is handled in it's own goroutine. This is a blocking function. -func ReceiveRaw(ctx context.Context, listener net.Listener, handleLine func(*lineprotocol.Decoder, string) error) error { +func ReceiveRaw(ctx context.Context, + listener net.Listener, + handleLine func(*lineprotocol.Decoder, string) error, +) error { var wg sync.WaitGroup wg.Add(1) @@ -86,7 +82,11 @@ func ReceiveRaw(ctx context.Context, listener net.Listener, handleLine func(*lin // Connect to a nats server and subscribe to "updates". This is a blocking // function. handleLine will be called for each line recieved via nats. // Send `true` through the done channel for gracefull termination. -func ReceiveNats(conf *config.NatsConfig, handleLine func(*lineprotocol.Decoder, string) error, workers int, ctx context.Context) error { +func ReceiveNats(conf *config.NatsConfig, + ms *memorystore.MemoryStore, + workers int, + ctx context.Context, +) error { var opts []nats.Option if conf.Username != "" && conf.Password != "" { opts = append(opts, nats.UserInfo(conf.Username, conf.Password)) @@ -113,7 +113,7 @@ func ReceiveNats(conf *config.NatsConfig, handleLine func(*lineprotocol.Decoder, go func() { for m := range msgs { dec := lineprotocol.NewDecoderWithBytes(m.Data) - if err := handleLine(dec, clusterTag); err != nil { + if err := decodeLine(dec, ms, clusterTag); err != nil { log.Printf("error: %s\n", err.Error()) } } @@ -128,7 +128,7 @@ func ReceiveNats(conf *config.NatsConfig, handleLine func(*lineprotocol.Decoder, } else { sub, err = nc.Subscribe(sc.SubscribeTo, func(m *nats.Msg) { dec := lineprotocol.NewDecoderWithBytes(m.Data) - if err := handleLine(dec, clusterTag); err != nil { + if err := decodeLine(dec, ms, clusterTag); err != nil { log.Printf("error: %s\n", err.Error()) } }) @@ -177,17 +177,20 @@ func reorder(buf, prefix []byte) []byte { // Decode lines using dec and make write calls to the MemoryStore. // If a line is missing its cluster tag, use clusterDefault as default. -func decodeLine(dec *lineprotocol.Decoder, memoryStore *memstore.MemoryStore, clusterDefault string) error { +func decodeLine(dec *lineprotocol.Decoder, + ms *memorystore.MemoryStore, + clusterDefault string, +) error { // Reduce allocations in loop: t := time.Now() - metric, metricBuf := Metric{}, make([]byte, 0, 16) + metric, metricBuf := memorystore.Metric{}, make([]byte, 0, 16) selector := make([]string, 0, 4) typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0) // Optimize for the case where all lines in a "batch" are about the same // cluster and host. By using `WriteToLevel` (level = host), we do not need // to take the root- and cluster-level lock as often. - var lvl *level = nil + var lvl *memorystore.Level = nil var prevCluster, prevHost string = "", "" var ok bool @@ -202,7 +205,7 @@ func decodeLine(dec *lineprotocol.Decoder, memoryStore *memstore.MemoryStore, cl metricBuf = append(metricBuf[:0], rawmeasurement...) // The go compiler optimizes map[string(byteslice)] lookups: - metric.mc, ok = memoryStore.metrics[string(rawmeasurement)] + metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)] if !ok { continue } @@ -266,7 +269,7 @@ func decodeLine(dec *lineprotocol.Decoder, memoryStore *memstore.MemoryStore, cl if lvl == nil { selector = selector[:2] selector[0], selector[1] = cluster, host - lvl = memoryStore.GetLevel(selector) + lvl = ms.GetLevel(selector) prevCluster, prevHost = cluster, host } @@ -308,7 +311,7 @@ func decodeLine(dec *lineprotocol.Decoder, memoryStore *memstore.MemoryStore, cl return err } - if err := memoryStore.WriteToLevel(lvl, selector, t.Unix(), []Metric{metric}); err != nil { + if err := ms.WriteToLevel(lvl, selector, t.Unix(), []memorystore.Metric{metric}); err != nil { return err } } diff --git a/internal/config/config.go b/internal/config/config.go index 54b64e0..0719d1f 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -43,7 +43,7 @@ type MetricConfig struct { Aggregation AggregationStrategy `json:"aggregation"` // Private, used internally... - offset int + Offset int } type HttpConfig struct { diff --git a/internal/memstore/archive.go b/internal/memorystore/archive.go similarity index 92% rename from internal/memstore/archive.go rename to internal/memorystore/archive.go index e1654f7..a6fe5dc 100644 --- a/internal/memstore/archive.go +++ b/internal/memorystore/archive.go @@ -1,4 +1,4 @@ -package memstore +package memorystore import ( "archive/zip" @@ -75,7 +75,7 @@ func init() { // The good thing: Only a host at a time is locked, so this function can run // in parallel to writes/reads. func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) { - levels := make([]*level, 0) + levels := make([]*Level, 0) selectors := make([][]string, 0) m.root.lock.RLock() for sel1, l1 := range m.root.children { @@ -89,7 +89,7 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) { m.root.lock.RUnlock() type workItem struct { - level *level + level *Level dir string selector []string } @@ -136,7 +136,7 @@ func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) { return int(n), nil } -func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) { +func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) { l.lock.RLock() defer l.lock.RUnlock() @@ -147,7 +147,7 @@ func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFil Children: make(map[string]*CheckpointFile), } - for metric, minfo := range m.metrics { + for metric, minfo := range m.Metrics { b := l.metrics[minfo.offset] if b == nil { continue @@ -200,7 +200,7 @@ func (l *level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFil return retval, nil } -func (l *level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error { +func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error { cf, err := l.toCheckpointFile(from, to, m) if err != nil { return err @@ -211,11 +211,11 @@ func (l *level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error { } filepath := path.Join(dir, fmt.Sprintf("%d.json", from)) - f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644) + f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644) if err != nil && os.IsNotExist(err) { - err = os.MkdirAll(dir, 0755) + err = os.MkdirAll(dir, 0o755) if err == nil { - f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0644) + f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644) } } if err != nil { @@ -244,7 +244,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) { go func() { defer wg.Done() for host := range work { - lvl := m.root.findLevelOrCreate(host[:], len(m.metrics)) + lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics)) nn, err := lvl.fromCheckpoint(filepath.Join(dir, host[0], host[1]), from, m) if err != nil { log.Fatalf("error while loading checkpoints: %s", err.Error()) @@ -302,7 +302,7 @@ done: return int(n), nil } -func (l *level) loadFile(cf *CheckpointFile, m *MemoryStore) error { +func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error { for name, metric := range cf.Metrics { n := len(metric.Data) b := &buffer{ @@ -315,7 +315,7 @@ func (l *level) loadFile(cf *CheckpointFile, m *MemoryStore) error { } b.close() - minfo, ok := m.metrics[name] + minfo, ok := m.Metrics[name] if !ok { continue // return errors.New("Unkown metric: " + name) @@ -336,14 +336,14 @@ func (l *level) loadFile(cf *CheckpointFile, m *MemoryStore) error { } if len(cf.Children) > 0 && l.children == nil { - l.children = make(map[string]*level) + l.children = make(map[string]*Level) } for sel, childCf := range cf.Children { child, ok := l.children[sel] if !ok { - child = &level{ - metrics: make([]*buffer, len(m.metrics)), + child = &Level{ + metrics: make([]*buffer, len(m.Metrics)), children: nil, } l.children[sel] = child @@ -357,7 +357,7 @@ func (l *level) loadFile(cf *CheckpointFile, m *MemoryStore) error { return nil } -func (l *level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, error) { +func (l *Level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, error) { direntries, err := os.ReadDir(dir) if err != nil { if os.IsNotExist(err) { @@ -371,9 +371,9 @@ func (l *level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, err filesLoaded := 0 for _, e := range direntries { if e.IsDir() { - child := &level{ - metrics: make([]*buffer, len(m.metrics)), - children: make(map[string]*level), + child := &Level{ + metrics: make([]*buffer, len(m.Metrics)), + children: make(map[string]*Level), } files, err := child.fromCheckpoint(path.Join(dir, e.Name()), from, m) @@ -553,11 +553,11 @@ func archiveCheckpoints(dir string, archiveDir string, from int64, deleteInstead } filename := filepath.Join(archiveDir, fmt.Sprintf("%d.zip", from)) - f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0644) + f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644) if err != nil && os.IsNotExist(err) { - err = os.MkdirAll(archiveDir, 0755) + err = os.MkdirAll(archiveDir, 0o755) if err == nil { - f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0644) + f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, 0o644) } } if err != nil { diff --git a/internal/memorystore/buffer.go b/internal/memorystore/buffer.go new file mode 100644 index 0000000..397be97 --- /dev/null +++ b/internal/memorystore/buffer.go @@ -0,0 +1,241 @@ +package memorystore + +import ( + "errors" + "sync" + + "github.com/ClusterCockpit/cc-metric-store/internal/util" +) + +// Default buffer capacity. +// `buffer.data` will only ever grow up to it's capacity and a new link +// in the buffer chain will be created if needed so that no copying +// of data or reallocation needs to happen on writes. +const ( + BUFFER_CAP int = 512 +) + +// So that we can reuse allocations +var bufferPool sync.Pool = sync.Pool{ + New: func() interface{} { + return &buffer{ + data: make([]util.Float, 0, BUFFER_CAP), + } + }, +} + +var ( + ErrNoData error = errors.New("no data for this metric/level") + ErrDataDoesNotAlign error = errors.New("data from lower granularities does not align") +) + +// Each metric on each level has it's own buffer. +// This is where the actual values go. +// If `cap(data)` is reached, a new buffer is created and +// becomes the new head of a buffer list. +type buffer struct { + frequency int64 // Time between two "slots" + start int64 // Timestamp of when `data[0]` was written. + data []util.Float // The slice should never reallocacte as `cap(data)` is respected. + prev, next *buffer // `prev` contains older data, `next` newer data. + archived bool // If true, this buffer is already archived + + closed bool + /* + statisticts struct { + samples int + min Float + max Float + avg Float + } + */ +} + +func newBuffer(ts, freq int64) *buffer { + b := bufferPool.Get().(*buffer) + b.frequency = freq + b.start = ts - (freq / 2) + b.prev = nil + b.next = nil + b.archived = false + b.closed = false + b.data = b.data[:0] + return b +} + +// If a new buffer was created, the new head is returnd. +// Otherwise, the existing buffer is returnd. +// Normaly, only "newer" data should be written, but if the value would +// end up in the same buffer anyways it is allowed. +func (b *buffer) write(ts int64, value util.Float) (*buffer, error) { + if ts < b.start { + return nil, errors.New("cannot write value to buffer from past") + } + + // idx := int((ts - b.start + (b.frequency / 3)) / b.frequency) + idx := int((ts - b.start) / b.frequency) + if idx >= cap(b.data) { + newbuf := newBuffer(ts, b.frequency) + newbuf.prev = b + b.next = newbuf + b.close() + b = newbuf + idx = 0 + } + + // Overwriting value or writing value from past + if idx < len(b.data) { + b.data[idx] = value + return b, nil + } + + // Fill up unwritten slots with NaN + for i := len(b.data); i < idx; i++ { + b.data = append(b.data, util.NaN) + } + + b.data = append(b.data, value) + return b, nil +} + +func (b *buffer) end() int64 { + return b.firstWrite() + int64(len(b.data))*b.frequency +} + +func (b *buffer) firstWrite() int64 { + return b.start + (b.frequency / 2) +} + +func (b *buffer) close() {} + +/* +func (b *buffer) close() { + if b.closed { + return + } + + b.closed = true + n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64 + for _, x := range b.data { + if x.IsNaN() { + continue + } + + n += 1 + f := float64(x) + sum += f + min = math.Min(min, f) + max = math.Max(max, f) + } + + b.statisticts.samples = n + if n > 0 { + b.statisticts.avg = Float(sum / float64(n)) + b.statisticts.min = Float(min) + b.statisticts.max = Float(max) + } else { + b.statisticts.avg = NaN + b.statisticts.min = NaN + b.statisticts.max = NaN + } +} +*/ + +// func interpolate(idx int, data []Float) Float { +// if idx == 0 || idx+1 == len(data) { +// return NaN +// } +// return (data[idx-1] + data[idx+1]) / 2.0 +// } + +// Return all known values from `from` to `to`. Gaps of information are represented as NaN. +// Simple linear interpolation is done between the two neighboring cells if possible. +// If values at the start or end are missing, instead of NaN values, the second and thrid +// return values contain the actual `from`/`to`. +// This function goes back the buffer chain if `from` is older than the currents buffer start. +// The loaded values are added to `data` and `data` is returned, possibly with a shorter length. +// If `data` is not long enough to hold all values, this function will panic! +func (b *buffer) read(from, to int64, data []util.Float) ([]util.Float, int64, int64, error) { + if from < b.firstWrite() { + if b.prev != nil { + return b.prev.read(from, to, data) + } + from = b.firstWrite() + } + + var i int = 0 + var t int64 = from + for ; t < to; t += b.frequency { + idx := int((t - b.start) / b.frequency) + if idx >= cap(b.data) { + if b.next == nil { + break + } + b = b.next + idx = 0 + } + + if idx >= len(b.data) { + if b.next == nil || to <= b.next.start { + break + } + data[i] += util.NaN + } else if t < b.start { + data[i] += util.NaN + // } else if b.data[idx].IsNaN() { + // data[i] += interpolate(idx, b.data) + } else { + data[i] += b.data[idx] + } + i++ + } + + return data[:i], from, t, nil +} + +// Returns true if this buffer needs to be freed. +func (b *buffer) free(t int64) (delme bool, n int) { + if b.prev != nil { + delme, m := b.prev.free(t) + n += m + if delme { + b.prev.next = nil + if cap(b.prev.data) == BUFFER_CAP { + bufferPool.Put(b.prev) + } + b.prev = nil + } + } + + end := b.end() + if end < t { + return true, n + 1 + } + + return false, n +} + +// Call `callback` on every buffer that contains data in the range from `from` to `to`. +func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error { + if b == nil { + return nil + } + + if err := b.prev.iterFromTo(from, to, callback); err != nil { + return err + } + + if from <= b.end() && b.start <= to { + return callback(b) + } + + return nil +} + +func (b *buffer) count() int64 { + res := int64(len(b.data)) + if b.prev != nil { + res += b.prev.count() + } + return res +} diff --git a/debug.go b/internal/memorystore/debug.go similarity index 94% rename from debug.go rename to internal/memorystore/debug.go index 88af59f..59a978b 100644 --- a/debug.go +++ b/internal/memorystore/debug.go @@ -1,4 +1,4 @@ -package main +package memorystore import ( "bufio" @@ -29,7 +29,7 @@ func (b *buffer) debugDump(buf []byte) []byte { return buf } -func (l *level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf []byte, depth int) ([]byte, error) { +func (l *Level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf []byte, depth int) ([]byte, error) { l.lock.RLock() defer l.lock.RUnlock() for i := 0; i < depth; i++ { @@ -40,7 +40,7 @@ func (l *level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf [ buf = append(buf, "\":{\n"...) depth += 1 objitems := 0 - for name, mc := range m.metrics { + for name, mc := range m.Metrics { if b := l.metrics[mc.offset]; b != nil { for i := 0; i < depth; i++ { buf = append(buf, '\t') diff --git a/internal/memorystore/level.go b/internal/memorystore/level.go new file mode 100644 index 0000000..34a58a2 --- /dev/null +++ b/internal/memorystore/level.go @@ -0,0 +1,183 @@ +package memorystore + +import ( + "sync" + "unsafe" + + "github.com/ClusterCockpit/cc-metric-store/internal/util" +) + +// Could also be called "node" as this forms a node in a tree structure. +// Called Level because "node" might be confusing here. +// Can be both a leaf or a inner node. In this tree structue, inner nodes can +// also hold data (in `metrics`). +type Level struct { + lock sync.RWMutex + metrics []*buffer // Every level can store metrics. + children map[string]*Level // Lower levels. +} + +// Find the correct level for the given selector, creating it if +// it does not exist. Example selector in the context of the +// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }. +// This function would probably benefit a lot from `level.children` beeing a `sync.Map`? +func (l *Level) findLevelOrCreate(selector []string, nMetrics int) *Level { + if len(selector) == 0 { + return l + } + + // Allow concurrent reads: + l.lock.RLock() + var child *Level + var ok bool + if l.children == nil { + // Children map needs to be created... + l.lock.RUnlock() + } else { + child, ok := l.children[selector[0]] + l.lock.RUnlock() + if ok { + return child.findLevelOrCreate(selector[1:], nMetrics) + } + } + + // The level does not exist, take write lock for unqiue access: + l.lock.Lock() + // While this thread waited for the write lock, another thread + // could have created the child node. + if l.children != nil { + child, ok = l.children[selector[0]] + if ok { + l.lock.Unlock() + return child.findLevelOrCreate(selector[1:], nMetrics) + } + } + + child = &Level{ + metrics: make([]*buffer, nMetrics), + children: nil, + } + + if l.children != nil { + l.children[selector[0]] = child + } else { + l.children = map[string]*Level{selector[0]: child} + } + l.lock.Unlock() + return child.findLevelOrCreate(selector[1:], nMetrics) +} + +func (l *Level) free(t int64) (int, error) { + l.lock.Lock() + defer l.lock.Unlock() + + n := 0 + for i, b := range l.metrics { + if b != nil { + delme, m := b.free(t) + n += m + if delme { + if cap(b.data) == BUFFER_CAP { + bufferPool.Put(b) + } + l.metrics[i] = nil + } + } + } + + for _, l := range l.children { + m, err := l.free(t) + n += m + if err != nil { + return n, err + } + } + + return n, nil +} + +func (l *Level) sizeInBytes() int64 { + l.lock.RLock() + defer l.lock.RUnlock() + size := int64(0) + + for _, b := range l.metrics { + if b != nil { + size += b.count() * int64(unsafe.Sizeof(util.Float(0))) + } + } + + return size +} + +func (l *Level) findLevel(selector []string) *Level { + if len(selector) == 0 { + return l + } + + l.lock.RLock() + defer l.lock.RUnlock() + + lvl := l.children[selector[0]] + if lvl == nil { + return nil + } + + return lvl.findLevel(selector[1:]) +} + +func (l *Level) findBuffers(selector Selector, offset int, f func(b *buffer) error) error { + l.lock.RLock() + defer l.lock.RUnlock() + + if len(selector) == 0 { + b := l.metrics[offset] + if b != nil { + return f(b) + } + + for _, lvl := range l.children { + err := lvl.findBuffers(nil, offset, f) + if err != nil { + return err + } + } + return nil + } + + sel := selector[0] + if len(sel.String) != 0 && l.children != nil { + lvl, ok := l.children[sel.String] + if ok { + err := lvl.findBuffers(selector[1:], offset, f) + if err != nil { + return err + } + } + return nil + } + + if sel.Group != nil && l.children != nil { + for _, key := range sel.Group { + lvl, ok := l.children[key] + if ok { + err := lvl.findBuffers(selector[1:], offset, f) + if err != nil { + return err + } + } + } + return nil + } + + if sel.Any && l.children != nil { + for _, lvl := range l.children { + if err := lvl.findBuffers(selector[1:], offset, f); err != nil { + return err + } + } + return nil + } + + return nil +} diff --git a/internal/memorystore/memorystore.go b/internal/memorystore/memorystore.go new file mode 100644 index 0000000..305ebdd --- /dev/null +++ b/internal/memorystore/memorystore.go @@ -0,0 +1,222 @@ +package memorystore + +import ( + "errors" + "log" + "sync" + + "github.com/ClusterCockpit/cc-metric-store/internal/config" + "github.com/ClusterCockpit/cc-metric-store/internal/util" +) + +var ( + singleton sync.Once + msInstance *MemoryStore +) + +type Metric struct { + Name string + Value util.Float + MetricConfig config.MetricConfig +} + +type MemoryStore struct { + root Level // root of the tree structure + Metrics map[string]config.MetricConfig +} + +// Create a new, initialized instance of a MemoryStore. +// Will panic if values in the metric configurations are invalid. +func Init(metrics map[string]config.MetricConfig) { + singleton.Do(func() { + offset := 0 + for key, cfg := range metrics { + if cfg.Frequency == 0 { + panic("invalid frequency") + } + + metrics[key] = config.MetricConfig{ + Frequency: cfg.Frequency, + Aggregation: cfg.Aggregation, + Offset: offset, + } + offset += 1 + } + + msInstance = &MemoryStore{ + root: Level{ + metrics: make([]*buffer, len(metrics)), + children: make(map[string]*Level), + }, + Metrics: metrics, + } + }) +} + +func GetMemoryStore() *MemoryStore { + if msInstance == nil { + log.Fatalf("MemoryStore not initialized!") + } + + return msInstance +} + +// Write all values in `metrics` to the level specified by `selector` for time `ts`. +// Look at `findLevelOrCreate` for how selectors work. +func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error { + var ok bool + for i, metric := range metrics { + if metric.MetricConfig.Frequency == 0 { + metric.MetricConfig, ok = m.Metrics[metric.Name] + if !ok { + metric.MetricConfig.Frequency = 0 + } + metrics[i] = metric + } + } + + return m.WriteToLevel(&m.root, selector, ts, metrics) +} + +func (m *MemoryStore) GetLevel(selector []string) *Level { + return m.root.findLevelOrCreate(selector, len(m.Metrics)) +} + +// Assumes that `minfo` in `metrics` is filled in! +func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metrics []Metric) error { + l = l.findLevelOrCreate(selector, len(m.Metrics)) + l.lock.Lock() + defer l.lock.Unlock() + + for _, metric := range metrics { + if metric.MetricConfig.Frequency == 0 { + continue + } + + b := l.metrics[metric.MetricConfig.Offset] + if b == nil { + // First write to this metric and level + b = newBuffer(ts, metric.MetricConfig.Frequency) + l.metrics[metric.MetricConfig.Offset] = b + } + + nb, err := b.write(ts, metric.Value) + if err != nil { + return err + } + + // Last write created a new buffer... + if b != nb { + l.metrics[metric.MetricConfig.Offset] = nb + } + } + return nil +} + +// Returns all values for metric `metric` from `from` to `to` for the selected level(s). +// If the level does not hold the metric itself, the data will be aggregated recursively from the children. +// The second and third return value are the actual from/to for the data. Those can be different from +// the range asked for if no data was available. +func (m *MemoryStore) Read(selector Selector, metric string, from, to int64) ([]util.Float, int64, int64, error) { + if from > to { + return nil, 0, 0, errors.New("invalid time range") + } + + minfo, ok := m.Metrics[metric] + if !ok { + return nil, 0, 0, errors.New("unkown metric: " + metric) + } + + n, data := 0, make([]util.Float, (to-from)/minfo.Frequency+1) + err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error { + cdata, cfrom, cto, err := b.read(from, to, data) + if err != nil { + return err + } + + if n == 0 { + from, to = cfrom, cto + } else if from != cfrom || to != cto || len(data) != len(cdata) { + missingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency) + if missingfront != 0 { + return ErrDataDoesNotAlign + } + + newlen := len(cdata) - missingback + if newlen < 1 { + return ErrDataDoesNotAlign + } + cdata = cdata[0:newlen] + if len(cdata) != len(data) { + return ErrDataDoesNotAlign + } + + from, to = cfrom, cto + } + + data = cdata + n += 1 + return nil + }) + + if err != nil { + return nil, 0, 0, err + } else if n == 0 { + return nil, 0, 0, errors.New("metric or host not found") + } else if n > 1 { + if minfo.Aggregation == config.AvgAggregation { + normalize := 1. / util.Float(n) + for i := 0; i < len(data); i++ { + data[i] *= normalize + } + } else if minfo.Aggregation != config.SumAggregation { + return nil, 0, 0, errors.New("invalid aggregation") + } + } + + return data, from, to, nil +} + +// Release all buffers for the selected level and all its children that contain only +// values older than `t`. +func (m *MemoryStore) Free(selector []string, t int64) (int, error) { + return m.GetLevel(selector).free(t) +} + +func (m *MemoryStore) FreeAll() error { + for k := range m.root.children { + delete(m.root.children, k) + } + + return nil +} + +func (m *MemoryStore) SizeInBytes() int64 { + return m.root.sizeInBytes() +} + +// Given a selector, return a list of all children of the level selected. +func (m *MemoryStore) ListChildren(selector []string) []string { + lvl := &m.root + for lvl != nil && len(selector) != 0 { + lvl.lock.RLock() + next := lvl.children[selector[0]] + lvl.lock.RUnlock() + lvl = next + selector = selector[1:] + } + + if lvl == nil { + return nil + } + + lvl.lock.RLock() + defer lvl.lock.RUnlock() + + children := make([]string, 0, len(lvl.children)) + for child := range lvl.children { + children = append(children, child) + } + + return children +} diff --git a/internal/memorystore/selector.go b/internal/memorystore/selector.go new file mode 100644 index 0000000..0b24300 --- /dev/null +++ b/internal/memorystore/selector.go @@ -0,0 +1,51 @@ +package memorystore + +import ( + "encoding/json" + "errors" +) + +type SelectorElement struct { + Any bool + String string + Group []string +} + +func (se *SelectorElement) UnmarshalJSON(input []byte) error { + if input[0] == '"' { + if err := json.Unmarshal(input, &se.String); err != nil { + return err + } + + if se.String == "*" { + se.Any = true + se.String = "" + } + + return nil + } + + if input[0] == '[' { + return json.Unmarshal(input, &se.Group) + } + + return errors.New("the Go SelectorElement type can only be a string or an array of strings") +} + +func (se *SelectorElement) MarshalJSON() ([]byte, error) { + if se.Any { + return []byte("\"*\""), nil + } + + if se.String != "" { + return json.Marshal(se.String) + } + + if se.Group != nil { + return json.Marshal(se.Group) + } + + return nil, errors.New("a Go Selector must be a non-empty string or a non-empty slice of strings") +} + +type Selector []SelectorElement diff --git a/internal/util/stats.go b/internal/memorystore/stats.go similarity index 75% rename from internal/util/stats.go rename to internal/memorystore/stats.go index 8e0f41f..3240d02 100644 --- a/internal/util/stats.go +++ b/internal/memorystore/stats.go @@ -1,15 +1,18 @@ -package util +package memorystore import ( "errors" "math" + + "github.com/ClusterCockpit/cc-metric-store/internal/config" + "github.com/ClusterCockpit/cc-metric-store/internal/util" ) type Stats struct { Samples int - Avg Float - Min Float - Max Float + Avg util.Float + Min util.Float + Max util.Float } func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) { @@ -54,9 +57,9 @@ func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) { return Stats{ Samples: samples, - Avg: Float(sum) / Float(samples), - Min: Float(min), - Max: Float(max), + Avg: util.Float(sum) / util.Float(samples), + Min: util.Float(min), + Max: util.Float(max), }, from, t, nil } @@ -68,14 +71,14 @@ func (m *MemoryStore) Stats(selector Selector, metric string, from, to int64) (* return nil, 0, 0, errors.New("invalid time range") } - minfo, ok := m.metrics[metric] + minfo, ok := m.Metrics[metric] if !ok { return nil, 0, 0, errors.New("unkown metric: " + metric) } n, samples := 0, 0 - avg, min, max := Float(0), math.MaxFloat32, -math.MaxFloat32 - err := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error { + avg, min, max := util.Float(0), math.MaxFloat32, -math.MaxFloat32 + err := m.root.findBuffers(selector, minfo.Offset, func(b *buffer) error { stats, cfrom, cto, err := b.stats(from, to) if err != nil { return err @@ -102,16 +105,16 @@ func (m *MemoryStore) Stats(selector Selector, metric string, from, to int64) (* return nil, 0, 0, ErrNoData } - if minfo.Aggregation == AvgAggregation { - avg /= Float(n) - } else if n > 1 && minfo.Aggregation != SumAggregation { + if minfo.Aggregation == config.AvgAggregation { + avg /= util.Float(n) + } else if n > 1 && minfo.Aggregation != config.SumAggregation { return nil, 0, 0, errors.New("invalid aggregation") } return &Stats{ Samples: samples, Avg: avg, - Min: Float(min), - Max: Float(max), + Min: util.Float(min), + Max: util.Float(max), }, from, to, nil } diff --git a/internal/memstore/memstore.go b/internal/memstore/memstore.go deleted file mode 100644 index 99b6295..0000000 --- a/internal/memstore/memstore.go +++ /dev/null @@ -1,542 +0,0 @@ -package memstore - -import ( - "errors" - "sync" - "unsafe" - - "github.com/ClusterCockpit/cc-metric-store/internal/api" - "github.com/ClusterCockpit/cc-metric-store/internal/config" - "github.com/ClusterCockpit/cc-metric-store/internal/util" -) - -// Default buffer capacity. -// `buffer.data` will only ever grow up to it's capacity and a new link -// in the buffer chain will be created if needed so that no copying -// of data or reallocation needs to happen on writes. -const ( - BUFFER_CAP int = 512 -) - -// So that we can reuse allocations -var bufferPool sync.Pool = sync.Pool{ - New: func() interface{} { - return &buffer{ - data: make([]util.Float, 0, BUFFER_CAP), - } - }, -} - -var ( - ErrNoData error = errors.New("no data for this metric/level") - ErrDataDoesNotAlign error = errors.New("data from lower granularities does not align") -) - -// Each metric on each level has it's own buffer. -// This is where the actual values go. -// If `cap(data)` is reached, a new buffer is created and -// becomes the new head of a buffer list. -type buffer struct { - frequency int64 // Time between two "slots" - start int64 // Timestamp of when `data[0]` was written. - data []util.Float // The slice should never reallocacte as `cap(data)` is respected. - prev, next *buffer // `prev` contains older data, `next` newer data. - archived bool // If true, this buffer is already archived - - closed bool - /* - statisticts struct { - samples int - min Float - max Float - avg Float - } - */ -} - -func newBuffer(ts, freq int64) *buffer { - b := bufferPool.Get().(*buffer) - b.frequency = freq - b.start = ts - (freq / 2) - b.prev = nil - b.next = nil - b.archived = false - b.closed = false - b.data = b.data[:0] - return b -} - -// If a new buffer was created, the new head is returnd. -// Otherwise, the existing buffer is returnd. -// Normaly, only "newer" data should be written, but if the value would -// end up in the same buffer anyways it is allowed. -func (b *buffer) write(ts int64, value util.Float) (*buffer, error) { - if ts < b.start { - return nil, errors.New("cannot write value to buffer from past") - } - - // idx := int((ts - b.start + (b.frequency / 3)) / b.frequency) - idx := int((ts - b.start) / b.frequency) - if idx >= cap(b.data) { - newbuf := newBuffer(ts, b.frequency) - newbuf.prev = b - b.next = newbuf - b.close() - b = newbuf - idx = 0 - } - - // Overwriting value or writing value from past - if idx < len(b.data) { - b.data[idx] = value - return b, nil - } - - // Fill up unwritten slots with NaN - for i := len(b.data); i < idx; i++ { - b.data = append(b.data, util.NaN) - } - - b.data = append(b.data, value) - return b, nil -} - -func (b *buffer) end() int64 { - return b.firstWrite() + int64(len(b.data))*b.frequency -} - -func (b *buffer) firstWrite() int64 { - return b.start + (b.frequency / 2) -} - -func (b *buffer) close() {} - -/* -func (b *buffer) close() { - if b.closed { - return - } - - b.closed = true - n, sum, min, max := 0, 0., math.MaxFloat64, -math.MaxFloat64 - for _, x := range b.data { - if x.IsNaN() { - continue - } - - n += 1 - f := float64(x) - sum += f - min = math.Min(min, f) - max = math.Max(max, f) - } - - b.statisticts.samples = n - if n > 0 { - b.statisticts.avg = Float(sum / float64(n)) - b.statisticts.min = Float(min) - b.statisticts.max = Float(max) - } else { - b.statisticts.avg = NaN - b.statisticts.min = NaN - b.statisticts.max = NaN - } -} -*/ - -// func interpolate(idx int, data []Float) Float { -// if idx == 0 || idx+1 == len(data) { -// return NaN -// } -// return (data[idx-1] + data[idx+1]) / 2.0 -// } - -// Return all known values from `from` to `to`. Gaps of information are represented as NaN. -// Simple linear interpolation is done between the two neighboring cells if possible. -// If values at the start or end are missing, instead of NaN values, the second and thrid -// return values contain the actual `from`/`to`. -// This function goes back the buffer chain if `from` is older than the currents buffer start. -// The loaded values are added to `data` and `data` is returned, possibly with a shorter length. -// If `data` is not long enough to hold all values, this function will panic! -func (b *buffer) read(from, to int64, data []util.Float) ([]util.Float, int64, int64, error) { - if from < b.firstWrite() { - if b.prev != nil { - return b.prev.read(from, to, data) - } - from = b.firstWrite() - } - - var i int = 0 - var t int64 = from - for ; t < to; t += b.frequency { - idx := int((t - b.start) / b.frequency) - if idx >= cap(b.data) { - if b.next == nil { - break - } - b = b.next - idx = 0 - } - - if idx >= len(b.data) { - if b.next == nil || to <= b.next.start { - break - } - data[i] += util.NaN - } else if t < b.start { - data[i] += util.NaN - // } else if b.data[idx].IsNaN() { - // data[i] += interpolate(idx, b.data) - } else { - data[i] += b.data[idx] - } - i++ - } - - return data[:i], from, t, nil -} - -// Returns true if this buffer needs to be freed. -func (b *buffer) free(t int64) (delme bool, n int) { - if b.prev != nil { - delme, m := b.prev.free(t) - n += m - if delme { - b.prev.next = nil - if cap(b.prev.data) == BUFFER_CAP { - bufferPool.Put(b.prev) - } - b.prev = nil - } - } - - end := b.end() - if end < t { - return true, n + 1 - } - - return false, n -} - -// Call `callback` on every buffer that contains data in the range from `from` to `to`. -func (b *buffer) iterFromTo(from, to int64, callback func(b *buffer) error) error { - if b == nil { - return nil - } - - if err := b.prev.iterFromTo(from, to, callback); err != nil { - return err - } - - if from <= b.end() && b.start <= to { - return callback(b) - } - - return nil -} - -func (b *buffer) count() int64 { - res := int64(len(b.data)) - if b.prev != nil { - res += b.prev.count() - } - return res -} - -// Could also be called "node" as this forms a node in a tree structure. -// Called level because "node" might be confusing here. -// Can be both a leaf or a inner node. In this tree structue, inner nodes can -// also hold data (in `metrics`). -type level struct { - lock sync.RWMutex - metrics []*buffer // Every level can store metrics. - children map[string]*level // Lower levels. -} - -// Find the correct level for the given selector, creating it if -// it does not exist. Example selector in the context of the -// ClusterCockpit could be: []string{ "emmy", "host123", "cpu0" }. -// This function would probably benefit a lot from `level.children` beeing a `sync.Map`? -func (l *level) findLevelOrCreate(selector []string, nMetrics int) *level { - if len(selector) == 0 { - return l - } - - // Allow concurrent reads: - l.lock.RLock() - var child *level - var ok bool - if l.children == nil { - // Children map needs to be created... - l.lock.RUnlock() - } else { - child, ok := l.children[selector[0]] - l.lock.RUnlock() - if ok { - return child.findLevelOrCreate(selector[1:], nMetrics) - } - } - - // The level does not exist, take write lock for unqiue access: - l.lock.Lock() - // While this thread waited for the write lock, another thread - // could have created the child node. - if l.children != nil { - child, ok = l.children[selector[0]] - if ok { - l.lock.Unlock() - return child.findLevelOrCreate(selector[1:], nMetrics) - } - } - - child = &level{ - metrics: make([]*buffer, nMetrics), - children: nil, - } - - if l.children != nil { - l.children[selector[0]] = child - } else { - l.children = map[string]*level{selector[0]: child} - } - l.lock.Unlock() - return child.findLevelOrCreate(selector[1:], nMetrics) -} - -func (l *level) free(t int64) (int, error) { - l.lock.Lock() - defer l.lock.Unlock() - - n := 0 - for i, b := range l.metrics { - if b != nil { - delme, m := b.free(t) - n += m - if delme { - if cap(b.data) == BUFFER_CAP { - bufferPool.Put(b) - } - l.metrics[i] = nil - } - } - } - - for _, l := range l.children { - m, err := l.free(t) - n += m - if err != nil { - return n, err - } - } - - return n, nil -} - -func (l *level) sizeInBytes() int64 { - l.lock.RLock() - defer l.lock.RUnlock() - size := int64(0) - - for _, b := range l.metrics { - if b != nil { - size += b.count() * int64(unsafe.Sizeof(util.Float(0))) - } - } - - for _, child := range l.children { - size += child.sizeInBytes() - } - - return size -} - -type MemoryStore struct { - root level // root of the tree structure - metrics map[string]config.MetricConfig -} - -// Return a new, initialized instance of a MemoryStore. -// Will panic if values in the metric configurations are invalid. -func NewMemoryStore(metrics map[string]config.MetricConfig) *MemoryStore { - offset := 0 - for key, config := range metrics { - if config.Frequency == 0 { - panic("invalid frequency") - } - - metrics[key] = MetricConfig{ - Frequency: config.Frequency, - Aggregation: config.Aggregation, - offset: offset, - } - offset += 1 - } - - return &MemoryStore{ - root: level{ - metrics: make([]*buffer, len(metrics)), - children: make(map[string]*level), - }, - metrics: metrics, - } -} - -// Write all values in `metrics` to the level specified by `selector` for time `ts`. -// Look at `findLevelOrCreate` for how selectors work. -func (m *MemoryStore) Write(selector []string, ts int64, metrics []api.Metric) error { - var ok bool - for i, metric := range metrics { - if metric.mc.Frequency == 0 { - metric.mc, ok = m.metrics[metric.Name] - if !ok { - metric.mc.Frequency = 0 - } - metrics[i] = metric - } - } - - return m.WriteToLevel(&m.root, selector, ts, metrics) -} - -func (m *MemoryStore) GetLevel(selector []string) *level { - return m.root.findLevelOrCreate(selector, len(m.metrics)) -} - -// Assumes that `minfo` in `metrics` is filled in! -func (m *MemoryStore) WriteToLevel(l *level, selector []string, ts int64, metrics []api.Metric) error { - l = l.findLevelOrCreate(selector, len(m.metrics)) - l.lock.Lock() - defer l.lock.Unlock() - - for _, metric := range metrics { - if metric.mc.Frequency == 0 { - continue - } - - b := l.metrics[metric.mc.offset] - if b == nil { - // First write to this metric and level - b = newBuffer(ts, metric.mc.Frequency) - l.metrics[metric.mc.offset] = b - } - - nb, err := b.write(ts, metric.Value) - if err != nil { - return err - } - - // Last write created a new buffer... - if b != nb { - l.metrics[metric.mc.offset] = nb - } - } - return nil -} - -// Returns all values for metric `metric` from `from` to `to` for the selected level(s). -// If the level does not hold the metric itself, the data will be aggregated recursively from the children. -// The second and third return value are the actual from/to for the data. Those can be different from -// the range asked for if no data was available. -func (m *MemoryStore) Read(selector Selector, metric string, from, to int64) ([]Float, int64, int64, error) { - if from > to { - return nil, 0, 0, errors.New("invalid time range") - } - - minfo, ok := m.metrics[metric] - if !ok { - return nil, 0, 0, errors.New("unkown metric: " + metric) - } - - n, data := 0, make([]Float, (to-from)/minfo.Frequency+1) - err := m.root.findBuffers(selector, minfo.offset, func(b *buffer) error { - cdata, cfrom, cto, err := b.read(from, to, data) - if err != nil { - return err - } - - if n == 0 { - from, to = cfrom, cto - } else if from != cfrom || to != cto || len(data) != len(cdata) { - missingfront, missingback := int((from-cfrom)/minfo.Frequency), int((to-cto)/minfo.Frequency) - if missingfront != 0 { - return ErrDataDoesNotAlign - } - - newlen := len(cdata) - missingback - if newlen < 1 { - return ErrDataDoesNotAlign - } - cdata = cdata[0:newlen] - if len(cdata) != len(data) { - return ErrDataDoesNotAlign - } - - from, to = cfrom, cto - } - - data = cdata - n += 1 - return nil - }) - - if err != nil { - return nil, 0, 0, err - } else if n == 0 { - return nil, 0, 0, errors.New("metric or host not found") - } else if n > 1 { - if minfo.Aggregation == AvgAggregation { - normalize := 1. / Float(n) - for i := 0; i < len(data); i++ { - data[i] *= normalize - } - } else if minfo.Aggregation != SumAggregation { - return nil, 0, 0, errors.New("invalid aggregation") - } - } - - return data, from, to, nil -} - -// Release all buffers for the selected level and all its children that contain only -// values older than `t`. -func (m *MemoryStore) Free(selector []string, t int64) (int, error) { - return m.GetLevel(selector).free(t) -} - -func (m *MemoryStore) FreeAll() error { - for k := range m.root.children { - delete(m.root.children, k) - } - - return nil -} - -func (m *MemoryStore) SizeInBytes() int64 { - return m.root.sizeInBytes() -} - -// Given a selector, return a list of all children of the level selected. -func (m *MemoryStore) ListChildren(selector []string) []string { - lvl := &m.root - for lvl != nil && len(selector) != 0 { - lvl.lock.RLock() - next := lvl.children[selector[0]] - lvl.lock.RUnlock() - lvl = next - selector = selector[1:] - } - - if lvl == nil { - return nil - } - - lvl.lock.RLock() - defer lvl.lock.RUnlock() - - children := make([]string, 0, len(lvl.children)) - for child := range lvl.children { - children = append(children, child) - } - - return children -} diff --git a/internal/memstore/selector.go b/internal/memstore/selector.go deleted file mode 100644 index 7bc498a..0000000 --- a/internal/memstore/selector.go +++ /dev/null @@ -1,123 +0,0 @@ -package memstore - -import ( - "encoding/json" - "errors" -) - -type SelectorElement struct { - Any bool - String string - Group []string -} - -func (se *SelectorElement) UnmarshalJSON(input []byte) error { - if input[0] == '"' { - if err := json.Unmarshal(input, &se.String); err != nil { - return err - } - - if se.String == "*" { - se.Any = true - se.String = "" - } - - return nil - } - - if input[0] == '[' { - return json.Unmarshal(input, &se.Group) - } - - return errors.New("the Go SelectorElement type can only be a string or an array of strings") -} - -func (se *SelectorElement) MarshalJSON() ([]byte, error) { - if se.Any { - return []byte("\"*\""), nil - } - - if se.String != "" { - return json.Marshal(se.String) - } - - if se.Group != nil { - return json.Marshal(se.Group) - } - - return nil, errors.New("a Go Selector must be a non-empty string or a non-empty slice of strings") -} - -type Selector []SelectorElement - -func (l *level) findLevel(selector []string) *level { - if len(selector) == 0 { - return l - } - - l.lock.RLock() - defer l.lock.RUnlock() - - lvl := l.children[selector[0]] - if lvl == nil { - return nil - } - - return lvl.findLevel(selector[1:]) -} - -func (l *level) findBuffers(selector Selector, offset int, f func(b *buffer) error) error { - l.lock.RLock() - defer l.lock.RUnlock() - - if len(selector) == 0 { - b := l.metrics[offset] - if b != nil { - return f(b) - } - - for _, lvl := range l.children { - err := lvl.findBuffers(nil, offset, f) - if err != nil { - return err - } - } - return nil - } - - sel := selector[0] - if len(sel.String) != 0 && l.children != nil { - lvl, ok := l.children[sel.String] - if ok { - err := lvl.findBuffers(selector[1:], offset, f) - if err != nil { - return err - } - } - return nil - } - - if sel.Group != nil && l.children != nil { - for _, key := range sel.Group { - lvl, ok := l.children[key] - if ok { - err := lvl.findBuffers(selector[1:], offset, f) - if err != nil { - return err - } - } - } - return nil - } - - if sel.Any && l.children != nil { - for _, lvl := range l.children { - if err := lvl.findBuffers(selector[1:], offset, f); err != nil { - return err - } - } - return nil - } - - return nil -} From fcc8eac2d56284989736d2630f746c80f175b528 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Mon, 6 May 2024 14:20:43 +0200 Subject: [PATCH 3/6] Restructure and Cleanup Compiles --- cmd/cc-metric-store/main.go | 162 +------ internal/api/api.go | 117 +++-- internal/api/lineprotocol.go | 2 +- internal/config/config.go | 8 +- internal/memorystore/archive.go | 481 ++------------------ internal/memorystore/buffer.go | 26 +- internal/memorystore/checkpoint.go | 501 +++++++++++++++++++++ internal/memorystore/debug.go | 2 +- internal/memorystore/level.go | 6 +- internal/memorystore/memorystore.go | 65 ++- internal/memorystore/stats.go | 2 +- internal/{memorystore => util}/selector.go | 4 +- 12 files changed, 686 insertions(+), 690 deletions(-) create mode 100644 internal/memorystore/checkpoint.go rename internal/{memorystore => util}/selector.go (97%) diff --git a/cmd/cc-metric-store/main.go b/cmd/cc-metric-store/main.go index 601dfe2..c7f3c03 100644 --- a/cmd/cc-metric-store/main.go +++ b/cmd/cc-metric-store/main.go @@ -4,7 +4,6 @@ import ( "bufio" "context" "flag" - "io" "log" "os" "os/signal" @@ -20,120 +19,6 @@ import ( "github.com/google/gops/agent" ) -var ( - conf config.Config - ms *memorystore.MemoryStore = nil - lastCheckpoint time.Time -) - -var ( - debugDumpLock sync.Mutex - debugDump io.Writer = io.Discard -) - -func intervals(wg *sync.WaitGroup, ctx context.Context) { - wg.Add(3) - // go func() { - // defer wg.Done() - // ticks := time.Tick(30 * time.Minute) - // for { - // select { - // case <-ctx.Done(): - // return - // case <-ticks: - // runtime.GC() - // } - // } - // }() - - go func() { - defer wg.Done() - d, err := time.ParseDuration(conf.RetentionInMemory) - if err != nil { - log.Fatal(err) - } - if d <= 0 { - return - } - - ticks := time.Tick(d / 2) - for { - select { - case <-ctx.Done(): - return - case <-ticks: - t := time.Now().Add(-d) - log.Printf("start freeing buffers (older than %s)...\n", t.Format(time.RFC3339)) - freed, err := ms.Free(nil, t.Unix()) - if err != nil { - log.Printf("freeing up buffers failed: %s\n", err.Error()) - } else { - log.Printf("done: %d buffers freed\n", freed) - } - } - } - }() - - lastCheckpoint = time.Now() - go func() { - defer wg.Done() - d, err := time.ParseDuration(conf.Checkpoints.Interval) - if err != nil { - log.Fatal(err) - } - if d <= 0 { - return - } - - ticks := time.Tick(d) - for { - select { - case <-ctx.Done(): - return - case <-ticks: - log.Printf("start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339)) - now := time.Now() - n, err := ms.ToCheckpoint(conf.Checkpoints.RootDir, - lastCheckpoint.Unix(), now.Unix()) - if err != nil { - log.Printf("checkpointing failed: %s\n", err.Error()) - } else { - log.Printf("done: %d checkpoint files created\n", n) - lastCheckpoint = now - } - } - } - }() - - go func() { - defer wg.Done() - d, err := time.ParseDuration(conf.Archive.Interval) - if err != nil { - log.Fatal(err) - } - if d <= 0 { - return - } - - ticks := time.Tick(d) - for { - select { - case <-ctx.Done(): - return - case <-ticks: - t := time.Now().Add(-d) - log.Printf("start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339)) - n, err := memorystore.ArchiveCheckpoints(conf.Checkpoints.RootDir, conf.Archive.RootDir, t.Unix(), conf.Archive.DeleteInstead) - if err != nil { - log.Printf("archiving failed: %s\n", err.Error()) - } else { - log.Printf("done: %d files zipped and moved to archive\n", n) - } - } - } - }() -} - func main() { var configFile string var enableGopsAgent bool @@ -142,33 +27,24 @@ func main() { flag.Parse() startupTime := time.Now() - conf = config.LoadConfiguration(configFile) - memorystore.Init(conf.Metrics) - ms = memorystore.GetMemoryStore() + config.Init(configFile) + memorystore.Init(config.Keys.Metrics) + ms := memorystore.GetMemoryStore() - if enableGopsAgent || conf.Debug.EnableGops { + if enableGopsAgent || config.Keys.Debug.EnableGops { if err := agent.Listen(agent.Options{}); err != nil { log.Fatal(err) } } - if conf.Debug.DumpToFile != "" { - f, err := os.Create(conf.Debug.DumpToFile) - if err != nil { - log.Fatal(err) - } - - debugDump = f - } - - d, err := time.ParseDuration(conf.Checkpoints.Restore) + d, err := time.ParseDuration(config.Keys.Checkpoints.Restore) if err != nil { log.Fatal(err) } restoreFrom := startupTime.Add(-d) log.Printf("Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339)) - files, err := ms.FromCheckpoint(conf.Checkpoints.RootDir, restoreFrom.Unix()) + files, err := ms.FromCheckpoint(config.Keys.Checkpoints.RootDir, restoreFrom.Unix()) loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB if err != nil { log.Fatalf("Loading checkpoints failed: %s\n", err.Error()) @@ -205,20 +81,24 @@ func main() { } }() - intervals(&wg, ctx) + wg.Add(3) + + memorystore.Retention(&wg, ctx) + memorystore.Checkpointing(&wg, ctx) + memorystore.Archiving(&wg, ctx) wg.Add(1) go func() { - err := api.StartApiServer(ctx, conf.HttpConfig) + err := api.StartApiServer(ctx, config.Keys.HttpConfig) if err != nil { log.Fatal(err) } wg.Done() }() - if conf.Nats != nil { - for _, natsConf := range conf.Nats { + if config.Keys.Nats != nil { + for _, natsConf := range config.Keys.Nats { // TODO: When multiple nats configs share a URL, do a single connect. wg.Add(1) nc := natsConf @@ -234,17 +114,5 @@ func main() { } wg.Wait() - - log.Printf("Writing to '%s'...\n", conf.Checkpoints.RootDir) - files, err = ms.ToCheckpoint(conf.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix()) - if err != nil { - log.Printf("Writing checkpoint failed: %s\n", err.Error()) - } - log.Printf("Done! (%d files written)\n", files) - - if closer, ok := debugDump.(io.Closer); ok { - if err := closer.Close(); err != nil { - log.Printf("error: %s", err.Error()) - } - } + memorystore.Shutdown() } diff --git a/internal/api/api.go b/internal/api/api.go index 397390d..73b513d 100644 --- a/internal/api/api.go +++ b/internal/api/api.go @@ -27,9 +27,9 @@ import ( type ApiMetricData struct { Error *string `json:"error,omitempty"` + Data util.FloatArray `json:"data,omitempty"` From int64 `json:"from"` To int64 `json:"to"` - Data util.FloatArray `json:"data,omitempty"` Avg util.Float `json:"avg"` Min util.Float `json:"min"` Max util.Float `json:"max"` @@ -73,8 +73,8 @@ func (data *ApiMetricData) ScaleBy(f util.Float) { } } -func (data *ApiMetricData) PadDataWithNull(from, to int64, metric string) { - minfo, ok := memoryStore.metrics[metric] +func (data *ApiMetricData) PadDataWithNull(ms *memorystore.MemoryStore, from, to int64, metric string) { + minfo, ok := ms.Metrics[metric] if !ok { return } @@ -105,12 +105,12 @@ func handleFree(rw http.ResponseWriter, r *http.Request) { return } - // TODO: lastCheckpoint might be modified by different go-routines. - // Load it using the sync/atomic package? - freeUpTo := lastCheckpoint.Unix() - if to < freeUpTo { - freeUpTo = to - } + // // TODO: lastCheckpoint might be modified by different go-routines. + // // Load it using the sync/atomic package? + // freeUpTo := lastCheckpoint.Unix() + // if to < freeUpTo { + // freeUpTo = to + // } if r.Method != http.MethodPost { http.Error(rw, "Method Not Allowed", http.StatusMethodNotAllowed) @@ -125,9 +125,10 @@ func handleFree(rw http.ResponseWriter, r *http.Request) { return } + ms := memorystore.GetMemoryStore() n := 0 for _, sel := range selectors { - bn, err := memoryStore.Free(sel, freeUpTo) + bn, err := ms.Free(sel, to) if err != nil { http.Error(rw, err.Error(), http.StatusInternalServerError) return @@ -137,7 +138,7 @@ func handleFree(rw http.ResponseWriter, r *http.Request) { } rw.WriteHeader(http.StatusOK) - rw.Write([]byte(fmt.Sprintf("buffers freed: %d\n", n))) + fmt.Fprintf(rw, "buffers freed: %d\n", n) } func handleWrite(rw http.ResponseWriter, r *http.Request) { @@ -153,26 +154,9 @@ func handleWrite(rw http.ResponseWriter, r *http.Request) { return } - if debugDump != io.Discard { - now := time.Now() - msg := make([]byte, 0, 512) - msg = append(msg, "\n--- local unix time: "...) - msg = strconv.AppendInt(msg, now.Unix(), 10) - msg = append(msg, " ---\n"...) - - debugDumpLock.Lock() - defer debugDumpLock.Unlock() - if _, err := debugDump.Write(msg); err != nil { - log.Printf("error while writing to debug dump: %s", err.Error()) - } - if _, err := debugDump.Write(bytes); err != nil { - log.Printf("error while writing to debug dump: %s", err.Error()) - } - return - } - + ms := memorystore.GetMemoryStore() dec := lineprotocol.NewDecoderWithBytes(bytes) - if err := decodeLine(dec, r.URL.Query().Get("cluster")); err != nil { + if err := decodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil { log.Printf("/api/write error: %s", err.Error()) http.Error(rw, err.Error(), http.StatusBadRequest) return @@ -182,13 +166,13 @@ func handleWrite(rw http.ResponseWriter, r *http.Request) { type ApiQueryRequest struct { Cluster string `json:"cluster"` + Queries []ApiQuery `json:"queries"` + ForAllNodes []string `json:"for-all-nodes"` From int64 `json:"from"` To int64 `json:"to"` WithStats bool `json:"with-stats"` WithData bool `json:"with-data"` WithPadding bool `json:"with-padding"` - Queries []ApiQuery `json:"queries"` - ForAllNodes []string `json:"for-all-nodes"` } type ApiQueryResponse struct { @@ -197,19 +181,19 @@ type ApiQueryResponse struct { } type ApiQuery struct { - Metric string `json:"metric"` - Hostname string `json:"host"` - Aggregate bool `json:"aggreg"` - ScaleFactor Float `json:"scale-by,omitempty"` - Type *string `json:"type,omitempty"` - TypeIds []string `json:"type-ids,omitempty"` - SubType *string `json:"subtype,omitempty"` - SubTypeIds []string `json:"subtype-ids,omitempty"` + Type *string `json:"type,omitempty"` + SubType *string `json:"subtype,omitempty"` + Metric string `json:"metric"` + Hostname string `json:"host"` + TypeIds []string `json:"type-ids,omitempty"` + SubTypeIds []string `json:"subtype-ids,omitempty"` + ScaleFactor util.Float `json:"scale-by,omitempty"` + Aggregate bool `json:"aggreg"` } func handleQuery(rw http.ResponseWriter, r *http.Request) { var err error - var req ApiQueryRequest = ApiQueryRequest{WithStats: true, WithData: true, WithPadding: true} + req := ApiQueryRequest{WithStats: true, WithData: true, WithPadding: true} if err := json.NewDecoder(r.Body).Decode(&req); err != nil { http.Error(rw, err.Error(), http.StatusBadRequest) return @@ -235,29 +219,29 @@ func handleQuery(rw http.ResponseWriter, r *http.Request) { } for _, query := range req.Queries { - sels := make([]Selector, 0, 1) + sels := make([]util.Selector, 0, 1) if query.Aggregate || query.Type == nil { - sel := Selector{{String: req.Cluster}, {String: query.Hostname}} + sel := util.Selector{{String: req.Cluster}, {String: query.Hostname}} if query.Type != nil { if len(query.TypeIds) == 1 { - sel = append(sel, SelectorElement{String: *query.Type + query.TypeIds[0]}) + sel = append(sel, util.SelectorElement{String: *query.Type + query.TypeIds[0]}) } else { ids := make([]string, len(query.TypeIds)) for i, id := range query.TypeIds { ids[i] = *query.Type + id } - sel = append(sel, SelectorElement{Group: ids}) + sel = append(sel, util.SelectorElement{Group: ids}) } if query.SubType != nil { if len(query.SubTypeIds) == 1 { - sel = append(sel, SelectorElement{String: *query.SubType + query.SubTypeIds[0]}) + sel = append(sel, util.SelectorElement{String: *query.SubType + query.SubTypeIds[0]}) } else { ids := make([]string, len(query.SubTypeIds)) for i, id := range query.SubTypeIds { ids[i] = *query.SubType + id } - sel = append(sel, SelectorElement{Group: ids}) + sel = append(sel, util.SelectorElement{Group: ids}) } } } @@ -266,7 +250,7 @@ func handleQuery(rw http.ResponseWriter, r *http.Request) { for _, typeId := range query.TypeIds { if query.SubType != nil { for _, subTypeId := range query.SubTypeIds { - sels = append(sels, Selector{ + sels = append(sels, util.Selector{ {String: req.Cluster}, {String: query.Hostname}, {String: *query.Type + typeId}, @@ -274,7 +258,7 @@ func handleQuery(rw http.ResponseWriter, r *http.Request) { }) } } else { - sels = append(sels, Selector{ + sels = append(sels, util.Selector{ {String: req.Cluster}, {String: query.Hostname}, {String: *query.Type + typeId}, @@ -289,7 +273,7 @@ func handleQuery(rw http.ResponseWriter, r *http.Request) { res := make([]ApiMetricData, 0, len(sels)) for _, sel := range sels { data := ApiMetricData{} - data.Data, data.From, data.To, err = memoryStore.Read(sel, query.Metric, req.From, req.To) + data.Data, data.From, data.To, err = ms.Read(sel, query.Metric, req.From, req.To) // log.Printf("data: %#v, %#v, %#v, %#v", data.Data, data.From, data.To, err) if err != nil { msg := err.Error() @@ -305,7 +289,7 @@ func handleQuery(rw http.ResponseWriter, r *http.Request) { data.ScaleBy(query.ScaleFactor) } if req.WithPadding { - data.PadDataWithNull(req.From, req.To, query.Metric) + data.PadDataWithNull(ms, req.From, req.To, query.Metric) } if !req.WithData { data.Data = nil @@ -324,6 +308,20 @@ func handleQuery(rw http.ResponseWriter, r *http.Request) { } } +func handleDebug(rw http.ResponseWriter, r *http.Request) { + raw := r.URL.Query().Get("selector") + selector := []string{} + if len(raw) != 0 { + selector = strings.Split(raw, ":") + } + + ms := memorystore.GetMemoryStore() + if err := ms.DebugDump(bufio.NewWriter(rw), selector); err != nil { + rw.WriteHeader(http.StatusBadRequest) + rw.Write([]byte(err.Error())) + } +} + func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler { cacheLock := sync.RWMutex{} cache := map[string]*jwt.Token{} @@ -375,18 +373,7 @@ func StartApiServer(ctx context.Context, httpConfig *config.HttpConfig) error { r.HandleFunc("/api/free", handleFree) r.HandleFunc("/api/write", handleWrite) r.HandleFunc("/api/query", handleQuery) - r.HandleFunc("/api/debug", func(rw http.ResponseWriter, r *http.Request) { - raw := r.URL.Query().Get("selector") - selector := []string{} - if len(raw) != 0 { - selector = strings.Split(raw, ":") - } - - if err := memoryStore.DebugDump(bufio.NewWriter(rw), selector); err != nil { - rw.WriteHeader(http.StatusBadRequest) - rw.Write([]byte(err.Error())) - } - }) + r.HandleFunc("/api/debug", handleDebug) server := &http.Server{ Handler: r, @@ -395,8 +382,8 @@ func StartApiServer(ctx context.Context, httpConfig *config.HttpConfig) error { ReadTimeout: 30 * time.Second, } - if len(conf.JwtPublicKey) > 0 { - buf, err := base64.StdEncoding.DecodeString(conf.JwtPublicKey) + if len(config.Keys.JwtPublicKey) > 0 { + buf, err := base64.StdEncoding.DecodeString(config.Keys.JwtPublicKey) if err != nil { return err } diff --git a/internal/api/lineprotocol.go b/internal/api/lineprotocol.go index f48f7c3..9081638 100644 --- a/internal/api/lineprotocol.go +++ b/internal/api/lineprotocol.go @@ -191,7 +191,7 @@ func decodeLine(dec *lineprotocol.Decoder, // cluster and host. By using `WriteToLevel` (level = host), we do not need // to take the root- and cluster-level lock as often. var lvl *memorystore.Level = nil - var prevCluster, prevHost string = "", "" + prevCluster, prevHost := "", "" var ok bool for dec.Next() { diff --git a/internal/config/config.go b/internal/config/config.go index 0719d1f..b829b5d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -96,8 +96,9 @@ type Config struct { Nats []*NatsConfig `json:"nats"` } -func LoadConfiguration(file string) Config { - var config Config +var Keys Config + +func Init(file string) { configFile, err := os.Open(file) if err != nil { log.Fatal(err) @@ -105,8 +106,7 @@ func LoadConfiguration(file string) Config { defer configFile.Close() dec := json.NewDecoder(configFile) dec.DisallowUnknownFields() - if err := dec.Decode(&config); err != nil { + if err := dec.Decode(&Keys); err != nil { log.Fatal(err) } - return config } diff --git a/internal/memorystore/archive.go b/internal/memorystore/archive.go index a6fe5dc..fab457c 100644 --- a/internal/memorystore/archive.go +++ b/internal/memorystore/archive.go @@ -3,470 +3,57 @@ package memorystore import ( "archive/zip" "bufio" - "encoding/json" + "context" "errors" "fmt" "io" - "io/fs" "log" "os" - "path" "path/filepath" - "runtime" - "sort" - "strconv" - "strings" "sync" "sync/atomic" + "time" + + "github.com/ClusterCockpit/cc-metric-store/internal/config" ) -// Whenever changed, update MarshalJSON as well! -type CheckpointMetrics struct { - Frequency int64 `json:"frequency"` - Start int64 `json:"start"` - Data []Float `json:"data"` -} - -// As `Float` implements a custom MarshalJSON() function, -// serializing an array of such types has more overhead -// than one would assume (because of extra allocations, interfaces and so on). -func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) { - buf := make([]byte, 0, 128+len(cm.Data)*8) - buf = append(buf, `{"frequency":`...) - buf = strconv.AppendInt(buf, cm.Frequency, 10) - buf = append(buf, `,"start":`...) - buf = strconv.AppendInt(buf, cm.Start, 10) - buf = append(buf, `,"data":[`...) - for i, x := range cm.Data { - if i != 0 { - buf = append(buf, ',') +func Archiving(wg *sync.WaitGroup, ctx context.Context) { + go func() { + defer wg.Done() + d, err := time.ParseDuration(config.Keys.Archive.Interval) + if err != nil { + log.Fatal(err) } - if x.IsNaN() { - buf = append(buf, `null`...) - } else { - buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32) + if d <= 0 { + return } - } - buf = append(buf, `]}`...) - return buf, nil -} -type CheckpointFile struct { - From int64 `json:"from"` - To int64 `json:"to"` - Metrics map[string]*CheckpointMetrics `json:"metrics"` - Children map[string]*CheckpointFile `json:"children"` + ticks := func() <-chan time.Time { + if d <= 0 { + return nil + } + return time.NewTicker(d).C + }() + for { + select { + case <-ctx.Done(): + return + case <-ticks: + t := time.Now().Add(-d) + log.Printf("start archiving checkpoints (older than %s)...\n", t.Format(time.RFC3339)) + n, err := ArchiveCheckpoints(config.Keys.Checkpoints.RootDir, config.Keys.Archive.RootDir, t.Unix(), config.Keys.Archive.DeleteInstead) + if err != nil { + log.Printf("archiving failed: %s\n", err.Error()) + } else { + log.Printf("done: %d files zipped and moved to archive\n", n) + } + } + } + }() } var ErrNoNewData error = errors.New("all data already archived") -var NumWorkers int = 4 - -func init() { - maxWorkers := 10 - NumWorkers = runtime.NumCPU()/2 + 1 - if NumWorkers > maxWorkers { - NumWorkers = maxWorkers - } -} - -// Metrics stored at the lowest 2 levels are not stored away (root and cluster)! -// On a per-host basis a new JSON file is created. I have no idea if this will scale. -// The good thing: Only a host at a time is locked, so this function can run -// in parallel to writes/reads. -func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) { - levels := make([]*Level, 0) - selectors := make([][]string, 0) - m.root.lock.RLock() - for sel1, l1 := range m.root.children { - l1.lock.RLock() - for sel2, l2 := range l1.children { - levels = append(levels, l2) - selectors = append(selectors, []string{sel1, sel2}) - } - l1.lock.RUnlock() - } - m.root.lock.RUnlock() - - type workItem struct { - level *Level - dir string - selector []string - } - - n, errs := int32(0), int32(0) - - var wg sync.WaitGroup - wg.Add(NumWorkers) - work := make(chan workItem, NumWorkers*2) - for worker := 0; worker < NumWorkers; worker++ { - go func() { - defer wg.Done() - - for workItem := range work { - if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil { - if err == ErrNoNewData { - continue - } - - log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error()) - atomic.AddInt32(&errs, 1) - } else { - atomic.AddInt32(&n, 1) - } - } - }() - } - - for i := 0; i < len(levels); i++ { - dir := path.Join(dir, path.Join(selectors[i]...)) - work <- workItem{ - level: levels[i], - dir: dir, - selector: selectors[i], - } - } - - close(work) - wg.Wait() - - if errs > 0 { - return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n) - } - return int(n), nil -} - -func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) { - l.lock.RLock() - defer l.lock.RUnlock() - - retval := &CheckpointFile{ - From: from, - To: to, - Metrics: make(map[string]*CheckpointMetrics), - Children: make(map[string]*CheckpointFile), - } - - for metric, minfo := range m.Metrics { - b := l.metrics[minfo.offset] - if b == nil { - continue - } - - allArchived := true - b.iterFromTo(from, to, func(b *buffer) error { - if !b.archived { - allArchived = false - } - return nil - }) - - if allArchived { - continue - } - - data := make([]Float, (to-from)/b.frequency+1) - data, start, end, err := b.read(from, to, data) - if err != nil { - return nil, err - } - - for i := int((end - start) / b.frequency); i < len(data); i++ { - data[i] = NaN - } - - retval.Metrics[metric] = &CheckpointMetrics{ - Frequency: b.frequency, - Start: start, - Data: data, - } - } - - for name, child := range l.children { - val, err := child.toCheckpointFile(from, to, m) - if err != nil { - return nil, err - } - - if val != nil { - retval.Children[name] = val - } - } - - if len(retval.Children) == 0 && len(retval.Metrics) == 0 { - return nil, nil - } - - return retval, nil -} - -func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error { - cf, err := l.toCheckpointFile(from, to, m) - if err != nil { - return err - } - - if cf == nil { - return ErrNoNewData - } - - filepath := path.Join(dir, fmt.Sprintf("%d.json", from)) - f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644) - if err != nil && os.IsNotExist(err) { - err = os.MkdirAll(dir, 0o755) - if err == nil { - f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644) - } - } - if err != nil { - return err - } - defer f.Close() - - bw := bufio.NewWriter(f) - if err = json.NewEncoder(bw).Encode(cf); err != nil { - return err - } - - return bw.Flush() -} - -// Metrics stored at the lowest 2 levels are not loaded (root and cluster)! -// This function can only be called once and before the very first write or read. -// Different host's data is loaded to memory in parallel. -func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) { - var wg sync.WaitGroup - work := make(chan [2]string, NumWorkers) - n, errs := int32(0), int32(0) - - wg.Add(NumWorkers) - for worker := 0; worker < NumWorkers; worker++ { - go func() { - defer wg.Done() - for host := range work { - lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics)) - nn, err := lvl.fromCheckpoint(filepath.Join(dir, host[0], host[1]), from, m) - if err != nil { - log.Fatalf("error while loading checkpoints: %s", err.Error()) - atomic.AddInt32(&errs, 1) - } - atomic.AddInt32(&n, int32(nn)) - } - }() - } - - i := 0 - clustersDir, err := os.ReadDir(dir) - for _, clusterDir := range clustersDir { - if !clusterDir.IsDir() { - err = errors.New("expected only directories at first level of checkpoints/ directory") - goto done - } - - hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name())) - if e != nil { - err = e - goto done - } - - for _, hostDir := range hostsDir { - if !hostDir.IsDir() { - err = errors.New("expected only directories at second level of checkpoints/ directory") - goto done - } - - i++ - if i%NumWorkers == 0 && i > 100 { - // Forcing garbage collection runs here regulary during the loading of checkpoints - // will decrease the total heap size after loading everything back to memory is done. - // While loading data, the heap will grow fast, so the GC target size will double - // almost always. By forcing GCs here, we can keep it growing more slowly so that - // at the end, less memory is wasted. - runtime.GC() - } - - work <- [2]string{clusterDir.Name(), hostDir.Name()} - } - } -done: - close(work) - wg.Wait() - - if err != nil { - return int(n), err - } - - if errs > 0 { - return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n) - } - return int(n), nil -} - -func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error { - for name, metric := range cf.Metrics { - n := len(metric.Data) - b := &buffer{ - frequency: metric.Frequency, - start: metric.Start, - data: metric.Data[0:n:n], // Space is wasted here :( - prev: nil, - next: nil, - archived: true, - } - b.close() - - minfo, ok := m.Metrics[name] - if !ok { - continue - // return errors.New("Unkown metric: " + name) - } - - prev := l.metrics[minfo.offset] - if prev == nil { - l.metrics[minfo.offset] = b - } else { - if prev.start > b.start { - return errors.New("wooops") - } - - b.prev = prev - prev.next = b - } - l.metrics[minfo.offset] = b - } - - if len(cf.Children) > 0 && l.children == nil { - l.children = make(map[string]*Level) - } - - for sel, childCf := range cf.Children { - child, ok := l.children[sel] - if !ok { - child = &Level{ - metrics: make([]*buffer, len(m.Metrics)), - children: nil, - } - l.children[sel] = child - } - - if err := child.loadFile(childCf, m); err != nil { - return err - } - } - - return nil -} - -func (l *Level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, error) { - direntries, err := os.ReadDir(dir) - if err != nil { - if os.IsNotExist(err) { - return 0, nil - } - - return 0, err - } - - jsonFiles := make([]fs.DirEntry, 0) - filesLoaded := 0 - for _, e := range direntries { - if e.IsDir() { - child := &Level{ - metrics: make([]*buffer, len(m.Metrics)), - children: make(map[string]*Level), - } - - files, err := child.fromCheckpoint(path.Join(dir, e.Name()), from, m) - filesLoaded += files - if err != nil { - return filesLoaded, err - } - - l.children[e.Name()] = child - } else if strings.HasSuffix(e.Name(), ".json") { - jsonFiles = append(jsonFiles, e) - } else { - return filesLoaded, errors.New("unexpected file: " + dir + "/" + e.Name()) - } - } - - files, err := findFiles(jsonFiles, from, true) - if err != nil { - return filesLoaded, err - } - - for _, filename := range files { - f, err := os.Open(path.Join(dir, filename)) - if err != nil { - return filesLoaded, err - } - defer f.Close() - - br := bufio.NewReader(f) - cf := &CheckpointFile{} - if err = json.NewDecoder(br).Decode(cf); err != nil { - return filesLoaded, err - } - - if cf.To != 0 && cf.To < from { - continue - } - - if err = l.loadFile(cf, m); err != nil { - return filesLoaded, err - } - - filesLoaded += 1 - } - - return filesLoaded, nil -} - -// This will probably get very slow over time! -// A solution could be some sort of an index file in which all other files -// and the timespan they contain is listed. -func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) { - nums := map[string]int64{} - for _, e := range direntries { - ts, err := strconv.ParseInt(strings.TrimSuffix(e.Name(), ".json"), 10, 64) - if err != nil { - return nil, err - } - nums[e.Name()] = ts - } - - sort.Slice(direntries, func(i, j int) bool { - a, b := direntries[i], direntries[j] - return nums[a.Name()] < nums[b.Name()] - }) - - filenames := make([]string, 0) - for i := 0; i < len(direntries); i++ { - e := direntries[i] - ts1 := nums[e.Name()] - - if findMoreRecentFiles && t <= ts1 || i == len(direntries)-1 { - filenames = append(filenames, e.Name()) - continue - } - - enext := direntries[i+1] - ts2 := nums[enext.Name()] - - if findMoreRecentFiles { - if ts1 < t && t < ts2 { - filenames = append(filenames, e.Name()) - } - } else { - if ts2 < t { - filenames = append(filenames, e.Name()) - } - } - } - - return filenames, nil -} - // ZIP all checkpoint files older than `from` together and write them to the `archiveDir`, // deleting them from the `checkpointsDir`. func ArchiveCheckpoints(checkpointsDir, archiveDir string, from int64, deleteInstead bool) (int, error) { diff --git a/internal/memorystore/buffer.go b/internal/memorystore/buffer.go index 397be97..34fee5d 100644 --- a/internal/memorystore/buffer.go +++ b/internal/memorystore/buffer.go @@ -34,21 +34,13 @@ var ( // If `cap(data)` is reached, a new buffer is created and // becomes the new head of a buffer list. type buffer struct { - frequency int64 // Time between two "slots" - start int64 // Timestamp of when `data[0]` was written. - data []util.Float // The slice should never reallocacte as `cap(data)` is respected. - prev, next *buffer // `prev` contains older data, `next` newer data. - archived bool // If true, this buffer is already archived - - closed bool - /* - statisticts struct { - samples int - min Float - max Float - avg Float - } - */ + prev *buffer + next *buffer + data []util.Float + frequency int64 + start int64 + archived bool + closed bool } func newBuffer(ts, freq int64) *buffer { @@ -163,8 +155,8 @@ func (b *buffer) read(from, to int64, data []util.Float) ([]util.Float, int64, i from = b.firstWrite() } - var i int = 0 - var t int64 = from + i := 0 + t := from for ; t < to; t += b.frequency { idx := int((t - b.start) / b.frequency) if idx >= cap(b.data) { diff --git a/internal/memorystore/checkpoint.go b/internal/memorystore/checkpoint.go new file mode 100644 index 0000000..9b036d5 --- /dev/null +++ b/internal/memorystore/checkpoint.go @@ -0,0 +1,501 @@ +package memorystore + +import ( + "bufio" + "context" + "encoding/json" + "errors" + "fmt" + "io/fs" + "log" + "os" + "path" + "path/filepath" + "runtime" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/ClusterCockpit/cc-metric-store/internal/config" + "github.com/ClusterCockpit/cc-metric-store/internal/util" +) + +// Whenever changed, update MarshalJSON as well! +type CheckpointMetrics struct { + Data []util.Float `json:"data"` + Frequency int64 `json:"frequency"` + Start int64 `json:"start"` +} + +type CheckpointFile struct { + Metrics map[string]*CheckpointMetrics `json:"metrics"` + Children map[string]*CheckpointFile `json:"children"` + From int64 `json:"from"` + To int64 `json:"to"` +} + +var lastCheckpoint time.Time + +func Checkpointing(wg *sync.WaitGroup, ctx context.Context) { + lastCheckpoint = time.Now() + ms := GetMemoryStore() + + go func() { + defer wg.Done() + d, err := time.ParseDuration(config.Keys.Checkpoints.Interval) + if err != nil { + log.Fatal(err) + } + if d <= 0 { + return + } + + ticks := func() <-chan time.Time { + if d <= 0 { + return nil + } + return time.NewTicker(d).C + }() + for { + select { + case <-ctx.Done(): + return + case <-ticks: + log.Printf("start checkpointing (starting at %s)...\n", lastCheckpoint.Format(time.RFC3339)) + now := time.Now() + n, err := ms.ToCheckpoint(config.Keys.Checkpoints.RootDir, + lastCheckpoint.Unix(), now.Unix()) + if err != nil { + log.Printf("checkpointing failed: %s\n", err.Error()) + } else { + log.Printf("done: %d checkpoint files created\n", n) + lastCheckpoint = now + } + } + } + }() +} + +// As `Float` implements a custom MarshalJSON() function, +// serializing an array of such types has more overhead +// than one would assume (because of extra allocations, interfaces and so on). +func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) { + buf := make([]byte, 0, 128+len(cm.Data)*8) + buf = append(buf, `{"frequency":`...) + buf = strconv.AppendInt(buf, cm.Frequency, 10) + buf = append(buf, `,"start":`...) + buf = strconv.AppendInt(buf, cm.Start, 10) + buf = append(buf, `,"data":[`...) + for i, x := range cm.Data { + if i != 0 { + buf = append(buf, ',') + } + if x.IsNaN() { + buf = append(buf, `null`...) + } else { + buf = strconv.AppendFloat(buf, float64(x), 'f', 1, 32) + } + } + buf = append(buf, `]}`...) + return buf, nil +} + +// Metrics stored at the lowest 2 levels are not stored away (root and cluster)! +// On a per-host basis a new JSON file is created. I have no idea if this will scale. +// The good thing: Only a host at a time is locked, so this function can run +// in parallel to writes/reads. +func (m *MemoryStore) ToCheckpoint(dir string, from, to int64) (int, error) { + levels := make([]*Level, 0) + selectors := make([][]string, 0) + m.root.lock.RLock() + for sel1, l1 := range m.root.children { + l1.lock.RLock() + for sel2, l2 := range l1.children { + levels = append(levels, l2) + selectors = append(selectors, []string{sel1, sel2}) + } + l1.lock.RUnlock() + } + m.root.lock.RUnlock() + + type workItem struct { + level *Level + dir string + selector []string + } + + n, errs := int32(0), int32(0) + + var wg sync.WaitGroup + wg.Add(NumWorkers) + work := make(chan workItem, NumWorkers*2) + for worker := 0; worker < NumWorkers; worker++ { + go func() { + defer wg.Done() + + for workItem := range work { + if err := workItem.level.toCheckpoint(workItem.dir, from, to, m); err != nil { + if err == ErrNoNewData { + continue + } + + log.Printf("error while checkpointing %#v: %s", workItem.selector, err.Error()) + atomic.AddInt32(&errs, 1) + } else { + atomic.AddInt32(&n, 1) + } + } + }() + } + + for i := 0; i < len(levels); i++ { + dir := path.Join(dir, path.Join(selectors[i]...)) + work <- workItem{ + level: levels[i], + dir: dir, + selector: selectors[i], + } + } + + close(work) + wg.Wait() + + if errs > 0 { + return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n) + } + return int(n), nil +} + +func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFile, error) { + l.lock.RLock() + defer l.lock.RUnlock() + + retval := &CheckpointFile{ + From: from, + To: to, + Metrics: make(map[string]*CheckpointMetrics), + Children: make(map[string]*CheckpointFile), + } + + for metric, minfo := range m.Metrics { + b := l.metrics[minfo.Offset] + if b == nil { + continue + } + + allArchived := true + b.iterFromTo(from, to, func(b *buffer) error { + if !b.archived { + allArchived = false + } + return nil + }) + + if allArchived { + continue + } + + data := make([]util.Float, (to-from)/b.frequency+1) + data, start, end, err := b.read(from, to, data) + if err != nil { + return nil, err + } + + for i := int((end - start) / b.frequency); i < len(data); i++ { + data[i] = util.NaN + } + + retval.Metrics[metric] = &CheckpointMetrics{ + Frequency: b.frequency, + Start: start, + Data: data, + } + } + + for name, child := range l.children { + val, err := child.toCheckpointFile(from, to, m) + if err != nil { + return nil, err + } + + if val != nil { + retval.Children[name] = val + } + } + + if len(retval.Children) == 0 && len(retval.Metrics) == 0 { + return nil, nil + } + + return retval, nil +} + +func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error { + cf, err := l.toCheckpointFile(from, to, m) + if err != nil { + return err + } + + if cf == nil { + return ErrNoNewData + } + + filepath := path.Join(dir, fmt.Sprintf("%d.json", from)) + f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644) + if err != nil && os.IsNotExist(err) { + err = os.MkdirAll(dir, 0o755) + if err == nil { + f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY, 0o644) + } + } + if err != nil { + return err + } + defer f.Close() + + bw := bufio.NewWriter(f) + if err = json.NewEncoder(bw).Encode(cf); err != nil { + return err + } + + return bw.Flush() +} + +// Metrics stored at the lowest 2 levels are not loaded (root and cluster)! +// This function can only be called once and before the very first write or read. +// Different host's data is loaded to memory in parallel. +func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) { + var wg sync.WaitGroup + work := make(chan [2]string, NumWorkers) + n, errs := int32(0), int32(0) + + wg.Add(NumWorkers) + for worker := 0; worker < NumWorkers; worker++ { + go func() { + defer wg.Done() + for host := range work { + lvl := m.root.findLevelOrCreate(host[:], len(m.Metrics)) + nn, err := lvl.fromCheckpoint(filepath.Join(dir, host[0], host[1]), from, m) + if err != nil { + log.Fatalf("error while loading checkpoints: %s", err.Error()) + atomic.AddInt32(&errs, 1) + } + atomic.AddInt32(&n, int32(nn)) + } + }() + } + + i := 0 + clustersDir, err := os.ReadDir(dir) + for _, clusterDir := range clustersDir { + if !clusterDir.IsDir() { + err = errors.New("expected only directories at first level of checkpoints/ directory") + goto done + } + + hostsDir, e := os.ReadDir(filepath.Join(dir, clusterDir.Name())) + if e != nil { + err = e + goto done + } + + for _, hostDir := range hostsDir { + if !hostDir.IsDir() { + err = errors.New("expected only directories at second level of checkpoints/ directory") + goto done + } + + i++ + if i%NumWorkers == 0 && i > 100 { + // Forcing garbage collection runs here regulary during the loading of checkpoints + // will decrease the total heap size after loading everything back to memory is done. + // While loading data, the heap will grow fast, so the GC target size will double + // almost always. By forcing GCs here, we can keep it growing more slowly so that + // at the end, less memory is wasted. + runtime.GC() + } + + work <- [2]string{clusterDir.Name(), hostDir.Name()} + } + } +done: + close(work) + wg.Wait() + + if err != nil { + return int(n), err + } + + if errs > 0 { + return int(n), fmt.Errorf("%d errors happend while creating checkpoints (%d successes)", errs, n) + } + return int(n), nil +} + +func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error { + for name, metric := range cf.Metrics { + n := len(metric.Data) + b := &buffer{ + frequency: metric.Frequency, + start: metric.Start, + data: metric.Data[0:n:n], // Space is wasted here :( + prev: nil, + next: nil, + archived: true, + } + b.close() + + minfo, ok := m.Metrics[name] + if !ok { + continue + // return errors.New("Unkown metric: " + name) + } + + prev := l.metrics[minfo.Offset] + if prev == nil { + l.metrics[minfo.Offset] = b + } else { + if prev.start > b.start { + return errors.New("wooops") + } + + b.prev = prev + prev.next = b + } + l.metrics[minfo.Offset] = b + } + + if len(cf.Children) > 0 && l.children == nil { + l.children = make(map[string]*Level) + } + + for sel, childCf := range cf.Children { + child, ok := l.children[sel] + if !ok { + child = &Level{ + metrics: make([]*buffer, len(m.Metrics)), + children: nil, + } + l.children[sel] = child + } + + if err := child.loadFile(childCf, m); err != nil { + return err + } + } + + return nil +} + +func (l *Level) fromCheckpoint(dir string, from int64, m *MemoryStore) (int, error) { + direntries, err := os.ReadDir(dir) + if err != nil { + if os.IsNotExist(err) { + return 0, nil + } + + return 0, err + } + + jsonFiles := make([]fs.DirEntry, 0) + filesLoaded := 0 + for _, e := range direntries { + if e.IsDir() { + child := &Level{ + metrics: make([]*buffer, len(m.Metrics)), + children: make(map[string]*Level), + } + + files, err := child.fromCheckpoint(path.Join(dir, e.Name()), from, m) + filesLoaded += files + if err != nil { + return filesLoaded, err + } + + l.children[e.Name()] = child + } else if strings.HasSuffix(e.Name(), ".json") { + jsonFiles = append(jsonFiles, e) + } else { + return filesLoaded, errors.New("unexpected file: " + dir + "/" + e.Name()) + } + } + + files, err := findFiles(jsonFiles, from, true) + if err != nil { + return filesLoaded, err + } + + for _, filename := range files { + f, err := os.Open(path.Join(dir, filename)) + if err != nil { + return filesLoaded, err + } + defer f.Close() + + br := bufio.NewReader(f) + cf := &CheckpointFile{} + if err = json.NewDecoder(br).Decode(cf); err != nil { + return filesLoaded, err + } + + if cf.To != 0 && cf.To < from { + continue + } + + if err = l.loadFile(cf, m); err != nil { + return filesLoaded, err + } + + filesLoaded += 1 + } + + return filesLoaded, nil +} + +// This will probably get very slow over time! +// A solution could be some sort of an index file in which all other files +// and the timespan they contain is listed. +func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) { + nums := map[string]int64{} + for _, e := range direntries { + ts, err := strconv.ParseInt(strings.TrimSuffix(e.Name(), ".json"), 10, 64) + if err != nil { + return nil, err + } + nums[e.Name()] = ts + } + + sort.Slice(direntries, func(i, j int) bool { + a, b := direntries[i], direntries[j] + return nums[a.Name()] < nums[b.Name()] + }) + + filenames := make([]string, 0) + for i := 0; i < len(direntries); i++ { + e := direntries[i] + ts1 := nums[e.Name()] + + if findMoreRecentFiles && t <= ts1 || i == len(direntries)-1 { + filenames = append(filenames, e.Name()) + continue + } + + enext := direntries[i+1] + ts2 := nums[enext.Name()] + + if findMoreRecentFiles { + if ts1 < t && t < ts2 { + filenames = append(filenames, e.Name()) + } + } else { + if ts2 < t { + filenames = append(filenames, e.Name()) + } + } + } + + return filenames, nil +} diff --git a/internal/memorystore/debug.go b/internal/memorystore/debug.go index 59a978b..2743a45 100644 --- a/internal/memorystore/debug.go +++ b/internal/memorystore/debug.go @@ -41,7 +41,7 @@ func (l *Level) debugDump(m *MemoryStore, w *bufio.Writer, lvlname string, buf [ depth += 1 objitems := 0 for name, mc := range m.Metrics { - if b := l.metrics[mc.offset]; b != nil { + if b := l.metrics[mc.Offset]; b != nil { for i := 0; i < depth; i++ { buf = append(buf, '\t') } diff --git a/internal/memorystore/level.go b/internal/memorystore/level.go index 34a58a2..4bbfe7c 100644 --- a/internal/memorystore/level.go +++ b/internal/memorystore/level.go @@ -12,9 +12,9 @@ import ( // Can be both a leaf or a inner node. In this tree structue, inner nodes can // also hold data (in `metrics`). type Level struct { + children map[string]*Level + metrics []*buffer lock sync.RWMutex - metrics []*buffer // Every level can store metrics. - children map[string]*Level // Lower levels. } // Find the correct level for the given selector, creating it if @@ -126,7 +126,7 @@ func (l *Level) findLevel(selector []string) *Level { return lvl.findLevel(selector[1:]) } -func (l *Level) findBuffers(selector Selector, offset int, f func(b *buffer) error) error { +func (l *Level) findBuffers(selector util.Selector, offset int, f func(b *buffer) error) error { l.lock.RLock() defer l.lock.RUnlock() diff --git a/internal/memorystore/memorystore.go b/internal/memorystore/memorystore.go index 305ebdd..4868a85 100644 --- a/internal/memorystore/memorystore.go +++ b/internal/memorystore/memorystore.go @@ -1,9 +1,12 @@ package memorystore import ( + "context" "errors" "log" + "runtime" "sync" + "time" "github.com/ClusterCockpit/cc-metric-store/internal/config" "github.com/ClusterCockpit/cc-metric-store/internal/util" @@ -14,6 +17,16 @@ var ( msInstance *MemoryStore ) +var NumWorkers int = 4 + +func init() { + maxWorkers := 10 + NumWorkers = runtime.NumCPU()/2 + 1 + if NumWorkers > maxWorkers { + NumWorkers = maxWorkers + } +} + type Metric struct { Name string Value util.Float @@ -21,8 +34,8 @@ type Metric struct { } type MemoryStore struct { - root Level // root of the tree structure Metrics map[string]config.MetricConfig + root Level } // Create a new, initialized instance of a MemoryStore. @@ -61,6 +74,54 @@ func GetMemoryStore() *MemoryStore { return msInstance } +func Shutdown() { + ms := GetMemoryStore() + log.Printf("Writing to '%s'...\n", config.Keys.Checkpoints.RootDir) + files, err := ms.ToCheckpoint(config.Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix()) + if err != nil { + log.Printf("Writing checkpoint failed: %s\n", err.Error()) + } + log.Printf("Done! (%d files written)\n", files) +} + +func Retention(wg *sync.WaitGroup, ctx context.Context) { + ms := GetMemoryStore() + + go func() { + defer wg.Done() + d, err := time.ParseDuration(config.Keys.RetentionInMemory) + if err != nil { + log.Fatal(err) + } + if d <= 0 { + return + } + + ticks := func() <-chan time.Time { + d := d / 2 + if d <= 0 { + return nil + } + return time.NewTicker(d).C + }() + for { + select { + case <-ctx.Done(): + return + case <-ticks: + t := time.Now().Add(-d) + log.Printf("start freeing buffers (older than %s)...\n", t.Format(time.RFC3339)) + freed, err := ms.Free(nil, t.Unix()) + if err != nil { + log.Printf("freeing up buffers failed: %s\n", err.Error()) + } else { + log.Printf("done: %d buffers freed\n", freed) + } + } + } + }() +} + // Write all values in `metrics` to the level specified by `selector` for time `ts`. // Look at `findLevelOrCreate` for how selectors work. func (m *MemoryStore) Write(selector []string, ts int64, metrics []Metric) error { @@ -117,7 +178,7 @@ func (m *MemoryStore) WriteToLevel(l *Level, selector []string, ts int64, metric // If the level does not hold the metric itself, the data will be aggregated recursively from the children. // The second and third return value are the actual from/to for the data. Those can be different from // the range asked for if no data was available. -func (m *MemoryStore) Read(selector Selector, metric string, from, to int64) ([]util.Float, int64, int64, error) { +func (m *MemoryStore) Read(selector util.Selector, metric string, from, to int64) ([]util.Float, int64, int64, error) { if from > to { return nil, 0, 0, errors.New("invalid time range") } diff --git a/internal/memorystore/stats.go b/internal/memorystore/stats.go index 3240d02..5ddecfc 100644 --- a/internal/memorystore/stats.go +++ b/internal/memorystore/stats.go @@ -66,7 +66,7 @@ func (b *buffer) stats(from, to int64) (Stats, int64, int64, error) { // Returns statistics for the requested metric on the selected node/level. // Data is aggregated to the selected level the same way as in `MemoryStore.Read`. // If `Stats.Samples` is zero, the statistics should not be considered as valid. -func (m *MemoryStore) Stats(selector Selector, metric string, from, to int64) (*Stats, int64, int64, error) { +func (m *MemoryStore) Stats(selector util.Selector, metric string, from, to int64) (*Stats, int64, int64, error) { if from > to { return nil, 0, 0, errors.New("invalid time range") } diff --git a/internal/memorystore/selector.go b/internal/util/selector.go similarity index 97% rename from internal/memorystore/selector.go rename to internal/util/selector.go index 0b24300..27557ef 100644 --- a/internal/memorystore/selector.go +++ b/internal/util/selector.go @@ -1,4 +1,4 @@ -package memorystore +package util import ( "encoding/json" @@ -6,9 +6,9 @@ import ( ) type SelectorElement struct { - Any bool String string Group []string + Any bool } func (se *SelectorElement) UnmarshalJSON(input []byte) error { From 8c9761fe2a8f5924fcbf7adba0ac3ad766fb5d5c Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Mon, 6 May 2024 15:10:45 +0200 Subject: [PATCH 4/6] Cleanup Makefile. Add version flag. --- Makefile | 128 +++++++----------------------------- cmd/cc-metric-store/main.go | 17 ++++- 2 files changed, 38 insertions(+), 107 deletions(-) diff --git a/Makefile b/Makefile index 85f5151..17a90bf 100644 --- a/Makefile +++ b/Makefile @@ -1,113 +1,29 @@ +TARGET = ./cc-metric-store +VERSION = 1.3.0 +GIT_HASH := $(shell git rev-parse --short HEAD || echo 'development') +CURRENT_TIME = $(shell date +"%Y-%m-%d:T%H:%M:%S") +LD_FLAGS = '-s -X main.date=${CURRENT_TIME} -X main.version=${VERSION} -X main.commit=${GIT_HASH}' -APP = cc-metric-store -GOSRC_APP := cc-metric-store.go -GOSRC_FILES := api.go \ - memstore.go \ - archive.go \ - debug.go \ - float.go \ - lineprotocol.go \ - selector.go \ - stats.go +.PHONY: clean test tags $(TARGET) +.NOTPARALLEL: +$(TARGET): + $(info ===> BUILD cc-backend) + @go build -ldflags=${LD_FLAGS} ./cmd/cc-metric-store -BINDIR ?= bin - - -.PHONY: all -all: $(APP) - -$(APP): $(GOSRC) - go get - go build -o $(APP) $(GOSRC_APP) $(GOSRC_FILES) - -install: $(APP) - @WORKSPACE=$(PREFIX) - @if [ -z "$${WORKSPACE}" ]; then exit 1; fi - @mkdir --parents --verbose $${WORKSPACE}/usr/$(BINDIR) - @install -Dpm 755 $(APP) $${WORKSPACE}/usr/$(BINDIR)/$(APP) - @install -Dpm 600 config.json $${WORKSPACE}/etc/$(APP)/$(APP).json - -.PHONY: clean -.ONESHELL: clean: - rm -f $(APP) + $(info ===> CLEAN) + @go clean + @rm -f $(TARGET) -.PHONY: fmt -fmt: - go fmt $(GOSRC_APP) +test: + $(info ===> TESTING) + @go clean -testcache + @go build ./... + @go vet ./... + @go test ./... -# Examine Go source code and reports suspicious constructs -.PHONY: vet -vet: - go vet ./... - -# Run linter for the Go programming language. -# Using static analysis, it finds bugs and performance issues, offers simplifications, and enforces style rules -.PHONY: staticcheck -staticcheck: - go install honnef.co/go/tools/cmd/staticcheck@latest - $$(go env GOPATH)/bin/staticcheck ./... - -.ONESHELL: -.PHONY: RPM -RPM: scripts/cc-metric-store.spec - @WORKSPACE="$${PWD}" - @SPECFILE="$${WORKSPACE}/scripts/cc-metric-store.spec" - # Setup RPM build tree - @eval $$(rpm --eval "ARCH='%{_arch}' RPMDIR='%{_rpmdir}' SOURCEDIR='%{_sourcedir}' SPECDIR='%{_specdir}' SRPMDIR='%{_srcrpmdir}' BUILDDIR='%{_builddir}'") - @mkdir --parents --verbose "$${RPMDIR}" "$${SOURCEDIR}" "$${SPECDIR}" "$${SRPMDIR}" "$${BUILDDIR}" - # Create source tarball - @COMMITISH="HEAD" - @VERS=$$(git describe --tags $${COMMITISH}) - @VERS=$${VERS#v} - @VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g) - @if [ "$${VERS}" = "" ]; then VERS="0.0.1"; fi - @eval $$(rpmspec --query --queryformat "NAME='%{name}' VERSION='%{version}' RELEASE='%{release}' NVR='%{NVR}' NVRA='%{NVRA}'" --define="VERS $${VERS}" "$${SPECFILE}") - @PREFIX="$${NAME}-$${VERSION}" - @FORMAT="tar.gz" - @SRCFILE="$${SOURCEDIR}/$${PREFIX}.$${FORMAT}" - @git archive --verbose --format "$${FORMAT}" --prefix="$${PREFIX}/" --output="$${SRCFILE}" $${COMMITISH} - # Build RPM and SRPM - @rpmbuild -ba --define="VERS $${VERS}" --rmsource --clean "$${SPECFILE}" - # Report RPMs and SRPMs when in GitHub Workflow - @if [[ "$${GITHUB_ACTIONS}" == true ]]; then - @ RPMFILE="$${RPMDIR}/$${ARCH}/$${NVRA}.rpm" - @ SRPMFILE="$${SRPMDIR}/$${NVR}.src.rpm" - @ echo "RPM: $${RPMFILE}" - @ echo "SRPM: $${SRPMFILE}" - @ echo "::set-output name=SRPM::$${SRPMFILE}" - @ echo "::set-output name=RPM::$${RPMFILE}" - @fi - -.ONESHELL: -.PHONY: DEB -DEB: scripts/cc-metric-store.deb.control $(APP) - @BASEDIR=$${PWD} - @WORKSPACE=$${PWD}/.dpkgbuild - @DEBIANDIR=$${WORKSPACE}/debian - @DEBIANBINDIR=$${WORKSPACE}/DEBIAN - @mkdir --parents --verbose $$WORKSPACE $$DEBIANBINDIR - #@mkdir --parents --verbose $$DEBIANDIR - @CONTROLFILE="$${BASEDIR}/scripts/cc-metric-store.deb.control" - @COMMITISH="HEAD" - @VERS=$$(git describe --tags --abbrev=0 $${COMMITISH}) - @VERS=$${VERS#v} - @VERS=$$(echo $$VERS | sed -e s+'-'+'_'+g) - @if [ "$${VERS}" = "" ]; then VERS="0.0.1"; fi - @ARCH=$$(uname -m) - @ARCH=$$(echo $$ARCH | sed -e s+'_'+'-'+g) - @if [ "$${ARCH}" = "x86-64" ]; then ARCH=amd64; fi - @PREFIX="$${NAME}-$${VERSION}_$${ARCH}" - @SIZE_BYTES=$$(du -bcs --exclude=.dpkgbuild "$$WORKSPACE"/ | awk '{print $$1}' | head -1 | sed -e 's/^0\+//') - @SIZE="$$(awk -v size="$$SIZE_BYTES" 'BEGIN {print (size/1024)+1}' | awk '{print int($$0)}')" - #@sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANDIR}/control - @sed -e s+"{VERSION}"+"$$VERS"+g -e s+"{INSTALLED_SIZE}"+"$$SIZE"+g -e s+"{ARCH}"+"$$ARCH"+g $$CONTROLFILE > $${DEBIANBINDIR}/control - @make PREFIX=$${WORKSPACE} install - @DEB_FILE="cc-metric-store_$${VERS}_$${ARCH}.deb" - @dpkg-deb -b $${WORKSPACE} "$$DEB_FILE" - @rm -r "$${WORKSPACE}" - @if [ "$${GITHUB_ACTIONS}" = "true" ]; then - @ echo "::set-output name=DEB::$${DEB_FILE}" - @fi +tags: + $(info ===> TAGS) + @ctags -R diff --git a/cmd/cc-metric-store/main.go b/cmd/cc-metric-store/main.go index c7f3c03..523b7c6 100644 --- a/cmd/cc-metric-store/main.go +++ b/cmd/cc-metric-store/main.go @@ -4,6 +4,7 @@ import ( "bufio" "context" "flag" + "fmt" "log" "os" "os/signal" @@ -19,13 +20,27 @@ import ( "github.com/google/gops/agent" ) +var ( + date string + commit string + version string +) + func main() { var configFile string - var enableGopsAgent bool + var enableGopsAgent, flagVersion bool flag.StringVar(&configFile, "config", "./config.json", "configuration file") flag.BoolVar(&enableGopsAgent, "gops", false, "Listen via github.com/google/gops/agent") + flag.BoolVar(&flagVersion, "version", false, "Show version information and exit") flag.Parse() + if flagVersion { + fmt.Printf("Version:\t%s\n", version) + fmt.Printf("Git hash:\t%s\n", commit) + fmt.Printf("Build time:\t%s\n", date) + os.Exit(0) + } + startupTime := time.Now() config.Init(configFile) memorystore.Init(config.Keys.Metrics) From 5ca66aef87f0ce1ff283baf82025a466e0b1dae4 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Mon, 6 May 2024 15:40:22 +0200 Subject: [PATCH 5/6] Put token authentication in separate file --- internal/api/api.go | 48 ----------------------------- internal/api/authentication.go | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 48 deletions(-) create mode 100644 internal/api/authentication.go diff --git a/internal/api/api.go b/internal/api/api.go index 73b513d..737f4d2 100644 --- a/internal/api/api.go +++ b/internal/api/api.go @@ -6,7 +6,6 @@ import ( "crypto/ed25519" "encoding/base64" "encoding/json" - "errors" "fmt" "io" "log" @@ -14,13 +13,11 @@ import ( "net/http" "strconv" "strings" - "sync" "time" "github.com/ClusterCockpit/cc-metric-store/internal/config" "github.com/ClusterCockpit/cc-metric-store/internal/memorystore" "github.com/ClusterCockpit/cc-metric-store/internal/util" - "github.com/golang-jwt/jwt/v4" "github.com/gorilla/mux" "github.com/influxdata/line-protocol/v2/lineprotocol" ) @@ -322,51 +319,6 @@ func handleDebug(rw http.ResponseWriter, r *http.Request) { } } -func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler { - cacheLock := sync.RWMutex{} - cache := map[string]*jwt.Token{} - - return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { - authheader := r.Header.Get("Authorization") - if authheader == "" || !strings.HasPrefix(authheader, "Bearer ") { - http.Error(rw, "Use JWT Authentication", http.StatusUnauthorized) - return - } - - rawtoken := authheader[len("Bearer "):] - cacheLock.RLock() - token, ok := cache[rawtoken] - cacheLock.RUnlock() - if ok && token.Claims.Valid() == nil { - next.ServeHTTP(rw, r) - return - } - - // The actual token is ignored for now. - // In case expiration and so on are specified, the Parse function - // already returns an error for expired tokens. - var err error - token, err = jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) { - if t.Method != jwt.SigningMethodEdDSA { - return nil, errors.New("only Ed25519/EdDSA supported") - } - - return publicKey, nil - }) - if err != nil { - http.Error(rw, err.Error(), http.StatusUnauthorized) - return - } - - cacheLock.Lock() - cache[rawtoken] = token - cacheLock.Unlock() - - // Let request through... - next.ServeHTTP(rw, r) - }) -} - func StartApiServer(ctx context.Context, httpConfig *config.HttpConfig) error { r := mux.NewRouter() diff --git a/internal/api/authentication.go b/internal/api/authentication.go new file mode 100644 index 0000000..015810a --- /dev/null +++ b/internal/api/authentication.go @@ -0,0 +1,56 @@ +package api + +import ( + "crypto/ed25519" + "errors" + "net/http" + "strings" + "sync" + + "github.com/golang-jwt/jwt/v4" +) + +func authentication(next http.Handler, publicKey ed25519.PublicKey) http.Handler { + cacheLock := sync.RWMutex{} + cache := map[string]*jwt.Token{} + + return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + authheader := r.Header.Get("Authorization") + if authheader == "" || !strings.HasPrefix(authheader, "Bearer ") { + http.Error(rw, "Use JWT Authentication", http.StatusUnauthorized) + return + } + + rawtoken := authheader[len("Bearer "):] + cacheLock.RLock() + token, ok := cache[rawtoken] + cacheLock.RUnlock() + if ok && token.Claims.Valid() == nil { + next.ServeHTTP(rw, r) + return + } + + // The actual token is ignored for now. + // In case expiration and so on are specified, the Parse function + // already returns an error for expired tokens. + var err error + token, err = jwt.Parse(rawtoken, func(t *jwt.Token) (interface{}, error) { + if t.Method != jwt.SigningMethodEdDSA { + return nil, errors.New("only Ed25519/EdDSA supported") + } + + return publicKey, nil + }) + if err != nil { + http.Error(rw, err.Error(), http.StatusUnauthorized) + return + } + + cacheLock.Lock() + cache[rawtoken] = token + cacheLock.Unlock() + + // Let request through... + next.ServeHTTP(rw, r) + }) +} From 52704cabb598ac94fe5bf37ac28b446f7fd6c6a6 Mon Sep 17 00:00:00 2001 From: Jan Eitzinger Date: Mon, 6 May 2024 17:51:50 +0200 Subject: [PATCH 6/6] Further cleanup --- .github/workflows/Release.yml | 314 ---------------------- {scripts => init}/cc-metric-store.service | 0 scripts/cc-metric-store.config | 17 -- scripts/cc-metric-store.deb.control | 12 - scripts/cc-metric-store.init | 141 ---------- scripts/cc-metric-store.spec | 62 ----- scripts/cc-metric-store.sysusers | 2 - scripts/send-fake-data.go | 105 -------- 8 files changed, 653 deletions(-) delete mode 100644 .github/workflows/Release.yml rename {scripts => init}/cc-metric-store.service (100%) delete mode 100644 scripts/cc-metric-store.config delete mode 100644 scripts/cc-metric-store.deb.control delete mode 100644 scripts/cc-metric-store.init delete mode 100644 scripts/cc-metric-store.spec delete mode 100644 scripts/cc-metric-store.sysusers delete mode 100644 scripts/send-fake-data.go diff --git a/.github/workflows/Release.yml b/.github/workflows/Release.yml deleted file mode 100644 index d3fbe9a..0000000 --- a/.github/workflows/Release.yml +++ /dev/null @@ -1,314 +0,0 @@ -# See: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions - -# Workflow name -name: Release - -# Run on tag push -on: - push: - tags: - - '**' - -jobs: - - # - # Build on AlmaLinux 8.5 using golang-1.18.2 - # - AlmaLinux-RPM-build: - runs-on: ubuntu-latest - # See: https://hub.docker.com/_/almalinux - container: almalinux:8.5 - # The job outputs link to the outputs of the 'rpmrename' step - # Only job outputs can be used in child jobs - outputs: - rpm : ${{steps.rpmrename.outputs.RPM}} - srpm : ${{steps.rpmrename.outputs.SRPM}} - steps: - - # Use dnf to install development packages - - name: Install development packages - run: | - dnf --assumeyes group install "Development Tools" "RPM Development Tools" - dnf --assumeyes install wget openssl-devel diffutils delve which - - # Checkout git repository and submodules - # fetch-depth must be 0 to use git describe - # See: https://github.com/marketplace/actions/checkout - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - fetch-depth: 0 - - # Use dnf to install build dependencies - - name: Install build dependencies - run: | - wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \ - http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \ - http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \ - http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm - rpm -i go*.rpm - - - name: RPM build MetricStore - id: rpmbuild - run: make RPM - - # AlmaLinux 8.5 is a derivate of RedHat Enterprise Linux 8 (UBI8), - # so the created RPM both contain the substring 'el8' in the RPM file names - # This step replaces the substring 'el8' to 'alma85'. It uses the move operation - # because it is unclear whether the default AlmaLinux 8.5 container contains the - # 'rename' command. This way we also get the new names for output. - - name: Rename RPMs (s/el8/alma85/) - id: rpmrename - run: | - OLD_RPM="${{steps.rpmbuild.outputs.RPM}}" - OLD_SRPM="${{steps.rpmbuild.outputs.SRPM}}" - NEW_RPM="${OLD_RPM/el8/alma85}" - NEW_SRPM=${OLD_SRPM/el8/alma85} - mv "${OLD_RPM}" "${NEW_RPM}" - mv "${OLD_SRPM}" "${NEW_SRPM}" - echo "::set-output name=SRPM::${NEW_SRPM}" - echo "::set-output name=RPM::${NEW_RPM}" - - # See: https://github.com/actions/upload-artifact - - name: Save RPM as artifact - uses: actions/upload-artifact@v2 - with: - name: cc-metric-store RPM for AlmaLinux 8.5 - path: ${{ steps.rpmrename.outputs.RPM }} - - name: Save SRPM as artifact - uses: actions/upload-artifact@v2 - with: - name: cc-metric-store SRPM for AlmaLinux 8.5 - path: ${{ steps.rpmrename.outputs.SRPM }} - - # - # Build on UBI 8 using golang-1.18.2 - # - UBI-8-RPM-build: - runs-on: ubuntu-latest - # See: https://catalog.redhat.com/software/containers/ubi8/ubi/5c359854d70cc534b3a3784e?container-tabs=gti - container: registry.access.redhat.com/ubi8/ubi:8.5-226.1645809065 - # The job outputs link to the outputs of the 'rpmbuild' step - outputs: - rpm : ${{steps.rpmbuild.outputs.RPM}} - srpm : ${{steps.rpmbuild.outputs.SRPM}} - steps: - - # Use dnf to install development packages - - name: Install development packages - run: dnf --assumeyes --disableplugin=subscription-manager install rpm-build go-srpm-macros rpm-build-libs rpm-libs gcc make python38 git wget openssl-devel diffutils delve which - - # Checkout git repository and submodules - # fetch-depth must be 0 to use git describe - # See: https://github.com/marketplace/actions/checkout - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - fetch-depth: 0 - - # Use dnf to install build dependencies - - name: Install build dependencies - run: | - wget -q http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \ - http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-bin-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm \ - http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/golang-src-1.18.2-1.module_el8.7.0+1173+5d37c0fd.noarch.rpm \ - http://mirror.centos.org/centos/8-stream/AppStream/x86_64/os/Packages/go-toolset-1.18.2-1.module_el8.7.0+1173+5d37c0fd.x86_64.rpm - rpm -i go*.rpm - - - name: RPM build MetricStore - id: rpmbuild - run: make RPM - - # See: https://github.com/actions/upload-artifact - - name: Save RPM as artifact - uses: actions/upload-artifact@v2 - with: - name: cc-metric-store RPM for UBI 8 - path: ${{ steps.rpmbuild.outputs.RPM }} - - name: Save SRPM as artifact - uses: actions/upload-artifact@v2 - with: - name: cc-metric-store SRPM for UBI 8 - path: ${{ steps.rpmbuild.outputs.SRPM }} - - # - # Build on Ubuntu 20.04 using official go 1.19.1 package - # - Ubuntu-focal-build: - runs-on: ubuntu-latest - container: ubuntu:20.04 - # The job outputs link to the outputs of the 'debrename' step - # Only job outputs can be used in child jobs - outputs: - deb : ${{steps.debrename.outputs.DEB}} - steps: - # Use apt to install development packages - - name: Install development packages - run: | - apt update && apt --assume-yes upgrade - apt --assume-yes install build-essential sed git wget bash - # Checkout git repository and submodules - # fetch-depth must be 0 to use git describe - # See: https://github.com/marketplace/actions/checkout - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - fetch-depth: 0 - # Use official golang package - - name: Install Golang - run: | - wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz - tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz - export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH - go version - - name: DEB build MetricStore - id: dpkg-build - run: | - export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH - make DEB - - name: Rename DEB (add '_ubuntu20.04') - id: debrename - run: | - OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev) - NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu20.04.deb" - mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}" - echo "::set-output name=DEB::${NEW_DEB_FILE}" - # See: https://github.com/actions/upload-artifact - - name: Save DEB as artifact - uses: actions/upload-artifact@v2 - with: - name: cc-metric-store DEB for Ubuntu 20.04 - path: ${{ steps.debrename.outputs.DEB }} - - # - # Build on Ubuntu 20.04 using official go 1.19.1 package - # - Ubuntu-jammy-build: - runs-on: ubuntu-latest - container: ubuntu:22.04 - # The job outputs link to the outputs of the 'debrename' step - # Only job outputs can be used in child jobs - outputs: - deb : ${{steps.debrename.outputs.DEB}} - steps: - # Use apt to install development packages - - name: Install development packages - run: | - apt update && apt --assume-yes upgrade - apt --assume-yes install build-essential sed git wget bash - # Checkout git repository and submodules - # fetch-depth must be 0 to use git describe - # See: https://github.com/marketplace/actions/checkout - - name: Checkout - uses: actions/checkout@v2 - with: - submodules: recursive - fetch-depth: 0 - # Use official golang package - - name: Install Golang - run: | - wget -q https://go.dev/dl/go1.19.1.linux-amd64.tar.gz - tar -C /usr/local -xzf go1.19.1.linux-amd64.tar.gz - export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH - go version - - name: DEB build MetricStore - id: dpkg-build - run: | - export PATH=/usr/local/go/bin:/usr/local/go/pkg/tool/linux_amd64:$PATH - make DEB - - name: Rename DEB (add '_ubuntu22.04') - id: debrename - run: | - OLD_DEB_NAME=$(echo "${{steps.dpkg-build.outputs.DEB}}" | rev | cut -d '.' -f 2- | rev) - NEW_DEB_FILE="${OLD_DEB_NAME}_ubuntu22.04.deb" - mv "${{steps.dpkg-build.outputs.DEB}}" "${NEW_DEB_FILE}" - echo "::set-output name=DEB::${NEW_DEB_FILE}" - # See: https://github.com/actions/upload-artifact - - name: Save DEB as artifact - uses: actions/upload-artifact@v2 - with: - name: cc-metric-store DEB for Ubuntu 22.04 - path: ${{ steps.debrename.outputs.DEB }} - - # - # Create release with fresh RPMs - # - Release: - runs-on: ubuntu-latest - # We need the RPMs, so add dependency - needs: [AlmaLinux-RPM-build, UBI-8-RPM-build, Ubuntu-focal-build, Ubuntu-jammy-build] - - steps: - # See: https://github.com/actions/download-artifact - - name: Download AlmaLinux 8.5 RPM - uses: actions/download-artifact@v2 - with: - name: cc-metric-store RPM for AlmaLinux 8.5 - - name: Download AlmaLinux 8.5 SRPM - uses: actions/download-artifact@v2 - with: - name: cc-metric-store SRPM for AlmaLinux 8.5 - - - name: Download UBI 8 RPM - uses: actions/download-artifact@v2 - with: - name: cc-metric-store RPM for UBI 8 - - name: Download UBI 8 SRPM - uses: actions/download-artifact@v2 - with: - name: cc-metric-store SRPM for UBI 8 - - - name: Download Ubuntu 20.04 DEB - uses: actions/download-artifact@v2 - with: - name: cc-metric-store DEB for Ubuntu 20.04 - - - name: Download Ubuntu 22.04 DEB - uses: actions/download-artifact@v2 - with: - name: cc-metric-store DEB for Ubuntu 22.04 - - # The download actions do not publish the name of the downloaded file, - # so we re-use the job outputs of the parent jobs. The files are all - # downloaded to the current folder. - # The gh-release action afterwards does not accept file lists but all - # files have to be listed at 'files'. The step creates one output per - # RPM package (2 per distro) - - name: Set RPM variables - id: files - run: | - ALMA_85_RPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.rpm}}") - ALMA_85_SRPM=$(basename "${{ needs.AlmaLinux-RPM-build.outputs.srpm}}") - UBI_8_RPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.rpm}}") - UBI_8_SRPM=$(basename "${{ needs.UBI-8-RPM-build.outputs.srpm}}") - U_2004_DEB=$(basename "${{ needs.Ubuntu-focal-build.outputs.deb}}") - U_2204_DEB=$(basename "${{ needs.Ubuntu-jammy-build.outputs.deb}}") - echo "ALMA_85_RPM::${ALMA_85_RPM}" - echo "ALMA_85_SRPM::${ALMA_85_SRPM}" - echo "UBI_8_RPM::${UBI_8_RPM}" - echo "UBI_8_SRPM::${UBI_8_SRPM}" - echo "U_2004_DEB::${U_2004_DEB}" - echo "U_2204_DEB::${U_2204_DEB}" - echo "::set-output name=ALMA_85_RPM::${ALMA_85_RPM}" - echo "::set-output name=ALMA_85_SRPM::${ALMA_85_SRPM}" - echo "::set-output name=UBI_8_RPM::${UBI_8_RPM}" - echo "::set-output name=UBI_8_SRPM::${UBI_8_SRPM}" - echo "::set-output name=U_2004_DEB::${U_2004_DEB}" - echo "::set-output name=U_2204_DEB::${U_2204_DEB}" - - # See: https://github.com/softprops/action-gh-release - - name: Release - uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') - with: - name: cc-metric-store-${{github.ref_name}} - files: | - ${{ steps.files.outputs.ALMA_85_RPM }} - ${{ steps.files.outputs.ALMA_85_SRPM }} - ${{ steps.files.outputs.UBI_8_RPM }} - ${{ steps.files.outputs.UBI_8_SRPM }} - ${{ steps.files.outputs.U_2004_DEB }} - ${{ steps.files.outputs.U_2204_DEB }} diff --git a/scripts/cc-metric-store.service b/init/cc-metric-store.service similarity index 100% rename from scripts/cc-metric-store.service rename to init/cc-metric-store.service diff --git a/scripts/cc-metric-store.config b/scripts/cc-metric-store.config deleted file mode 100644 index feb0740..0000000 --- a/scripts/cc-metric-store.config +++ /dev/null @@ -1,17 +0,0 @@ -CC_USER=clustercockpit - -CC_GROUP=clustercockpit - -CC_HOME=/tmp - -LOG_DIR=/var/log - -DATA_DIR=/var/run/cc-metric-store - -MAX_OPEN_FILES=10000 - -CONF_DIR=/etc/cc-metric-store - -CONF_FILE=/etc/cc-metric-store/cc-metric-store.json - -RESTART_ON_UPGRADE=true diff --git a/scripts/cc-metric-store.deb.control b/scripts/cc-metric-store.deb.control deleted file mode 100644 index 6978971..0000000 --- a/scripts/cc-metric-store.deb.control +++ /dev/null @@ -1,12 +0,0 @@ -Package: cc-metric-store -Version: {VERSION} -Installed-Size: {INSTALLED_SIZE} -Architecture: {ARCH} -Maintainer: thomas.gruber@fau.de -Depends: libc6 (>= 2.2.1) -Build-Depends: debhelper-compat (= 13), git, golang-go -Description: In-memory metric store daemon from the ClusterCockpit suite -Homepage: https://github.com/ClusterCockpit/cc-metric-store -Source: cc-metric-store -Rules-Requires-Root: no - diff --git a/scripts/cc-metric-store.init b/scripts/cc-metric-store.init deleted file mode 100644 index 80cab6f..0000000 --- a/scripts/cc-metric-store.init +++ /dev/null @@ -1,141 +0,0 @@ -#! /usr/bin/env bash - -# chkconfig: 2345 80 05 -# description: ClusterCockpit metric store -# processname: cc-metric-store -# config: /etc/default/cc-metric-store -# pidfile: /var/run/cc-metric-store.pid - -### BEGIN INIT INFO -# Provides: cc-metric-store -# Required-Start: $all -# Required-Stop: $remote_fs $syslog -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Short-Description: Start ClusterCockpit metric store at boot time -### END INIT INFO - - -PATH=/bin:/usr/bin:/sbin:/usr/sbin -NAME=cc-metric-store -DESC="ClusterCockpit metric store" -DEFAULT=/etc/default/${NAME}.json - -CC_USER=clustercockpit -CC_GROUP=clustercockpit -CONF_DIR=/etc/cc-metric-store -PID_FILE=/var/run/$NAME.pid -DAEMON=/usr/sbin/$NAME -CONF_FILE=${CONF_DIR}/cc-metric-store.json - -umask 0027 - -if [ ! -x $DAEMON ]; then - echo "Program not installed or not executable" - exit 5 -fi - -. /lib/lsb/init-functions - -if [ -r /etc/default/rcS ]; then - . /etc/default/rcS -fi - -# overwrite settings from default file -if [ -f "$DEFAULT" ]; then - . "$DEFAULT" -fi - -CC_OPTS="--config=${CONF_FILE}" - -function checkUser() { - if [ `id -u` -ne 0 ]; then - echo "You need root privileges to run this script" - exit 4 - fi -} - -case "$1" in - start) - checkUser - log_daemon_msg "Starting $DESC" - - pid=`pidofproc -p $PID_FILE $NAME` - if [ -n "$pid" ] ; then - log_begin_msg "Already running." - log_end_msg 0 - exit 0 - fi - - # Prepare environment - touch "$PID_FILE" && chown "$CC_USER":"$CC_GROUP" "$PID_FILE" - - if [ -n "$MAX_OPEN_FILES" ]; then - ulimit -n $MAX_OPEN_FILES - fi - - # Start Daemon - start-stop-daemon --start -b --chdir "$WORK_DIR" --user "$CC_USER" -c "$CC_USER" --pidfile "$PID_FILE" --exec $DAEMON -- $DAEMON_OPTS - return=$? - if [ $return -eq 0 ] - then - sleep 1 - - # check if pid file has been written to - if ! [[ -s $PID_FILE ]]; then - log_end_msg 1 - exit 1 - fi - - i=0 - timeout=10 - # Wait for the process to be properly started before exiting - until { cat "$PID_FILE" | xargs kill -0; } >/dev/null 2>&1 - do - sleep 1 - i=$(($i + 1)) - if [ $i -gt $timeout ]; then - log_end_msg 1 - exit 1 - fi - done - fi - log_end_msg $return - ;; - stop) - checkUser - log_daemon_msg "Stopping $DESC" - - if [ -f "$PID_FILE" ]; then - start-stop-daemon --stop --pidfile "$PID_FILE" \ - --user "$CC_USER" \ - --retry=TERM/20/KILL/5 >/dev/null - if [ $? -eq 1 ]; then - log_progress_msg "$DESC is not running but pid file exists, cleaning up" - elif [ $? -eq 3 ]; then - PID="`cat $PID_FILE`" - log_failure_msg "Failed to stop $DESC (pid $PID)" - exit 1 - fi - rm -f "$PID_FILE" - else - log_progress_msg "(not running)" - fi - log_end_msg 0 - ;; - status) - status_of_proc -p $PID_FILE $NAME $NAME && exit 0 || exit $? - ;; - restart|force-reload) - if [ -f "$PID_FILE" ]; then - $0 stop - sleep 1 - fi - $0 start - ;; - *) - log_success_msg "Usage: $0 {start|stop|restart|force-reload|status}" - exit 3 - ;; -esac - diff --git a/scripts/cc-metric-store.spec b/scripts/cc-metric-store.spec deleted file mode 100644 index ae88883..0000000 --- a/scripts/cc-metric-store.spec +++ /dev/null @@ -1,62 +0,0 @@ -Name: cc-metric-store -Version: %{VERS} -Release: 1%{?dist} -Summary: In-memory metric database from the ClusterCockpit suite - -License: MIT -Source0: %{name}-%{version}.tar.gz - -BuildRequires: go-toolset -BuildRequires: systemd-rpm-macros - -Provides: %{name} = %{version} - -%description -In-memory metric database from the ClusterCockpit suite - -%global debug_package %{nil} - -%prep -%autosetup - - -%build -make - - -%install -# Install cc-metric-store -make PREFIX=%{buildroot} install -# Integrate into system -install -Dpm 0644 scripts/%{name}.service %{buildroot}%{_unitdir}/%{name}.service -install -Dpm 0600 scripts/%{name}.config %{buildroot}%{_sysconfdir}/default/%{name} -install -Dpm 0644 scripts/%{name}.sysusers %{buildroot}%{_sysusersdir}/%{name}.conf - - -%check -# go test should be here... :) - -%pre -%sysusers_create_package scripts/%{name}.sysusers - -%post -%systemd_post %{name}.service - -%preun -%systemd_preun %{name}.service - -%files -# Binary -%attr(-,clustercockpit,clustercockpit) %{_bindir}/%{name} -# Config -%dir %{_sysconfdir}/%{name} -%attr(0600,clustercockpit,clustercockpit) %config(noreplace) %{_sysconfdir}/%{name}/%{name}.json -# Systemd -%{_unitdir}/%{name}.service -%{_sysconfdir}/default/%{name} -%{_sysusersdir}/%{name}.conf - -%changelog -* Mon Mar 07 2022 Thomas Gruber - 0.1 -- Initial metric store implementation - diff --git a/scripts/cc-metric-store.sysusers b/scripts/cc-metric-store.sysusers deleted file mode 100644 index 6ce3700..0000000 --- a/scripts/cc-metric-store.sysusers +++ /dev/null @@ -1,2 +0,0 @@ -#Type Name ID GECOS Home directory Shell -u clustercockpit - "User for ClusterCockpit" /run/cc-metric-collector /sbin/nologin diff --git a/scripts/send-fake-data.go b/scripts/send-fake-data.go deleted file mode 100644 index 2fdc479..0000000 --- a/scripts/send-fake-data.go +++ /dev/null @@ -1,105 +0,0 @@ -package main - -import ( - "bytes" - "fmt" - "io" - "log" - "math" - "math/rand" - "net/http" - "time" -) - -const token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJFZERTQSJ9.eyJ1c2VyIjoiYWRtaW4iLCJyb2xlcyI6WyJST0xFX0FETUlOIiwiUk9MRV9BTkFMWVNUIiwiUk9MRV9VU0VSIl19.d-3_3FZTsadPjDEdsWrrQ7nS0edMAR4zjl-eK7rJU3HziNBfI9PDHDIpJVHTNN5E5SlLGLFXctWyKAkwhXL-Dw" -const ccmsurl = "http://localhost:8081/api/write" -const cluster = "fakedev" -const sockets = 2 -const cpus = 8 -const freq = 15 * time.Second - -var hosts = []string{"fake001", "fake002", "fake003", "fake004", "fake005"} -var metrics = []struct { - Name string - Type string - AvgValue float64 -}{ - {"flops_any", "cpu", 10.0}, - {"mem_bw", "socket", 50.0}, - {"ipc", "cpu", 1.25}, - {"cpu_load", "node", 4}, - {"mem_used", "node", 20}, -} - -var states = make([]float64, 0) - -func send(client *http.Client, t int64) { - msg := &bytes.Buffer{} - - i := 0 - for _, host := range hosts { - for _, metric := range metrics { - n := 1 - if metric.Type == "socket" { - n = sockets - } else if metric.Type == "cpu" { - n = cpus - } - - for j := 0; j < n; j++ { - fmt.Fprintf(msg, "%s,cluster=%s,host=%s,type=%s", metric.Name, cluster, host, metric.Type) - if metric.Type == "socket" { - fmt.Fprintf(msg, ",type-id=%d", j) - } else if metric.Type == "cpu" { - fmt.Fprintf(msg, ",type-id=%d", j) - } - - x := metric.AvgValue + math.Sin(states[i])*(metric.AvgValue/10.) - states[i] += 0.1 - fmt.Fprintf(msg, " value=%f ", x) - - fmt.Fprintf(msg, "%d\n", t) - i++ - } - } - } - - req, _ := http.NewRequest(http.MethodPost, ccmsurl, msg) - req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token)) - res, err := client.Do(req) - if err != nil { - log.Print(err) - return - } - if res.StatusCode != http.StatusOK { - body, _ := io.ReadAll(res.Body) - log.Printf("%s: %s", res.Status, string(body)) - } -} - -func main() { - for range hosts { - for _, m := range metrics { - n := 1 - if m.Type == "socket" { - n = sockets - } else if m.Type == "cpu" { - n = cpus - } - - for i := 0; i < n; i++ { - states = append(states, rand.Float64()*100) - } - } - } - - client := &http.Client{} - - i := 0 - for t := range time.Tick(freq) { - log.Printf("tick... (#%d)", i) - i++ - - send(client, t.Unix()) - } -}