mirror of
https://github.com/ClusterCockpit/cc-backend
synced 2026-02-28 13:27:30 +01:00
Compare commits
8 Commits
optimize-c
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a6e23dd52e | ||
|
|
8bacffbd3e | ||
|
|
248b923980 | ||
|
|
ac5ee1564a | ||
|
|
2b56e02a3e | ||
|
|
2b788f14ec | ||
|
|
5ee3bbdbf5 | ||
|
|
39c919bb0c |
16
go.mod
16
go.mod
@@ -9,8 +9,7 @@ tool (
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/99designs/gqlgen v0.17.86
|
github.com/99designs/gqlgen v0.17.86
|
||||||
github.com/ClusterCockpit/cc-lib/v2 v2.7.0
|
github.com/ClusterCockpit/cc-lib/v2 v2.6.0
|
||||||
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0
|
|
||||||
github.com/Masterminds/squirrel v1.5.4
|
github.com/Masterminds/squirrel v1.5.4
|
||||||
github.com/aws/aws-sdk-go-v2 v1.41.1
|
github.com/aws/aws-sdk-go-v2 v1.41.1
|
||||||
github.com/aws/aws-sdk-go-v2/config v1.32.8
|
github.com/aws/aws-sdk-go-v2/config v1.32.8
|
||||||
@@ -26,8 +25,10 @@ require (
|
|||||||
github.com/golang-migrate/migrate/v4 v4.19.1
|
github.com/golang-migrate/migrate/v4 v4.19.1
|
||||||
github.com/google/gops v0.3.29
|
github.com/google/gops v0.3.29
|
||||||
github.com/gorilla/sessions v1.4.0
|
github.com/gorilla/sessions v1.4.0
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.2.1
|
||||||
github.com/jmoiron/sqlx v1.4.0
|
github.com/jmoiron/sqlx v1.4.0
|
||||||
github.com/joho/godotenv v1.5.1
|
github.com/joho/godotenv v1.5.1
|
||||||
|
github.com/linkedin/goavro/v2 v2.15.0
|
||||||
github.com/mattn/go-sqlite3 v1.14.34
|
github.com/mattn/go-sqlite3 v1.14.34
|
||||||
github.com/parquet-go/parquet-go v0.27.0
|
github.com/parquet-go/parquet-go v0.27.0
|
||||||
github.com/qustavo/sqlhooks/v2 v2.1.0
|
github.com/qustavo/sqlhooks/v2 v2.1.0
|
||||||
@@ -79,6 +80,7 @@ require (
|
|||||||
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
|
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
|
||||||
github.com/go-viper/mapstructure/v2 v2.5.0 // indirect
|
github.com/go-viper/mapstructure/v2 v2.5.0 // indirect
|
||||||
github.com/goccy/go-yaml v1.19.2 // indirect
|
github.com/goccy/go-yaml v1.19.2 // indirect
|
||||||
|
github.com/golang/snappy v1.0.0 // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
github.com/gorilla/securecookie v1.1.2 // indirect
|
github.com/gorilla/securecookie v1.1.2 // indirect
|
||||||
github.com/gorilla/websocket v1.5.3 // indirect
|
github.com/gorilla/websocket v1.5.3 // indirect
|
||||||
@@ -90,10 +92,10 @@ require (
|
|||||||
github.com/kr/pretty v0.3.1 // indirect
|
github.com/kr/pretty v0.3.1 // indirect
|
||||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||||
github.com/nats-io/nats.go v1.49.0 // indirect
|
github.com/nats-io/nats.go v1.48.0 // indirect
|
||||||
github.com/nats-io/nkeys v0.4.15 // indirect
|
github.com/nats-io/nkeys v0.4.15 // indirect
|
||||||
github.com/nats-io/nuid v1.0.1 // indirect
|
github.com/nats-io/nuid v1.0.1 // indirect
|
||||||
github.com/oapi-codegen/runtime v1.2.0 // indirect
|
github.com/oapi-codegen/runtime v1.1.2 // indirect
|
||||||
github.com/parquet-go/bitpack v1.0.0 // indirect
|
github.com/parquet-go/bitpack v1.0.0 // indirect
|
||||||
github.com/parquet-go/jsonlite v1.4.0 // indirect
|
github.com/parquet-go/jsonlite v1.4.0 // indirect
|
||||||
github.com/pierrec/lz4/v4 v4.1.25 // indirect
|
github.com/pierrec/lz4/v4 v4.1.25 // indirect
|
||||||
@@ -102,7 +104,7 @@ require (
|
|||||||
github.com/rogpeppe/go-internal v1.10.0 // indirect
|
github.com/rogpeppe/go-internal v1.10.0 // indirect
|
||||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||||
github.com/sosodev/duration v1.3.1 // indirect
|
github.com/sosodev/duration v1.3.1 // indirect
|
||||||
github.com/stmcginnis/gofish v0.21.3 // indirect
|
github.com/stmcginnis/gofish v0.21.1 // indirect
|
||||||
github.com/stretchr/objx v0.5.2 // indirect
|
github.com/stretchr/objx v0.5.2 // indirect
|
||||||
github.com/swaggo/files v1.0.1 // indirect
|
github.com/swaggo/files v1.0.1 // indirect
|
||||||
github.com/twpayne/go-geom v1.6.1 // indirect
|
github.com/twpayne/go-geom v1.6.1 // indirect
|
||||||
@@ -111,9 +113,9 @@ require (
|
|||||||
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
|
||||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||||
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
go.yaml.in/yaml/v3 v3.0.4 // indirect
|
||||||
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa // indirect
|
golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a // indirect
|
||||||
golang.org/x/mod v0.33.0 // indirect
|
golang.org/x/mod v0.33.0 // indirect
|
||||||
golang.org/x/net v0.51.0 // indirect
|
golang.org/x/net v0.50.0 // indirect
|
||||||
golang.org/x/sync v0.19.0 // indirect
|
golang.org/x/sync v0.19.0 // indirect
|
||||||
golang.org/x/sys v0.41.0 // indirect
|
golang.org/x/sys v0.41.0 // indirect
|
||||||
golang.org/x/text v0.34.0 // indirect
|
golang.org/x/text v0.34.0 // indirect
|
||||||
|
|||||||
54
go.sum
54
go.sum
@@ -4,10 +4,10 @@ github.com/99designs/gqlgen v0.17.86 h1:C8N3UTa5heXX6twl+b0AJyGkTwYL6dNmFrgZNLRc
|
|||||||
github.com/99designs/gqlgen v0.17.86/go.mod h1:KTrPl+vHA1IUzNlh4EYkl7+tcErL3MgKnhHrBcV74Fw=
|
github.com/99designs/gqlgen v0.17.86/go.mod h1:KTrPl+vHA1IUzNlh4EYkl7+tcErL3MgKnhHrBcV74Fw=
|
||||||
github.com/Azure/go-ntlmssp v0.1.0 h1:DjFo6YtWzNqNvQdrwEyr/e4nhU3vRiwenz5QX7sFz+A=
|
github.com/Azure/go-ntlmssp v0.1.0 h1:DjFo6YtWzNqNvQdrwEyr/e4nhU3vRiwenz5QX7sFz+A=
|
||||||
github.com/Azure/go-ntlmssp v0.1.0/go.mod h1:NYqdhxd/8aAct/s4qSYZEerdPuH1liG2/X9DiVTbhpk=
|
github.com/Azure/go-ntlmssp v0.1.0/go.mod h1:NYqdhxd/8aAct/s4qSYZEerdPuH1liG2/X9DiVTbhpk=
|
||||||
github.com/ClusterCockpit/cc-lib/v2 v2.7.0 h1:EMTShk6rMTR1wlfmQ8SVCawH1OdltUbD3kVQmaW+5pE=
|
github.com/ClusterCockpit/cc-lib/v2 v2.5.1 h1:s6M9tyPDty+4zTdQGJYKpGJM9Nz7N6ITMdjPvNSLX5g=
|
||||||
github.com/ClusterCockpit/cc-lib/v2 v2.7.0/go.mod h1:0Etx8WMs0lYZ4tiOQizY18CQop+2i3WROvU9rMUxHA4=
|
github.com/ClusterCockpit/cc-lib/v2 v2.5.1/go.mod h1:DZ8OIHPUZJpWqErLITt0B8P6/Q7CBW2IQSQ5YiFFaG0=
|
||||||
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q=
|
github.com/ClusterCockpit/cc-lib/v2 v2.6.0 h1:Q7zvRAVhfYA9PDB18pfY9A/6Ws4oWpnv8+P9MBRUDzg=
|
||||||
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0/go.mod h1:y42qUu+YFmu5fdNuUAS4VbbIKxVjxCvbVqFdpdh8ahY=
|
github.com/ClusterCockpit/cc-lib/v2 v2.6.0/go.mod h1:DZ8OIHPUZJpWqErLITt0B8P6/Q7CBW2IQSQ5YiFFaG0=
|
||||||
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
|
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
|
||||||
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
|
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
|
||||||
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
|
||||||
@@ -95,6 +95,8 @@ github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7c
|
|||||||
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
|
||||||
github.com/expr-lang/expr v1.17.8 h1:W1loDTT+0PQf5YteHSTpju2qfUfNoBt4yw9+wOEU9VM=
|
github.com/expr-lang/expr v1.17.8 h1:W1loDTT+0PQf5YteHSTpju2qfUfNoBt4yw9+wOEU9VM=
|
||||||
github.com/expr-lang/expr v1.17.8/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
github.com/expr-lang/expr v1.17.8/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||||
|
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||||
|
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
|
||||||
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
|
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
|
||||||
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
|
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
|
||||||
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
|
||||||
@@ -149,6 +151,9 @@ github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63Y
|
|||||||
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
|
||||||
github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA=
|
github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA=
|
||||||
github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE=
|
github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE=
|
||||||
|
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||||
|
github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
|
||||||
|
github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
|
||||||
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
@@ -179,8 +184,13 @@ github.com/influxdata/influxdb-client-go/v2 v2.14.0 h1:AjbBfJuq+QoaXNcrova8smSjw
|
|||||||
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
|
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
|
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
|
||||||
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
|
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
|
||||||
|
github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98=
|
||||||
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig=
|
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig=
|
||||||
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo=
|
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo=
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY=
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY=
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
|
||||||
|
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
|
||||||
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
|
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
|
||||||
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
|
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
|
||||||
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
|
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
|
||||||
@@ -202,8 +212,11 @@ github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7X
|
|||||||
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
|
||||||
github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
|
github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
|
||||||
github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
|
||||||
|
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
|
||||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||||
|
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||||
|
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
|
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
|
||||||
@@ -213,6 +226,8 @@ github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6Fm
|
|||||||
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
|
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
|
||||||
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||||
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||||
|
github.com/linkedin/goavro/v2 v2.15.0 h1:pDj1UrjUOO62iXhgBiE7jQkpNIc5/tA5eZsgolMjgVI=
|
||||||
|
github.com/linkedin/goavro/v2 v2.15.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
|
||||||
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
|
||||||
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||||
github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk=
|
github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk=
|
||||||
@@ -225,14 +240,15 @@ github.com/nats-io/jwt/v2 v2.8.0 h1:K7uzyz50+yGZDO5o772eRE7atlcSEENpL7P+b74JV1g=
|
|||||||
github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA=
|
github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA=
|
||||||
github.com/nats-io/nats-server/v2 v2.12.3 h1:KRv+1n7lddMVgkJPQer+pt36TcO0ENxjilBmeWdjcHs=
|
github.com/nats-io/nats-server/v2 v2.12.3 h1:KRv+1n7lddMVgkJPQer+pt36TcO0ENxjilBmeWdjcHs=
|
||||||
github.com/nats-io/nats-server/v2 v2.12.3/go.mod h1:MQXjG9WjyXKz9koWzUc3jYUMKD8x3CLmTNy91IQQz3Y=
|
github.com/nats-io/nats-server/v2 v2.12.3/go.mod h1:MQXjG9WjyXKz9koWzUc3jYUMKD8x3CLmTNy91IQQz3Y=
|
||||||
github.com/nats-io/nats.go v1.49.0 h1:yh/WvY59gXqYpgl33ZI+XoVPKyut/IcEaqtsiuTJpoE=
|
github.com/nats-io/nats.go v1.48.0 h1:pSFyXApG+yWU/TgbKCjmm5K4wrHu86231/w84qRVR+U=
|
||||||
github.com/nats-io/nats.go v1.49.0/go.mod h1:fDCn3mN5cY8HooHwE2ukiLb4p4G4ImmzvXyJt+tGwdw=
|
github.com/nats-io/nats.go v1.48.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
|
||||||
github.com/nats-io/nkeys v0.4.15 h1:JACV5jRVO9V856KOapQ7x+EY8Jo3qw1vJt/9Jpwzkk4=
|
github.com/nats-io/nkeys v0.4.15 h1:JACV5jRVO9V856KOapQ7x+EY8Jo3qw1vJt/9Jpwzkk4=
|
||||||
github.com/nats-io/nkeys v0.4.15/go.mod h1:CpMchTXC9fxA5zrMo4KpySxNjiDVvr8ANOSZdiNfUrs=
|
github.com/nats-io/nkeys v0.4.15/go.mod h1:CpMchTXC9fxA5zrMo4KpySxNjiDVvr8ANOSZdiNfUrs=
|
||||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||||
github.com/oapi-codegen/runtime v1.2.0 h1:RvKc1CVS1QeKSNzO97FBQbSMZyQ8s6rZd+LpmzwHMP4=
|
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
|
||||||
github.com/oapi-codegen/runtime v1.2.0/go.mod h1:Y7ZhmmlE8ikZOmuHRRndiIm7nf3xcVv+YMweKgG1DT0=
|
github.com/oapi-codegen/runtime v1.1.2 h1:P2+CubHq8fO4Q6fV1tqDBZHCwpVpvPg7oKiYzQgXIyI=
|
||||||
|
github.com/oapi-codegen/runtime v1.1.2/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
|
||||||
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
|
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
|
||||||
github.com/parquet-go/bitpack v1.0.0 h1:AUqzlKzPPXf2bCdjfj4sTeacrUwsT7NlcYDMUQxPcQA=
|
github.com/parquet-go/bitpack v1.0.0 h1:AUqzlKzPPXf2bCdjfj4sTeacrUwsT7NlcYDMUQxPcQA=
|
||||||
github.com/parquet-go/bitpack v1.0.0/go.mod h1:XnVk9TH+O40eOOmvpAVZ7K2ocQFrQwysLMnc6M/8lgs=
|
github.com/parquet-go/bitpack v1.0.0/go.mod h1:XnVk9TH+O40eOOmvpAVZ7K2ocQFrQwysLMnc6M/8lgs=
|
||||||
@@ -252,8 +268,8 @@ github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNw
|
|||||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||||
github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4=
|
github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4=
|
||||||
github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw=
|
github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw=
|
||||||
github.com/prometheus/procfs v0.20.0 h1:AA7aCvjxwAquZAlonN7888f2u4IN8WVeFgBi4k82M4Q=
|
github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws=
|
||||||
github.com/prometheus/procfs v0.20.0/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
|
github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=
|
||||||
github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0=
|
github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0=
|
||||||
github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU=
|
github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU=
|
||||||
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
|
||||||
@@ -270,14 +286,17 @@ github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NF
|
|||||||
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
|
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
|
||||||
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
|
||||||
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
|
||||||
github.com/stmcginnis/gofish v0.21.3 h1:EBLCHfORnbx7MPw7lplOOVe9QAD1T3XRVz6+a1Z4z5Q=
|
github.com/stmcginnis/gofish v0.21.1 h1:sutDvBhmLh4RDOZ1DN8GUyYRu7f1ggvKMMnSaiqhwn4=
|
||||||
github.com/stmcginnis/gofish v0.21.3/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
|
github.com/stmcginnis/gofish v0.21.1/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
|
||||||
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
|
||||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||||
|
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||||
|
github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
|
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
|
||||||
@@ -309,8 +328,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
|
|||||||
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
|
||||||
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
|
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
|
||||||
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
|
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
|
||||||
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa h1:Zt3DZoOFFYkKhDT3v7Lm9FDMEV06GpzjG2jrqW+QTE0=
|
golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a h1:ovFr6Z0MNmU7nH8VaX5xqw+05ST2uO1exVfZPVqRC5o=
|
||||||
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa/go.mod h1:K79w1Vqn7PoiZn+TkNpx3BUWUQksGO3JcVX6qIjytmA=
|
golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a/go.mod h1:K79w1Vqn7PoiZn+TkNpx3BUWUQksGO3JcVX6qIjytmA=
|
||||||
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
|
||||||
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
|
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
|
||||||
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
|
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
|
||||||
@@ -318,8 +337,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
|
|||||||
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
|
||||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||||
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
|
||||||
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
|
golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60=
|
||||||
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
|
golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM=
|
||||||
golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
|
golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
|
||||||
golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
|
||||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
@@ -351,13 +370,16 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
|
|||||||
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
|
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
|
||||||
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
|
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
|
||||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||||
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
|
||||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
|
||||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
|
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -17,7 +18,7 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol"
|
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
// handleFree godoc
|
// handleFree godoc
|
||||||
@@ -89,17 +90,16 @@ func freeMetrics(rw http.ResponseWriter, r *http.Request) {
|
|||||||
// @security ApiKeyAuth
|
// @security ApiKeyAuth
|
||||||
// @router /write/ [post]
|
// @router /write/ [post]
|
||||||
func writeMetrics(rw http.ResponseWriter, r *http.Request) {
|
func writeMetrics(rw http.ResponseWriter, r *http.Request) {
|
||||||
|
bytes, err := io.ReadAll(r.Body)
|
||||||
rw.Header().Add("Content-Type", "application/json")
|
rw.Header().Add("Content-Type", "application/json")
|
||||||
|
if err != nil {
|
||||||
|
handleError(err, http.StatusInternalServerError, rw)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Extract the "cluster" query parameter without allocating a url.Values map.
|
|
||||||
cluster := queryParam(r.URL.RawQuery, "cluster")
|
|
||||||
|
|
||||||
// Stream directly from the request body instead of copying it into a
|
|
||||||
// temporary buffer via io.ReadAll. The line-protocol decoder supports
|
|
||||||
// io.Reader natively, so this avoids the largest heap allocation.
|
|
||||||
ms := metricstore.GetMemoryStore()
|
ms := metricstore.GetMemoryStore()
|
||||||
dec := lineprotocol.NewDecoder(r.Body)
|
dec := lineprotocol.NewDecoderWithBytes(bytes)
|
||||||
if err := metricstore.DecodeLine(dec, ms, cluster); err != nil {
|
if err := metricstore.DecodeLine(dec, ms, r.URL.Query().Get("cluster")); err != nil {
|
||||||
cclog.Errorf("/api/write error: %s", err.Error())
|
cclog.Errorf("/api/write error: %s", err.Error())
|
||||||
handleError(err, http.StatusBadRequest, rw)
|
handleError(err, http.StatusBadRequest, rw)
|
||||||
return
|
return
|
||||||
@@ -107,20 +107,6 @@ func writeMetrics(rw http.ResponseWriter, r *http.Request) {
|
|||||||
rw.WriteHeader(http.StatusOK)
|
rw.WriteHeader(http.StatusOK)
|
||||||
}
|
}
|
||||||
|
|
||||||
// queryParam extracts a single query-parameter value from a raw query string
|
|
||||||
// without allocating a url.Values map. Returns "" if the key is not present.
|
|
||||||
func queryParam(raw, key string) string {
|
|
||||||
for raw != "" {
|
|
||||||
var kv string
|
|
||||||
kv, raw, _ = strings.Cut(raw, "&")
|
|
||||||
k, v, _ := strings.Cut(kv, "=")
|
|
||||||
if k == key {
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// handleDebug godoc
|
// handleDebug godoc
|
||||||
// @summary Debug endpoint
|
// @summary Debug endpoint
|
||||||
// @tags debug
|
// @tags debug
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import (
|
|||||||
"github.com/ClusterCockpit/cc-lib/v2/nats"
|
"github.com/ClusterCockpit/cc-lib/v2/nats"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/receivers"
|
"github.com/ClusterCockpit/cc-lib/v2/receivers"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
influx "github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol"
|
influx "github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NatsAPI provides NATS subscription-based handlers for Job and Node operations.
|
// NatsAPI provides NATS subscription-based handlers for Job and Node operations.
|
||||||
|
|||||||
@@ -302,7 +302,7 @@ func (api *RestAPI) runTagger(rw http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
rw.Header().Set("Content-Type", "text/plain")
|
rw.Header().Set("Content-Type", "text/plain")
|
||||||
rw.WriteHeader(http.StatusOK)
|
rw.WriteHeader(http.StatusOK)
|
||||||
if _, err := rw.Write(fmt.Appendf(nil, "Tagger %s started", name)); err != nil {
|
if _, err := rw.Write([]byte(fmt.Sprintf("Tagger %s started", name))); err != nil {
|
||||||
cclog.Errorf("Failed to write response: %v", err)
|
cclog.Errorf("Failed to write response: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -263,7 +263,7 @@ func GetAuthInstance() *Authentication {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// handleUserSync syncs or updates a user in the database based on configuration.
|
// handleUserSync syncs or updates a user in the database based on configuration.
|
||||||
// This is used for LDAP, JWT and OIDC authentications when syncUserOnLogin or updateUserOnLogin is enabled.
|
// This is used for both JWT and OIDC authentication when syncUserOnLogin or updateUserOnLogin is enabled.
|
||||||
func handleUserSync(user *schema.User, syncUserOnLogin, updateUserOnLogin bool) {
|
func handleUserSync(user *schema.User, syncUserOnLogin, updateUserOnLogin bool) {
|
||||||
r := repository.GetUserRepository()
|
r := repository.GetUserRepository()
|
||||||
dbUser, err := r.GetUser(user.Username)
|
dbUser, err := r.GetUser(user.Username)
|
||||||
|
|||||||
@@ -501,7 +501,9 @@ func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
|||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
for range numWorkers {
|
for range numWorkers {
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
for jobPath := range jobPaths {
|
for jobPath := range jobPaths {
|
||||||
job, err := loadJobMeta(filepath.Join(jobPath, "meta.json"))
|
job, err := loadJobMeta(filepath.Join(jobPath, "meta.json"))
|
||||||
if err != nil && !errors.Is(err, &jsonschema.ValidationError{}) {
|
if err != nil && !errors.Is(err, &jsonschema.ValidationError{}) {
|
||||||
@@ -527,7 +529,7 @@ func (fsa *FsArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
|||||||
ch <- JobContainer{Meta: job, Data: nil}
|
ch <- JobContainer{Meta: job, Data: nil}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
clustersDir, err := os.ReadDir(fsa.path)
|
clustersDir, err := os.ReadDir(fsa.path)
|
||||||
|
|||||||
@@ -821,7 +821,9 @@ func (s3a *S3Archive) Iter(loadMetricData bool) <-chan JobContainer {
|
|||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
for range numWorkers {
|
for range numWorkers {
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
for metaKey := range metaKeys {
|
for metaKey := range metaKeys {
|
||||||
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
result, err := s3a.client.GetObject(ctx, &s3.GetObjectInput{
|
||||||
Bucket: aws.String(s3a.bucket),
|
Bucket: aws.String(s3a.bucket),
|
||||||
@@ -857,7 +859,7 @@ func (s3a *S3Archive) Iter(loadMetricData bool) <-chan JobContainer {
|
|||||||
ch <- JobContainer{Meta: job, Data: nil}
|
ch <- JobContainer{Meta: job, Data: nil}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, cluster := range s3a.clusters {
|
for _, cluster := range s3a.clusters {
|
||||||
|
|||||||
@@ -576,7 +576,9 @@ func (sa *SqliteArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
|||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
for range numWorkers {
|
for range numWorkers {
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
for row := range jobRows {
|
for row := range jobRows {
|
||||||
job, err := DecodeJobMeta(bytes.NewReader(row.metaBlob))
|
job, err := DecodeJobMeta(bytes.NewReader(row.metaBlob))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -615,7 +617,7 @@ func (sa *SqliteArchive) Iter(loadMetricData bool) <-chan JobContainer {
|
|||||||
ch <- JobContainer{Meta: job, Data: nil}
|
ch <- JobContainer{Meta: job, Data: nil}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
for {
|
for {
|
||||||
|
|||||||
@@ -6,9 +6,12 @@
|
|||||||
package metricstore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"archive/zip"
|
||||||
|
"bufio"
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -44,9 +47,11 @@ func CleanUp(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// cleanUpWorker takes simple values to configure what it does
|
// runWorker takes simple values to configure what it does
|
||||||
func cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {
|
func cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mode string, cleanupDir string, delete bool) {
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
d, err := time.ParseDuration(interval)
|
d, err := time.ParseDuration(interval)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -72,38 +77,29 @@ func cleanUpWorker(wg *sync.WaitGroup, ctx context.Context, interval string, mod
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("[METRICSTORE]> %s failed: %s", mode, err.Error())
|
cclog.Errorf("[METRICSTORE]> %s failed: %s", mode, err.Error())
|
||||||
} else {
|
} else {
|
||||||
if delete {
|
if delete && cleanupDir == "" {
|
||||||
cclog.Infof("[METRICSTORE]> done: %d checkpoints deleted", n)
|
cclog.Infof("[METRICSTORE]> done: %d checkpoints deleted", n)
|
||||||
} else {
|
} else {
|
||||||
cclog.Infof("[METRICSTORE]> done: %d checkpoint files archived to parquet", n)
|
cclog.Infof("[METRICSTORE]> done: %d files zipped and moved to archive", n)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
var ErrNoNewArchiveData error = errors.New("all data already archived")
|
var ErrNoNewArchiveData error = errors.New("all data already archived")
|
||||||
|
|
||||||
// CleanupCheckpoints deletes or archives all checkpoint files older than `from`.
|
// Delete or ZIP all checkpoint files older than `from` together and write them to the `cleanupDir`,
|
||||||
// When archiving, consolidates all hosts per cluster into a single Parquet file.
|
// deleting/moving them from the `checkpointsDir`.
|
||||||
func CleanupCheckpoints(checkpointsDir, cleanupDir string, from int64, deleteInstead bool) (int, error) {
|
func CleanupCheckpoints(checkpointsDir, cleanupDir string, from int64, deleteInstead bool) (int, error) {
|
||||||
if deleteInstead {
|
|
||||||
return deleteCheckpoints(checkpointsDir, from)
|
|
||||||
}
|
|
||||||
|
|
||||||
return archiveCheckpoints(checkpointsDir, cleanupDir, from)
|
|
||||||
}
|
|
||||||
|
|
||||||
// deleteCheckpoints removes checkpoint files older than `from` across all clusters/hosts.
|
|
||||||
func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {
|
|
||||||
entries1, err := os.ReadDir(checkpointsDir)
|
entries1, err := os.ReadDir(checkpointsDir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
type workItem struct {
|
type workItem struct {
|
||||||
dir string
|
cdir, adir string
|
||||||
cluster, host string
|
cluster, host string
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -115,29 +111,13 @@ func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {
|
|||||||
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
for worker := 0; worker < Keys.NumWorkers; worker++ {
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
for item := range work {
|
for workItem := range work {
|
||||||
entries, err := os.ReadDir(item.dir)
|
m, err := cleanupCheckpoints(workItem.cdir, workItem.adir, from, deleteInstead)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("error reading %s/%s: %s", item.cluster, item.host, err.Error())
|
cclog.Errorf("error while archiving %s/%s: %s", workItem.cluster, workItem.host, err.Error())
|
||||||
atomic.AddInt32(&errs, 1)
|
atomic.AddInt32(&errs, 1)
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
files, err := findFiles(entries, from, false)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Errorf("error finding files in %s/%s: %s", item.cluster, item.host, err.Error())
|
|
||||||
atomic.AddInt32(&errs, 1)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, checkpoint := range files {
|
|
||||||
if err := os.Remove(filepath.Join(item.dir, checkpoint)); err != nil {
|
|
||||||
cclog.Errorf("error deleting %s/%s/%s: %s", item.cluster, item.host, checkpoint, err.Error())
|
|
||||||
atomic.AddInt32(&errs, 1)
|
|
||||||
} else {
|
|
||||||
atomic.AddInt32(&n, 1)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
atomic.AddInt32(&n, int32(m))
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
@@ -146,14 +126,14 @@ func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {
|
|||||||
entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
|
entries2, e := os.ReadDir(filepath.Join(checkpointsDir, de1.Name()))
|
||||||
if e != nil {
|
if e != nil {
|
||||||
err = e
|
err = e
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, de2 := range entries2 {
|
for _, de2 := range entries2 {
|
||||||
|
cdir := filepath.Join(checkpointsDir, de1.Name(), de2.Name())
|
||||||
|
adir := filepath.Join(cleanupDir, de1.Name(), de2.Name())
|
||||||
work <- workItem{
|
work <- workItem{
|
||||||
dir: filepath.Join(checkpointsDir, de1.Name(), de2.Name()),
|
adir: adir, cdir: cdir,
|
||||||
cluster: de1.Name(),
|
cluster: de1.Name(), host: de2.Name(),
|
||||||
host: de2.Name(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -164,118 +144,85 @@ func deleteCheckpoints(checkpointsDir string, from int64) (int, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return int(n), err
|
return int(n), err
|
||||||
}
|
}
|
||||||
|
|
||||||
if errs > 0 {
|
if errs > 0 {
|
||||||
return int(n), fmt.Errorf("%d errors happened while deleting (%d successes)", errs, n)
|
return int(n), fmt.Errorf("%d errors happened while archiving (%d successes)", errs, n)
|
||||||
}
|
}
|
||||||
return int(n), nil
|
return int(n), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// archiveCheckpoints archives checkpoint files to Parquet format.
|
// Helper function for `CleanupCheckpoints`.
|
||||||
// Produces one Parquet file per cluster: <cleanupDir>/<cluster>/<timestamp>.parquet
|
func cleanupCheckpoints(dir string, cleanupDir string, from int64, deleteInstead bool) (int, error) {
|
||||||
func archiveCheckpoints(checkpointsDir, cleanupDir string, from int64) (int, error) {
|
entries, err := os.ReadDir(dir)
|
||||||
clusterEntries, err := os.ReadDir(checkpointsDir)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
totalFiles := 0
|
files, err := findFiles(entries, from, false)
|
||||||
|
|
||||||
for _, clusterEntry := range clusterEntries {
|
|
||||||
if !clusterEntry.IsDir() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
cluster := clusterEntry.Name()
|
|
||||||
hostEntries, err := os.ReadDir(filepath.Join(checkpointsDir, cluster))
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return totalFiles, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect rows from all hosts in this cluster using worker pool
|
if deleteInstead {
|
||||||
type hostResult struct {
|
n := 0
|
||||||
rows []ParquetMetricRow
|
for _, checkpoint := range files {
|
||||||
files []string // checkpoint filenames to delete after successful write
|
filename := filepath.Join(dir, checkpoint)
|
||||||
dir string // checkpoint directory for this host
|
if err = os.Remove(filename); err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
n += 1
|
||||||
|
}
|
||||||
|
return n, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
results := make(chan hostResult, len(hostEntries))
|
filename := filepath.Join(cleanupDir, fmt.Sprintf("%d.zip", from))
|
||||||
work := make(chan struct {
|
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||||
dir, host string
|
if err != nil && os.IsNotExist(err) {
|
||||||
}, Keys.NumWorkers)
|
err = os.MkdirAll(cleanupDir, CheckpointDirPerms)
|
||||||
|
if err == nil {
|
||||||
var wg sync.WaitGroup
|
f, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY, CheckpointFilePerms)
|
||||||
errs := int32(0)
|
}
|
||||||
|
}
|
||||||
wg.Add(Keys.NumWorkers)
|
|
||||||
for w := 0; w < Keys.NumWorkers; w++ {
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
for item := range work {
|
|
||||||
rows, files, err := archiveCheckpointsToParquet(item.dir, cluster, item.host, from)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("[METRICSTORE]> error reading checkpoints for %s/%s: %s", cluster, item.host, err.Error())
|
return 0, err
|
||||||
atomic.AddInt32(&errs, 1)
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
if len(rows) > 0 {
|
defer f.Close()
|
||||||
results <- hostResult{rows: rows, files: files, dir: item.dir}
|
bw := bufio.NewWriter(f)
|
||||||
|
defer bw.Flush()
|
||||||
|
zw := zip.NewWriter(bw)
|
||||||
|
defer zw.Close()
|
||||||
|
|
||||||
|
n := 0
|
||||||
|
for _, checkpoint := range files {
|
||||||
|
// Use closure to ensure file is closed immediately after use,
|
||||||
|
// avoiding file descriptor leak from defer in loop
|
||||||
|
err := func() error {
|
||||||
|
filename := filepath.Join(dir, checkpoint)
|
||||||
|
r, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
defer r.Close()
|
||||||
|
|
||||||
|
w, err := zw.Create(checkpoint)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if _, err = io.Copy(w, r); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = os.Remove(filename); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}()
|
}()
|
||||||
|
if err != nil {
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
n += 1
|
||||||
}
|
}
|
||||||
|
|
||||||
go func() {
|
return n, nil
|
||||||
for _, hostEntry := range hostEntries {
|
|
||||||
if !hostEntry.IsDir() {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
dir := filepath.Join(checkpointsDir, cluster, hostEntry.Name())
|
|
||||||
work <- struct {
|
|
||||||
dir, host string
|
|
||||||
}{dir: dir, host: hostEntry.Name()}
|
|
||||||
}
|
|
||||||
close(work)
|
|
||||||
wg.Wait()
|
|
||||||
close(results)
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Collect all rows and file info
|
|
||||||
var allRows []ParquetMetricRow
|
|
||||||
var allResults []hostResult
|
|
||||||
for r := range results {
|
|
||||||
allRows = append(allRows, r.rows...)
|
|
||||||
allResults = append(allResults, r)
|
|
||||||
}
|
|
||||||
|
|
||||||
if errs > 0 {
|
|
||||||
return totalFiles, fmt.Errorf("%d errors reading checkpoints for cluster %s", errs, cluster)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(allRows) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write one Parquet file per cluster
|
|
||||||
parquetFile := filepath.Join(cleanupDir, cluster, fmt.Sprintf("%d.parquet", from))
|
|
||||||
if err := writeParquetArchive(parquetFile, allRows); err != nil {
|
|
||||||
return totalFiles, fmt.Errorf("writing parquet archive for cluster %s: %w", cluster, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete archived checkpoint files
|
|
||||||
for _, result := range allResults {
|
|
||||||
for _, file := range result.files {
|
|
||||||
filename := filepath.Join(result.dir, file)
|
|
||||||
if err := os.Remove(filename); err != nil {
|
|
||||||
cclog.Warnf("[METRICSTORE]> could not remove archived checkpoint %s: %v", filename, err)
|
|
||||||
} else {
|
|
||||||
totalFiles++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
cclog.Infof("[METRICSTORE]> archived %d rows from %d files for cluster %s to %s",
|
|
||||||
len(allRows), totalFiles, cluster, parquetFile)
|
|
||||||
}
|
|
||||||
|
|
||||||
return totalFiles, nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
481
pkg/metricstore/avroCheckpoint.go
Normal file
481
pkg/metricstore/avroCheckpoint.go
Normal file
@@ -0,0 +1,481 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package metricstore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
"sort"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
|
"github.com/linkedin/goavro/v2"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
NumAvroWorkers int = DefaultAvroWorkers
|
||||||
|
startUp bool = true
|
||||||
|
)
|
||||||
|
|
||||||
|
func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
|
||||||
|
levels := make([]*AvroLevel, 0)
|
||||||
|
selectors := make([][]string, 0)
|
||||||
|
as.root.lock.RLock()
|
||||||
|
// Cluster
|
||||||
|
for sel1, l1 := range as.root.children {
|
||||||
|
l1.lock.RLock()
|
||||||
|
// Node
|
||||||
|
for sel2, l2 := range l1.children {
|
||||||
|
l2.lock.RLock()
|
||||||
|
// Frequency
|
||||||
|
for sel3, l3 := range l2.children {
|
||||||
|
levels = append(levels, l3)
|
||||||
|
selectors = append(selectors, []string{sel1, sel2, sel3})
|
||||||
|
}
|
||||||
|
l2.lock.RUnlock()
|
||||||
|
}
|
||||||
|
l1.lock.RUnlock()
|
||||||
|
}
|
||||||
|
as.root.lock.RUnlock()
|
||||||
|
|
||||||
|
type workItem struct {
|
||||||
|
level *AvroLevel
|
||||||
|
dir string
|
||||||
|
selector []string
|
||||||
|
}
|
||||||
|
|
||||||
|
n, errs := int32(0), int32(0)
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
wg.Add(NumAvroWorkers)
|
||||||
|
work := make(chan workItem, NumAvroWorkers*2)
|
||||||
|
for range NumAvroWorkers {
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
for workItem := range work {
|
||||||
|
from := getTimestamp(workItem.dir)
|
||||||
|
|
||||||
|
if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil {
|
||||||
|
if err == ErrNoNewArchiveData {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
cclog.Errorf("error while checkpointing %#v: %s", workItem.selector, err.Error())
|
||||||
|
atomic.AddInt32(&errs, 1)
|
||||||
|
} else {
|
||||||
|
atomic.AddInt32(&n, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for i := range len(levels) {
|
||||||
|
dir := path.Join(dir, path.Join(selectors[i]...))
|
||||||
|
work <- workItem{
|
||||||
|
level: levels[i],
|
||||||
|
dir: dir,
|
||||||
|
selector: selectors[i],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(work)
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
if errs > 0 {
|
||||||
|
return int(n), fmt.Errorf("%d errors happend while creating avro checkpoints (%d successes)", errs, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
startUp = false
|
||||||
|
|
||||||
|
return int(n), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getTimestamp returns the timestamp from the directory name
|
||||||
|
func getTimestamp(dir string) int64 {
|
||||||
|
// Extract the resolution and timestamp from the directory name
|
||||||
|
// The existing avro file will be in epoch timestamp format
|
||||||
|
// iterate over all the files in the directory and find the maximum timestamp
|
||||||
|
// and return it
|
||||||
|
|
||||||
|
resolution := path.Base(dir)
|
||||||
|
dir = path.Dir(dir)
|
||||||
|
|
||||||
|
files, err := os.ReadDir(dir)
|
||||||
|
if err != nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
var maxTS int64 = 0
|
||||||
|
|
||||||
|
if len(files) == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range files {
|
||||||
|
if file.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
name := file.Name()
|
||||||
|
|
||||||
|
if len(name) < 5 || !strings.HasSuffix(name, ".avro") || !strings.HasPrefix(name, resolution+"_") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("error while parsing timestamp: %s\n", err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if ts > maxTS {
|
||||||
|
maxTS = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interval, _ := time.ParseDuration(Keys.Checkpoints.Interval)
|
||||||
|
updateTime := time.Unix(maxTS, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix()
|
||||||
|
|
||||||
|
if startUp {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if updateTime < time.Now().Unix() {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return maxTS
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
|
||||||
|
l.lock.Lock()
|
||||||
|
defer l.lock.Unlock()
|
||||||
|
|
||||||
|
// fmt.Printf("Checkpointing directory: %s\n", dir)
|
||||||
|
// filepath contains the resolution
|
||||||
|
intRes, _ := strconv.Atoi(path.Base(dir))
|
||||||
|
|
||||||
|
// find smallest overall timestamp in l.data map and delete it from l.data
|
||||||
|
minTS := int64(1<<63 - 1)
|
||||||
|
for ts, dat := range l.data {
|
||||||
|
if ts < minTS && len(dat) != 0 {
|
||||||
|
minTS = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if from == 0 && minTS != int64(1<<63-1) {
|
||||||
|
from = minTS
|
||||||
|
}
|
||||||
|
|
||||||
|
if from == 0 {
|
||||||
|
return ErrNoNewArchiveData
|
||||||
|
}
|
||||||
|
|
||||||
|
var schema string
|
||||||
|
var codec *goavro.Codec
|
||||||
|
recordList := make([]map[string]any, 0)
|
||||||
|
|
||||||
|
var f *os.File
|
||||||
|
|
||||||
|
filePath := dir + fmt.Sprintf("_%d.avro", from)
|
||||||
|
|
||||||
|
var err error
|
||||||
|
|
||||||
|
fp_, err_ := os.Stat(filePath)
|
||||||
|
if errors.Is(err_, os.ErrNotExist) {
|
||||||
|
err = os.MkdirAll(path.Dir(dir), 0o755)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create directory: %v", err)
|
||||||
|
}
|
||||||
|
} else if fp_.Size() != 0 {
|
||||||
|
f, err = os.Open(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open existing avro file: %v", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
|
||||||
|
reader, err := goavro.NewOCFReader(br)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create OCF reader: %v", err)
|
||||||
|
}
|
||||||
|
codec = reader.Codec()
|
||||||
|
schema = codec.Schema()
|
||||||
|
}
|
||||||
|
|
||||||
|
timeRef := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
|
||||||
|
|
||||||
|
if dumpAll {
|
||||||
|
timeRef = time.Now().Unix()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Empty values
|
||||||
|
if len(l.data) == 0 {
|
||||||
|
// we checkpoint avro files every 60 seconds
|
||||||
|
repeat := 60 / intRes
|
||||||
|
|
||||||
|
for range repeat {
|
||||||
|
recordList = append(recordList, make(map[string]any))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
readFlag := true
|
||||||
|
|
||||||
|
for ts := range l.data {
|
||||||
|
flag := false
|
||||||
|
if ts < timeRef {
|
||||||
|
data := l.data[ts]
|
||||||
|
|
||||||
|
schemaGen, err := generateSchema(data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
flag, schema, err = compareSchema(schema, schemaGen)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to compare read and generated schema: %v", err)
|
||||||
|
}
|
||||||
|
if flag && readFlag && !errors.Is(err_, os.ErrNotExist) {
|
||||||
|
// Use closure to ensure file is closed even on error
|
||||||
|
err := func() error {
|
||||||
|
f2, err := os.Open(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open Avro file: %v", err)
|
||||||
|
}
|
||||||
|
defer f2.Close()
|
||||||
|
|
||||||
|
br := bufio.NewReader(f2)
|
||||||
|
|
||||||
|
ocfReader, err := goavro.NewOCFReader(br)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create OCF reader while changing schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for ocfReader.Scan() {
|
||||||
|
record, err := ocfReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read record: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
recordList = append(recordList, record.(map[string]any))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = os.Remove(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to delete file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
readFlag = false
|
||||||
|
}
|
||||||
|
codec, err = goavro.NewCodec(schema)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create codec after merged schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
recordList = append(recordList, generateRecord(data))
|
||||||
|
delete(l.data, ts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(recordList) == 0 {
|
||||||
|
return ErrNoNewArchiveData
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to append new avro file: %v", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
// fmt.Printf("Codec : %#v\n", codec)
|
||||||
|
|
||||||
|
writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
|
||||||
|
W: f,
|
||||||
|
Codec: codec,
|
||||||
|
CompressionName: goavro.CompressionDeflateLabel,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create OCF writer: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append the new record
|
||||||
|
if err := writer.Append(recordList); err != nil {
|
||||||
|
return fmt.Errorf("failed to append record: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func compareSchema(schemaRead, schemaGen string) (bool, string, error) {
|
||||||
|
var genSchema, readSchema AvroSchema
|
||||||
|
|
||||||
|
if schemaRead == "" {
|
||||||
|
return false, schemaGen, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unmarshal the schema strings into AvroSchema structs
|
||||||
|
if err := json.Unmarshal([]byte(schemaGen), &genSchema); err != nil {
|
||||||
|
return false, "", fmt.Errorf("failed to parse generated schema: %v", err)
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal([]byte(schemaRead), &readSchema); err != nil {
|
||||||
|
return false, "", fmt.Errorf("failed to parse read schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(genSchema.Fields, func(i, j int) bool {
|
||||||
|
return genSchema.Fields[i].Name < genSchema.Fields[j].Name
|
||||||
|
})
|
||||||
|
|
||||||
|
sort.Slice(readSchema.Fields, func(i, j int) bool {
|
||||||
|
return readSchema.Fields[i].Name < readSchema.Fields[j].Name
|
||||||
|
})
|
||||||
|
|
||||||
|
// Check if schemas are identical
|
||||||
|
schemasEqual := true
|
||||||
|
if len(genSchema.Fields) <= len(readSchema.Fields) {
|
||||||
|
|
||||||
|
for i := range genSchema.Fields {
|
||||||
|
if genSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||||
|
schemasEqual = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If schemas are identical, return the read schema
|
||||||
|
if schemasEqual {
|
||||||
|
return false, schemaRead, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a map to hold unique fields from both schemas
|
||||||
|
fieldMap := make(map[string]AvroField)
|
||||||
|
|
||||||
|
// Add fields from the read schema
|
||||||
|
for _, field := range readSchema.Fields {
|
||||||
|
fieldMap[field.Name] = field
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add or update fields from the generated schema
|
||||||
|
for _, field := range genSchema.Fields {
|
||||||
|
fieldMap[field.Name] = field
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a union schema by collecting fields from the map
|
||||||
|
var mergedFields []AvroField
|
||||||
|
for _, field := range fieldMap {
|
||||||
|
mergedFields = append(mergedFields, field)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort fields by name for consistency
|
||||||
|
sort.Slice(mergedFields, func(i, j int) bool {
|
||||||
|
return mergedFields[i].Name < mergedFields[j].Name
|
||||||
|
})
|
||||||
|
|
||||||
|
// Create the merged schema
|
||||||
|
mergedSchema := AvroSchema{
|
||||||
|
Type: "record",
|
||||||
|
Name: genSchema.Name,
|
||||||
|
Fields: mergedFields,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if schemas are identical
|
||||||
|
schemasEqual = len(mergedSchema.Fields) == len(readSchema.Fields)
|
||||||
|
if schemasEqual {
|
||||||
|
for i := range mergedSchema.Fields {
|
||||||
|
if mergedSchema.Fields[i].Name != readSchema.Fields[i].Name {
|
||||||
|
schemasEqual = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if schemasEqual {
|
||||||
|
return false, schemaRead, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Marshal the merged schema back to JSON
|
||||||
|
mergedSchemaJSON, err := json.Marshal(mergedSchema)
|
||||||
|
if err != nil {
|
||||||
|
return false, "", fmt.Errorf("failed to marshal merged schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, string(mergedSchemaJSON), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateSchema(data map[string]schema.Float) (string, error) {
|
||||||
|
// Define the Avro schema structure
|
||||||
|
schema := map[string]any{
|
||||||
|
"type": "record",
|
||||||
|
"name": "DataRecord",
|
||||||
|
"fields": []map[string]any{},
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldTracker := make(map[string]struct{})
|
||||||
|
|
||||||
|
for key := range data {
|
||||||
|
if _, exists := fieldTracker[key]; !exists {
|
||||||
|
key = correctKey(key)
|
||||||
|
|
||||||
|
field := map[string]any{
|
||||||
|
"name": key,
|
||||||
|
"type": "double",
|
||||||
|
"default": -1.0,
|
||||||
|
}
|
||||||
|
schema["fields"] = append(schema["fields"].([]map[string]any), field)
|
||||||
|
fieldTracker[key] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
schemaString, err := json.Marshal(schema)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to marshal schema: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return string(schemaString), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateRecord(data map[string]schema.Float) map[string]any {
|
||||||
|
record := make(map[string]any)
|
||||||
|
|
||||||
|
// Iterate through each map in data
|
||||||
|
for key, value := range data {
|
||||||
|
key = correctKey(key)
|
||||||
|
|
||||||
|
// Set the value in the record
|
||||||
|
// avro only accepts basic types
|
||||||
|
record[key] = value.Double()
|
||||||
|
}
|
||||||
|
|
||||||
|
return record
|
||||||
|
}
|
||||||
|
|
||||||
|
func correctKey(key string) string {
|
||||||
|
key = strings.ReplaceAll(key, "_", "_0x5F_")
|
||||||
|
key = strings.ReplaceAll(key, ":", "_0x3A_")
|
||||||
|
key = strings.ReplaceAll(key, ".", "_0x2E_")
|
||||||
|
return key
|
||||||
|
}
|
||||||
|
|
||||||
|
func ReplaceKey(key string) string {
|
||||||
|
key = strings.ReplaceAll(key, "_0x2E_", ".")
|
||||||
|
key = strings.ReplaceAll(key, "_0x3A_", ":")
|
||||||
|
key = strings.ReplaceAll(key, "_0x5F_", "_")
|
||||||
|
return key
|
||||||
|
}
|
||||||
130
pkg/metricstore/avroHelper.go
Normal file
130
pkg/metricstore/avroHelper.go
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package metricstore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"slices"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
|
)
|
||||||
|
|
||||||
|
func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
if Keys.Checkpoints.FileFormat == "json" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
ms := GetMemoryStore()
|
||||||
|
var avroLevel *AvroLevel
|
||||||
|
oldSelector := make([]string, 0)
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
// Drain any remaining messages in channel before exiting
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case val, ok := <-LineProtocolMessages:
|
||||||
|
if !ok {
|
||||||
|
// Channel closed
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Process remaining message
|
||||||
|
freq, err := ms.GetMetricFrequency(val.MetricName)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var metricName strings.Builder
|
||||||
|
for _, selectorName := range val.Selector {
|
||||||
|
metricName.WriteString(selectorName + SelectorDelimiter)
|
||||||
|
}
|
||||||
|
metricName.WriteString(val.MetricName)
|
||||||
|
|
||||||
|
var selector []string
|
||||||
|
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
|
||||||
|
|
||||||
|
if !stringSlicesEqual(oldSelector, selector) {
|
||||||
|
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
|
||||||
|
if avroLevel == nil {
|
||||||
|
cclog.Errorf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
|
||||||
|
}
|
||||||
|
oldSelector = slices.Clone(selector)
|
||||||
|
}
|
||||||
|
|
||||||
|
if avroLevel != nil {
|
||||||
|
avroLevel.addMetric(metricName.String(), val.Value, val.Timestamp, int(freq))
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// No more messages, exit
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case val, ok := <-LineProtocolMessages:
|
||||||
|
if !ok {
|
||||||
|
// Channel closed, exit gracefully
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch the frequency of the metric from the global configuration
|
||||||
|
freq, err := ms.GetMetricFrequency(val.MetricName)
|
||||||
|
if err != nil {
|
||||||
|
cclog.Errorf("Error fetching metric frequency: %s\n", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var metricName strings.Builder
|
||||||
|
|
||||||
|
for _, selectorName := range val.Selector {
|
||||||
|
metricName.WriteString(selectorName + SelectorDelimiter)
|
||||||
|
}
|
||||||
|
|
||||||
|
metricName.WriteString(val.MetricName)
|
||||||
|
|
||||||
|
// Create a new selector for the Avro level
|
||||||
|
// The selector is a slice of strings that represents the path to the
|
||||||
|
// Avro level. It is created by appending the cluster, node, and metric
|
||||||
|
// name to the selector.
|
||||||
|
var selector []string
|
||||||
|
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
|
||||||
|
|
||||||
|
if !stringSlicesEqual(oldSelector, selector) {
|
||||||
|
// Get the Avro level for the metric
|
||||||
|
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
|
||||||
|
|
||||||
|
// If the Avro level is nil, create a new one
|
||||||
|
if avroLevel == nil {
|
||||||
|
cclog.Errorf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
|
||||||
|
}
|
||||||
|
oldSelector = slices.Clone(selector)
|
||||||
|
}
|
||||||
|
|
||||||
|
if avroLevel != nil {
|
||||||
|
avroLevel.addMetric(metricName.String(), val.Value, val.Timestamp, int(freq))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
func stringSlicesEqual(a, b []string) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
167
pkg/metricstore/avroStruct.go
Normal file
167
pkg/metricstore/avroStruct.go
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
||||||
|
// All rights reserved. This file is part of cc-backend.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package metricstore
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
LineProtocolMessages = make(chan *AvroStruct)
|
||||||
|
// SelectorDelimiter separates hierarchical selector components in metric names for Avro encoding
|
||||||
|
SelectorDelimiter = "_SEL_"
|
||||||
|
)
|
||||||
|
|
||||||
|
var CheckpointBufferMinutes = DefaultCheckpointBufferMin
|
||||||
|
|
||||||
|
type AvroStruct struct {
|
||||||
|
MetricName string
|
||||||
|
Cluster string
|
||||||
|
Node string
|
||||||
|
Selector []string
|
||||||
|
Value schema.Float
|
||||||
|
Timestamp int64
|
||||||
|
}
|
||||||
|
|
||||||
|
type AvroStore struct {
|
||||||
|
root AvroLevel
|
||||||
|
}
|
||||||
|
|
||||||
|
var avroStore AvroStore
|
||||||
|
|
||||||
|
type AvroLevel struct {
|
||||||
|
children map[string]*AvroLevel
|
||||||
|
data map[int64]map[string]schema.Float
|
||||||
|
lock sync.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
type AvroField struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Type any `json:"type"`
|
||||||
|
Default any `json:"default,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type AvroSchema struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
Fields []AvroField `json:"fields"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel {
|
||||||
|
if len(selector) == 0 {
|
||||||
|
return l
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow concurrent reads:
|
||||||
|
l.lock.RLock()
|
||||||
|
var child *AvroLevel
|
||||||
|
var ok bool
|
||||||
|
if l.children == nil {
|
||||||
|
// Children map needs to be created...
|
||||||
|
l.lock.RUnlock()
|
||||||
|
} else {
|
||||||
|
child, ok := l.children[selector[0]]
|
||||||
|
l.lock.RUnlock()
|
||||||
|
if ok {
|
||||||
|
return child.findAvroLevelOrCreate(selector[1:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The level does not exist, take write lock for unique access:
|
||||||
|
l.lock.Lock()
|
||||||
|
// While this thread waited for the write lock, another thread
|
||||||
|
// could have created the child node.
|
||||||
|
if l.children != nil {
|
||||||
|
child, ok = l.children[selector[0]]
|
||||||
|
if ok {
|
||||||
|
l.lock.Unlock()
|
||||||
|
return child.findAvroLevelOrCreate(selector[1:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
child = &AvroLevel{
|
||||||
|
data: make(map[int64]map[string]schema.Float, 0),
|
||||||
|
children: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
if l.children != nil {
|
||||||
|
l.children[selector[0]] = child
|
||||||
|
} else {
|
||||||
|
l.children = map[string]*AvroLevel{selector[0]: child}
|
||||||
|
}
|
||||||
|
l.lock.Unlock()
|
||||||
|
return child.findAvroLevelOrCreate(selector[1:])
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *AvroLevel) addMetric(metricName string, value schema.Float, timestamp int64, Freq int) {
|
||||||
|
l.lock.Lock()
|
||||||
|
defer l.lock.Unlock()
|
||||||
|
|
||||||
|
KeyCounter := int(CheckpointBufferMinutes * 60 / Freq)
|
||||||
|
|
||||||
|
// Create keys in advance for the given amount of time
|
||||||
|
if len(l.data) != KeyCounter {
|
||||||
|
if len(l.data) == 0 {
|
||||||
|
for i := range KeyCounter {
|
||||||
|
l.data[timestamp+int64(i*Freq)] = make(map[string]schema.Float, 0)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Get the last timestamp
|
||||||
|
var lastTS int64
|
||||||
|
for ts := range l.data {
|
||||||
|
if ts > lastTS {
|
||||||
|
lastTS = ts
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Create keys for the next KeyCounter timestamps
|
||||||
|
l.data[lastTS+int64(Freq)] = make(map[string]schema.Float, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
closestTS := int64(0)
|
||||||
|
minDiff := int64(Freq) + 1 // Start with diff just outside the valid range
|
||||||
|
found := false
|
||||||
|
|
||||||
|
// Iterate over timestamps and choose the one which is within range.
|
||||||
|
// Since its epoch time, we check if the difference is less than 60 seconds.
|
||||||
|
for ts, dat := range l.data {
|
||||||
|
// Check if timestamp is within range
|
||||||
|
diff := timestamp - ts
|
||||||
|
if diff < -int64(Freq) || diff > int64(Freq) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metric already present at this timestamp — skip
|
||||||
|
if _, ok := dat[metricName]; ok {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is the closest timestamp so far
|
||||||
|
if Abs(diff) < minDiff {
|
||||||
|
minDiff = Abs(diff)
|
||||||
|
closestTS = ts
|
||||||
|
found = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if found {
|
||||||
|
l.data[closestTS][metricName] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetAvroStore() *AvroStore {
|
||||||
|
return &avroStore
|
||||||
|
}
|
||||||
|
|
||||||
|
// Abs returns the absolute value of x.
|
||||||
|
func Abs(x int64) int64 {
|
||||||
|
if x < 0 {
|
||||||
|
return -x
|
||||||
|
}
|
||||||
|
return x
|
||||||
|
}
|
||||||
@@ -43,7 +43,6 @@ package metricstore
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
)
|
)
|
||||||
@@ -54,102 +53,12 @@ import (
|
|||||||
// of data or reallocation needs to happen on writes.
|
// of data or reallocation needs to happen on writes.
|
||||||
const BufferCap int = DefaultBufferCapacity
|
const BufferCap int = DefaultBufferCapacity
|
||||||
|
|
||||||
// BufferPool is the global instance.
|
var bufferPool sync.Pool = sync.Pool{
|
||||||
// It is initialized immediately when the package loads.
|
New: func() any {
|
||||||
var bufferPool = NewPersistentBufferPool()
|
|
||||||
|
|
||||||
type PersistentBufferPool struct {
|
|
||||||
pool []*buffer
|
|
||||||
mu sync.Mutex
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewPersistentBufferPool creates a dynamic pool for buffers.
|
|
||||||
func NewPersistentBufferPool() *PersistentBufferPool {
|
|
||||||
return &PersistentBufferPool{
|
|
||||||
pool: make([]*buffer, 0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *PersistentBufferPool) Get() *buffer {
|
|
||||||
p.mu.Lock()
|
|
||||||
defer p.mu.Unlock()
|
|
||||||
|
|
||||||
n := len(p.pool)
|
|
||||||
if n == 0 {
|
|
||||||
// Pool is empty, allocate a new one
|
|
||||||
return &buffer{
|
return &buffer{
|
||||||
data: make([]schema.Float, 0, BufferCap),
|
data: make([]schema.Float, 0, BufferCap),
|
||||||
}
|
}
|
||||||
}
|
},
|
||||||
|
|
||||||
// Reuse existing buffer from the pool
|
|
||||||
b := p.pool[n-1]
|
|
||||||
p.pool[n-1] = nil // Avoid memory leak
|
|
||||||
p.pool = p.pool[:n-1]
|
|
||||||
return b
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *PersistentBufferPool) Put(b *buffer) {
|
|
||||||
// Reset the buffer before putting it back
|
|
||||||
b.data = b.data[:0]
|
|
||||||
|
|
||||||
p.mu.Lock()
|
|
||||||
defer p.mu.Unlock()
|
|
||||||
p.pool = append(p.pool, b)
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetSize returns the exact number of buffers currently sitting in the pool.
|
|
||||||
func (p *PersistentBufferPool) GetSize() int {
|
|
||||||
p.mu.Lock()
|
|
||||||
defer p.mu.Unlock()
|
|
||||||
return len(p.pool)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clear drains all buffers currently in the pool, allowing the GC to collect them.
|
|
||||||
func (p *PersistentBufferPool) Clear() {
|
|
||||||
p.mu.Lock()
|
|
||||||
defer p.mu.Unlock()
|
|
||||||
for i := range p.pool {
|
|
||||||
p.pool[i] = nil
|
|
||||||
}
|
|
||||||
p.pool = p.pool[:0]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean removes buffers from the pool that haven't been used in the given duration.
|
|
||||||
// It uses a simple LRU approach based on the lastUsed timestamp.
|
|
||||||
func (p *PersistentBufferPool) Clean(threshold int64) {
|
|
||||||
p.mu.Lock()
|
|
||||||
defer p.mu.Unlock()
|
|
||||||
|
|
||||||
// Filter in place
|
|
||||||
active := p.pool[:0]
|
|
||||||
for _, b := range p.pool {
|
|
||||||
if b.lastUsed >= threshold {
|
|
||||||
active = append(active, b)
|
|
||||||
} else {
|
|
||||||
// Buffer is older than the threshold, let it be collected by GC
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Nullify the rest to prevent memory leaks
|
|
||||||
for i := len(active); i < len(p.pool); i++ {
|
|
||||||
p.pool[i] = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
p.pool = active
|
|
||||||
}
|
|
||||||
|
|
||||||
// CleanAll removes all buffers from the pool.
|
|
||||||
func (p *PersistentBufferPool) CleanAll() {
|
|
||||||
p.mu.Lock()
|
|
||||||
defer p.mu.Unlock()
|
|
||||||
|
|
||||||
// Nullify all buffers to prevent memory leaks
|
|
||||||
for i := range p.pool {
|
|
||||||
p.pool[i] = nil
|
|
||||||
}
|
|
||||||
|
|
||||||
p.pool = p.pool[:0]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -185,11 +94,10 @@ type buffer struct {
|
|||||||
start int64
|
start int64
|
||||||
archived bool
|
archived bool
|
||||||
closed bool
|
closed bool
|
||||||
lastUsed int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func newBuffer(ts, freq int64) *buffer {
|
func newBuffer(ts, freq int64) *buffer {
|
||||||
b := bufferPool.Get()
|
b := bufferPool.Get().(*buffer)
|
||||||
b.frequency = freq
|
b.frequency = freq
|
||||||
b.start = ts - (freq / 2)
|
b.start = ts - (freq / 2)
|
||||||
b.prev = nil
|
b.prev = nil
|
||||||
@@ -332,7 +240,6 @@ func (b *buffer) free(t int64) (delme bool, n int) {
|
|||||||
if cap(b.prev.data) != BufferCap {
|
if cap(b.prev.data) != BufferCap {
|
||||||
b.prev.data = make([]schema.Float, 0, BufferCap)
|
b.prev.data = make([]schema.Float, 0, BufferCap)
|
||||||
}
|
}
|
||||||
b.prev.lastUsed = time.Now().Unix()
|
|
||||||
bufferPool.Put(b.prev)
|
bufferPool.Put(b.prev)
|
||||||
b.prev = nil
|
b.prev = nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,15 +6,15 @@
|
|||||||
// This file implements checkpoint persistence for the in-memory metric store.
|
// This file implements checkpoint persistence for the in-memory metric store.
|
||||||
//
|
//
|
||||||
// Checkpoints enable graceful restarts by periodically saving in-memory metric
|
// Checkpoints enable graceful restarts by periodically saving in-memory metric
|
||||||
// data to disk in JSON or binary format. The checkpoint system:
|
// data to disk in either JSON or Avro format. The checkpoint system:
|
||||||
//
|
//
|
||||||
// Key Features:
|
// Key Features:
|
||||||
// - Periodic background checkpointing via the Checkpointing() worker
|
// - Periodic background checkpointing via the Checkpointing() worker
|
||||||
// - Two format families: JSON (human-readable) and WAL+binary (compact, crash-safe)
|
// - Two formats: JSON (human-readable) and Avro (compact, efficient)
|
||||||
// - Parallel checkpoint creation and loading using worker pools
|
// - Parallel checkpoint creation and loading using worker pools
|
||||||
// - Hierarchical file organization: checkpoint_dir/cluster/host/timestamp.{json|bin}
|
// - Hierarchical file organization: checkpoint_dir/cluster/host/timestamp.{json|avro}
|
||||||
// - WAL file: checkpoint_dir/cluster/host/current.wal (append-only, per-entry)
|
|
||||||
// - Only saves unarchived data (archived data is already persisted elsewhere)
|
// - Only saves unarchived data (archived data is already persisted elsewhere)
|
||||||
|
// - Automatic format detection and fallback during loading
|
||||||
// - GC optimization during loading to prevent excessive heap growth
|
// - GC optimization during loading to prevent excessive heap growth
|
||||||
//
|
//
|
||||||
// Checkpoint Workflow:
|
// Checkpoint Workflow:
|
||||||
@@ -27,9 +27,8 @@
|
|||||||
// checkpoints/
|
// checkpoints/
|
||||||
// cluster1/
|
// cluster1/
|
||||||
// host001/
|
// host001/
|
||||||
// 1234567890.json (JSON format: full subtree snapshot)
|
// 1234567890.json (timestamp = checkpoint start time)
|
||||||
// 1234567890.bin (binary format: full subtree snapshot)
|
// 1234567950.json
|
||||||
// current.wal (WAL format: append-only per-entry log)
|
|
||||||
// host002/
|
// host002/
|
||||||
// ...
|
// ...
|
||||||
package metricstore
|
package metricstore
|
||||||
@@ -53,6 +52,7 @@ import (
|
|||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
|
"github.com/linkedin/goavro/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -86,18 +86,22 @@ var (
|
|||||||
|
|
||||||
// Checkpointing starts a background worker that periodically saves metric data to disk.
|
// Checkpointing starts a background worker that periodically saves metric data to disk.
|
||||||
//
|
//
|
||||||
// Format behaviour:
|
// The behavior depends on the configured file format:
|
||||||
// - "json": Periodic checkpointing based on Keys.Checkpoints.Interval
|
// - JSON: Periodic checkpointing based on Keys.Checkpoints.Interval
|
||||||
// - "wal": Periodic binary snapshots + WAL rotation at Keys.Checkpoints.Interval
|
// - Avro: Initial delay + periodic checkpointing at DefaultAvroCheckpointInterval
|
||||||
|
//
|
||||||
|
// The worker respects context cancellation and signals completion via the WaitGroup.
|
||||||
func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
|
func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
lastCheckpointMu.Lock()
|
lastCheckpointMu.Lock()
|
||||||
lastCheckpoint = time.Now()
|
lastCheckpoint = time.Now()
|
||||||
lastCheckpointMu.Unlock()
|
lastCheckpointMu.Unlock()
|
||||||
|
|
||||||
|
if Keys.Checkpoints.FileFormat == "json" {
|
||||||
ms := GetMemoryStore()
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
d, err := time.ParseDuration(Keys.Checkpoints.Interval)
|
d, err := time.ParseDuration(Keys.Checkpoints.Interval)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Fatalf("[METRICSTORE]> invalid checkpoint interval '%s': %s", Keys.Checkpoints.Interval, err.Error())
|
cclog.Fatalf("[METRICSTORE]> invalid checkpoint interval '%s': %s", Keys.Checkpoints.Interval, err.Error())
|
||||||
@@ -119,23 +123,10 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
from := lastCheckpoint
|
from := lastCheckpoint
|
||||||
lastCheckpointMu.Unlock()
|
lastCheckpointMu.Unlock()
|
||||||
|
|
||||||
now := time.Now()
|
|
||||||
cclog.Infof("[METRICSTORE]> start checkpointing (starting at %s)...", from.Format(time.RFC3339))
|
cclog.Infof("[METRICSTORE]> start checkpointing (starting at %s)...", from.Format(time.RFC3339))
|
||||||
|
now := time.Now()
|
||||||
if Keys.Checkpoints.FileFormat == "wal" {
|
n, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir,
|
||||||
n, hostDirs, err := ms.ToCheckpointWAL(Keys.Checkpoints.RootDir, from.Unix(), now.Unix())
|
from.Unix(), now.Unix())
|
||||||
if err != nil {
|
|
||||||
cclog.Errorf("[METRICSTORE]> binary checkpointing failed: %s", err.Error())
|
|
||||||
} else {
|
|
||||||
cclog.Infof("[METRICSTORE]> done: %d binary snapshot files created", n)
|
|
||||||
lastCheckpointMu.Lock()
|
|
||||||
lastCheckpoint = now
|
|
||||||
lastCheckpointMu.Unlock()
|
|
||||||
// Rotate WAL files for successfully checkpointed hosts.
|
|
||||||
RotateWALFiles(hostDirs)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
n, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir, from.Unix(), now.Unix())
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Errorf("[METRICSTORE]> checkpointing failed: %s", err.Error())
|
cclog.Errorf("[METRICSTORE]> checkpointing failed: %s", err.Error())
|
||||||
} else {
|
} else {
|
||||||
@@ -146,8 +137,32 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}()
|
||||||
|
} else {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-time.After(time.Duration(CheckpointBufferMinutes) * time.Minute):
|
||||||
|
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
ticker := time.NewTicker(DefaultAvroCheckpointInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
}
|
}
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// MarshalJSON provides optimized JSON encoding for CheckpointMetrics.
|
// MarshalJSON provides optimized JSON encoding for CheckpointMetrics.
|
||||||
@@ -175,7 +190,7 @@ func (cm *CheckpointMetrics) MarshalJSON() ([]byte, error) {
|
|||||||
return buf, nil
|
return buf, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ToCheckpoint writes metric data to checkpoint files in parallel (JSON format).
|
// ToCheckpoint writes metric data to checkpoint files in parallel.
|
||||||
//
|
//
|
||||||
// Metrics at root and cluster levels are skipped. One file per host is created.
|
// Metrics at root and cluster levels are skipped. One file per host is created.
|
||||||
// Uses worker pool (Keys.NumWorkers) for parallel processing. Only locks one host
|
// Uses worker pool (Keys.NumWorkers) for parallel processing. Only locks one host
|
||||||
@@ -363,6 +378,7 @@ func enqueueCheckpointHosts(dir string, work chan<- [2]string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gcCounter := 0
|
||||||
for _, clusterDir := range clustersDir {
|
for _, clusterDir := range clustersDir {
|
||||||
if !clusterDir.IsDir() {
|
if !clusterDir.IsDir() {
|
||||||
return errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
|
return errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
|
||||||
@@ -378,6 +394,16 @@ func enqueueCheckpointHosts(dir string, work chan<- [2]string) error {
|
|||||||
return errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
|
return errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gcCounter++
|
||||||
|
// if gcCounter%GCTriggerInterval == 0 {
|
||||||
|
// Forcing garbage collection runs here regulary during the loading of checkpoints
|
||||||
|
// will decrease the total heap size after loading everything back to memory is done.
|
||||||
|
// While loading data, the heap will grow fast, so the GC target size will double
|
||||||
|
// almost always. By forcing GCs here, we can keep it growing more slowly so that
|
||||||
|
// at the end, less memory is wasted.
|
||||||
|
// runtime.GC()
|
||||||
|
// }
|
||||||
|
|
||||||
work <- [2]string{clusterDir.Name(), hostDir.Name()}
|
work <- [2]string{clusterDir.Name(), hostDir.Name()}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -387,8 +413,8 @@ func enqueueCheckpointHosts(dir string, work chan<- [2]string) error {
|
|||||||
|
|
||||||
// FromCheckpoint loads checkpoint files from disk into memory in parallel.
|
// FromCheckpoint loads checkpoint files from disk into memory in parallel.
|
||||||
//
|
//
|
||||||
// Uses worker pool to load cluster/host combinations. Returns number of files
|
// Uses worker pool to load cluster/host combinations. Periodically triggers GC
|
||||||
// loaded and any errors.
|
// to prevent excessive heap growth. Returns number of files loaded and any errors.
|
||||||
func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
|
func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
work := make(chan [2]string, Keys.NumWorkers*4)
|
work := make(chan [2]string, Keys.NumWorkers*4)
|
||||||
@@ -426,11 +452,13 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
|
|||||||
|
|
||||||
// FromCheckpointFiles is the main entry point for loading checkpoints at startup.
|
// FromCheckpointFiles is the main entry point for loading checkpoints at startup.
|
||||||
//
|
//
|
||||||
|
// Automatically detects checkpoint format (JSON vs Avro) and falls back if needed.
|
||||||
// Creates checkpoint directory if it doesn't exist. This function must be called
|
// Creates checkpoint directory if it doesn't exist. This function must be called
|
||||||
// before any writes or reads, and can only be called once.
|
// before any writes or reads, and can only be called once.
|
||||||
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
||||||
if _, err := os.Stat(dir); os.IsNotExist(err) {
|
if _, err := os.Stat(dir); os.IsNotExist(err) {
|
||||||
err := os.MkdirAll(dir, CheckpointDirPerms)
|
// The directory does not exist, so create it using os.MkdirAll()
|
||||||
|
err := os.MkdirAll(dir, CheckpointDirPerms) // CheckpointDirPerms sets the permissions for the directory
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
|
cclog.Fatalf("[METRICSTORE]> Error creating directory: %#v\n", err)
|
||||||
}
|
}
|
||||||
@@ -440,6 +468,146 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
|
|||||||
return m.FromCheckpoint(dir, from)
|
return m.FromCheckpoint(dir, from)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
|
||||||
|
br := bufio.NewReader(f)
|
||||||
|
|
||||||
|
fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:]
|
||||||
|
resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while reading avro file (resolution parsing) : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fromTimestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64)
|
||||||
|
|
||||||
|
// Same logic according to lineprotocol
|
||||||
|
fromTimestamp -= (resolution / 2)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error converting timestamp from the avro file : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// fmt.Printf("File : %s with resolution : %d\n", fileName, resolution)
|
||||||
|
|
||||||
|
var recordCounter int64 = 0
|
||||||
|
|
||||||
|
// Create a new OCF reader from the buffered reader
|
||||||
|
ocfReader, err := goavro.NewOCFReader(br)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error creating OCF reader: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
metricsData := make(map[string]schema.FloatArray)
|
||||||
|
|
||||||
|
for ocfReader.Scan() {
|
||||||
|
datum, err := ocfReader.Read()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while reading avro file : %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
record, ok := datum.(map[string]any)
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> failed to assert datum as map[string]interface{}")
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, value := range record {
|
||||||
|
metricsData[key] = append(metricsData[key], schema.ConvertToFloat(value.(float64)))
|
||||||
|
}
|
||||||
|
|
||||||
|
recordCounter += 1
|
||||||
|
}
|
||||||
|
|
||||||
|
to := (fromTimestamp + (recordCounter / (60 / resolution) * 60))
|
||||||
|
if to < from {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, floatArray := range metricsData {
|
||||||
|
metricName := ReplaceKey(key)
|
||||||
|
|
||||||
|
if strings.Contains(metricName, SelectorDelimiter) {
|
||||||
|
subString := strings.Split(metricName, SelectorDelimiter)
|
||||||
|
|
||||||
|
lvl := l
|
||||||
|
|
||||||
|
for i := 0; i < len(subString)-1; i++ {
|
||||||
|
|
||||||
|
sel := subString[i]
|
||||||
|
|
||||||
|
if lvl.children == nil {
|
||||||
|
lvl.children = make(map[string]*Level)
|
||||||
|
}
|
||||||
|
|
||||||
|
child, ok := lvl.children[sel]
|
||||||
|
if !ok {
|
||||||
|
child = &Level{
|
||||||
|
metrics: make([]*buffer, len(m.Metrics)),
|
||||||
|
children: nil,
|
||||||
|
}
|
||||||
|
lvl.children[sel] = child
|
||||||
|
}
|
||||||
|
lvl = child
|
||||||
|
}
|
||||||
|
|
||||||
|
leafMetricName := subString[len(subString)-1]
|
||||||
|
err = lvl.createBuffer(m, leafMetricName, floatArray, fromTimestamp, resolution)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
err = l.createBuffer(m, metricName, floatArray, fromTimestamp, resolution)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schema.FloatArray, from int64, resolution int64) error {
|
||||||
|
n := len(floatArray)
|
||||||
|
b := &buffer{
|
||||||
|
frequency: resolution,
|
||||||
|
start: from,
|
||||||
|
data: floatArray[0:n:n],
|
||||||
|
prev: nil,
|
||||||
|
next: nil,
|
||||||
|
archived: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
minfo, ok := m.Metrics[metricName]
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
prev := l.metrics[minfo.offset]
|
||||||
|
if prev == nil {
|
||||||
|
l.metrics[minfo.offset] = b
|
||||||
|
} else {
|
||||||
|
if prev.start > b.start {
|
||||||
|
return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
|
||||||
|
}
|
||||||
|
|
||||||
|
b.prev = prev
|
||||||
|
prev.next = b
|
||||||
|
|
||||||
|
missingCount := ((int(b.start) - int(prev.start)) - len(prev.data)*int(b.frequency))
|
||||||
|
if missingCount > 0 {
|
||||||
|
missingCount /= int(b.frequency)
|
||||||
|
|
||||||
|
for range missingCount {
|
||||||
|
prev.data = append(prev.data, schema.NaN)
|
||||||
|
}
|
||||||
|
|
||||||
|
prev.data = prev.data[0:len(prev.data):len(prev.data)]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
l.metrics[minfo.offset] = b
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {
|
func (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {
|
||||||
br := bufio.NewReader(f)
|
br := bufio.NewReader(f)
|
||||||
cf := &CheckpointFile{}
|
cf := &CheckpointFile{}
|
||||||
@@ -511,37 +679,37 @@ func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// fromCheckpoint loads all checkpoint files (JSON, binary snapshot, WAL) for a
|
|
||||||
// single host directory. Snapshot files are loaded first (sorted by timestamp),
|
|
||||||
// then current.wal is replayed on top.
|
|
||||||
func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, error) {
|
func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, error) {
|
||||||
direntries, err := os.ReadDir(dir)
|
direntries, err := os.ReadDir(dir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
allFiles := make([]fs.DirEntry, 0)
|
allFiles := make([]fs.DirEntry, 0)
|
||||||
var walEntry fs.DirEntry
|
|
||||||
filesLoaded := 0
|
filesLoaded := 0
|
||||||
|
|
||||||
for _, e := range direntries {
|
for _, e := range direntries {
|
||||||
if e.IsDir() {
|
if e.IsDir() {
|
||||||
// Legacy: skip subdirectories (only used by old Avro format).
|
child := &Level{
|
||||||
// These are ignored; their data is not loaded.
|
metrics: make([]*buffer, len(m.Metrics)),
|
||||||
cclog.Debugf("[METRICSTORE]> skipping subdirectory %s in checkpoint dir %s", e.Name(), dir)
|
children: make(map[string]*Level),
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
name := e.Name()
|
files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from)
|
||||||
if strings.HasSuffix(name, ".json") || strings.HasSuffix(name, ".bin") {
|
filesLoaded += files
|
||||||
allFiles = append(allFiles, e)
|
if err != nil {
|
||||||
} else if name == "current.wal" {
|
return filesLoaded, err
|
||||||
walEntry = e
|
}
|
||||||
|
|
||||||
|
l.children[e.Name()] = child
|
||||||
|
} else if strings.HasSuffix(e.Name(), ".json") || strings.HasSuffix(e.Name(), ".avro") {
|
||||||
|
allFiles = append(allFiles, e)
|
||||||
|
} else {
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
// Silently ignore other files (e.g., .tmp, .bin.tmp from interrupted writes).
|
|
||||||
}
|
}
|
||||||
|
|
||||||
files, err := findFiles(allFiles, from, true)
|
files, err := findFiles(allFiles, from, true)
|
||||||
@@ -551,81 +719,54 @@ func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, err
|
|||||||
|
|
||||||
loaders := map[string]func(*MemoryStore, *os.File, int64) error{
|
loaders := map[string]func(*MemoryStore, *os.File, int64) error{
|
||||||
".json": l.loadJSONFile,
|
".json": l.loadJSONFile,
|
||||||
".bin": l.loadBinaryFile,
|
".avro": l.loadAvroFile,
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, filename := range files {
|
for _, filename := range files {
|
||||||
ext := filepath.Ext(filename)
|
ext := filepath.Ext(filename)
|
||||||
loader := loaders[ext]
|
loader := loaders[ext]
|
||||||
if loader == nil {
|
if loader == nil {
|
||||||
cclog.Warnf("[METRICSTORE]> unknown extension for checkpoint file %s", filename)
|
cclog.Warnf("Unknown extension for file %s", filename)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Use a closure to ensure file is closed immediately after use
|
||||||
err := func() error {
|
err := func() error {
|
||||||
f, err := os.Open(path.Join(dir, filename))
|
f, err := os.Open(path.Join(dir, filename))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer f.Close()
|
defer f.Close()
|
||||||
|
|
||||||
return loader(m, f, from)
|
return loader(m, f, from)
|
||||||
}()
|
}()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return filesLoaded, err
|
return filesLoaded, err
|
||||||
}
|
}
|
||||||
filesLoaded++
|
|
||||||
}
|
|
||||||
|
|
||||||
// Replay WAL after all snapshot files so it fills in data since the last snapshot.
|
filesLoaded += 1
|
||||||
if walEntry != nil {
|
|
||||||
err := func() error {
|
|
||||||
f, err := os.Open(path.Join(dir, walEntry.Name()))
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
return l.loadWALFile(m, f, from)
|
|
||||||
}()
|
|
||||||
if err != nil {
|
|
||||||
// WAL errors are non-fatal: the snapshot already loaded the bulk of data.
|
|
||||||
cclog.Warnf("[METRICSTORE]> WAL replay error for %s: %v (data since last snapshot may be missing)", dir, err)
|
|
||||||
} else {
|
|
||||||
filesLoaded++
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return filesLoaded, nil
|
return filesLoaded, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseTimestampFromFilename extracts a Unix timestamp from a checkpoint filename.
|
// This will probably get very slow over time!
|
||||||
// Supports ".json" (format: "<ts>.json") and ".bin" (format: "<ts>.bin").
|
// A solution could be some sort of an index file in which all other files
|
||||||
func parseTimestampFromFilename(name string) (int64, error) {
|
// and the timespan they contain is listed.
|
||||||
switch {
|
// NOTE: This now assumes that you have distinct timestamps for json and avro files
|
||||||
case strings.HasSuffix(name, ".json"):
|
// Also, it assumes that the timestamps are not overlapping/self-modified.
|
||||||
return strconv.ParseInt(name[:len(name)-5], 10, 64)
|
|
||||||
case strings.HasSuffix(name, ".bin"):
|
|
||||||
return strconv.ParseInt(name[:len(name)-4], 10, 64)
|
|
||||||
default:
|
|
||||||
return 0, fmt.Errorf("unknown checkpoint extension for file %q", name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// findFiles returns filenames from direntries whose timestamps satisfy the filter.
|
|
||||||
// If findMoreRecentFiles is true, returns files with timestamps >= t (plus the
|
|
||||||
// last file before t if t falls between two files).
|
|
||||||
func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {
|
func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {
|
||||||
nums := map[string]int64{}
|
nums := map[string]int64{}
|
||||||
for _, e := range direntries {
|
for _, e := range direntries {
|
||||||
name := e.Name()
|
if !strings.HasSuffix(e.Name(), ".json") && !strings.HasSuffix(e.Name(), ".avro") {
|
||||||
if !strings.HasSuffix(name, ".json") && !strings.HasSuffix(name, ".bin") {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
ts, err := parseTimestampFromFilename(name)
|
ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
nums[name] = ts
|
nums[e.Name()] = ts
|
||||||
}
|
}
|
||||||
|
|
||||||
sort.Slice(direntries, func(i, j int) bool {
|
sort.Slice(direntries, func(i, j int) bool {
|
||||||
@@ -642,12 +783,16 @@ func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]s
|
|||||||
for i, e := range direntries {
|
for i, e := range direntries {
|
||||||
ts1 := nums[e.Name()]
|
ts1 := nums[e.Name()]
|
||||||
|
|
||||||
|
// Logic to look for files in forward or direction
|
||||||
|
// If logic: All files greater than or after
|
||||||
|
// the given timestamp will be selected
|
||||||
|
// Else If logic: All files less than or before
|
||||||
|
// the given timestamp will be selected
|
||||||
if findMoreRecentFiles && t <= ts1 {
|
if findMoreRecentFiles && t <= ts1 {
|
||||||
filenames = append(filenames, e.Name())
|
filenames = append(filenames, e.Name())
|
||||||
} else if !findMoreRecentFiles && ts1 <= t && ts1 != 0 {
|
} else if !findMoreRecentFiles && ts1 <= t && ts1 != 0 {
|
||||||
filenames = append(filenames, e.Name())
|
filenames = append(filenames, e.Name())
|
||||||
}
|
}
|
||||||
|
|
||||||
if i == len(direntries)-1 {
|
if i == len(direntries)-1 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
// ├─ RetentionInMemory: How long to keep data in RAM
|
// ├─ RetentionInMemory: How long to keep data in RAM
|
||||||
// ├─ MemoryCap: Memory limit in bytes (triggers forceFree)
|
// ├─ MemoryCap: Memory limit in bytes (triggers forceFree)
|
||||||
// ├─ Checkpoints: Persistence configuration
|
// ├─ Checkpoints: Persistence configuration
|
||||||
// │ ├─ FileFormat: "json" or "wal"
|
// │ ├─ FileFormat: "avro" or "json"
|
||||||
// │ ├─ Interval: How often to save (e.g., "1h")
|
// │ ├─ Interval: How often to save (e.g., "1h")
|
||||||
// │ └─ RootDir: Checkpoint storage path
|
// │ └─ RootDir: Checkpoint storage path
|
||||||
// ├─ Cleanup: Long-term storage configuration
|
// ├─ Cleanup: Long-term storage configuration
|
||||||
@@ -55,13 +55,16 @@ const (
|
|||||||
DefaultMaxWorkers = 10
|
DefaultMaxWorkers = 10
|
||||||
DefaultBufferCapacity = 512
|
DefaultBufferCapacity = 512
|
||||||
DefaultGCTriggerInterval = 100
|
DefaultGCTriggerInterval = 100
|
||||||
|
DefaultAvroWorkers = 4
|
||||||
|
DefaultCheckpointBufferMin = 3
|
||||||
|
DefaultAvroCheckpointInterval = time.Minute
|
||||||
DefaultMemoryUsageTrackerInterval = 1 * time.Hour
|
DefaultMemoryUsageTrackerInterval = 1 * time.Hour
|
||||||
)
|
)
|
||||||
|
|
||||||
// Checkpoints configures periodic persistence of in-memory metric data.
|
// Checkpoints configures periodic persistence of in-memory metric data.
|
||||||
//
|
//
|
||||||
// Fields:
|
// Fields:
|
||||||
// - FileFormat: "json" (human-readable, periodic) or "wal" (binary snapshot + WAL, crash-safe)
|
// - FileFormat: "avro" (default, binary, compact) or "json" (human-readable, slower)
|
||||||
// - Interval: Duration string (e.g., "1h", "30m") between checkpoint saves
|
// - Interval: Duration string (e.g., "1h", "30m") between checkpoint saves
|
||||||
// - RootDir: Filesystem path for checkpoint files (created if missing)
|
// - RootDir: Filesystem path for checkpoint files (created if missing)
|
||||||
type Checkpoints struct {
|
type Checkpoints struct {
|
||||||
@@ -141,7 +144,7 @@ type MetricStoreConfig struct {
|
|||||||
// Accessed by Init(), Checkpointing(), and other lifecycle functions.
|
// Accessed by Init(), Checkpointing(), and other lifecycle functions.
|
||||||
var Keys MetricStoreConfig = MetricStoreConfig{
|
var Keys MetricStoreConfig = MetricStoreConfig{
|
||||||
Checkpoints: Checkpoints{
|
Checkpoints: Checkpoints{
|
||||||
FileFormat: "json",
|
FileFormat: "avro",
|
||||||
RootDir: "./var/checkpoints",
|
RootDir: "./var/checkpoints",
|
||||||
},
|
},
|
||||||
Cleanup: &Cleanup{
|
Cleanup: &Cleanup{
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ const configSchema = `{
|
|||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"file-format": {
|
"file-format": {
|
||||||
"description": "Specify the format for checkpoint files. Two variants: 'json' (human-readable, periodic) and 'wal' (binary snapshot + Write-Ahead Log, crash-safe). Default is 'json'.",
|
"description": "Specify the format for checkpoint files. There are 2 variants: 'avro' and 'json'. If nothing is specified, 'avro' is default.",
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
"interval": {
|
"interval": {
|
||||||
|
|||||||
@@ -42,7 +42,6 @@ package metricstore
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
@@ -193,7 +192,6 @@ func (l *Level) free(t int64) (int, error) {
|
|||||||
if cap(b.data) != BufferCap {
|
if cap(b.data) != BufferCap {
|
||||||
b.data = make([]schema.Float, 0, BufferCap)
|
b.data = make([]schema.Float, 0, BufferCap)
|
||||||
}
|
}
|
||||||
b.lastUsed = time.Now().Unix()
|
|
||||||
bufferPool.Put(b)
|
bufferPool.Put(b)
|
||||||
l.metrics[i] = nil
|
l.metrics[i] = nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,23 +3,9 @@
|
|||||||
// Use of this source code is governed by a MIT-style
|
// Use of this source code is governed by a MIT-style
|
||||||
// license that can be found in the LICENSE file.
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
// This file implements ingestion of InfluxDB line-protocol metric data received
|
|
||||||
// over NATS. Each line encodes one metric sample with the following structure:
|
|
||||||
//
|
|
||||||
// <measurement>[,cluster=<c>][,hostname=<h>][,type=<t>][,type-id=<id>][,subtype=<s>][,stype-id=<id>] value=<v> [<timestamp>]
|
|
||||||
//
|
|
||||||
// The measurement name identifies the metric (e.g. "cpu_load"). Tags provide
|
|
||||||
// routing information (cluster, host) and optional sub-device selectors (type,
|
|
||||||
// subtype). Only one field is expected per line: "value".
|
|
||||||
//
|
|
||||||
// After decoding, each sample is:
|
|
||||||
// 1. Written to the in-memory store via ms.WriteToLevel.
|
|
||||||
// 2. If the checkpoint format is "wal", also forwarded to the WAL staging
|
|
||||||
// goroutine via the WALMessages channel for durable write-ahead logging.
|
|
||||||
package metricstore
|
package metricstore
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -28,19 +14,9 @@ import (
|
|||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/nats"
|
"github.com/ClusterCockpit/cc-lib/v2/nats"
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
||||||
"github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol"
|
"github.com/influxdata/line-protocol/v2/lineprotocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ReceiveNats subscribes to all configured NATS subjects and feeds incoming
|
|
||||||
// line-protocol messages into the MemoryStore.
|
|
||||||
//
|
|
||||||
// When workers > 1 a pool of goroutines drains a shared channel so that
|
|
||||||
// multiple messages can be decoded in parallel. With workers == 1 the NATS
|
|
||||||
// callback decodes inline (no channel overhead, lower latency).
|
|
||||||
//
|
|
||||||
// The function blocks until ctx is cancelled and all worker goroutines have
|
|
||||||
// finished. It returns nil when the NATS client is not configured; callers
|
|
||||||
// should treat that as a no-op rather than an error.
|
|
||||||
func ReceiveNats(ms *MemoryStore,
|
func ReceiveNats(ms *MemoryStore,
|
||||||
workers int,
|
workers int,
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
@@ -99,13 +75,8 @@ func ReceiveNats(ms *MemoryStore,
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// reorder prepends prefix to buf in-place when buf has enough spare capacity,
|
// Place `prefix` in front of `buf` but if possible,
|
||||||
// avoiding an allocation. Falls back to a regular append otherwise.
|
// do that inplace in `buf`.
|
||||||
//
|
|
||||||
// It is used to assemble the "type<type-id>" and "subtype<stype-id>" selector
|
|
||||||
// strings when the type tag arrives before the type-id tag in the line, so the
|
|
||||||
// two byte slices need to be concatenated in tag-declaration order regardless
|
|
||||||
// of wire order.
|
|
||||||
func reorder(buf, prefix []byte) []byte {
|
func reorder(buf, prefix []byte) []byte {
|
||||||
n := len(prefix)
|
n := len(prefix)
|
||||||
m := len(buf)
|
m := len(buf)
|
||||||
@@ -123,83 +94,17 @@ func reorder(buf, prefix []byte) []byte {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// decodeState holds the per-call scratch buffers used by DecodeLine.
|
// Decode lines using dec and make write calls to the MemoryStore.
|
||||||
// Instances are recycled via decodeStatePool to avoid repeated allocations
|
// If a line is missing its cluster tag, use clusterDefault as default.
|
||||||
// during high-throughput ingestion.
|
|
||||||
type decodeState struct {
|
|
||||||
// metricBuf holds a copy of the current measurement name (line-protocol
|
|
||||||
// measurement field). Copied because dec.Measurement() returns a slice
|
|
||||||
// that is invalidated by the next decoder call.
|
|
||||||
metricBuf []byte
|
|
||||||
|
|
||||||
// selector is the sub-device path passed to WriteToLevel and WALMessage
|
|
||||||
// (e.g. ["socket0"] or ["socket0", "memctrl1"]). Reused across lines.
|
|
||||||
selector []string
|
|
||||||
|
|
||||||
// typeBuf accumulates the concatenated "type"+"type-id" tag value for the
|
|
||||||
// current line. Reset at the start of each line's tag-decode loop.
|
|
||||||
typeBuf []byte
|
|
||||||
|
|
||||||
// subTypeBuf accumulates the concatenated "subtype"+"stype-id" tag value.
|
|
||||||
// Reset at the start of each line's tag-decode loop.
|
|
||||||
subTypeBuf []byte
|
|
||||||
|
|
||||||
// prevTypeBytes / prevTypeStr cache the last seen typeBuf content and its
|
|
||||||
// string conversion. Because consecutive lines in a batch typically address
|
|
||||||
// the same sub-device, the cache hit rate is very high and avoids
|
|
||||||
// repeated []byte→string allocations.
|
|
||||||
prevTypeBytes []byte
|
|
||||||
prevTypeStr string
|
|
||||||
|
|
||||||
// prevSubTypeBytes / prevSubTypeStr are the same cache for the subtype.
|
|
||||||
prevSubTypeBytes []byte
|
|
||||||
prevSubTypeStr string
|
|
||||||
}
|
|
||||||
|
|
||||||
// decodeStatePool recycles decodeState values across DecodeLine calls to
|
|
||||||
// reduce GC pressure during sustained metric ingestion.
|
|
||||||
var decodeStatePool = sync.Pool{
|
|
||||||
New: func() any {
|
|
||||||
return &decodeState{
|
|
||||||
metricBuf: make([]byte, 0, 16),
|
|
||||||
selector: make([]string, 0, 4),
|
|
||||||
typeBuf: make([]byte, 0, 16),
|
|
||||||
subTypeBuf: make([]byte, 0, 16),
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// DecodeLine reads all lines from dec (InfluxDB line-protocol) and writes each
|
|
||||||
// decoded metric sample into ms.
|
|
||||||
//
|
|
||||||
// clusterDefault is used as the cluster name for lines that do not carry a
|
|
||||||
// "cluster" tag. Callers typically supply the ClusterTag value from the NATS
|
|
||||||
// subscription configuration.
|
|
||||||
//
|
|
||||||
// Performance notes:
|
|
||||||
// - A decodeState is obtained from decodeStatePool to reuse scratch buffers.
|
|
||||||
// - The Level pointer (host-level node in the metric tree) is cached across
|
|
||||||
// consecutive lines that share the same cluster+host pair to avoid
|
|
||||||
// repeated lock acquisitions on the root and cluster levels.
|
|
||||||
// - []byte→string conversions for type/subtype selectors are cached via
|
|
||||||
// prevType*/prevSubType* fields because batches typically repeat the same
|
|
||||||
// sub-device identifiers.
|
|
||||||
// - Timestamp parsing tries Second precision first; if that fails it retries
|
|
||||||
// Millisecond, Microsecond, and Nanosecond in turn. A missing timestamp
|
|
||||||
// falls back to time.Now().
|
|
||||||
//
|
|
||||||
// When the checkpoint format is "wal" each successfully decoded sample is also
|
|
||||||
// sent to WALMessages so the WAL staging goroutine can persist it durably
|
|
||||||
// before the next binary snapshot.
|
|
||||||
func DecodeLine(dec *lineprotocol.Decoder,
|
func DecodeLine(dec *lineprotocol.Decoder,
|
||||||
ms *MemoryStore,
|
ms *MemoryStore,
|
||||||
clusterDefault string,
|
clusterDefault string,
|
||||||
) error {
|
) error {
|
||||||
// Reduce allocations in loop:
|
// Reduce allocations in loop:
|
||||||
t := time.Now()
|
t := time.Now()
|
||||||
metric := Metric{}
|
metric, metricBuf := Metric{}, make([]byte, 0, 16)
|
||||||
st := decodeStatePool.Get().(*decodeState)
|
selector := make([]string, 0, 4)
|
||||||
defer decodeStatePool.Put(st)
|
typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
|
||||||
|
|
||||||
// Optimize for the case where all lines in a "batch" are about the same
|
// Optimize for the case where all lines in a "batch" are about the same
|
||||||
// cluster and host. By using `WriteToLevel` (level = host), we do not need
|
// cluster and host. By using `WriteToLevel` (level = host), we do not need
|
||||||
@@ -216,7 +121,7 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
|||||||
|
|
||||||
// Needs to be copied because another call to dec.* would
|
// Needs to be copied because another call to dec.* would
|
||||||
// invalidate the returned slice.
|
// invalidate the returned slice.
|
||||||
st.metricBuf = append(st.metricBuf[:0], rawmeasurement...)
|
metricBuf = append(metricBuf[:0], rawmeasurement...)
|
||||||
|
|
||||||
// The go compiler optimizes map[string(byteslice)] lookups:
|
// The go compiler optimizes map[string(byteslice)] lookups:
|
||||||
metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]
|
metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]
|
||||||
@@ -224,7 +129,7 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
st.typeBuf, st.subTypeBuf = st.typeBuf[:0], st.subTypeBuf[:0]
|
typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
|
||||||
cluster, host := clusterDefault, ""
|
cluster, host := clusterDefault, ""
|
||||||
for {
|
for {
|
||||||
key, val, err := dec.NextTag()
|
key, val, err := dec.NextTag()
|
||||||
@@ -257,49 +162,41 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We cannot be sure that the "type" tag comes before the "type-id" tag:
|
// We cannot be sure that the "type" tag comes before the "type-id" tag:
|
||||||
if len(st.typeBuf) == 0 {
|
if len(typeBuf) == 0 {
|
||||||
st.typeBuf = append(st.typeBuf, val...)
|
typeBuf = append(typeBuf, val...)
|
||||||
} else {
|
} else {
|
||||||
st.typeBuf = reorder(st.typeBuf, val)
|
typeBuf = reorder(typeBuf, val)
|
||||||
}
|
}
|
||||||
case "type-id":
|
case "type-id":
|
||||||
st.typeBuf = append(st.typeBuf, val...)
|
typeBuf = append(typeBuf, val...)
|
||||||
case "subtype":
|
case "subtype":
|
||||||
// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
|
// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
|
||||||
if len(st.subTypeBuf) == 0 {
|
if len(subTypeBuf) == 0 {
|
||||||
st.subTypeBuf = append(st.subTypeBuf, val...)
|
subTypeBuf = append(subTypeBuf, val...)
|
||||||
} else {
|
} else {
|
||||||
st.subTypeBuf = reorder(st.subTypeBuf, val)
|
subTypeBuf = reorder(subTypeBuf, val)
|
||||||
|
// subTypeBuf = reorder(typeBuf, val)
|
||||||
}
|
}
|
||||||
case "stype-id":
|
case "stype-id":
|
||||||
st.subTypeBuf = append(st.subTypeBuf, val...)
|
subTypeBuf = append(subTypeBuf, val...)
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the cluster or host changed, the lvl was set to nil
|
// If the cluster or host changed, the lvl was set to nil
|
||||||
if lvl == nil {
|
if lvl == nil {
|
||||||
st.selector = st.selector[:2]
|
selector = selector[:2]
|
||||||
st.selector[0], st.selector[1] = cluster, host
|
selector[0], selector[1] = cluster, host
|
||||||
lvl = ms.GetLevel(st.selector)
|
lvl = ms.GetLevel(selector)
|
||||||
prevCluster, prevHost = cluster, host
|
prevCluster, prevHost = cluster, host
|
||||||
}
|
}
|
||||||
|
|
||||||
// subtypes: cache []byte→string conversions; messages in a batch typically
|
// subtypes:
|
||||||
// share the same type/subtype so the hit rate is very high.
|
selector = selector[:0]
|
||||||
st.selector = st.selector[:0]
|
if len(typeBuf) > 0 {
|
||||||
if len(st.typeBuf) > 0 {
|
selector = append(selector, string(typeBuf)) // <- Allocation :(
|
||||||
if !bytes.Equal(st.typeBuf, st.prevTypeBytes) {
|
if len(subTypeBuf) > 0 {
|
||||||
st.prevTypeBytes = append(st.prevTypeBytes[:0], st.typeBuf...)
|
selector = append(selector, string(subTypeBuf))
|
||||||
st.prevTypeStr = string(st.typeBuf)
|
|
||||||
}
|
|
||||||
st.selector = append(st.selector, st.prevTypeStr)
|
|
||||||
if len(st.subTypeBuf) > 0 {
|
|
||||||
if !bytes.Equal(st.subTypeBuf, st.prevSubTypeBytes) {
|
|
||||||
st.prevSubTypeBytes = append(st.prevSubTypeBytes[:0], st.subTypeBuf...)
|
|
||||||
st.prevSubTypeStr = string(st.subTypeBuf)
|
|
||||||
}
|
|
||||||
st.selector = append(st.selector, st.prevSubTypeStr)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -347,18 +244,18 @@ func DecodeLine(dec *lineprotocol.Decoder,
|
|||||||
|
|
||||||
time := t.Unix()
|
time := t.Unix()
|
||||||
|
|
||||||
if Keys.Checkpoints.FileFormat == "wal" {
|
if Keys.Checkpoints.FileFormat != "json" {
|
||||||
WALMessages <- &WALMessage{
|
LineProtocolMessages <- &AvroStruct{
|
||||||
MetricName: string(st.metricBuf),
|
MetricName: string(metricBuf),
|
||||||
Cluster: cluster,
|
Cluster: cluster,
|
||||||
Node: host,
|
Node: host,
|
||||||
Selector: append([]string{}, st.selector...),
|
Selector: append([]string{}, selector...),
|
||||||
Value: metric.Value,
|
Value: metric.Value,
|
||||||
Timestamp: time,
|
Timestamp: time,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := ms.WriteToLevel(lvl, st.selector, time, []Metric{metric}); err != nil {
|
if err := ms.WriteToLevel(lvl, selector, time, []Metric{metric}); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -172,7 +172,7 @@ func Init(rawConfig json.RawMessage, metrics map[string]MetricConfig, wg *sync.W
|
|||||||
Retention(wg, ctx)
|
Retention(wg, ctx)
|
||||||
Checkpointing(wg, ctx)
|
Checkpointing(wg, ctx)
|
||||||
CleanUp(wg, ctx)
|
CleanUp(wg, ctx)
|
||||||
WALStaging(wg, ctx)
|
DataStaging(wg, ctx)
|
||||||
MemoryUsageTracker(wg, ctx)
|
MemoryUsageTracker(wg, ctx)
|
||||||
|
|
||||||
// Note: Signal handling has been removed from this function.
|
// Note: Signal handling has been removed from this function.
|
||||||
@@ -264,7 +264,7 @@ func (ms *MemoryStore) SetNodeProvider(provider NodeProvider) {
|
|||||||
//
|
//
|
||||||
// The function will:
|
// The function will:
|
||||||
// 1. Cancel the context to stop all background workers
|
// 1. Cancel the context to stop all background workers
|
||||||
// 2. Close the WAL messages channel if using WAL format
|
// 2. Close NATS message channels if using Avro format
|
||||||
// 3. Write a final checkpoint to preserve in-memory data
|
// 3. Write a final checkpoint to preserve in-memory data
|
||||||
// 4. Log any errors encountered during shutdown
|
// 4. Log any errors encountered during shutdown
|
||||||
//
|
//
|
||||||
@@ -276,8 +276,8 @@ func Shutdown() {
|
|||||||
shutdownFunc()
|
shutdownFunc()
|
||||||
}
|
}
|
||||||
|
|
||||||
if Keys.Checkpoints.FileFormat == "wal" {
|
if Keys.Checkpoints.FileFormat != "json" {
|
||||||
close(WALMessages)
|
close(LineProtocolMessages)
|
||||||
}
|
}
|
||||||
|
|
||||||
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
|
||||||
@@ -286,18 +286,10 @@ func Shutdown() {
|
|||||||
|
|
||||||
ms := GetMemoryStore()
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
lastCheckpointMu.Lock()
|
if Keys.Checkpoints.FileFormat == "json" {
|
||||||
from := lastCheckpoint
|
files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
|
||||||
lastCheckpointMu.Unlock()
|
|
||||||
|
|
||||||
if Keys.Checkpoints.FileFormat == "wal" {
|
|
||||||
var hostDirs []string
|
|
||||||
files, hostDirs, err = ms.ToCheckpointWAL(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())
|
|
||||||
if err == nil {
|
|
||||||
RotateWALFiles(hostDirs)
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, from.Unix(), time.Now().Unix())
|
files, err = GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -320,7 +312,9 @@ func Shutdown() {
|
|||||||
func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
ms := GetMemoryStore()
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
d, err := time.ParseDuration(Keys.RetentionInMemory)
|
d, err := time.ParseDuration(Keys.RetentionInMemory)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cclog.Fatal(err)
|
cclog.Fatal(err)
|
||||||
@@ -357,12 +351,9 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
state.mu.Unlock()
|
state.mu.Unlock()
|
||||||
|
|
||||||
// Clean up the buffer pool
|
|
||||||
bufferPool.Clean(state.lastRetentionTime)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
// MemoryUsageTracker starts a background goroutine that monitors memory usage.
|
// MemoryUsageTracker starts a background goroutine that monitors memory usage.
|
||||||
@@ -383,7 +374,9 @@ func Retention(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
func MemoryUsageTracker(wg *sync.WaitGroup, ctx context.Context) {
|
func MemoryUsageTracker(wg *sync.WaitGroup, ctx context.Context) {
|
||||||
ms := GetMemoryStore()
|
ms := GetMemoryStore()
|
||||||
|
|
||||||
wg.Go(func() {
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
d := DefaultMemoryUsageTrackerInterval
|
d := DefaultMemoryUsageTrackerInterval
|
||||||
|
|
||||||
if d <= 0 {
|
if d <= 0 {
|
||||||
@@ -428,9 +421,6 @@ func MemoryUsageTracker(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
runtime.ReadMemStats(&mem)
|
runtime.ReadMemStats(&mem)
|
||||||
actualMemoryGB = float64(mem.Alloc) / 1e9
|
actualMemoryGB = float64(mem.Alloc) / 1e9
|
||||||
|
|
||||||
bufferPool.CleanAll()
|
|
||||||
cclog.Infof("[METRICSTORE]> Cleaned up bufferPool\n")
|
|
||||||
|
|
||||||
if actualMemoryGB > float64(Keys.MemoryCap) {
|
if actualMemoryGB > float64(Keys.MemoryCap) {
|
||||||
cclog.Warnf("[METRICSTORE]> memory usage %.2f GB exceeds cap %d GB, starting emergency buffer freeing", actualMemoryGB, Keys.MemoryCap)
|
cclog.Warnf("[METRICSTORE]> memory usage %.2f GB exceeds cap %d GB, starting emergency buffer freeing", actualMemoryGB, Keys.MemoryCap)
|
||||||
|
|
||||||
@@ -472,7 +462,7 @@ func MemoryUsageTracker(wg *sync.WaitGroup, ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Free removes metric data older than the given time while preserving data for active nodes.
|
// Free removes metric data older than the given time while preserving data for active nodes.
|
||||||
|
|||||||
@@ -464,53 +464,3 @@ func TestBufferHealthChecks(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBufferPoolClean(t *testing.T) {
|
|
||||||
// Use a fresh pool for testing
|
|
||||||
pool := NewPersistentBufferPool()
|
|
||||||
|
|
||||||
now := time.Now().Unix()
|
|
||||||
|
|
||||||
// Create some buffers and put them in the pool with different lastUsed times
|
|
||||||
b1 := &buffer{lastUsed: now - 3600, data: make([]schema.Float, 0)} // 1 hour ago
|
|
||||||
b2 := &buffer{lastUsed: now - 7200, data: make([]schema.Float, 0)} // 2 hours ago
|
|
||||||
b3 := &buffer{lastUsed: now - 180000, data: make([]schema.Float, 0)} // 50 hours ago
|
|
||||||
b4 := &buffer{lastUsed: now - 200000, data: make([]schema.Float, 0)} // 55 hours ago
|
|
||||||
b5 := &buffer{lastUsed: now, data: make([]schema.Float, 0)}
|
|
||||||
|
|
||||||
pool.Put(b1)
|
|
||||||
pool.Put(b2)
|
|
||||||
pool.Put(b3)
|
|
||||||
pool.Put(b4)
|
|
||||||
pool.Put(b5)
|
|
||||||
|
|
||||||
if pool.GetSize() != 5 {
|
|
||||||
t.Fatalf("Expected pool size 5, got %d", pool.GetSize())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean buffers older than 48 hours
|
|
||||||
timeUpdate := time.Now().Add(-48 * time.Hour).Unix()
|
|
||||||
pool.Clean(timeUpdate)
|
|
||||||
|
|
||||||
// Expected: b1, b2, b5 should remain. b3, b4 should be cleaned.
|
|
||||||
if pool.GetSize() != 3 {
|
|
||||||
t.Fatalf("Expected pool size 3 after clean, got %d", pool.GetSize())
|
|
||||||
}
|
|
||||||
|
|
||||||
validBufs := map[int64]bool{
|
|
||||||
b1.lastUsed: true,
|
|
||||||
b2.lastUsed: true,
|
|
||||||
b5.lastUsed: true,
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := 0; i < 3; i++ {
|
|
||||||
b := pool.Get()
|
|
||||||
if !validBufs[b.lastUsed] {
|
|
||||||
t.Errorf("Found unexpected buffer with lastUsed %d", b.lastUsed)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if pool.GetSize() != 0 {
|
|
||||||
t.Fatalf("Expected pool to be empty, got %d", pool.GetSize())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,213 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved. This file is part of cc-backend.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package metricstore
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"encoding/binary"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
|
||||||
pq "github.com/parquet-go/parquet-go"
|
|
||||||
)
|
|
||||||
|
|
||||||
// ParquetMetricRow is the long-format schema for archived metric data.
|
|
||||||
// One row per (host, metric, scope, scope_id, timestamp) data point.
|
|
||||||
// Sorted by (cluster, hostname, metric, timestamp) for optimal compression.
|
|
||||||
type ParquetMetricRow struct {
|
|
||||||
Cluster string `parquet:"cluster"`
|
|
||||||
Hostname string `parquet:"hostname"`
|
|
||||||
Metric string `parquet:"metric"`
|
|
||||||
Scope string `parquet:"scope"`
|
|
||||||
ScopeID string `parquet:"scope_id"`
|
|
||||||
Timestamp int64 `parquet:"timestamp"`
|
|
||||||
Frequency int64 `parquet:"frequency"`
|
|
||||||
Value float32 `parquet:"value"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// flattenCheckpointFile recursively converts a CheckpointFile tree into Parquet rows.
|
|
||||||
// The scope path is built from the hierarchy: host level is "node", then child names
|
|
||||||
// map to scope/scope_id (e.g., "socket0" → scope="socket", scope_id="0").
|
|
||||||
func flattenCheckpointFile(cf *CheckpointFile, cluster, hostname, scope, scopeID string, rows []ParquetMetricRow) []ParquetMetricRow {
|
|
||||||
for metricName, cm := range cf.Metrics {
|
|
||||||
ts := cm.Start
|
|
||||||
for _, v := range cm.Data {
|
|
||||||
if !v.IsNaN() {
|
|
||||||
rows = append(rows, ParquetMetricRow{
|
|
||||||
Cluster: cluster,
|
|
||||||
Hostname: hostname,
|
|
||||||
Metric: metricName,
|
|
||||||
Scope: scope,
|
|
||||||
ScopeID: scopeID,
|
|
||||||
Timestamp: ts,
|
|
||||||
Frequency: cm.Frequency,
|
|
||||||
Value: float32(v),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
ts += cm.Frequency
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for childName, childCf := range cf.Children {
|
|
||||||
childScope, childScopeID := parseScopeFromName(childName)
|
|
||||||
rows = flattenCheckpointFile(childCf, cluster, hostname, childScope, childScopeID, rows)
|
|
||||||
}
|
|
||||||
|
|
||||||
return rows
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseScopeFromName infers scope and scope_id from a child level name.
|
|
||||||
// Examples: "socket0" → ("socket", "0"), "core12" → ("core", "12"),
|
|
||||||
// "a0" (accelerator) → ("accelerator", "0").
|
|
||||||
// If the name doesn't match known patterns, it's used as-is for scope with empty scope_id.
|
|
||||||
func parseScopeFromName(name string) (string, string) {
|
|
||||||
prefixes := []struct {
|
|
||||||
prefix string
|
|
||||||
scope string
|
|
||||||
}{
|
|
||||||
{"socket", "socket"},
|
|
||||||
{"memoryDomain", "memoryDomain"},
|
|
||||||
{"core", "core"},
|
|
||||||
{"hwthread", "hwthread"},
|
|
||||||
{"cpu", "hwthread"},
|
|
||||||
{"accelerator", "accelerator"},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, p := range prefixes {
|
|
||||||
if len(name) > len(p.prefix) && name[:len(p.prefix)] == p.prefix {
|
|
||||||
id := name[len(p.prefix):]
|
|
||||||
if len(id) > 0 && id[0] >= '0' && id[0] <= '9' {
|
|
||||||
return p.scope, id
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return name, ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// writeParquetArchive writes rows to a Parquet file with Zstd compression.
|
|
||||||
func writeParquetArchive(filename string, rows []ParquetMetricRow) error {
|
|
||||||
if err := os.MkdirAll(filepath.Dir(filename), CheckpointDirPerms); err != nil {
|
|
||||||
return fmt.Errorf("creating archive directory: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
f, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("creating parquet file: %w", err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
bw := bufio.NewWriterSize(f, 1<<20) // 1MB write buffer
|
|
||||||
|
|
||||||
writer := pq.NewGenericWriter[ParquetMetricRow](bw,
|
|
||||||
pq.Compression(&pq.Zstd),
|
|
||||||
pq.SortingWriterConfig(pq.SortingColumns(
|
|
||||||
pq.Ascending("cluster"),
|
|
||||||
pq.Ascending("hostname"),
|
|
||||||
pq.Ascending("metric"),
|
|
||||||
pq.Ascending("timestamp"),
|
|
||||||
)),
|
|
||||||
)
|
|
||||||
|
|
||||||
if _, err := writer.Write(rows); err != nil {
|
|
||||||
return fmt.Errorf("writing parquet rows: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := writer.Close(); err != nil {
|
|
||||||
return fmt.Errorf("closing parquet writer: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := bw.Flush(); err != nil {
|
|
||||||
return fmt.Errorf("flushing parquet file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// loadCheckpointFileFromDisk reads a JSON or binary checkpoint file and returns
|
|
||||||
// a CheckpointFile. Used by the Parquet archiver to read checkpoint data
|
|
||||||
// before converting it to Parquet format.
|
|
||||||
func loadCheckpointFileFromDisk(filename string) (*CheckpointFile, error) {
|
|
||||||
f, err := os.Open(filename)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
ext := filepath.Ext(filename)
|
|
||||||
switch ext {
|
|
||||||
case ".json":
|
|
||||||
cf := &CheckpointFile{}
|
|
||||||
br := bufio.NewReader(f)
|
|
||||||
if err := json.NewDecoder(br).Decode(cf); err != nil {
|
|
||||||
return nil, fmt.Errorf("decoding JSON checkpoint %s: %w", filename, err)
|
|
||||||
}
|
|
||||||
return cf, nil
|
|
||||||
|
|
||||||
case ".bin":
|
|
||||||
br := bufio.NewReader(f)
|
|
||||||
var magic uint32
|
|
||||||
if err := binary.Read(br, binary.LittleEndian, &magic); err != nil {
|
|
||||||
return nil, fmt.Errorf("reading magic from %s: %w", filename, err)
|
|
||||||
}
|
|
||||||
if magic != snapFileMagic {
|
|
||||||
return nil, fmt.Errorf("invalid snapshot magic in %s: 0x%08X", filename, magic)
|
|
||||||
}
|
|
||||||
var fileFrom, fileTo int64
|
|
||||||
if err := binary.Read(br, binary.LittleEndian, &fileFrom); err != nil {
|
|
||||||
return nil, fmt.Errorf("reading from-timestamp from %s: %w", filename, err)
|
|
||||||
}
|
|
||||||
if err := binary.Read(br, binary.LittleEndian, &fileTo); err != nil {
|
|
||||||
return nil, fmt.Errorf("reading to-timestamp from %s: %w", filename, err)
|
|
||||||
}
|
|
||||||
cf, err := readBinaryLevel(br)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("reading binary level from %s: %w", filename, err)
|
|
||||||
}
|
|
||||||
cf.From = fileFrom
|
|
||||||
cf.To = fileTo
|
|
||||||
return cf, nil
|
|
||||||
|
|
||||||
default:
|
|
||||||
return nil, fmt.Errorf("unsupported checkpoint extension: %s", ext)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// archiveCheckpointsToParquet reads checkpoint files for a host directory,
|
|
||||||
// converts them to Parquet rows. Returns the rows and filenames that were processed.
|
|
||||||
func archiveCheckpointsToParquet(dir, cluster, host string, from int64) ([]ParquetMetricRow, []string, error) {
|
|
||||||
entries, err := os.ReadDir(dir)
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
files, err := findFiles(entries, from, false)
|
|
||||||
if err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(files) == 0 {
|
|
||||||
return nil, nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var rows []ParquetMetricRow
|
|
||||||
|
|
||||||
for _, checkpoint := range files {
|
|
||||||
filename := filepath.Join(dir, checkpoint)
|
|
||||||
cf, err := loadCheckpointFileFromDisk(filename)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Warnf("[METRICSTORE]> skipping unreadable checkpoint %s: %v", filename, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
rows = flattenCheckpointFile(cf, cluster, host, "node", "", rows)
|
|
||||||
}
|
|
||||||
|
|
||||||
return rows, files, nil
|
|
||||||
}
|
|
||||||
@@ -1,255 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved. This file is part of cc-backend.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
package metricstore
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
|
||||||
pq "github.com/parquet-go/parquet-go"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParseScopeFromName(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
wantScope string
|
|
||||||
wantID string
|
|
||||||
}{
|
|
||||||
{"socket0", "socket", "0"},
|
|
||||||
{"socket12", "socket", "12"},
|
|
||||||
{"core0", "core", "0"},
|
|
||||||
{"core127", "core", "127"},
|
|
||||||
{"cpu0", "hwthread", "0"},
|
|
||||||
{"hwthread5", "hwthread", "5"},
|
|
||||||
{"memoryDomain0", "memoryDomain", "0"},
|
|
||||||
{"accelerator0", "accelerator", "0"},
|
|
||||||
{"unknown", "unknown", ""},
|
|
||||||
{"socketX", "socketX", ""}, // not numeric suffix
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
scope, id := parseScopeFromName(tt.name)
|
|
||||||
if scope != tt.wantScope || id != tt.wantID {
|
|
||||||
t.Errorf("parseScopeFromName(%q) = (%q, %q), want (%q, %q)",
|
|
||||||
tt.name, scope, id, tt.wantScope, tt.wantID)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFlattenCheckpointFile(t *testing.T) {
|
|
||||||
cf := &CheckpointFile{
|
|
||||||
From: 1000,
|
|
||||||
To: 1060,
|
|
||||||
Metrics: map[string]*CheckpointMetrics{
|
|
||||||
"cpu_load": {
|
|
||||||
Frequency: 60,
|
|
||||||
Start: 1000,
|
|
||||||
Data: []schema.Float{0.5, 0.7, schema.NaN},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Children: map[string]*CheckpointFile{
|
|
||||||
"socket0": {
|
|
||||||
Metrics: map[string]*CheckpointMetrics{
|
|
||||||
"mem_bw": {
|
|
||||||
Frequency: 60,
|
|
||||||
Start: 1000,
|
|
||||||
Data: []schema.Float{100.0, schema.NaN, 200.0},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Children: make(map[string]*CheckpointFile),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
rows := flattenCheckpointFile(cf, "fritz", "node001", "node", "", nil)
|
|
||||||
|
|
||||||
// cpu_load: 2 non-NaN values at node scope
|
|
||||||
// mem_bw: 2 non-NaN values at socket0 scope
|
|
||||||
if len(rows) != 4 {
|
|
||||||
t.Fatalf("expected 4 rows, got %d", len(rows))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify a node-scope row
|
|
||||||
found := false
|
|
||||||
for _, r := range rows {
|
|
||||||
if r.Metric == "cpu_load" && r.Timestamp == 1000 {
|
|
||||||
found = true
|
|
||||||
if r.Cluster != "fritz" || r.Hostname != "node001" || r.Scope != "node" || r.Value != 0.5 {
|
|
||||||
t.Errorf("unexpected row: %+v", r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !found {
|
|
||||||
t.Error("expected cpu_load row at timestamp 1000")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify a socket-scope row
|
|
||||||
found = false
|
|
||||||
for _, r := range rows {
|
|
||||||
if r.Metric == "mem_bw" && r.Scope == "socket" && r.ScopeID == "0" {
|
|
||||||
found = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !found {
|
|
||||||
t.Error("expected mem_bw row with scope=socket, scope_id=0")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParquetArchiveRoundtrip(t *testing.T) {
|
|
||||||
tmpDir := t.TempDir()
|
|
||||||
|
|
||||||
// Create checkpoint files on disk (JSON format)
|
|
||||||
cpDir := filepath.Join(tmpDir, "checkpoints", "testcluster", "node001")
|
|
||||||
if err := os.MkdirAll(cpDir, 0o755); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
cf := &CheckpointFile{
|
|
||||||
From: 1000,
|
|
||||||
To: 1180,
|
|
||||||
Metrics: map[string]*CheckpointMetrics{
|
|
||||||
"cpu_load": {
|
|
||||||
Frequency: 60,
|
|
||||||
Start: 1000,
|
|
||||||
Data: []schema.Float{0.5, 0.7, 0.9},
|
|
||||||
},
|
|
||||||
"mem_used": {
|
|
||||||
Frequency: 60,
|
|
||||||
Start: 1000,
|
|
||||||
Data: []schema.Float{45.0, 46.0, 47.0},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Children: map[string]*CheckpointFile{
|
|
||||||
"socket0": {
|
|
||||||
Metrics: map[string]*CheckpointMetrics{
|
|
||||||
"mem_bw": {
|
|
||||||
Frequency: 60,
|
|
||||||
Start: 1000,
|
|
||||||
Data: []schema.Float{100.0, 110.0, 120.0},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Children: make(map[string]*CheckpointFile),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write JSON checkpoint
|
|
||||||
cpFile := filepath.Join(cpDir, "1000.json")
|
|
||||||
data, err := json.Marshal(cf)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
if err := os.WriteFile(cpFile, data, 0o644); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Archive to Parquet
|
|
||||||
archiveDir := filepath.Join(tmpDir, "archive")
|
|
||||||
rows, files, err := archiveCheckpointsToParquet(cpDir, "testcluster", "node001", 2000)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
if len(files) != 1 || files[0] != "1000.json" {
|
|
||||||
t.Fatalf("expected 1 file, got %v", files)
|
|
||||||
}
|
|
||||||
|
|
||||||
parquetFile := filepath.Join(archiveDir, "testcluster", "1000.parquet")
|
|
||||||
if err := writeParquetArchive(parquetFile, rows); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read back and verify
|
|
||||||
f, err := os.Open(parquetFile)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
stat, _ := f.Stat()
|
|
||||||
pf, err := pq.OpenFile(f, stat.Size())
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
reader := pq.NewGenericReader[ParquetMetricRow](pf)
|
|
||||||
readRows := make([]ParquetMetricRow, 100)
|
|
||||||
n, err := reader.Read(readRows)
|
|
||||||
if err != nil && n == 0 {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
readRows = readRows[:n]
|
|
||||||
reader.Close()
|
|
||||||
|
|
||||||
// We expect: cpu_load(3) + mem_used(3) + mem_bw(3) = 9 rows
|
|
||||||
if n != 9 {
|
|
||||||
t.Fatalf("expected 9 rows in parquet file, got %d", n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify cluster and hostname are set correctly
|
|
||||||
for _, r := range readRows {
|
|
||||||
if r.Cluster != "testcluster" {
|
|
||||||
t.Errorf("expected cluster=testcluster, got %s", r.Cluster)
|
|
||||||
}
|
|
||||||
if r.Hostname != "node001" {
|
|
||||||
t.Errorf("expected hostname=node001, got %s", r.Hostname)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Verify parquet file is smaller than JSON (compression working)
|
|
||||||
if stat.Size() == 0 {
|
|
||||||
t.Error("parquet file is empty")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Logf("Parquet file size: %d bytes for %d rows", stat.Size(), n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLoadCheckpointFileFromDisk_JSON(t *testing.T) {
|
|
||||||
tmpDir := t.TempDir()
|
|
||||||
|
|
||||||
cf := &CheckpointFile{
|
|
||||||
From: 1000,
|
|
||||||
To: 1060,
|
|
||||||
Metrics: map[string]*CheckpointMetrics{
|
|
||||||
"test_metric": {
|
|
||||||
Frequency: 60,
|
|
||||||
Start: 1000,
|
|
||||||
Data: []schema.Float{1.0, 2.0, 3.0},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Children: make(map[string]*CheckpointFile),
|
|
||||||
}
|
|
||||||
|
|
||||||
filename := filepath.Join(tmpDir, "1000.json")
|
|
||||||
data, err := json.Marshal(cf)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
if err := os.WriteFile(filename, data, 0o644); err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
loaded, err := loadCheckpointFileFromDisk(filename)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if loaded.From != 1000 || loaded.To != 1060 {
|
|
||||||
t.Errorf("expected From=1000, To=1060, got From=%d, To=%d", loaded.From, loaded.To)
|
|
||||||
}
|
|
||||||
|
|
||||||
m, ok := loaded.Metrics["test_metric"]
|
|
||||||
if !ok {
|
|
||||||
t.Fatal("expected test_metric in loaded checkpoint")
|
|
||||||
}
|
|
||||||
if m.Frequency != 60 || m.Start != 1000 || len(m.Data) != 3 {
|
|
||||||
t.Errorf("unexpected metric data: %+v", m)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,787 +0,0 @@
|
|||||||
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
|
|
||||||
// All rights reserved. This file is part of cc-backend.
|
|
||||||
// Use of this source code is governed by a MIT-style
|
|
||||||
// license that can be found in the LICENSE file.
|
|
||||||
|
|
||||||
// Package metricstore provides walCheckpoint.go: WAL-based checkpoint implementation.
|
|
||||||
//
|
|
||||||
// This replaces the Avro shadow tree with an append-only Write-Ahead Log (WAL)
|
|
||||||
// per host, eliminating the extra memory overhead of the AvroStore and providing
|
|
||||||
// truly continuous (per-write) crash safety.
|
|
||||||
//
|
|
||||||
// # Architecture
|
|
||||||
//
|
|
||||||
// Metric write (DecodeLine)
|
|
||||||
// │
|
|
||||||
// ├─► WriteToLevel() → main MemoryStore (unchanged)
|
|
||||||
// │
|
|
||||||
// └─► WALMessages channel
|
|
||||||
// │
|
|
||||||
// ▼
|
|
||||||
// WALStaging goroutine
|
|
||||||
// │
|
|
||||||
// ▼
|
|
||||||
// checkpoints/cluster/host/current.wal (append-only, binary)
|
|
||||||
//
|
|
||||||
// Periodic checkpoint (Checkpointing goroutine):
|
|
||||||
// 1. Write <timestamp>.bin snapshot (column-oriented, from main tree)
|
|
||||||
// 2. Signal WALStaging to truncate current.wal per host
|
|
||||||
//
|
|
||||||
// On restart (FromCheckpoint):
|
|
||||||
// 1. Load most recent <timestamp>.bin snapshot
|
|
||||||
// 2. Replay current.wal (overwrite-safe: buffer.write handles duplicate timestamps)
|
|
||||||
//
|
|
||||||
// # WAL Record Format
|
|
||||||
//
|
|
||||||
// [4B magic 0xCC1DA7A1][4B payload_len][payload][4B CRC32]
|
|
||||||
//
|
|
||||||
// payload:
|
|
||||||
// [8B timestamp int64]
|
|
||||||
// [2B metric_name_len uint16][N metric name bytes]
|
|
||||||
// [1B selector_count uint8]
|
|
||||||
// per selector: [1B selector_len uint8][M selector bytes]
|
|
||||||
// [4B value float32 bits]
|
|
||||||
//
|
|
||||||
// # Binary Snapshot Format
|
|
||||||
//
|
|
||||||
// [4B magic 0xCC5B0001][8B from int64][8B to int64]
|
|
||||||
// Level tree (recursive):
|
|
||||||
// [4B num_metrics uint32]
|
|
||||||
// per metric:
|
|
||||||
// [2B name_len uint16][N name bytes]
|
|
||||||
// [8B frequency int64][8B start int64]
|
|
||||||
// [4B num_values uint32][num_values × 4B float32]
|
|
||||||
// [4B num_children uint32]
|
|
||||||
// per child: [2B name_len uint16][N name bytes] + Level (recursive)
|
|
||||||
package metricstore
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"context"
|
|
||||||
"encoding/binary"
|
|
||||||
"fmt"
|
|
||||||
"hash/crc32"
|
|
||||||
"io"
|
|
||||||
"math"
|
|
||||||
"os"
|
|
||||||
"path"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
"sync/atomic"
|
|
||||||
|
|
||||||
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
|
|
||||||
"github.com/ClusterCockpit/cc-lib/v2/schema"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Magic numbers for binary formats.
|
|
||||||
const (
|
|
||||||
walFileMagic = uint32(0xCC1DA701) // WAL file header magic
|
|
||||||
walRecordMagic = uint32(0xCC1DA7A1) // WAL record magic
|
|
||||||
snapFileMagic = uint32(0xCC5B0001) // Binary snapshot magic
|
|
||||||
)
|
|
||||||
|
|
||||||
// WALMessages is the channel for sending metric writes to the WAL staging goroutine.
|
|
||||||
// Buffered to allow burst writes without blocking the metric ingestion path.
|
|
||||||
var WALMessages = make(chan *WALMessage, 4096)
|
|
||||||
|
|
||||||
// walRotateCh is used by the checkpoint goroutine to request WAL file rotation
|
|
||||||
// (close, delete, reopen) after a binary snapshot has been written.
|
|
||||||
var walRotateCh = make(chan walRotateReq, 256)
|
|
||||||
|
|
||||||
// WALMessage represents a single metric write to be appended to the WAL.
|
|
||||||
// Cluster and Node are NOT stored in the WAL record (inferred from file path).
|
|
||||||
type WALMessage struct {
|
|
||||||
MetricName string
|
|
||||||
Cluster string
|
|
||||||
Node string
|
|
||||||
Selector []string
|
|
||||||
Value schema.Float
|
|
||||||
Timestamp int64
|
|
||||||
}
|
|
||||||
|
|
||||||
// walRotateReq requests WAL file rotation for a specific host directory.
|
|
||||||
// The done channel is closed by the WAL goroutine when rotation is complete.
|
|
||||||
type walRotateReq struct {
|
|
||||||
hostDir string
|
|
||||||
done chan struct{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// walFileState holds an open WAL file handle for one host directory.
|
|
||||||
type walFileState struct {
|
|
||||||
f *os.File
|
|
||||||
}
|
|
||||||
|
|
||||||
// WALStaging starts a background goroutine that receives WALMessage items
|
|
||||||
// and appends binary WAL records to per-host current.wal files.
|
|
||||||
// Also handles WAL rotation requests from the checkpoint goroutine.
|
|
||||||
func WALStaging(wg *sync.WaitGroup, ctx context.Context) {
|
|
||||||
wg.Go(func() {
|
|
||||||
|
|
||||||
if Keys.Checkpoints.FileFormat == "json" {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
hostFiles := make(map[string]*walFileState)
|
|
||||||
|
|
||||||
defer func() {
|
|
||||||
for _, ws := range hostFiles {
|
|
||||||
if ws.f != nil {
|
|
||||||
ws.f.Close()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
getOrOpenWAL := func(hostDir string) *os.File {
|
|
||||||
ws, ok := hostFiles[hostDir]
|
|
||||||
if ok {
|
|
||||||
return ws.f
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.MkdirAll(hostDir, CheckpointDirPerms); err != nil {
|
|
||||||
cclog.Errorf("[METRICSTORE]> WAL: mkdir %s: %v", hostDir, err)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
walPath := path.Join(hostDir, "current.wal")
|
|
||||||
f, err := os.OpenFile(walPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, CheckpointFilePerms)
|
|
||||||
if err != nil {
|
|
||||||
cclog.Errorf("[METRICSTORE]> WAL: open %s: %v", walPath, err)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write file header magic if file is new (empty).
|
|
||||||
info, err := f.Stat()
|
|
||||||
if err == nil && info.Size() == 0 {
|
|
||||||
var hdr [4]byte
|
|
||||||
binary.LittleEndian.PutUint32(hdr[:], walFileMagic)
|
|
||||||
if _, err := f.Write(hdr[:]); err != nil {
|
|
||||||
cclog.Errorf("[METRICSTORE]> WAL: write header %s: %v", walPath, err)
|
|
||||||
f.Close()
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
hostFiles[hostDir] = &walFileState{f: f}
|
|
||||||
return f
|
|
||||||
}
|
|
||||||
|
|
||||||
processMsg := func(msg *WALMessage) {
|
|
||||||
hostDir := path.Join(Keys.Checkpoints.RootDir, msg.Cluster, msg.Node)
|
|
||||||
f := getOrOpenWAL(hostDir)
|
|
||||||
if f == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if err := writeWALRecord(f, msg); err != nil {
|
|
||||||
cclog.Errorf("[METRICSTORE]> WAL: write record: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
processRotate := func(req walRotateReq) {
|
|
||||||
ws, ok := hostFiles[req.hostDir]
|
|
||||||
if ok && ws.f != nil {
|
|
||||||
ws.f.Close()
|
|
||||||
walPath := path.Join(req.hostDir, "current.wal")
|
|
||||||
if err := os.Remove(walPath); err != nil && !os.IsNotExist(err) {
|
|
||||||
cclog.Errorf("[METRICSTORE]> WAL: remove %s: %v", walPath, err)
|
|
||||||
}
|
|
||||||
delete(hostFiles, req.hostDir)
|
|
||||||
}
|
|
||||||
close(req.done)
|
|
||||||
}
|
|
||||||
|
|
||||||
drain := func() {
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case msg, ok := <-WALMessages:
|
|
||||||
if !ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
processMsg(msg)
|
|
||||||
case req := <-walRotateCh:
|
|
||||||
processRotate(req)
|
|
||||||
default:
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
drain()
|
|
||||||
return
|
|
||||||
case msg, ok := <-WALMessages:
|
|
||||||
if !ok {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
processMsg(msg)
|
|
||||||
case req := <-walRotateCh:
|
|
||||||
processRotate(req)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// RotateWALFiles sends rotation requests for the given host directories
|
|
||||||
// and blocks until all rotations complete.
|
|
||||||
func RotateWALFiles(hostDirs []string) {
|
|
||||||
dones := make([]chan struct{}, len(hostDirs))
|
|
||||||
for i, dir := range hostDirs {
|
|
||||||
dones[i] = make(chan struct{})
|
|
||||||
walRotateCh <- walRotateReq{hostDir: dir, done: dones[i]}
|
|
||||||
}
|
|
||||||
for _, done := range dones {
|
|
||||||
<-done
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// buildWALPayload encodes a WALMessage into a binary payload (without magic/length/CRC).
|
|
||||||
func buildWALPayload(msg *WALMessage) []byte {
|
|
||||||
size := 8 + 2 + len(msg.MetricName) + 1 + 4
|
|
||||||
for _, s := range msg.Selector {
|
|
||||||
size += 1 + len(s)
|
|
||||||
}
|
|
||||||
|
|
||||||
buf := make([]byte, 0, size)
|
|
||||||
|
|
||||||
// Timestamp (8 bytes, little-endian int64)
|
|
||||||
var ts [8]byte
|
|
||||||
binary.LittleEndian.PutUint64(ts[:], uint64(msg.Timestamp))
|
|
||||||
buf = append(buf, ts[:]...)
|
|
||||||
|
|
||||||
// Metric name (2-byte length prefix + bytes)
|
|
||||||
var mLen [2]byte
|
|
||||||
binary.LittleEndian.PutUint16(mLen[:], uint16(len(msg.MetricName)))
|
|
||||||
buf = append(buf, mLen[:]...)
|
|
||||||
buf = append(buf, msg.MetricName...)
|
|
||||||
|
|
||||||
// Selector count (1 byte)
|
|
||||||
buf = append(buf, byte(len(msg.Selector)))
|
|
||||||
|
|
||||||
// Selectors (1-byte length prefix + bytes each)
|
|
||||||
for _, sel := range msg.Selector {
|
|
||||||
buf = append(buf, byte(len(sel)))
|
|
||||||
buf = append(buf, sel...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Value (4 bytes, float32 bit representation)
|
|
||||||
var val [4]byte
|
|
||||||
binary.LittleEndian.PutUint32(val[:], math.Float32bits(float32(msg.Value)))
|
|
||||||
buf = append(buf, val[:]...)
|
|
||||||
|
|
||||||
return buf
|
|
||||||
}
|
|
||||||
|
|
||||||
// writeWALRecord appends a binary WAL record to the file.
|
|
||||||
// Format: [4B magic][4B payload_len][payload][4B CRC32]
|
|
||||||
func writeWALRecord(f *os.File, msg *WALMessage) error {
|
|
||||||
payload := buildWALPayload(msg)
|
|
||||||
crc := crc32.ChecksumIEEE(payload)
|
|
||||||
|
|
||||||
record := make([]byte, 0, 4+4+len(payload)+4)
|
|
||||||
|
|
||||||
var magic [4]byte
|
|
||||||
binary.LittleEndian.PutUint32(magic[:], walRecordMagic)
|
|
||||||
record = append(record, magic[:]...)
|
|
||||||
|
|
||||||
var pLen [4]byte
|
|
||||||
binary.LittleEndian.PutUint32(pLen[:], uint32(len(payload)))
|
|
||||||
record = append(record, pLen[:]...)
|
|
||||||
|
|
||||||
record = append(record, payload...)
|
|
||||||
|
|
||||||
var crcBytes [4]byte
|
|
||||||
binary.LittleEndian.PutUint32(crcBytes[:], crc)
|
|
||||||
record = append(record, crcBytes[:]...)
|
|
||||||
|
|
||||||
_, err := f.Write(record)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// readWALRecord reads one WAL record from the reader.
|
|
||||||
// Returns (nil, nil) on clean EOF. Returns error on data corruption.
|
|
||||||
// A CRC mismatch indicates a truncated trailing record (expected on crash).
|
|
||||||
func readWALRecord(r io.Reader) (*WALMessage, error) {
|
|
||||||
var magic uint32
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &magic); err != nil {
|
|
||||||
if err == io.EOF {
|
|
||||||
return nil, nil // Clean EOF
|
|
||||||
}
|
|
||||||
return nil, fmt.Errorf("read record magic: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if magic != walRecordMagic {
|
|
||||||
return nil, fmt.Errorf("invalid record magic 0x%08X (expected 0x%08X)", magic, walRecordMagic)
|
|
||||||
}
|
|
||||||
|
|
||||||
var payloadLen uint32
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &payloadLen); err != nil {
|
|
||||||
return nil, fmt.Errorf("read payload length: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if payloadLen > 1<<20 { // 1 MB sanity limit
|
|
||||||
return nil, fmt.Errorf("record payload too large: %d bytes", payloadLen)
|
|
||||||
}
|
|
||||||
|
|
||||||
payload := make([]byte, payloadLen)
|
|
||||||
if _, err := io.ReadFull(r, payload); err != nil {
|
|
||||||
return nil, fmt.Errorf("read payload: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var storedCRC uint32
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &storedCRC); err != nil {
|
|
||||||
return nil, fmt.Errorf("read CRC: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if crc32.ChecksumIEEE(payload) != storedCRC {
|
|
||||||
return nil, fmt.Errorf("CRC mismatch (truncated write or corruption)")
|
|
||||||
}
|
|
||||||
|
|
||||||
return parseWALPayload(payload)
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseWALPayload decodes a binary payload into a WALMessage.
|
|
||||||
func parseWALPayload(payload []byte) (*WALMessage, error) {
|
|
||||||
if len(payload) < 8+2+1+4 {
|
|
||||||
return nil, fmt.Errorf("payload too short: %d bytes", len(payload))
|
|
||||||
}
|
|
||||||
|
|
||||||
offset := 0
|
|
||||||
|
|
||||||
// Timestamp (8 bytes)
|
|
||||||
ts := int64(binary.LittleEndian.Uint64(payload[offset : offset+8]))
|
|
||||||
offset += 8
|
|
||||||
|
|
||||||
// Metric name (2-byte length + bytes)
|
|
||||||
if offset+2 > len(payload) {
|
|
||||||
return nil, fmt.Errorf("metric name length overflows payload")
|
|
||||||
}
|
|
||||||
mLen := int(binary.LittleEndian.Uint16(payload[offset : offset+2]))
|
|
||||||
offset += 2
|
|
||||||
|
|
||||||
if offset+mLen > len(payload) {
|
|
||||||
return nil, fmt.Errorf("metric name overflows payload")
|
|
||||||
}
|
|
||||||
metricName := string(payload[offset : offset+mLen])
|
|
||||||
offset += mLen
|
|
||||||
|
|
||||||
// Selector count (1 byte)
|
|
||||||
if offset >= len(payload) {
|
|
||||||
return nil, fmt.Errorf("selector count overflows payload")
|
|
||||||
}
|
|
||||||
selCount := int(payload[offset])
|
|
||||||
offset++
|
|
||||||
|
|
||||||
selectors := make([]string, selCount)
|
|
||||||
for i := range selCount {
|
|
||||||
if offset >= len(payload) {
|
|
||||||
return nil, fmt.Errorf("selector[%d] length overflows payload", i)
|
|
||||||
}
|
|
||||||
sLen := int(payload[offset])
|
|
||||||
offset++
|
|
||||||
|
|
||||||
if offset+sLen > len(payload) {
|
|
||||||
return nil, fmt.Errorf("selector[%d] data overflows payload", i)
|
|
||||||
}
|
|
||||||
selectors[i] = string(payload[offset : offset+sLen])
|
|
||||||
offset += sLen
|
|
||||||
}
|
|
||||||
|
|
||||||
// Value (4 bytes, float32 bits)
|
|
||||||
if offset+4 > len(payload) {
|
|
||||||
return nil, fmt.Errorf("value overflows payload")
|
|
||||||
}
|
|
||||||
bits := binary.LittleEndian.Uint32(payload[offset : offset+4])
|
|
||||||
value := schema.Float(math.Float32frombits(bits))
|
|
||||||
|
|
||||||
return &WALMessage{
|
|
||||||
MetricName: metricName,
|
|
||||||
Timestamp: ts,
|
|
||||||
Selector: selectors,
|
|
||||||
Value: value,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// loadWALFile reads a WAL file and replays all valid records into the Level tree.
|
|
||||||
// l is the host-level node. Corrupt or partial trailing records are silently skipped
|
|
||||||
// (expected on crash). Records older than 'from' are skipped.
|
|
||||||
func (l *Level) loadWALFile(m *MemoryStore, f *os.File, from int64) error {
|
|
||||||
br := bufio.NewReader(f)
|
|
||||||
|
|
||||||
// Verify file header magic.
|
|
||||||
var fileMagic uint32
|
|
||||||
if err := binary.Read(br, binary.LittleEndian, &fileMagic); err != nil {
|
|
||||||
if err == io.EOF {
|
|
||||||
return nil // Empty file, no data
|
|
||||||
}
|
|
||||||
return fmt.Errorf("[METRICSTORE]> WAL: read file header: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if fileMagic != walFileMagic {
|
|
||||||
return fmt.Errorf("[METRICSTORE]> WAL: invalid file magic 0x%08X (expected 0x%08X)", fileMagic, walFileMagic)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cache level lookups to avoid repeated tree traversal.
|
|
||||||
lvlCache := make(map[string]*Level)
|
|
||||||
|
|
||||||
for {
|
|
||||||
msg, err := readWALRecord(br)
|
|
||||||
if err != nil {
|
|
||||||
// Truncated trailing record is expected after a crash; stop replaying.
|
|
||||||
cclog.Debugf("[METRICSTORE]> WAL: stopping replay at corrupted/partial record: %v", err)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if msg == nil {
|
|
||||||
break // Clean EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
if msg.Timestamp < from {
|
|
||||||
continue // Older than retention window
|
|
||||||
}
|
|
||||||
|
|
||||||
minfo, ok := m.Metrics[msg.MetricName]
|
|
||||||
if !ok {
|
|
||||||
continue // Unknown metric (config may have changed)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cache key is the null-separated selector path.
|
|
||||||
cacheKey := joinSelector(msg.Selector)
|
|
||||||
lvl, ok := lvlCache[cacheKey]
|
|
||||||
if !ok {
|
|
||||||
lvl = l.findLevelOrCreate(msg.Selector, len(m.Metrics))
|
|
||||||
lvlCache[cacheKey] = lvl
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write directly to the buffer, same as WriteToLevel but without the
|
|
||||||
// global level lookup (we already have the right level).
|
|
||||||
lvl.lock.Lock()
|
|
||||||
b := lvl.metrics[minfo.offset]
|
|
||||||
if b == nil {
|
|
||||||
b = newBuffer(msg.Timestamp, minfo.Frequency)
|
|
||||||
lvl.metrics[minfo.offset] = b
|
|
||||||
}
|
|
||||||
nb, writeErr := b.write(msg.Timestamp, msg.Value)
|
|
||||||
if writeErr == nil && b != nb {
|
|
||||||
lvl.metrics[minfo.offset] = nb
|
|
||||||
}
|
|
||||||
// Ignore write errors for timestamps before buffer start (can happen when
|
|
||||||
// replaying WAL entries that predate a loaded snapshot's start time).
|
|
||||||
lvl.lock.Unlock()
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// joinSelector builds a cache key from a selector slice using null bytes as separators.
|
|
||||||
func joinSelector(sel []string) string {
|
|
||||||
if len(sel) == 0 {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
var result strings.Builder
|
|
||||||
result.WriteString(sel[0])
|
|
||||||
for i := 1; i < len(sel); i++ {
|
|
||||||
result.WriteString("\x00" + sel[i])
|
|
||||||
}
|
|
||||||
return result.String()
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToCheckpointWAL writes binary snapshot files for all hosts in parallel.
|
|
||||||
// Returns the number of files written, the list of host directories that were
|
|
||||||
// successfully checkpointed (for WAL rotation), and any errors.
|
|
||||||
func (m *MemoryStore) ToCheckpointWAL(dir string, from, to int64) (int, []string, error) {
|
|
||||||
// Collect all cluster/host pairs.
|
|
||||||
m.root.lock.RLock()
|
|
||||||
totalHosts := 0
|
|
||||||
for _, l1 := range m.root.children {
|
|
||||||
l1.lock.RLock()
|
|
||||||
totalHosts += len(l1.children)
|
|
||||||
l1.lock.RUnlock()
|
|
||||||
}
|
|
||||||
m.root.lock.RUnlock()
|
|
||||||
|
|
||||||
levels := make([]*Level, 0, totalHosts)
|
|
||||||
selectors := make([][]string, 0, totalHosts)
|
|
||||||
|
|
||||||
m.root.lock.RLock()
|
|
||||||
for sel1, l1 := range m.root.children {
|
|
||||||
l1.lock.RLock()
|
|
||||||
for sel2, l2 := range l1.children {
|
|
||||||
levels = append(levels, l2)
|
|
||||||
selectors = append(selectors, []string{sel1, sel2})
|
|
||||||
}
|
|
||||||
l1.lock.RUnlock()
|
|
||||||
}
|
|
||||||
m.root.lock.RUnlock()
|
|
||||||
|
|
||||||
type workItem struct {
|
|
||||||
level *Level
|
|
||||||
hostDir string
|
|
||||||
selector []string
|
|
||||||
}
|
|
||||||
|
|
||||||
n, errs := int32(0), int32(0)
|
|
||||||
var successDirs []string
|
|
||||||
var successMu sync.Mutex
|
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
wg.Add(Keys.NumWorkers)
|
|
||||||
work := make(chan workItem, Keys.NumWorkers*2)
|
|
||||||
|
|
||||||
for range Keys.NumWorkers {
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
for wi := range work {
|
|
||||||
err := wi.level.toCheckpointBinary(wi.hostDir, from, to, m)
|
|
||||||
if err != nil {
|
|
||||||
if err == ErrNoNewArchiveData {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
cclog.Errorf("[METRICSTORE]> binary checkpoint error for %s: %v", wi.hostDir, err)
|
|
||||||
atomic.AddInt32(&errs, 1)
|
|
||||||
} else {
|
|
||||||
atomic.AddInt32(&n, 1)
|
|
||||||
successMu.Lock()
|
|
||||||
successDirs = append(successDirs, wi.hostDir)
|
|
||||||
successMu.Unlock()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range levels {
|
|
||||||
hostDir := path.Join(dir, path.Join(selectors[i]...))
|
|
||||||
work <- workItem{
|
|
||||||
level: levels[i],
|
|
||||||
hostDir: hostDir,
|
|
||||||
selector: selectors[i],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
close(work)
|
|
||||||
wg.Wait()
|
|
||||||
|
|
||||||
if errs > 0 {
|
|
||||||
return int(n), successDirs, fmt.Errorf("[METRICSTORE]> %d errors during binary checkpoint (%d successes)", errs, n)
|
|
||||||
}
|
|
||||||
return int(n), successDirs, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// toCheckpointBinary writes a binary snapshot file for a single host-level node.
|
|
||||||
// Uses atomic rename (write to .tmp then rename) to avoid partial reads on crash.
|
|
||||||
func (l *Level) toCheckpointBinary(dir string, from, to int64, m *MemoryStore) error {
|
|
||||||
cf, err := l.toCheckpointFile(from, to, m)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if cf == nil {
|
|
||||||
return ErrNoNewArchiveData
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := os.MkdirAll(dir, CheckpointDirPerms); err != nil {
|
|
||||||
return fmt.Errorf("mkdir %s: %w", dir, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write to a temp file first, then rename (atomic on POSIX).
|
|
||||||
tmpPath := path.Join(dir, fmt.Sprintf("%d.bin.tmp", from))
|
|
||||||
finalPath := path.Join(dir, fmt.Sprintf("%d.bin", from))
|
|
||||||
|
|
||||||
f, err := os.OpenFile(tmpPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("open binary snapshot %s: %w", tmpPath, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
bw := bufio.NewWriter(f)
|
|
||||||
if err := writeBinarySnapshotFile(bw, cf); err != nil {
|
|
||||||
f.Close()
|
|
||||||
os.Remove(tmpPath)
|
|
||||||
return fmt.Errorf("write binary snapshot: %w", err)
|
|
||||||
}
|
|
||||||
if err := bw.Flush(); err != nil {
|
|
||||||
f.Close()
|
|
||||||
os.Remove(tmpPath)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
f.Close()
|
|
||||||
|
|
||||||
return os.Rename(tmpPath, finalPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
// writeBinarySnapshotFile writes the binary snapshot file header and level tree.
|
|
||||||
func writeBinarySnapshotFile(w io.Writer, cf *CheckpointFile) error {
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, snapFileMagic); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, cf.From); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, cf.To); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return writeBinaryLevel(w, cf)
|
|
||||||
}
|
|
||||||
|
|
||||||
// writeBinaryLevel recursively writes a CheckpointFile level in binary format.
|
|
||||||
func writeBinaryLevel(w io.Writer, cf *CheckpointFile) error {
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Metrics))); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for name, metric := range cf.Metrics {
|
|
||||||
if err := writeString16(w, name); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, metric.Frequency); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, metric.Start); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, uint32(len(metric.Data))); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
for _, v := range metric.Data {
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, math.Float32bits(float32(v))); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, uint32(len(cf.Children))); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
for name, child := range cf.Children {
|
|
||||||
if err := writeString16(w, name); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if err := writeBinaryLevel(w, child); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// writeString16 writes a 2-byte length-prefixed string to w.
|
|
||||||
func writeString16(w io.Writer, s string) error {
|
|
||||||
if err := binary.Write(w, binary.LittleEndian, uint16(len(s))); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
_, err := io.WriteString(w, s)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// loadBinaryFile reads a binary snapshot file and loads data into the Level tree.
|
|
||||||
// The retention check (from) is applied to the file's 'to' timestamp.
|
|
||||||
func (l *Level) loadBinaryFile(m *MemoryStore, f *os.File, from int64) error {
|
|
||||||
br := bufio.NewReader(f)
|
|
||||||
|
|
||||||
var magic uint32
|
|
||||||
if err := binary.Read(br, binary.LittleEndian, &magic); err != nil {
|
|
||||||
return fmt.Errorf("[METRICSTORE]> binary snapshot: read magic: %w", err)
|
|
||||||
}
|
|
||||||
if magic != snapFileMagic {
|
|
||||||
return fmt.Errorf("[METRICSTORE]> binary snapshot: invalid magic 0x%08X (expected 0x%08X)", magic, snapFileMagic)
|
|
||||||
}
|
|
||||||
|
|
||||||
var fileFrom, fileTo int64
|
|
||||||
if err := binary.Read(br, binary.LittleEndian, &fileFrom); err != nil {
|
|
||||||
return fmt.Errorf("[METRICSTORE]> binary snapshot: read from: %w", err)
|
|
||||||
}
|
|
||||||
if err := binary.Read(br, binary.LittleEndian, &fileTo); err != nil {
|
|
||||||
return fmt.Errorf("[METRICSTORE]> binary snapshot: read to: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if fileTo != 0 && fileTo < from {
|
|
||||||
return nil // File is older than retention window, skip it
|
|
||||||
}
|
|
||||||
|
|
||||||
cf, err := readBinaryLevel(br)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("[METRICSTORE]> binary snapshot: read level tree: %w", err)
|
|
||||||
}
|
|
||||||
cf.From = fileFrom
|
|
||||||
cf.To = fileTo
|
|
||||||
|
|
||||||
return l.loadFile(cf, m)
|
|
||||||
}
|
|
||||||
|
|
||||||
// readBinaryLevel recursively reads a level from the binary snapshot format.
|
|
||||||
func readBinaryLevel(r io.Reader) (*CheckpointFile, error) {
|
|
||||||
cf := &CheckpointFile{
|
|
||||||
Metrics: make(map[string]*CheckpointMetrics),
|
|
||||||
Children: make(map[string]*CheckpointFile),
|
|
||||||
}
|
|
||||||
|
|
||||||
var numMetrics uint32
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &numMetrics); err != nil {
|
|
||||||
return nil, fmt.Errorf("read num_metrics: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for range numMetrics {
|
|
||||||
name, err := readString16(r)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("read metric name: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var freq, start int64
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &freq); err != nil {
|
|
||||||
return nil, fmt.Errorf("read frequency for %s: %w", name, err)
|
|
||||||
}
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &start); err != nil {
|
|
||||||
return nil, fmt.Errorf("read start for %s: %w", name, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var numValues uint32
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &numValues); err != nil {
|
|
||||||
return nil, fmt.Errorf("read num_values for %s: %w", name, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
data := make([]schema.Float, numValues)
|
|
||||||
for i := range numValues {
|
|
||||||
var bits uint32
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &bits); err != nil {
|
|
||||||
return nil, fmt.Errorf("read value[%d] for %s: %w", i, name, err)
|
|
||||||
}
|
|
||||||
data[i] = schema.Float(math.Float32frombits(bits))
|
|
||||||
}
|
|
||||||
|
|
||||||
cf.Metrics[name] = &CheckpointMetrics{
|
|
||||||
Frequency: freq,
|
|
||||||
Start: start,
|
|
||||||
Data: data,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var numChildren uint32
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &numChildren); err != nil {
|
|
||||||
return nil, fmt.Errorf("read num_children: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for range numChildren {
|
|
||||||
childName, err := readString16(r)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("read child name: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
child, err := readBinaryLevel(r)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("read child %s: %w", childName, err)
|
|
||||||
}
|
|
||||||
cf.Children[childName] = child
|
|
||||||
}
|
|
||||||
|
|
||||||
return cf, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// readString16 reads a 2-byte length-prefixed string from r.
|
|
||||||
func readString16(r io.Reader) (string, error) {
|
|
||||||
var sLen uint16
|
|
||||||
if err := binary.Read(r, binary.LittleEndian, &sLen); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
buf := make([]byte, sLen)
|
|
||||||
if _, err := io.ReadFull(r, buf); err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
return string(buf), nil
|
|
||||||
}
|
|
||||||
@@ -36,9 +36,6 @@
|
|||||||
const { query: initq } = init();
|
const { query: initq } = init();
|
||||||
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
|
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
|
||||||
|
|
||||||
/* State Init */
|
|
||||||
let activeTab = $state("");
|
|
||||||
|
|
||||||
/* Derived */
|
/* Derived */
|
||||||
const subClusters = $derived($initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || []);
|
const subClusters = $derived($initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || []);
|
||||||
</script>
|
</script>
|
||||||
@@ -66,22 +63,22 @@
|
|||||||
</Row>
|
</Row>
|
||||||
{:else}
|
{:else}
|
||||||
<Card class="overflow-auto" style="height: auto;">
|
<Card class="overflow-auto" style="height: auto;">
|
||||||
<TabContent on:tab={(e) => (activeTab = e.detail)}>
|
<TabContent>
|
||||||
<TabPane tabId="status-dash" tab="Status" active>
|
<TabPane tabId="status-dash" tab="Status" active>
|
||||||
<CardBody>
|
<CardBody>
|
||||||
<StatusDash clusters={$initq.data.clusters} {presetCluster} loadMe={(activeTab === "status-dash")}></StatusDash>
|
<StatusDash clusters={$initq.data.clusters} {presetCluster}></StatusDash>
|
||||||
</CardBody>
|
</CardBody>
|
||||||
</TabPane>
|
</TabPane>
|
||||||
|
|
||||||
<TabPane tabId="health-dash" tab="Metric Status">
|
<TabPane tabId="health-dash" tab="Metric Status">
|
||||||
<CardBody>
|
<CardBody>
|
||||||
<HealthDash {presetCluster} loadMe={(activeTab === "health-dash")}></HealthDash>
|
<HealthDash {presetCluster}></HealthDash>
|
||||||
</CardBody>
|
</CardBody>
|
||||||
</TabPane>
|
</TabPane>
|
||||||
|
|
||||||
<TabPane tabId="usage-dash" tab="Cluster Usage">
|
<TabPane tabId="usage-dash" tab="Cluster Usage">
|
||||||
<CardBody>
|
<CardBody>
|
||||||
<UsageDash {presetCluster} {useCbColors} loadMe={(activeTab === "usage-dash")}></UsageDash>
|
<UsageDash {presetCluster} {useCbColors}></UsageDash>
|
||||||
</CardBody>
|
</CardBody>
|
||||||
</TabPane>
|
</TabPane>
|
||||||
|
|
||||||
@@ -89,7 +86,7 @@
|
|||||||
{#each subClusters.map(sc => sc.name) as scn}
|
{#each subClusters.map(sc => sc.name) as scn}
|
||||||
<TabPane tabId="{scn}-usage-dash" tab="{scn.charAt(0).toUpperCase() + scn.slice(1)} Usage">
|
<TabPane tabId="{scn}-usage-dash" tab="{scn.charAt(0).toUpperCase() + scn.slice(1)} Usage">
|
||||||
<CardBody>
|
<CardBody>
|
||||||
<UsageDash {presetCluster} presetSubCluster={scn} {useCbColors} loadMe={(activeTab === `${scn}-usage-dash`)}></UsageDash>
|
<UsageDash {presetCluster} presetSubCluster={scn} {useCbColors}></UsageDash>
|
||||||
</CardBody>
|
</CardBody>
|
||||||
</TabPane>
|
</TabPane>
|
||||||
{/each}
|
{/each}
|
||||||
@@ -97,7 +94,7 @@
|
|||||||
|
|
||||||
<TabPane tabId="metric-dash" tab="Statistics">
|
<TabPane tabId="metric-dash" tab="Statistics">
|
||||||
<CardBody>
|
<CardBody>
|
||||||
<StatisticsDash {presetCluster} {useCbColors} loadMe={(activeTab === "metric-dash")}></StatisticsDash>
|
<StatisticsDash {presetCluster} {useCbColors}></StatisticsDash>
|
||||||
</CardBody>
|
</CardBody>
|
||||||
</TabPane>
|
</TabPane>
|
||||||
</TabContent>
|
</TabContent>
|
||||||
|
|||||||
@@ -6,7 +6,6 @@
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
import { onMount } from "svelte";
|
|
||||||
import {
|
import {
|
||||||
Row,
|
Row,
|
||||||
Col,
|
Col,
|
||||||
@@ -29,7 +28,6 @@
|
|||||||
/* Svelte 5 Props */
|
/* Svelte 5 Props */
|
||||||
let {
|
let {
|
||||||
presetCluster,
|
presetCluster,
|
||||||
loadMe = false,
|
|
||||||
} = $props();
|
} = $props();
|
||||||
|
|
||||||
/* Const Init */
|
/* Const Init */
|
||||||
@@ -56,7 +54,7 @@
|
|||||||
/* Derived */
|
/* Derived */
|
||||||
let cluster = $derived(presetCluster);
|
let cluster = $derived(presetCluster);
|
||||||
|
|
||||||
const statusQuery = $derived(loadMe ? queryStore({
|
const statusQuery = $derived(queryStore({
|
||||||
client: client,
|
client: client,
|
||||||
query: gql`
|
query: gql`
|
||||||
query (
|
query (
|
||||||
@@ -86,7 +84,7 @@
|
|||||||
sorting: querySorting,
|
sorting: querySorting,
|
||||||
},
|
},
|
||||||
requestPolicy: "network-only"
|
requestPolicy: "network-only"
|
||||||
}) : null);
|
}));
|
||||||
|
|
||||||
let healthTableData = $derived.by(() => {
|
let healthTableData = $derived.by(() => {
|
||||||
if ($statusQuery?.data) {
|
if ($statusQuery?.data) {
|
||||||
@@ -142,9 +140,6 @@
|
|||||||
healthTableData = [...pendingHealthData];
|
healthTableData = [...pendingHealthData];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* On Mount */
|
|
||||||
onMount(() => sortBy('healthState'));
|
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<!-- Refresher and space for other options -->
|
<!-- Refresher and space for other options -->
|
||||||
@@ -162,16 +157,16 @@
|
|||||||
<hr/>
|
<hr/>
|
||||||
|
|
||||||
<!-- Node Health Pis, later Charts -->
|
<!-- Node Health Pis, later Charts -->
|
||||||
{#if $statusQuery?.fetching}
|
{#if $statusQuery.fetching}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Spinner />
|
<Spinner />
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statusQuery?.error}
|
{:else if $statusQuery.error}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Card body color="danger">Status Query (States): {$statusQuery?.error?.message}</Card>
|
<Card body color="danger">Status Query (States): {$statusQuery.error.message}</Card>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statusQuery?.data?.nodeStates}
|
{:else if $statusQuery?.data?.nodeStates}
|
||||||
@@ -265,19 +260,19 @@
|
|||||||
<hr/>
|
<hr/>
|
||||||
|
|
||||||
<!-- Tabular Info About Node States and Missing Metrics -->
|
<!-- Tabular Info About Node States and Missing Metrics -->
|
||||||
{#if $statusQuery?.fetching}
|
{#if $statusQuery.fetching}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Spinner />
|
<Spinner />
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statusQuery?.error}
|
{:else if $statusQuery.error}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Card body color="danger">Status Query (Details): {$statusQuery?.error?.message}</Card>
|
<Card body color="danger">Status Query (Details): {$statusQuery.error.message}</Card>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statusQuery?.data}
|
{:else if $statusQuery.data}
|
||||||
<Row>
|
<Row>
|
||||||
<Col>
|
<Col>
|
||||||
<Card>
|
<Card>
|
||||||
|
|||||||
@@ -30,8 +30,7 @@
|
|||||||
|
|
||||||
/* Svelte 5 Props */
|
/* Svelte 5 Props */
|
||||||
let {
|
let {
|
||||||
presetCluster,
|
presetCluster
|
||||||
loadMe = false,
|
|
||||||
} = $props();
|
} = $props();
|
||||||
|
|
||||||
/* Const Init */
|
/* Const Init */
|
||||||
@@ -50,7 +49,7 @@
|
|||||||
: ccconfig['statusView_selectedHistograms'] || []);
|
: ccconfig['statusView_selectedHistograms'] || []);
|
||||||
|
|
||||||
// Note: nodeMetrics are requested on configured $timestep resolution
|
// Note: nodeMetrics are requested on configured $timestep resolution
|
||||||
const metricStatusQuery = $derived(loadMe ? queryStore({
|
const metricStatusQuery = $derived(queryStore({
|
||||||
client: client,
|
client: client,
|
||||||
query: gql`
|
query: gql`
|
||||||
query (
|
query (
|
||||||
@@ -76,7 +75,7 @@
|
|||||||
selectedHistograms: selectedHistograms
|
selectedHistograms: selectedHistograms
|
||||||
},
|
},
|
||||||
requestPolicy: "network-only"
|
requestPolicy: "network-only"
|
||||||
}) : null);
|
}));
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<!-- Loading indicators & Metric Sleect -->
|
<!-- Loading indicators & Metric Sleect -->
|
||||||
@@ -101,18 +100,18 @@
|
|||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
{#if $metricStatusQuery?.fetching}
|
{#if $metricStatusQuery.fetching}
|
||||||
<Col>
|
<Col>
|
||||||
<Spinner />
|
<Spinner />
|
||||||
</Col>
|
</Col>
|
||||||
{:else if $metricStatusQuery?.error}
|
{:else if $metricStatusQuery.error}
|
||||||
<Col>
|
<Col>
|
||||||
<Card body color="danger">{$metricStatusQuery.error.message}</Card>
|
<Card body color="danger">{$metricStatusQuery.error.message}</Card>
|
||||||
</Col>
|
</Col>
|
||||||
{/if}
|
{/if}
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
{#if $metricStatusQuery?.data}
|
{#if $metricStatusQuery.data}
|
||||||
<!-- Selectable Stats as Histograms : Average Values of Running Jobs -->
|
<!-- Selectable Stats as Histograms : Average Values of Running Jobs -->
|
||||||
{#if selectedHistograms}
|
{#if selectedHistograms}
|
||||||
<!-- Note: Ignore '#snippet' Error in IDE -->
|
<!-- Note: Ignore '#snippet' Error in IDE -->
|
||||||
|
|||||||
@@ -32,7 +32,6 @@
|
|||||||
let {
|
let {
|
||||||
clusters,
|
clusters,
|
||||||
presetCluster,
|
presetCluster,
|
||||||
loadMe = false,
|
|
||||||
} = $props();
|
} = $props();
|
||||||
|
|
||||||
/* Const Init */
|
/* Const Init */
|
||||||
@@ -60,7 +59,7 @@
|
|||||||
/* Derived */
|
/* Derived */
|
||||||
let cluster = $derived(presetCluster);
|
let cluster = $derived(presetCluster);
|
||||||
// States for Stacked charts
|
// States for Stacked charts
|
||||||
const statesTimed = $derived(loadMe ? queryStore({
|
const statesTimed = $derived(queryStore({
|
||||||
client: client,
|
client: client,
|
||||||
query: gql`
|
query: gql`
|
||||||
query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) {
|
query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) {
|
||||||
@@ -82,11 +81,11 @@
|
|||||||
typeHealth: "health"
|
typeHealth: "health"
|
||||||
},
|
},
|
||||||
requestPolicy: "network-only"
|
requestPolicy: "network-only"
|
||||||
}) : null);
|
}));
|
||||||
|
|
||||||
// Note: nodeMetrics are requested on configured $timestep resolution
|
// Note: nodeMetrics are requested on configured $timestep resolution
|
||||||
// Result: The latest 5 minutes (datapoints) for each node independent of job
|
// Result: The latest 5 minutes (datapoints) for each node independent of job
|
||||||
const statusQuery = $derived(loadMe ? queryStore({
|
const statusQuery = $derived(queryStore({
|
||||||
client: client,
|
client: client,
|
||||||
query: gql`
|
query: gql`
|
||||||
query (
|
query (
|
||||||
@@ -185,11 +184,11 @@
|
|||||||
sorting: { field: "startTime", type: "col", order: "DESC" }
|
sorting: { field: "startTime", type: "col", order: "DESC" }
|
||||||
},
|
},
|
||||||
requestPolicy: "network-only"
|
requestPolicy: "network-only"
|
||||||
}) : null);
|
}));
|
||||||
|
|
||||||
/* Effects */
|
/* Effects */
|
||||||
$effect(() => {
|
$effect(() => {
|
||||||
if ($statusQuery?.data) {
|
if ($statusQuery.data) {
|
||||||
let subClusters = clusters.find(
|
let subClusters = clusters.find(
|
||||||
(c) => c.name == cluster,
|
(c) => c.name == cluster,
|
||||||
).subClusters;
|
).subClusters;
|
||||||
@@ -375,19 +374,19 @@
|
|||||||
<hr/>
|
<hr/>
|
||||||
|
|
||||||
<!-- Node Stack Charts -->
|
<!-- Node Stack Charts -->
|
||||||
{#if $statesTimed?.fetching}
|
{#if $statesTimed.fetching}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Spinner />
|
<Spinner />
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statesTimed?.error}
|
{:else if $statesTimed.error}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Card body color="danger">States Timed: {$statesTimed?.error?.message}</Card>
|
<Card body color="danger">States Timed: {$statesTimed.error.message}</Card>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statesTimed?.data}
|
{:else if $statesTimed.data}
|
||||||
<Row cols={{ md: 2 , sm: 1}} class="mb-3 justify-content-center">
|
<Row cols={{ md: 2 , sm: 1}} class="mb-3 justify-content-center">
|
||||||
<Col class="px-3 mt-2 mt-lg-0">
|
<Col class="px-3 mt-2 mt-lg-0">
|
||||||
<div>
|
<div>
|
||||||
@@ -428,19 +427,19 @@
|
|||||||
|
|
||||||
<hr/>
|
<hr/>
|
||||||
<!-- Gauges & Roofline per Subcluster-->
|
<!-- Gauges & Roofline per Subcluster-->
|
||||||
{#if $statusQuery?.fetching}
|
{#if $statusQuery.fetching}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Spinner />
|
<Spinner />
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statusQuery?.error}
|
{:else if $statusQuery.error}
|
||||||
<Row cols={1} class="text-center mt-3">
|
<Row cols={1} class="text-center mt-3">
|
||||||
<Col>
|
<Col>
|
||||||
<Card body color="danger">Status Query (Details): {$statusQuery?.error?.message}</Card>
|
<Card body color="danger">Status Query (Details): {$statusQuery.error.message}</Card>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
{:else if $statusQuery?.data}
|
{:else if $statusQuery.data}
|
||||||
{#each clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
|
{#each clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
|
||||||
<Row cols={{ lg: 3, md: 1 , sm: 1}} class="mb-3 justify-content-center">
|
<Row cols={{ lg: 3, md: 1 , sm: 1}} class="mb-3 justify-content-center">
|
||||||
<Col class="px-3">
|
<Col class="px-3">
|
||||||
|
|||||||
@@ -40,8 +40,7 @@
|
|||||||
presetCluster,
|
presetCluster,
|
||||||
presetSubCluster = null,
|
presetSubCluster = null,
|
||||||
useCbColors = false,
|
useCbColors = false,
|
||||||
useAltColors = false,
|
useAltColors = false
|
||||||
loadMe = false,
|
|
||||||
} = $props();
|
} = $props();
|
||||||
|
|
||||||
/* Const Init */
|
/* Const Init */
|
||||||
@@ -63,7 +62,7 @@
|
|||||||
? [{ state: ["running"] }, { cluster: { eq: presetCluster} }, { subCluster: { eq: presetSubCluster } }]
|
? [{ state: ["running"] }, { cluster: { eq: presetCluster} }, { subCluster: { eq: presetSubCluster } }]
|
||||||
: [{ state: ["running"] }, { cluster: { eq: presetCluster} }]
|
: [{ state: ["running"] }, { cluster: { eq: presetCluster} }]
|
||||||
);
|
);
|
||||||
const topJobsQuery = $derived(loadMe ? queryStore({
|
const topJobsQuery = $derived(queryStore({
|
||||||
client: client,
|
client: client,
|
||||||
query: gql`
|
query: gql`
|
||||||
query (
|
query (
|
||||||
@@ -96,9 +95,9 @@
|
|||||||
paging: pagingState // Top 10
|
paging: pagingState // Top 10
|
||||||
},
|
},
|
||||||
requestPolicy: "network-only"
|
requestPolicy: "network-only"
|
||||||
}) : null);
|
}));
|
||||||
|
|
||||||
const topNodesQuery = $derived(loadMe ? queryStore({
|
const topNodesQuery = $derived(queryStore({
|
||||||
client: client,
|
client: client,
|
||||||
query: gql`
|
query: gql`
|
||||||
query (
|
query (
|
||||||
@@ -131,9 +130,9 @@
|
|||||||
paging: pagingState
|
paging: pagingState
|
||||||
},
|
},
|
||||||
requestPolicy: "network-only"
|
requestPolicy: "network-only"
|
||||||
}) : null);
|
}));
|
||||||
|
|
||||||
const topAccsQuery = $derived(loadMe ? queryStore({
|
const topAccsQuery = $derived(queryStore({
|
||||||
client: client,
|
client: client,
|
||||||
query: gql`
|
query: gql`
|
||||||
query (
|
query (
|
||||||
@@ -166,10 +165,10 @@
|
|||||||
paging: pagingState
|
paging: pagingState
|
||||||
},
|
},
|
||||||
requestPolicy: "network-only"
|
requestPolicy: "network-only"
|
||||||
}): null);
|
}));
|
||||||
|
|
||||||
// Note: nodeMetrics are requested on configured $timestep resolution
|
// Note: nodeMetrics are requested on configured $timestep resolution
|
||||||
const nodeStatusQuery = $derived(loadMe ? queryStore({
|
const nodeStatusQuery = $derived(queryStore({
|
||||||
client: client,
|
client: client,
|
||||||
query: gql`
|
query: gql`
|
||||||
query (
|
query (
|
||||||
@@ -199,7 +198,7 @@
|
|||||||
numDurationBins: numDurationBins,
|
numDurationBins: numDurationBins,
|
||||||
},
|
},
|
||||||
requestPolicy: "network-only"
|
requestPolicy: "network-only"
|
||||||
}) : null);
|
}));
|
||||||
|
|
||||||
/* Functions */
|
/* Functions */
|
||||||
function legendColors(targetIdx) {
|
function legendColors(targetIdx) {
|
||||||
@@ -247,9 +246,9 @@
|
|||||||
<hr/>
|
<hr/>
|
||||||
|
|
||||||
<!-- Job Duration, Top Users and Projects-->
|
<!-- Job Duration, Top Users and Projects-->
|
||||||
{#if $topJobsQuery?.fetching || $nodeStatusQuery?.fetching}
|
{#if $topJobsQuery.fetching || $nodeStatusQuery.fetching}
|
||||||
<Spinner />
|
<Spinner />
|
||||||
{:else if $topJobsQuery?.data && $nodeStatusQuery?.data}
|
{:else if $topJobsQuery.data && $nodeStatusQuery.data}
|
||||||
<Row>
|
<Row>
|
||||||
<Col xs="12" lg="4" class="p-2">
|
<Col xs="12" lg="4" class="p-2">
|
||||||
{#key $nodeStatusQuery.data.jobsStatistics[0].histDuration}
|
{#key $nodeStatusQuery.data.jobsStatistics[0].histDuration}
|
||||||
@@ -355,9 +354,9 @@
|
|||||||
<hr/>
|
<hr/>
|
||||||
|
|
||||||
<!-- Node Distribution, Top Users and Projects-->
|
<!-- Node Distribution, Top Users and Projects-->
|
||||||
{#if $topNodesQuery?.fetching || $nodeStatusQuery?.fetching}
|
{#if $topNodesQuery.fetching || $nodeStatusQuery.fetching}
|
||||||
<Spinner />
|
<Spinner />
|
||||||
{:else if $topNodesQuery?.data && $nodeStatusQuery?.data}
|
{:else if $topNodesQuery.data && $nodeStatusQuery.data}
|
||||||
<Row>
|
<Row>
|
||||||
<Col xs="12" lg="4" class="p-2">
|
<Col xs="12" lg="4" class="p-2">
|
||||||
<Histogram
|
<Histogram
|
||||||
@@ -459,9 +458,9 @@
|
|||||||
<hr/>
|
<hr/>
|
||||||
|
|
||||||
<!-- Acc Distribution, Top Users and Projects-->
|
<!-- Acc Distribution, Top Users and Projects-->
|
||||||
{#if $topAccsQuery?.fetching || $nodeStatusQuery?.fetching}
|
{#if $topAccsQuery.fetching || $nodeStatusQuery.fetching}
|
||||||
<Spinner />
|
<Spinner />
|
||||||
{:else if $topAccsQuery?.data && $nodeStatusQuery?.data}
|
{:else if $topAccsQuery.data && $nodeStatusQuery.data}
|
||||||
<Row>
|
<Row>
|
||||||
<Col xs="12" lg="4" class="p-2">
|
<Col xs="12" lg="4" class="p-2">
|
||||||
<Histogram
|
<Histogram
|
||||||
|
|||||||
@@ -38,7 +38,7 @@
|
|||||||
<input class="form-control" type="password" id="password" name="password" required/>
|
<input class="form-control" type="password" id="password" name="password" required/>
|
||||||
</div>
|
</div>
|
||||||
<button type="submit" class="btn btn-success">Submit</button>
|
<button type="submit" class="btn btn-success">Submit</button>
|
||||||
{{if .Infos.hasOpenIDConnect}}
|
{{- if .Infos.hasOpenIDConnect}}
|
||||||
<a class="btn btn-primary" href="/oidc-login">OpenID Connect Login</a>
|
<a class="btn btn-primary" href="/oidc-login">OpenID Connect Login</a>
|
||||||
{{end}}
|
{{end}}
|
||||||
<input type="hidden" id="redirect" name="redirect" value="{{ .Redirect }}" />
|
<input type="hidden" id="redirect" name="redirect" value="{{ .Redirect }}" />
|
||||||
|
|||||||
Reference in New Issue
Block a user