8 Commits

Author SHA1 Message Date
Jan Eitzinger
a6e23dd52e Merge pull request #503 from ClusterCockpit/dev
Add csv export for user/prject list
2026-02-24 20:29:51 +01:00
Jan Eitzinger
8bacffbd3e Merge pull request #502 from ClusterCockpit/dev
reintroduce state information for pie charts
2026-02-24 20:09:07 +01:00
Jan Eitzinger
248b923980 Merge pull request #501 from ClusterCockpit/dev
Update jobclass rules
2026-02-24 07:03:23 +01:00
Jan Eitzinger
ac5ee1564a Merge pull request #500 from ClusterCockpit/dev
Dev
2026-02-24 06:46:49 +01:00
Jan Eitzinger
2b56e02a3e Merge pull request #499 from ClusterCockpit/dev
Fix and extend jobclass rules
2026-02-23 17:58:14 +01:00
Jan Eitzinger
2b788f14ec Merge pull request #495 from ClusterCockpit/dev
Fix more bugs related to job_cache ids used in job table
2026-02-22 09:57:37 +01:00
Jan Eitzinger
5ee3bbdbf5 Merge pull request #494 from ClusterCockpit/dev
Dev
2026-02-20 08:43:54 +01:00
Jan Eitzinger
39c919bb0c Merge pull request #493 from ClusterCockpit/dev
Dev
2026-02-20 07:49:13 +01:00
20 changed files with 1267 additions and 830 deletions

16
go.mod
View File

@@ -9,8 +9,7 @@ tool (
require (
github.com/99designs/gqlgen v0.17.86
github.com/ClusterCockpit/cc-lib/v2 v2.7.0
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0
github.com/ClusterCockpit/cc-lib/v2 v2.6.0
github.com/Masterminds/squirrel v1.5.4
github.com/aws/aws-sdk-go-v2 v1.41.1
github.com/aws/aws-sdk-go-v2/config v1.32.8
@@ -26,8 +25,10 @@ require (
github.com/golang-migrate/migrate/v4 v4.19.1
github.com/google/gops v0.3.29
github.com/gorilla/sessions v1.4.0
github.com/influxdata/line-protocol/v2 v2.2.1
github.com/jmoiron/sqlx v1.4.0
github.com/joho/godotenv v1.5.1
github.com/linkedin/goavro/v2 v2.15.0
github.com/mattn/go-sqlite3 v1.14.34
github.com/parquet-go/parquet-go v0.27.0
github.com/qustavo/sqlhooks/v2 v2.1.0
@@ -79,6 +80,7 @@ require (
github.com/go-openapi/swag/yamlutils v0.25.4 // indirect
github.com/go-viper/mapstructure/v2 v2.5.0 // indirect
github.com/goccy/go-yaml v1.19.2 // indirect
github.com/golang/snappy v1.0.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/securecookie v1.1.2 // indirect
github.com/gorilla/websocket v1.5.3 // indirect
@@ -90,10 +92,10 @@ require (
github.com/kr/pretty v0.3.1 // indirect
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
github.com/nats-io/nats.go v1.49.0 // indirect
github.com/nats-io/nats.go v1.48.0 // indirect
github.com/nats-io/nkeys v0.4.15 // indirect
github.com/nats-io/nuid v1.0.1 // indirect
github.com/oapi-codegen/runtime v1.2.0 // indirect
github.com/oapi-codegen/runtime v1.1.2 // indirect
github.com/parquet-go/bitpack v1.0.0 // indirect
github.com/parquet-go/jsonlite v1.4.0 // indirect
github.com/pierrec/lz4/v4 v4.1.25 // indirect
@@ -102,7 +104,7 @@ require (
github.com/rogpeppe/go-internal v1.10.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sosodev/duration v1.3.1 // indirect
github.com/stmcginnis/gofish v0.21.3 // indirect
github.com/stmcginnis/gofish v0.21.1 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/swaggo/files v1.0.1 // indirect
github.com/twpayne/go-geom v1.6.1 // indirect
@@ -111,9 +113,9 @@ require (
github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 // indirect
go.yaml.in/yaml/v2 v2.4.3 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa // indirect
golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a // indirect
golang.org/x/mod v0.33.0 // indirect
golang.org/x/net v0.51.0 // indirect
golang.org/x/net v0.50.0 // indirect
golang.org/x/sync v0.19.0 // indirect
golang.org/x/sys v0.41.0 // indirect
golang.org/x/text v0.34.0 // indirect

54
go.sum
View File

@@ -4,10 +4,10 @@ github.com/99designs/gqlgen v0.17.86 h1:C8N3UTa5heXX6twl+b0AJyGkTwYL6dNmFrgZNLRc
github.com/99designs/gqlgen v0.17.86/go.mod h1:KTrPl+vHA1IUzNlh4EYkl7+tcErL3MgKnhHrBcV74Fw=
github.com/Azure/go-ntlmssp v0.1.0 h1:DjFo6YtWzNqNvQdrwEyr/e4nhU3vRiwenz5QX7sFz+A=
github.com/Azure/go-ntlmssp v0.1.0/go.mod h1:NYqdhxd/8aAct/s4qSYZEerdPuH1liG2/X9DiVTbhpk=
github.com/ClusterCockpit/cc-lib/v2 v2.7.0 h1:EMTShk6rMTR1wlfmQ8SVCawH1OdltUbD3kVQmaW+5pE=
github.com/ClusterCockpit/cc-lib/v2 v2.7.0/go.mod h1:0Etx8WMs0lYZ4tiOQizY18CQop+2i3WROvU9rMUxHA4=
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0 h1:hIzxgTBWcmCIHtoDKDkSCsKCOCOwUC34sFsbD2wcW0Q=
github.com/ClusterCockpit/cc-line-protocol/v2 v2.4.0/go.mod h1:y42qUu+YFmu5fdNuUAS4VbbIKxVjxCvbVqFdpdh8ahY=
github.com/ClusterCockpit/cc-lib/v2 v2.5.1 h1:s6M9tyPDty+4zTdQGJYKpGJM9Nz7N6ITMdjPvNSLX5g=
github.com/ClusterCockpit/cc-lib/v2 v2.5.1/go.mod h1:DZ8OIHPUZJpWqErLITt0B8P6/Q7CBW2IQSQ5YiFFaG0=
github.com/ClusterCockpit/cc-lib/v2 v2.6.0 h1:Q7zvRAVhfYA9PDB18pfY9A/6Ws4oWpnv8+P9MBRUDzg=
github.com/ClusterCockpit/cc-lib/v2 v2.6.0/go.mod h1:DZ8OIHPUZJpWqErLITt0B8P6/Q7CBW2IQSQ5YiFFaG0=
github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc=
@@ -95,6 +95,8 @@ github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7c
github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
github.com/expr-lang/expr v1.17.8 h1:W1loDTT+0PQf5YteHSTpju2qfUfNoBt4yw9+wOEU9VM=
github.com/expr-lang/expr v1.17.8/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
github.com/frankban/quicktest v1.11.0/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.11.2/go.mod h1:K+q6oSqb0W0Ininfk863uOk1lMy69l/P6txr3mVT54s=
github.com/frankban/quicktest v1.13.0 h1:yNZif1OkDfNoDfb9zZa9aXIpejNR4F23Wely0c+Qdqk=
github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/r/VLSOOIySU=
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
@@ -149,6 +151,9 @@ github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63Y
github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE=
github.com/golang-migrate/migrate/v4 v4.19.1 h1:OCyb44lFuQfYXYLx1SCxPZQGU7mcaZ7gH9yH4jSFbBA=
github.com/golang-migrate/migrate/v4 v4.19.1/go.mod h1:CTcgfjxhaUtsLipnLoQRWCrjYXycRz/g5+RWDuYgPrE=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v1.0.0 h1:Oy607GVXHs7RtbggtPBnr2RmDArIsAefDwvrdWvRhGs=
github.com/golang/snappy v1.0.0/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
@@ -179,8 +184,13 @@ github.com/influxdata/influxdb-client-go/v2 v2.14.0 h1:AjbBfJuq+QoaXNcrova8smSjw
github.com/influxdata/influxdb-client-go/v2 v2.14.0/go.mod h1:Ahpm3QXKMJslpXl3IftVLVezreAUtBOTZssDrjZEFHI=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/influxdata/line-protocol-corpus v0.0.0-20210519164801-ca6fa5da0184/go.mod h1:03nmhxzZ7Xk2pdG+lmMd7mHDfeVOYFyhOgwO61qWU98=
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937 h1:MHJNQ+p99hFATQm6ORoLmpUCF7ovjwEFshs/NHzAbig=
github.com/influxdata/line-protocol-corpus v0.0.0-20210922080147-aa28ccfb8937/go.mod h1:BKR9c0uHSmRgM/se9JhFHtTT7JTO67X23MtKMHtZcpo=
github.com/influxdata/line-protocol/v2 v2.0.0-20210312151457-c52fdecb625a/go.mod h1:6+9Xt5Sq1rWx+glMgxhcg2c0DUaehK+5TDcPZ76GypY=
github.com/influxdata/line-protocol/v2 v2.1.0/go.mod h1:QKw43hdUBg3GTk2iC3iyCxksNj7PX9aUSeYOYE/ceHY=
github.com/influxdata/line-protocol/v2 v2.2.1 h1:EAPkqJ9Km4uAxtMRgUubJyqAr6zgWM0dznKMLRauQRE=
github.com/influxdata/line-protocol/v2 v2.2.1/go.mod h1:DmB3Cnh+3oxmG6LOBIxce4oaL4CPj3OmMPgvauXh+tM=
github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8=
github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs=
github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo=
@@ -202,8 +212,11 @@ github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7X
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c=
github.com/klauspost/compress v1.18.4/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq6+3iTQz8KNCLtVX6idSoTLdUw=
@@ -213,6 +226,8 @@ github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6Fm
github.com/lib/pq v1.2.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/linkedin/goavro/v2 v2.15.0 h1:pDj1UrjUOO62iXhgBiE7jQkpNIc5/tA5eZsgolMjgVI=
github.com/linkedin/goavro/v2 v2.15.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk=
github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk=
@@ -225,14 +240,15 @@ github.com/nats-io/jwt/v2 v2.8.0 h1:K7uzyz50+yGZDO5o772eRE7atlcSEENpL7P+b74JV1g=
github.com/nats-io/jwt/v2 v2.8.0/go.mod h1:me11pOkwObtcBNR8AiMrUbtVOUGkqYjMQZ6jnSdVUIA=
github.com/nats-io/nats-server/v2 v2.12.3 h1:KRv+1n7lddMVgkJPQer+pt36TcO0ENxjilBmeWdjcHs=
github.com/nats-io/nats-server/v2 v2.12.3/go.mod h1:MQXjG9WjyXKz9koWzUc3jYUMKD8x3CLmTNy91IQQz3Y=
github.com/nats-io/nats.go v1.49.0 h1:yh/WvY59gXqYpgl33ZI+XoVPKyut/IcEaqtsiuTJpoE=
github.com/nats-io/nats.go v1.49.0/go.mod h1:fDCn3mN5cY8HooHwE2ukiLb4p4G4ImmzvXyJt+tGwdw=
github.com/nats-io/nats.go v1.48.0 h1:pSFyXApG+yWU/TgbKCjmm5K4wrHu86231/w84qRVR+U=
github.com/nats-io/nats.go v1.48.0/go.mod h1:iRWIPokVIFbVijxuMQq4y9ttaBTMe0SFdlZfMDd+33g=
github.com/nats-io/nkeys v0.4.15 h1:JACV5jRVO9V856KOapQ7x+EY8Jo3qw1vJt/9Jpwzkk4=
github.com/nats-io/nkeys v0.4.15/go.mod h1:CpMchTXC9fxA5zrMo4KpySxNjiDVvr8ANOSZdiNfUrs=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/oapi-codegen/runtime v1.2.0 h1:RvKc1CVS1QeKSNzO97FBQbSMZyQ8s6rZd+LpmzwHMP4=
github.com/oapi-codegen/runtime v1.2.0/go.mod h1:Y7ZhmmlE8ikZOmuHRRndiIm7nf3xcVv+YMweKgG1DT0=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/oapi-codegen/runtime v1.1.2 h1:P2+CubHq8fO4Q6fV1tqDBZHCwpVpvPg7oKiYzQgXIyI=
github.com/oapi-codegen/runtime v1.1.2/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/parquet-go/bitpack v1.0.0 h1:AUqzlKzPPXf2bCdjfj4sTeacrUwsT7NlcYDMUQxPcQA=
github.com/parquet-go/bitpack v1.0.0/go.mod h1:XnVk9TH+O40eOOmvpAVZ7K2ocQFrQwysLMnc6M/8lgs=
@@ -252,8 +268,8 @@ github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNw
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4=
github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw=
github.com/prometheus/procfs v0.20.0 h1:AA7aCvjxwAquZAlonN7888f2u4IN8WVeFgBi4k82M4Q=
github.com/prometheus/procfs v0.20.0/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo=
github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws=
github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw=
github.com/qustavo/sqlhooks/v2 v2.1.0 h1:54yBemHnGHp/7xgT+pxwmIlMSDNYKx5JW5dfRAiCZi0=
github.com/qustavo/sqlhooks/v2 v2.1.0/go.mod h1:aMREyKo7fOKTwiLuWPsaHRXEmtqG4yREztO0idF83AU=
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
@@ -270,14 +286,17 @@ github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NF
github.com/sosodev/duration v1.3.1 h1:qtHBDMQ6lvMQsL15g4aopM4HEfOaYuhWBw3NPTtlqq4=
github.com/sosodev/duration v1.3.1/go.mod h1:RQIBBX0+fMLc/D9+Jb/fwvVmo0eZvDDEERAikUR6SDg=
github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0=
github.com/stmcginnis/gofish v0.21.3 h1:EBLCHfORnbx7MPw7lplOOVe9QAD1T3XRVz6+a1Z4z5Q=
github.com/stmcginnis/gofish v0.21.3/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
github.com/stmcginnis/gofish v0.21.1 h1:sutDvBhmLh4RDOZ1DN8GUyYRu7f1ggvKMMnSaiqhwn4=
github.com/stmcginnis/gofish v0.21.1/go.mod h1:PzF5i8ecRG9A2ol8XT64npKUunyraJ+7t0kYMpQAtqU=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE=
@@ -309,8 +328,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa h1:Zt3DZoOFFYkKhDT3v7Lm9FDMEV06GpzjG2jrqW+QTE0=
golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa/go.mod h1:K79w1Vqn7PoiZn+TkNpx3BUWUQksGO3JcVX6qIjytmA=
golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a h1:ovFr6Z0MNmU7nH8VaX5xqw+05ST2uO1exVfZPVqRC5o=
golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a/go.mod h1:K79w1Vqn7PoiZn+TkNpx3BUWUQksGO3JcVX6qIjytmA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
@@ -318,8 +337,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
golang.org/x/net v0.50.0 h1:ucWh9eiCGyDR3vtzso0WMQinm2Dnt8cFMuQa9K33J60=
golang.org/x/net v0.50.0/go.mod h1:UgoSli3F/pBgdJBHCTc+tp3gmrU4XswgGRgtnwWTfyM=
golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ=
golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -351,13 +370,16 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=

View File

@@ -18,7 +18,7 @@ import (
"github.com/ClusterCockpit/cc-backend/pkg/metricstore"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol"
"github.com/influxdata/line-protocol/v2/lineprotocol"
)
// handleFree godoc

View File

@@ -21,7 +21,7 @@ import (
"github.com/ClusterCockpit/cc-lib/v2/nats"
"github.com/ClusterCockpit/cc-lib/v2/receivers"
"github.com/ClusterCockpit/cc-lib/v2/schema"
influx "github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol"
influx "github.com/influxdata/line-protocol/v2/lineprotocol"
)
// NatsAPI provides NATS subscription-based handlers for Job and Node operations.

View File

@@ -263,7 +263,7 @@ func GetAuthInstance() *Authentication {
}
// handleUserSync syncs or updates a user in the database based on configuration.
// This is used for LDAP, JWT and OIDC authentications when syncUserOnLogin or updateUserOnLogin is enabled.
// This is used for both JWT and OIDC authentication when syncUserOnLogin or updateUserOnLogin is enabled.
func handleUserSync(user *schema.User, syncUserOnLogin, updateUserOnLogin bool) {
r := repository.GetUserRepository()
dbUser, err := r.GetUser(user.Username)

View File

@@ -0,0 +1,481 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricstore
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"os"
"path"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
"github.com/linkedin/goavro/v2"
)
var (
NumAvroWorkers int = DefaultAvroWorkers
startUp bool = true
)
func (as *AvroStore) ToCheckpoint(dir string, dumpAll bool) (int, error) {
levels := make([]*AvroLevel, 0)
selectors := make([][]string, 0)
as.root.lock.RLock()
// Cluster
for sel1, l1 := range as.root.children {
l1.lock.RLock()
// Node
for sel2, l2 := range l1.children {
l2.lock.RLock()
// Frequency
for sel3, l3 := range l2.children {
levels = append(levels, l3)
selectors = append(selectors, []string{sel1, sel2, sel3})
}
l2.lock.RUnlock()
}
l1.lock.RUnlock()
}
as.root.lock.RUnlock()
type workItem struct {
level *AvroLevel
dir string
selector []string
}
n, errs := int32(0), int32(0)
var wg sync.WaitGroup
wg.Add(NumAvroWorkers)
work := make(chan workItem, NumAvroWorkers*2)
for range NumAvroWorkers {
go func() {
defer wg.Done()
for workItem := range work {
from := getTimestamp(workItem.dir)
if err := workItem.level.toCheckpoint(workItem.dir, from, dumpAll); err != nil {
if err == ErrNoNewArchiveData {
continue
}
cclog.Errorf("error while checkpointing %#v: %s", workItem.selector, err.Error())
atomic.AddInt32(&errs, 1)
} else {
atomic.AddInt32(&n, 1)
}
}
}()
}
for i := range len(levels) {
dir := path.Join(dir, path.Join(selectors[i]...))
work <- workItem{
level: levels[i],
dir: dir,
selector: selectors[i],
}
}
close(work)
wg.Wait()
if errs > 0 {
return int(n), fmt.Errorf("%d errors happend while creating avro checkpoints (%d successes)", errs, n)
}
startUp = false
return int(n), nil
}
// getTimestamp returns the timestamp from the directory name
func getTimestamp(dir string) int64 {
// Extract the resolution and timestamp from the directory name
// The existing avro file will be in epoch timestamp format
// iterate over all the files in the directory and find the maximum timestamp
// and return it
resolution := path.Base(dir)
dir = path.Dir(dir)
files, err := os.ReadDir(dir)
if err != nil {
return 0
}
var maxTS int64 = 0
if len(files) == 0 {
return 0
}
for _, file := range files {
if file.IsDir() {
continue
}
name := file.Name()
if len(name) < 5 || !strings.HasSuffix(name, ".avro") || !strings.HasPrefix(name, resolution+"_") {
continue
}
ts, err := strconv.ParseInt(name[strings.Index(name, "_")+1:len(name)-5], 10, 64)
if err != nil {
fmt.Printf("error while parsing timestamp: %s\n", err.Error())
continue
}
if ts > maxTS {
maxTS = ts
}
}
interval, _ := time.ParseDuration(Keys.Checkpoints.Interval)
updateTime := time.Unix(maxTS, 0).Add(interval).Add(time.Duration(CheckpointBufferMinutes-1) * time.Minute).Unix()
if startUp {
return 0
}
if updateTime < time.Now().Unix() {
return 0
}
return maxTS
}
func (l *AvroLevel) toCheckpoint(dir string, from int64, dumpAll bool) error {
l.lock.Lock()
defer l.lock.Unlock()
// fmt.Printf("Checkpointing directory: %s\n", dir)
// filepath contains the resolution
intRes, _ := strconv.Atoi(path.Base(dir))
// find smallest overall timestamp in l.data map and delete it from l.data
minTS := int64(1<<63 - 1)
for ts, dat := range l.data {
if ts < minTS && len(dat) != 0 {
minTS = ts
}
}
if from == 0 && minTS != int64(1<<63-1) {
from = minTS
}
if from == 0 {
return ErrNoNewArchiveData
}
var schema string
var codec *goavro.Codec
recordList := make([]map[string]any, 0)
var f *os.File
filePath := dir + fmt.Sprintf("_%d.avro", from)
var err error
fp_, err_ := os.Stat(filePath)
if errors.Is(err_, os.ErrNotExist) {
err = os.MkdirAll(path.Dir(dir), 0o755)
if err != nil {
return fmt.Errorf("failed to create directory: %v", err)
}
} else if fp_.Size() != 0 {
f, err = os.Open(filePath)
if err != nil {
return fmt.Errorf("failed to open existing avro file: %v", err)
}
defer f.Close()
br := bufio.NewReader(f)
reader, err := goavro.NewOCFReader(br)
if err != nil {
return fmt.Errorf("failed to create OCF reader: %v", err)
}
codec = reader.Codec()
schema = codec.Schema()
}
timeRef := time.Now().Add(time.Duration(-CheckpointBufferMinutes+1) * time.Minute).Unix()
if dumpAll {
timeRef = time.Now().Unix()
}
// Empty values
if len(l.data) == 0 {
// we checkpoint avro files every 60 seconds
repeat := 60 / intRes
for range repeat {
recordList = append(recordList, make(map[string]any))
}
}
readFlag := true
for ts := range l.data {
flag := false
if ts < timeRef {
data := l.data[ts]
schemaGen, err := generateSchema(data)
if err != nil {
return err
}
flag, schema, err = compareSchema(schema, schemaGen)
if err != nil {
return fmt.Errorf("failed to compare read and generated schema: %v", err)
}
if flag && readFlag && !errors.Is(err_, os.ErrNotExist) {
// Use closure to ensure file is closed even on error
err := func() error {
f2, err := os.Open(filePath)
if err != nil {
return fmt.Errorf("failed to open Avro file: %v", err)
}
defer f2.Close()
br := bufio.NewReader(f2)
ocfReader, err := goavro.NewOCFReader(br)
if err != nil {
return fmt.Errorf("failed to create OCF reader while changing schema: %v", err)
}
for ocfReader.Scan() {
record, err := ocfReader.Read()
if err != nil {
return fmt.Errorf("failed to read record: %v", err)
}
recordList = append(recordList, record.(map[string]any))
}
return nil
}()
if err != nil {
return err
}
err = os.Remove(filePath)
if err != nil {
return fmt.Errorf("failed to delete file: %v", err)
}
readFlag = false
}
codec, err = goavro.NewCodec(schema)
if err != nil {
return fmt.Errorf("failed to create codec after merged schema: %v", err)
}
recordList = append(recordList, generateRecord(data))
delete(l.data, ts)
}
}
if len(recordList) == 0 {
return ErrNoNewArchiveData
}
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0o644)
if err != nil {
return fmt.Errorf("failed to append new avro file: %v", err)
}
defer f.Close()
// fmt.Printf("Codec : %#v\n", codec)
writer, err := goavro.NewOCFWriter(goavro.OCFConfig{
W: f,
Codec: codec,
CompressionName: goavro.CompressionDeflateLabel,
})
if err != nil {
return fmt.Errorf("failed to create OCF writer: %v", err)
}
// Append the new record
if err := writer.Append(recordList); err != nil {
return fmt.Errorf("failed to append record: %v", err)
}
return nil
}
func compareSchema(schemaRead, schemaGen string) (bool, string, error) {
var genSchema, readSchema AvroSchema
if schemaRead == "" {
return false, schemaGen, nil
}
// Unmarshal the schema strings into AvroSchema structs
if err := json.Unmarshal([]byte(schemaGen), &genSchema); err != nil {
return false, "", fmt.Errorf("failed to parse generated schema: %v", err)
}
if err := json.Unmarshal([]byte(schemaRead), &readSchema); err != nil {
return false, "", fmt.Errorf("failed to parse read schema: %v", err)
}
sort.Slice(genSchema.Fields, func(i, j int) bool {
return genSchema.Fields[i].Name < genSchema.Fields[j].Name
})
sort.Slice(readSchema.Fields, func(i, j int) bool {
return readSchema.Fields[i].Name < readSchema.Fields[j].Name
})
// Check if schemas are identical
schemasEqual := true
if len(genSchema.Fields) <= len(readSchema.Fields) {
for i := range genSchema.Fields {
if genSchema.Fields[i].Name != readSchema.Fields[i].Name {
schemasEqual = false
break
}
}
// If schemas are identical, return the read schema
if schemasEqual {
return false, schemaRead, nil
}
}
// Create a map to hold unique fields from both schemas
fieldMap := make(map[string]AvroField)
// Add fields from the read schema
for _, field := range readSchema.Fields {
fieldMap[field.Name] = field
}
// Add or update fields from the generated schema
for _, field := range genSchema.Fields {
fieldMap[field.Name] = field
}
// Create a union schema by collecting fields from the map
var mergedFields []AvroField
for _, field := range fieldMap {
mergedFields = append(mergedFields, field)
}
// Sort fields by name for consistency
sort.Slice(mergedFields, func(i, j int) bool {
return mergedFields[i].Name < mergedFields[j].Name
})
// Create the merged schema
mergedSchema := AvroSchema{
Type: "record",
Name: genSchema.Name,
Fields: mergedFields,
}
// Check if schemas are identical
schemasEqual = len(mergedSchema.Fields) == len(readSchema.Fields)
if schemasEqual {
for i := range mergedSchema.Fields {
if mergedSchema.Fields[i].Name != readSchema.Fields[i].Name {
schemasEqual = false
break
}
}
if schemasEqual {
return false, schemaRead, nil
}
}
// Marshal the merged schema back to JSON
mergedSchemaJSON, err := json.Marshal(mergedSchema)
if err != nil {
return false, "", fmt.Errorf("failed to marshal merged schema: %v", err)
}
return true, string(mergedSchemaJSON), nil
}
func generateSchema(data map[string]schema.Float) (string, error) {
// Define the Avro schema structure
schema := map[string]any{
"type": "record",
"name": "DataRecord",
"fields": []map[string]any{},
}
fieldTracker := make(map[string]struct{})
for key := range data {
if _, exists := fieldTracker[key]; !exists {
key = correctKey(key)
field := map[string]any{
"name": key,
"type": "double",
"default": -1.0,
}
schema["fields"] = append(schema["fields"].([]map[string]any), field)
fieldTracker[key] = struct{}{}
}
}
schemaString, err := json.Marshal(schema)
if err != nil {
return "", fmt.Errorf("failed to marshal schema: %v", err)
}
return string(schemaString), nil
}
func generateRecord(data map[string]schema.Float) map[string]any {
record := make(map[string]any)
// Iterate through each map in data
for key, value := range data {
key = correctKey(key)
// Set the value in the record
// avro only accepts basic types
record[key] = value.Double()
}
return record
}
func correctKey(key string) string {
key = strings.ReplaceAll(key, "_", "_0x5F_")
key = strings.ReplaceAll(key, ":", "_0x3A_")
key = strings.ReplaceAll(key, ".", "_0x2E_")
return key
}
func ReplaceKey(key string) string {
key = strings.ReplaceAll(key, "_0x2E_", ".")
key = strings.ReplaceAll(key, "_0x3A_", ":")
key = strings.ReplaceAll(key, "_0x5F_", "_")
return key
}

View File

@@ -0,0 +1,130 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricstore
import (
"context"
"slices"
"strconv"
"strings"
"sync"
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
)
func DataStaging(wg *sync.WaitGroup, ctx context.Context) {
wg.Add(1)
go func() {
defer wg.Done()
if Keys.Checkpoints.FileFormat == "json" {
return
}
ms := GetMemoryStore()
var avroLevel *AvroLevel
oldSelector := make([]string, 0)
for {
select {
case <-ctx.Done():
// Drain any remaining messages in channel before exiting
for {
select {
case val, ok := <-LineProtocolMessages:
if !ok {
// Channel closed
return
}
// Process remaining message
freq, err := ms.GetMetricFrequency(val.MetricName)
if err != nil {
continue
}
var metricName strings.Builder
for _, selectorName := range val.Selector {
metricName.WriteString(selectorName + SelectorDelimiter)
}
metricName.WriteString(val.MetricName)
var selector []string
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
if !stringSlicesEqual(oldSelector, selector) {
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
if avroLevel == nil {
cclog.Errorf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
}
oldSelector = slices.Clone(selector)
}
if avroLevel != nil {
avroLevel.addMetric(metricName.String(), val.Value, val.Timestamp, int(freq))
}
default:
// No more messages, exit
return
}
}
case val, ok := <-LineProtocolMessages:
if !ok {
// Channel closed, exit gracefully
return
}
// Fetch the frequency of the metric from the global configuration
freq, err := ms.GetMetricFrequency(val.MetricName)
if err != nil {
cclog.Errorf("Error fetching metric frequency: %s\n", err)
continue
}
var metricName strings.Builder
for _, selectorName := range val.Selector {
metricName.WriteString(selectorName + SelectorDelimiter)
}
metricName.WriteString(val.MetricName)
// Create a new selector for the Avro level
// The selector is a slice of strings that represents the path to the
// Avro level. It is created by appending the cluster, node, and metric
// name to the selector.
var selector []string
selector = append(selector, val.Cluster, val.Node, strconv.FormatInt(freq, 10))
if !stringSlicesEqual(oldSelector, selector) {
// Get the Avro level for the metric
avroLevel = avroStore.root.findAvroLevelOrCreate(selector)
// If the Avro level is nil, create a new one
if avroLevel == nil {
cclog.Errorf("Error creating or finding the level with cluster : %s, node : %s, metric : %s\n", val.Cluster, val.Node, val.MetricName)
}
oldSelector = slices.Clone(selector)
}
if avroLevel != nil {
avroLevel.addMetric(metricName.String(), val.Value, val.Timestamp, int(freq))
}
}
}
}()
}
func stringSlicesEqual(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}

View File

@@ -0,0 +1,167 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package metricstore
import (
"sync"
"github.com/ClusterCockpit/cc-lib/v2/schema"
)
var (
LineProtocolMessages = make(chan *AvroStruct)
// SelectorDelimiter separates hierarchical selector components in metric names for Avro encoding
SelectorDelimiter = "_SEL_"
)
var CheckpointBufferMinutes = DefaultCheckpointBufferMin
type AvroStruct struct {
MetricName string
Cluster string
Node string
Selector []string
Value schema.Float
Timestamp int64
}
type AvroStore struct {
root AvroLevel
}
var avroStore AvroStore
type AvroLevel struct {
children map[string]*AvroLevel
data map[int64]map[string]schema.Float
lock sync.RWMutex
}
type AvroField struct {
Name string `json:"name"`
Type any `json:"type"`
Default any `json:"default,omitempty"`
}
type AvroSchema struct {
Type string `json:"type"`
Name string `json:"name"`
Fields []AvroField `json:"fields"`
}
func (l *AvroLevel) findAvroLevelOrCreate(selector []string) *AvroLevel {
if len(selector) == 0 {
return l
}
// Allow concurrent reads:
l.lock.RLock()
var child *AvroLevel
var ok bool
if l.children == nil {
// Children map needs to be created...
l.lock.RUnlock()
} else {
child, ok := l.children[selector[0]]
l.lock.RUnlock()
if ok {
return child.findAvroLevelOrCreate(selector[1:])
}
}
// The level does not exist, take write lock for unique access:
l.lock.Lock()
// While this thread waited for the write lock, another thread
// could have created the child node.
if l.children != nil {
child, ok = l.children[selector[0]]
if ok {
l.lock.Unlock()
return child.findAvroLevelOrCreate(selector[1:])
}
}
child = &AvroLevel{
data: make(map[int64]map[string]schema.Float, 0),
children: nil,
}
if l.children != nil {
l.children[selector[0]] = child
} else {
l.children = map[string]*AvroLevel{selector[0]: child}
}
l.lock.Unlock()
return child.findAvroLevelOrCreate(selector[1:])
}
func (l *AvroLevel) addMetric(metricName string, value schema.Float, timestamp int64, Freq int) {
l.lock.Lock()
defer l.lock.Unlock()
KeyCounter := int(CheckpointBufferMinutes * 60 / Freq)
// Create keys in advance for the given amount of time
if len(l.data) != KeyCounter {
if len(l.data) == 0 {
for i := range KeyCounter {
l.data[timestamp+int64(i*Freq)] = make(map[string]schema.Float, 0)
}
} else {
// Get the last timestamp
var lastTS int64
for ts := range l.data {
if ts > lastTS {
lastTS = ts
}
}
// Create keys for the next KeyCounter timestamps
l.data[lastTS+int64(Freq)] = make(map[string]schema.Float, 0)
}
}
closestTS := int64(0)
minDiff := int64(Freq) + 1 // Start with diff just outside the valid range
found := false
// Iterate over timestamps and choose the one which is within range.
// Since its epoch time, we check if the difference is less than 60 seconds.
for ts, dat := range l.data {
// Check if timestamp is within range
diff := timestamp - ts
if diff < -int64(Freq) || diff > int64(Freq) {
continue
}
// Metric already present at this timestamp — skip
if _, ok := dat[metricName]; ok {
continue
}
// Check if this is the closest timestamp so far
if Abs(diff) < minDiff {
minDiff = Abs(diff)
closestTS = ts
found = true
}
}
if found {
l.data[closestTS][metricName] = value
}
}
func GetAvroStore() *AvroStore {
return &avroStore
}
// Abs returns the absolute value of x.
func Abs(x int64) int64 {
if x < 0 {
return -x
}
return x
}

View File

@@ -1,274 +0,0 @@
// Copyright (C) NHR@FAU, University Erlangen-Nuremberg.
// All rights reserved. This file is part of cc-backend.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// This file implements the binary checkpoint format for fast loading.
//
// The binary format stores metric data in column-oriented layout (per-metric
// float64 arrays) for maximum load speed. Float32 arrays are read/written
// as raw bytes, avoiding per-element parsing overhead.
//
// File format:
//
// Header (28 bytes):
// magic: [4]byte "CCMS"
// version: uint32 LE
// from: int64 LE
// to: int64 LE
//
// Body (recursive):
// nmetrics: uint32 LE
// Per metric:
// name_len: uint16 LE
// name: []byte
// freq: int64 LE
// start: int64 LE
// nvalues: uint32 LE
// data: []float64 LE (NaN = missing)
// nchildren: uint32 LE
// Per child:
// name_len: uint16 LE
// name: []byte
// (recursive body)
package metricstore
import (
"bufio"
"encoding/binary"
"fmt"
"io"
"os"
"path"
"unsafe"
"github.com/ClusterCockpit/cc-lib/v2/schema"
)
var (
binaryMagic = [4]byte{'C', 'C', 'M', 'S'}
binaryVersion = uint32(1)
binaryByteOrder = binary.LittleEndian
floatSize = int(unsafe.Sizeof(schema.Float(0))) // schema.Float is float64
)
// writeBinaryCheckpoint writes a CheckpointFile to a binary checkpoint file on disk.
func writeBinaryCheckpoint(filePath string, cf *CheckpointFile) error {
f, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
if err != nil && os.IsNotExist(err) {
if err2 := os.MkdirAll(path.Dir(filePath), CheckpointDirPerms); err2 != nil {
return err2
}
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
}
if err != nil {
return err
}
defer f.Close()
bw := bufio.NewWriter(f)
// Write header
if _, err := bw.Write(binaryMagic[:]); err != nil {
return err
}
if err := binary.Write(bw, binaryByteOrder, binaryVersion); err != nil {
return err
}
if err := binary.Write(bw, binaryByteOrder, cf.From); err != nil {
return err
}
if err := binary.Write(bw, binaryByteOrder, cf.To); err != nil {
return err
}
// Write body (metrics + children recursively)
if err := writeBinaryBody(bw, cf); err != nil {
return err
}
return bw.Flush()
}
// writeBinaryBody writes the metrics and children of a CheckpointFile.
func writeBinaryBody(w io.Writer, cf *CheckpointFile) error {
if err := binary.Write(w, binaryByteOrder, uint32(len(cf.Metrics))); err != nil {
return err
}
for name, metric := range cf.Metrics {
nameBytes := []byte(name)
if err := binary.Write(w, binaryByteOrder, uint16(len(nameBytes))); err != nil {
return err
}
if _, err := w.Write(nameBytes); err != nil {
return err
}
if err := binary.Write(w, binaryByteOrder, metric.Frequency); err != nil {
return err
}
if err := binary.Write(w, binaryByteOrder, metric.Start); err != nil {
return err
}
if err := binary.Write(w, binaryByteOrder, uint32(len(metric.Data))); err != nil {
return err
}
if err := writeFloatArray(w, metric.Data); err != nil {
return err
}
}
if err := binary.Write(w, binaryByteOrder, uint32(len(cf.Children))); err != nil {
return err
}
for name, child := range cf.Children {
nameBytes := []byte(name)
if err := binary.Write(w, binaryByteOrder, uint16(len(nameBytes))); err != nil {
return err
}
if _, err := w.Write(nameBytes); err != nil {
return err
}
if err := writeBinaryBody(w, child); err != nil {
return err
}
}
return nil
}
// writeFloatArray writes a schema.Float slice as raw little-endian float64 bytes.
func writeFloatArray(w io.Writer, data []schema.Float) error {
if len(data) == 0 {
return nil
}
buf := unsafe.Slice((*byte)(unsafe.Pointer(&data[0])), len(data)*floatSize)
_, err := w.Write(buf)
return err
}
// loadBinaryFile reads a binary checkpoint file into a CheckpointFile.
func loadBinaryFile(filePath string) (*CheckpointFile, error) {
f, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer f.Close()
br := bufio.NewReader(f)
var magic [4]byte
if _, err := io.ReadFull(br, magic[:]); err != nil {
return nil, fmt.Errorf("reading magic: %w", err)
}
if magic != binaryMagic {
return nil, fmt.Errorf("[METRICSTORE]> invalid binary checkpoint magic in %s", filePath)
}
var version uint32
if err := binary.Read(br, binaryByteOrder, &version); err != nil {
return nil, fmt.Errorf("reading version: %w", err)
}
if version != binaryVersion {
return nil, fmt.Errorf("[METRICSTORE]> unsupported binary checkpoint version %d in %s", version, filePath)
}
cf := &CheckpointFile{}
if err := binary.Read(br, binaryByteOrder, &cf.From); err != nil {
return nil, fmt.Errorf("reading from: %w", err)
}
if err := binary.Read(br, binaryByteOrder, &cf.To); err != nil {
return nil, fmt.Errorf("reading to: %w", err)
}
if err := readBinaryBody(br, cf); err != nil {
return nil, err
}
return cf, nil
}
// readBinaryBody reads the metrics and children of a CheckpointFile.
func readBinaryBody(r io.Reader, cf *CheckpointFile) error {
var nmetrics uint32
if err := binary.Read(r, binaryByteOrder, &nmetrics); err != nil {
return fmt.Errorf("reading metric count: %w", err)
}
cf.Metrics = make(map[string]*CheckpointMetrics, nmetrics)
for range nmetrics {
var nameLen uint16
if err := binary.Read(r, binaryByteOrder, &nameLen); err != nil {
return fmt.Errorf("reading metric name length: %w", err)
}
nameBytes := make([]byte, nameLen)
if _, err := io.ReadFull(r, nameBytes); err != nil {
return fmt.Errorf("reading metric name: %w", err)
}
cm := &CheckpointMetrics{}
if err := binary.Read(r, binaryByteOrder, &cm.Frequency); err != nil {
return fmt.Errorf("reading frequency: %w", err)
}
if err := binary.Read(r, binaryByteOrder, &cm.Start); err != nil {
return fmt.Errorf("reading start: %w", err)
}
var nvalues uint32
if err := binary.Read(r, binaryByteOrder, &nvalues); err != nil {
return fmt.Errorf("reading value count: %w", err)
}
var err error
cm.Data, err = readFloatArray(r, int(nvalues))
if err != nil {
return fmt.Errorf("reading data for %s: %w", string(nameBytes), err)
}
cf.Metrics[string(nameBytes)] = cm
}
var nchildren uint32
if err := binary.Read(r, binaryByteOrder, &nchildren); err != nil {
return fmt.Errorf("reading children count: %w", err)
}
cf.Children = make(map[string]*CheckpointFile, nchildren)
for range nchildren {
var nameLen uint16
if err := binary.Read(r, binaryByteOrder, &nameLen); err != nil {
return fmt.Errorf("reading child name length: %w", err)
}
nameBytes := make([]byte, nameLen)
if _, err := io.ReadFull(r, nameBytes); err != nil {
return fmt.Errorf("reading child name: %w", err)
}
child := &CheckpointFile{}
if err := readBinaryBody(r, child); err != nil {
return fmt.Errorf("reading child %s: %w", string(nameBytes), err)
}
cf.Children[string(nameBytes)] = child
}
return nil
}
// readFloatArray reads n float32 values from raw little-endian bytes.
func readFloatArray(r io.Reader, n int) ([]schema.Float, error) {
if n == 0 {
return nil, nil
}
data := make([]schema.Float, n)
buf := unsafe.Slice((*byte)(unsafe.Pointer(&data[0])), n*floatSize)
if _, err := io.ReadFull(r, buf); err != nil {
return nil, err
}
return data, nil
}

View File

@@ -6,16 +6,15 @@
// This file implements checkpoint persistence for the in-memory metric store.
//
// Checkpoints enable graceful restarts by periodically saving in-memory metric
// data to disk. The checkpoint system supports two write formats:
// - binary (default): fast loading via raw float32 arrays
// - json: human-readable, slightly slower to load
// data to disk in either JSON or Avro format. The checkpoint system:
//
// Key Features:
// - Periodic background checkpointing via the Checkpointing() worker
// - Two formats: JSON (human-readable) and Avro (compact, efficient)
// - Parallel checkpoint creation and loading using worker pools
// - Hierarchical file organization: checkpoint_dir/cluster/host/timestamp.{bin|json}
// - Hierarchical file organization: checkpoint_dir/cluster/host/timestamp.{json|avro}
// - Only saves unarchived data (archived data is already persisted elsewhere)
// - Automatic format detection during loading (supports bin, json, and legacy avro)
// - Automatic format detection and fallback during loading
// - GC optimization during loading to prevent excessive heap growth
//
// Checkpoint Workflow:
@@ -28,8 +27,8 @@
// checkpoints/
// cluster1/
// host001/
// 1234567890.bin (timestamp = checkpoint start time)
// 1234567950.bin
// 1234567890.json (timestamp = checkpoint start time)
// 1234567950.json
// host002/
// ...
package metricstore
@@ -53,11 +52,13 @@ import (
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/schema"
"github.com/linkedin/goavro/v2"
)
const (
CheckpointFilePerms = 0o644 // File permissions for checkpoint files
CheckpointDirPerms = 0o755 // Directory permissions for checkpoint directories
GCTriggerInterval = DefaultGCTriggerInterval // Interval for triggering GC during checkpoint loading
)
// CheckpointMetrics represents metric data in a checkpoint file.
@@ -85,17 +86,22 @@ var (
// Checkpointing starts a background worker that periodically saves metric data to disk.
//
// Checkpoints are written at the configured interval (Keys.Checkpoints.Interval) in
// either binary or JSON format. The worker respects context cancellation and signals
// completion via the WaitGroup.
// The behavior depends on the configured file format:
// - JSON: Periodic checkpointing based on Keys.Checkpoints.Interval
// - Avro: Initial delay + periodic checkpointing at DefaultAvroCheckpointInterval
//
// The worker respects context cancellation and signals completion via the WaitGroup.
func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
lastCheckpointMu.Lock()
lastCheckpoint = time.Now()
lastCheckpointMu.Unlock()
if Keys.Checkpoints.FileFormat == "json" {
ms := GetMemoryStore()
wg.Go(func() {
wg.Add(1)
go func() {
defer wg.Done()
d, err := time.ParseDuration(Keys.Checkpoints.Interval)
if err != nil {
cclog.Fatalf("[METRICSTORE]> invalid checkpoint interval '%s': %s", Keys.Checkpoints.Interval, err.Error())
@@ -131,154 +137,32 @@ func Checkpointing(wg *sync.WaitGroup, ctx context.Context) {
}
}
}
})
}
// UnmarshalJSON provides optimized JSON decoding for CheckpointMetrics.
//
// Mirrors the optimized MarshalJSON by manually parsing JSON to avoid
// per-element interface dispatch and allocation overhead of the generic
// json.Unmarshal path for []schema.Float.
func (cm *CheckpointMetrics) UnmarshalJSON(input []byte) error {
// Minimal manual JSON parsing for the known structure:
// {"frequency":N,"start":N,"data":[...]}
// Field order may vary, so we parse field names.
if len(input) < 2 || input[0] != '{' {
return fmt.Errorf("expected JSON object")
}
i := 1 // skip '{'
for i < len(input) {
// Skip whitespace
for i < len(input) && (input[i] == ' ' || input[i] == '\t' || input[i] == '\n' || input[i] == '\r') {
i++
}
if i >= len(input) || input[i] == '}' {
break
}
if input[i] == ',' {
i++
continue
}
// Parse field name
if input[i] != '"' {
return fmt.Errorf("expected field name at pos %d", i)
}
i++
nameStart := i
for i < len(input) && input[i] != '"' {
i++
}
fieldName := string(input[nameStart:i])
i++ // skip closing '"'
// Skip ':'
for i < len(input) && (input[i] == ' ' || input[i] == ':') {
i++
}
switch fieldName {
case "frequency":
numStart := i
for i < len(input) && input[i] != ',' && input[i] != '}' {
i++
}
v, err := strconv.ParseInt(string(input[numStart:i]), 10, 64)
if err != nil {
return fmt.Errorf("invalid frequency: %w", err)
}
cm.Frequency = v
case "start":
numStart := i
for i < len(input) && input[i] != ',' && input[i] != '}' {
i++
}
v, err := strconv.ParseInt(string(input[numStart:i]), 10, 64)
if err != nil {
return fmt.Errorf("invalid start: %w", err)
}
cm.Start = v
case "data":
if input[i] != '[' {
return fmt.Errorf("expected '[' for data array at pos %d", i)
}
i++ // skip '['
cm.Data = make([]schema.Float, 0, 256)
for i < len(input) {
// Skip whitespace
for i < len(input) && (input[i] == ' ' || input[i] == '\t' || input[i] == '\n' || input[i] == '\r') {
i++
}
if i >= len(input) {
break
}
if input[i] == ']' {
i++
break
}
if input[i] == ',' {
i++
continue
}
// Parse value: number or null
if input[i] == 'n' {
// "null"
cm.Data = append(cm.Data, schema.NaN)
i += 4
}()
} else {
numStart := i
for i < len(input) && input[i] != ',' && input[i] != ']' && input[i] != ' ' {
i++
}
v, err := strconv.ParseFloat(string(input[numStart:i]), 64)
if err != nil {
return fmt.Errorf("invalid data value: %w", err)
}
cm.Data = append(cm.Data, schema.Float(v))
}
wg.Add(1)
go func() {
defer wg.Done()
select {
case <-ctx.Done():
return
case <-time.After(time.Duration(CheckpointBufferMinutes) * time.Minute):
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
}
default:
// Skip unknown field value
depth := 0
inStr := false
for i < len(input) {
if inStr {
if input[i] == '\\' {
i++
} else if input[i] == '"' {
inStr = false
}
} else {
switch input[i] {
case '"':
inStr = true
case '{', '[':
depth++
case '}', ']':
if depth == 0 {
goto doneSkip
}
depth--
case ',':
if depth == 0 {
goto doneSkip
}
}
}
i++
}
doneSkip:
}
}
ticker := time.NewTicker(DefaultAvroCheckpointInterval)
defer ticker.Stop()
return nil
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, false)
}
}
}()
}
}
// MarshalJSON provides optimized JSON encoding for CheckpointMetrics.
@@ -453,8 +337,7 @@ func (l *Level) toCheckpointFile(from, to int64, m *MemoryStore) (*CheckpointFil
return retval, nil
}
// toCheckpoint writes a Level's data to a checkpoint file.
// The format (binary or JSON) is determined by Keys.Checkpoints.FileFormat.
// toCheckpoint writes a Level's data to a JSON checkpoint file.
// Creates directory if needed. Returns ErrNoNewArchiveData if nothing to save.
func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
cf, err := l.toCheckpointFile(from, to, m)
@@ -466,23 +349,12 @@ func (l *Level) toCheckpoint(dir string, from, to int64, m *MemoryStore) error {
return ErrNoNewArchiveData
}
if Keys.Checkpoints.FileFormat == "json" {
return writeJSONCheckpoint(dir, from, cf)
}
// Default: binary format
filePath := path.Join(dir, fmt.Sprintf("%d.bin", from))
return writeBinaryCheckpoint(filePath, cf)
}
// writeJSONCheckpoint writes a CheckpointFile in JSON format.
func writeJSONCheckpoint(dir string, from int64, cf *CheckpointFile) error {
filePath := path.Join(dir, fmt.Sprintf("%d.json", from))
f, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
filepath := path.Join(dir, fmt.Sprintf("%d.json", from))
f, err := os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
if err != nil && os.IsNotExist(err) {
err = os.MkdirAll(dir, CheckpointDirPerms)
if err == nil {
f, err = os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
f, err = os.OpenFile(filepath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, CheckpointFilePerms)
}
}
if err != nil {
@@ -499,56 +371,51 @@ func writeJSONCheckpoint(dir string, from int64, cf *CheckpointFile) error {
}
// enqueueCheckpointHosts traverses checkpoint directory and enqueues cluster/host pairs.
// Returns the set of cluster names found and any error if directory structure is invalid.
func enqueueCheckpointHosts(dir string, work chan<- [2]string) (map[string]struct{}, error) {
// Returns error if directory structure is invalid.
func enqueueCheckpointHosts(dir string, work chan<- [2]string) error {
clustersDir, err := os.ReadDir(dir)
if err != nil {
return nil, err
return err
}
clusters := make(map[string]struct{}, len(clustersDir))
gcCounter := 0
for _, clusterDir := range clustersDir {
if !clusterDir.IsDir() {
return nil, errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
return errors.New("[METRICSTORE]> expected only directories at first level of checkpoints/ directory")
}
clusters[clusterDir.Name()] = struct{}{}
hostsDir, err := os.ReadDir(filepath.Join(dir, clusterDir.Name()))
if err != nil {
return nil, err
return err
}
for _, hostDir := range hostsDir {
if !hostDir.IsDir() {
return nil, errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
return errors.New("[METRICSTORE]> expected only directories at second level of checkpoints/ directory")
}
gcCounter++
// if gcCounter%GCTriggerInterval == 0 {
// Forcing garbage collection runs here regulary during the loading of checkpoints
// will decrease the total heap size after loading everything back to memory is done.
// While loading data, the heap will grow fast, so the GC target size will double
// almost always. By forcing GCs here, we can keep it growing more slowly so that
// at the end, less memory is wasted.
// runtime.GC()
// }
work <- [2]string{clusterDir.Name(), hostDir.Name()}
}
}
return clusters, nil
return nil
}
// FromCheckpoint loads checkpoint files from disk into memory in parallel.
//
// Pre-creates cluster-level entries to reduce lock contention during parallel loading.
// Uses worker pool to load cluster/host combinations. Returns number of files loaded and any errors.
// Uses worker pool to load cluster/host combinations. Periodically triggers GC
// to prevent excessive heap growth. Returns number of files loaded and any errors.
func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
// Pre-create cluster-level entries to eliminate write-lock contention on m.root
// during parallel loading. Workers only contend at the cluster level (independent).
clusterDirs, err := os.ReadDir(dir)
if err != nil && !os.IsNotExist(err) {
return 0, err
}
for _, d := range clusterDirs {
if d.IsDir() {
m.root.findLevelOrCreate([]string{d.Name()}, len(m.Metrics))
}
}
var wg sync.WaitGroup
work := make(chan [2]string, Keys.NumWorkers*4)
n, errs := int32(0), int32(0)
@@ -569,7 +436,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
}()
}
_, err = enqueueCheckpointHosts(dir, work)
err := enqueueCheckpointHosts(dir, work)
close(work)
wg.Wait()
@@ -585,7 +452,7 @@ func (m *MemoryStore) FromCheckpoint(dir string, from int64) (int, error) {
// FromCheckpointFiles is the main entry point for loading checkpoints at startup.
//
// Automatically detects checkpoint format (binary, JSON, or legacy Avro).
// Automatically detects checkpoint format (JSON vs Avro) and falls back if needed.
// Creates checkpoint directory if it doesn't exist. This function must be called
// before any writes or reads, and can only be called once.
func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
@@ -601,11 +468,150 @@ func (m *MemoryStore) FromCheckpointFiles(dir string, from int64) (int, error) {
return m.FromCheckpoint(dir, from)
}
// loadBinaryCheckpointFile loads a binary checkpoint file into the Level tree.
// Binary files are decoded in the same way as JSON files (via loadFile).
func (l *Level) loadBinaryCheckpointFile(m *MemoryStore, filePath string, from int64) error {
cf, err := loadBinaryFile(filePath)
func (l *Level) loadAvroFile(m *MemoryStore, f *os.File, from int64) error {
br := bufio.NewReader(f)
fileName := f.Name()[strings.LastIndex(f.Name(), "/")+1:]
resolution, err := strconv.ParseInt(fileName[0:strings.Index(fileName, "_")], 10, 64)
if err != nil {
return fmt.Errorf("[METRICSTORE]> error while reading avro file (resolution parsing) : %s", err)
}
fromTimestamp, err := strconv.ParseInt(fileName[strings.Index(fileName, "_")+1:len(fileName)-5], 10, 64)
// Same logic according to lineprotocol
fromTimestamp -= (resolution / 2)
if err != nil {
return fmt.Errorf("[METRICSTORE]> error converting timestamp from the avro file : %s", err)
}
// fmt.Printf("File : %s with resolution : %d\n", fileName, resolution)
var recordCounter int64 = 0
// Create a new OCF reader from the buffered reader
ocfReader, err := goavro.NewOCFReader(br)
if err != nil {
return fmt.Errorf("[METRICSTORE]> error creating OCF reader: %w", err)
}
metricsData := make(map[string]schema.FloatArray)
for ocfReader.Scan() {
datum, err := ocfReader.Read()
if err != nil {
return fmt.Errorf("[METRICSTORE]> error while reading avro file : %s", err)
}
record, ok := datum.(map[string]any)
if !ok {
return fmt.Errorf("[METRICSTORE]> failed to assert datum as map[string]interface{}")
}
for key, value := range record {
metricsData[key] = append(metricsData[key], schema.ConvertToFloat(value.(float64)))
}
recordCounter += 1
}
to := (fromTimestamp + (recordCounter / (60 / resolution) * 60))
if to < from {
return nil
}
for key, floatArray := range metricsData {
metricName := ReplaceKey(key)
if strings.Contains(metricName, SelectorDelimiter) {
subString := strings.Split(metricName, SelectorDelimiter)
lvl := l
for i := 0; i < len(subString)-1; i++ {
sel := subString[i]
if lvl.children == nil {
lvl.children = make(map[string]*Level)
}
child, ok := lvl.children[sel]
if !ok {
child = &Level{
metrics: make([]*buffer, len(m.Metrics)),
children: nil,
}
lvl.children[sel] = child
}
lvl = child
}
leafMetricName := subString[len(subString)-1]
err = lvl.createBuffer(m, leafMetricName, floatArray, fromTimestamp, resolution)
if err != nil {
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
}
} else {
err = l.createBuffer(m, metricName, floatArray, fromTimestamp, resolution)
if err != nil {
return fmt.Errorf("[METRICSTORE]> error while creating buffers from avroReader : %s", err)
}
}
}
return nil
}
func (l *Level) createBuffer(m *MemoryStore, metricName string, floatArray schema.FloatArray, from int64, resolution int64) error {
n := len(floatArray)
b := &buffer{
frequency: resolution,
start: from,
data: floatArray[0:n:n],
prev: nil,
next: nil,
archived: true,
}
minfo, ok := m.Metrics[metricName]
if !ok {
return nil
}
prev := l.metrics[minfo.offset]
if prev == nil {
l.metrics[minfo.offset] = b
} else {
if prev.start > b.start {
return fmt.Errorf("[METRICSTORE]> buffer start time %d is before previous buffer start %d", b.start, prev.start)
}
b.prev = prev
prev.next = b
missingCount := ((int(b.start) - int(prev.start)) - len(prev.data)*int(b.frequency))
if missingCount > 0 {
missingCount /= int(b.frequency)
for range missingCount {
prev.data = append(prev.data, schema.NaN)
}
prev.data = prev.data[0:len(prev.data):len(prev.data)]
}
}
l.metrics[minfo.offset] = b
return nil
}
func (l *Level) loadJSONFile(m *MemoryStore, f *os.File, from int64) error {
br := bufio.NewReader(f)
cf := &CheckpointFile{}
if err := json.NewDecoder(br).Decode(cf); err != nil {
return err
}
@@ -613,7 +619,11 @@ func (l *Level) loadBinaryCheckpointFile(m *MemoryStore, filePath string, from i
return nil
}
return l.loadFile(cf, m)
if err := l.loadFile(cf, m); err != nil {
return err
}
return nil
}
func (l *Level) loadFile(cf *CheckpointFile, m *MemoryStore) error {
@@ -679,14 +689,26 @@ func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, err
return 0, err
}
allFiles := make([]fs.DirEntry, 0, len(direntries))
allFiles := make([]fs.DirEntry, 0)
filesLoaded := 0
for _, e := range direntries {
if e.IsDir() {
cclog.Warnf("[METRICSTORE]> unexpected subdirectory '%s' in checkpoint dir '%s', skipping", e.Name(), dir)
continue
} else if strings.HasSuffix(e.Name(), ".bin") || strings.HasSuffix(e.Name(), ".json") {
child := &Level{
metrics: make([]*buffer, len(m.Metrics)),
children: make(map[string]*Level),
}
files, err := child.fromCheckpoint(m, path.Join(dir, e.Name()), from)
filesLoaded += files
if err != nil {
return filesLoaded, err
}
l.children[e.Name()] = child
} else if strings.HasSuffix(e.Name(), ".json") || strings.HasSuffix(e.Name(), ".avro") {
allFiles = append(allFiles, e)
} else {
continue
}
}
@@ -695,189 +717,94 @@ func (l *Level) fromCheckpoint(m *MemoryStore, dir string, from int64) (int, err
return filesLoaded, err
}
if len(files) == 0 {
return 0, nil
loaders := map[string]func(*MemoryStore, *os.File, int64) error{
".json": l.loadJSONFile,
".avro": l.loadAvroFile,
}
// Separate files by type
var binFiles, jsonFiles []string
for _, filename := range files {
switch filepath.Ext(filename) {
case ".bin":
binFiles = append(binFiles, filename)
case ".json":
jsonFiles = append(jsonFiles, filename)
default:
cclog.Warnf("[METRICSTORE]> unknown extension for file %s", filename)
}
}
// Parallel binary decoding: decode files concurrently, then apply sequentially
if len(binFiles) > 0 {
type decodedFile struct {
cf *CheckpointFile
err error
}
decoded := make([]decodedFile, len(binFiles))
var decodeWg sync.WaitGroup
for i, filename := range binFiles {
decodeWg.Add(1)
go func(idx int, fname string) {
defer decodeWg.Done()
cf, err := loadBinaryFile(path.Join(dir, fname))
if err != nil {
decoded[idx] = decodedFile{err: fmt.Errorf("decoding %s: %w", fname, err)}
return
}
decoded[idx] = decodedFile{cf: cf}
}(i, filename)
}
decodeWg.Wait()
for i, d := range decoded {
if d.err != nil {
return filesLoaded, d.err
}
if d.cf.To != 0 && d.cf.To < from {
ext := filepath.Ext(filename)
loader := loaders[ext]
if loader == nil {
cclog.Warnf("Unknown extension for file %s", filename)
continue
}
if err := l.loadFile(d.cf, m); err != nil {
return filesLoaded, fmt.Errorf("loading %s: %w", binFiles[i], err)
}
filesLoaded++
}
}
// Parallel JSON decoding: decode files concurrently, then apply sequentially
if len(jsonFiles) > 0 {
type decodedFile struct {
cf *CheckpointFile
err error
}
decoded := make([]decodedFile, len(jsonFiles))
var decodeWg sync.WaitGroup
for i, filename := range jsonFiles {
decodeWg.Add(1)
go func(idx int, fname string) {
defer decodeWg.Done()
f, err := os.Open(path.Join(dir, fname))
// Use a closure to ensure file is closed immediately after use
err := func() error {
f, err := os.Open(path.Join(dir, filename))
if err != nil {
decoded[idx] = decodedFile{err: err}
return
return err
}
defer f.Close()
cf := &CheckpointFile{}
if err := json.NewDecoder(bufio.NewReader(f)).Decode(cf); err != nil {
decoded[idx] = decodedFile{err: fmt.Errorf("decoding %s: %w", fname, err)}
return
return loader(m, f, from)
}()
if err != nil {
return filesLoaded, err
}
decoded[idx] = decodedFile{cf: cf}
}(i, filename)
}
decodeWg.Wait()
for i, d := range decoded {
if d.err != nil {
return filesLoaded, d.err
}
if d.cf.To != 0 && d.cf.To < from {
continue
}
if err := l.loadFile(d.cf, m); err != nil {
return filesLoaded, fmt.Errorf("loading %s: %w", jsonFiles[i], err)
}
filesLoaded++
}
filesLoaded += 1
}
return filesLoaded, nil
}
// findFiles filters and sorts checkpoint files by timestamp.
//
// When findMoreRecentFiles is true, returns files with timestamp >= t (for loading),
// plus the immediately preceding file if it straddles the boundary.
// When false, returns files with timestamp <= t (for cleanup).
//
// Filters before sorting so only relevant files are sorted, keeping performance
// stable regardless of total directory size.
// This will probably get very slow over time!
// A solution could be some sort of an index file in which all other files
// and the timespan they contain is listed.
// NOTE: This now assumes that you have distinct timestamps for json and avro files
// Also, it assumes that the timestamps are not overlapping/self-modified.
func findFiles(direntries []fs.DirEntry, t int64, findMoreRecentFiles bool) ([]string, error) {
type fileEntry struct {
name string
ts int64
}
// Parse timestamps and pre-filter in a single pass
var candidates []fileEntry
var bestPreceding *fileEntry // Track the file just before the cutoff (for boundary straddling)
nums := map[string]int64{}
for _, e := range direntries {
name := e.Name()
ext := filepath.Ext(name)
if ext != ".bin" && ext != ".json" {
if !strings.HasSuffix(e.Name(), ".json") && !strings.HasSuffix(e.Name(), ".avro") {
continue
}
// Parse timestamp from filename: for .bin and .json it's just "TIMESTAMP.ext"
baseName := name[:len(name)-len(ext)]
// Handle legacy format with prefix (e.g., "60_TIMESTAMP.avro")
if idx := strings.Index(baseName, "_"); idx >= 0 {
baseName = baseName[idx+1:]
}
ts, err := strconv.ParseInt(baseName, 10, 64)
ts, err := strconv.ParseInt(e.Name()[strings.Index(e.Name(), "_")+1:len(e.Name())-5], 10, 64)
if err != nil {
return nil, err
}
if findMoreRecentFiles {
if ts >= t {
candidates = append(candidates, fileEntry{name, ts})
} else {
// Track the most recent file before the cutoff for boundary straddling
if bestPreceding == nil || ts > bestPreceding.ts {
bestPreceding = &fileEntry{name, ts}
}
}
} else {
if ts <= t && ts != 0 {
candidates = append(candidates, fileEntry{name, ts})
}
}
nums[e.Name()] = ts
}
// Include the boundary-straddling file if we found one and there are also files after the cutoff
if findMoreRecentFiles && bestPreceding != nil && len(candidates) > 0 {
candidates = append(candidates, *bestPreceding)
}
sort.Slice(direntries, func(i, j int) bool {
a, b := direntries[i], direntries[j]
return nums[a.Name()] < nums[b.Name()]
})
if len(candidates) == 0 {
// If searching for recent files and we only have a preceding file, include it
if findMoreRecentFiles && bestPreceding != nil {
return []string{bestPreceding.name}, nil
}
if len(nums) == 0 {
return nil, nil
}
// Sort only the filtered candidates
sort.Slice(candidates, func(i, j int) bool {
return candidates[i].ts < candidates[j].ts
})
filenames := make([]string, 0)
filenames := make([]string, len(candidates))
for i, c := range candidates {
filenames[i] = c.name
for i, e := range direntries {
ts1 := nums[e.Name()]
// Logic to look for files in forward or direction
// If logic: All files greater than or after
// the given timestamp will be selected
// Else If logic: All files less than or before
// the given timestamp will be selected
if findMoreRecentFiles && t <= ts1 {
filenames = append(filenames, e.Name())
} else if !findMoreRecentFiles && ts1 <= t && ts1 != 0 {
filenames = append(filenames, e.Name())
}
if i == len(direntries)-1 {
continue
}
enext := direntries[i+1]
ts2 := nums[enext.Name()]
if findMoreRecentFiles {
if ts1 < t && t < ts2 {
filenames = append(filenames, e.Name())
}
}
}
return filenames, nil

View File

@@ -14,7 +14,7 @@
// ├─ RetentionInMemory: How long to keep data in RAM
// ├─ MemoryCap: Memory limit in bytes (triggers forceFree)
// ├─ Checkpoints: Persistence configuration
// │ ├─ FileFormat: "binary" or "json"
// │ ├─ FileFormat: "avro" or "json"
// │ ├─ Interval: How often to save (e.g., "1h")
// │ └─ RootDir: Checkpoint storage path
// ├─ Cleanup: Long-term storage configuration
@@ -54,13 +54,17 @@ import (
const (
DefaultMaxWorkers = 10
DefaultBufferCapacity = 512
DefaultGCTriggerInterval = 100
DefaultAvroWorkers = 4
DefaultCheckpointBufferMin = 3
DefaultAvroCheckpointInterval = time.Minute
DefaultMemoryUsageTrackerInterval = 1 * time.Hour
)
// Checkpoints configures periodic persistence of in-memory metric data.
//
// Fields:
// - FileFormat: "binary" (default, fast loading) or "json" (human-readable)
// - FileFormat: "avro" (default, binary, compact) or "json" (human-readable, slower)
// - Interval: Duration string (e.g., "1h", "30m") between checkpoint saves
// - RootDir: Filesystem path for checkpoint files (created if missing)
type Checkpoints struct {
@@ -140,7 +144,7 @@ type MetricStoreConfig struct {
// Accessed by Init(), Checkpointing(), and other lifecycle functions.
var Keys MetricStoreConfig = MetricStoreConfig{
Checkpoints: Checkpoints{
FileFormat: "binary",
FileFormat: "avro",
RootDir: "./var/checkpoints",
},
Cleanup: &Cleanup{

View File

@@ -18,9 +18,8 @@ const configSchema = `{
"type": "object",
"properties": {
"file-format": {
"description": "Specify the format for checkpoint files: 'binary' (default, fast loading) or 'json' (human-readable).",
"type": "string",
"enum": ["binary", "json"]
"description": "Specify the format for checkpoint files. There are 2 variants: 'avro' and 'json'. If nothing is specified, 'avro' is default.",
"type": "string"
},
"interval": {
"description": "Interval at which the metrics should be checkpointed.",

View File

@@ -6,7 +6,6 @@
package metricstore
import (
"bytes"
"context"
"fmt"
"sync"
@@ -15,7 +14,7 @@ import (
cclog "github.com/ClusterCockpit/cc-lib/v2/ccLogger"
"github.com/ClusterCockpit/cc-lib/v2/nats"
"github.com/ClusterCockpit/cc-lib/v2/schema"
"github.com/ClusterCockpit/cc-line-protocol/v2/lineprotocol"
"github.com/influxdata/line-protocol/v2/lineprotocol"
)
func ReceiveNats(ms *MemoryStore,
@@ -95,28 +94,6 @@ func reorder(buf, prefix []byte) []byte {
}
}
type decodeState struct {
metricBuf []byte
selector []string
typeBuf []byte
subTypeBuf []byte
prevTypeBytes []byte
prevTypeStr string
prevSubTypeBytes []byte
prevSubTypeStr string
}
var decodeStatePool = sync.Pool{
New: func() any {
return &decodeState{
metricBuf: make([]byte, 0, 16),
selector: make([]string, 0, 4),
typeBuf: make([]byte, 0, 16),
subTypeBuf: make([]byte, 0, 16),
}
},
}
// Decode lines using dec and make write calls to the MemoryStore.
// If a line is missing its cluster tag, use clusterDefault as default.
func DecodeLine(dec *lineprotocol.Decoder,
@@ -125,9 +102,9 @@ func DecodeLine(dec *lineprotocol.Decoder,
) error {
// Reduce allocations in loop:
t := time.Now()
metric := Metric{}
st := decodeStatePool.Get().(*decodeState)
defer decodeStatePool.Put(st)
metric, metricBuf := Metric{}, make([]byte, 0, 16)
selector := make([]string, 0, 4)
typeBuf, subTypeBuf := make([]byte, 0, 16), make([]byte, 0)
// Optimize for the case where all lines in a "batch" are about the same
// cluster and host. By using `WriteToLevel` (level = host), we do not need
@@ -144,7 +121,7 @@ func DecodeLine(dec *lineprotocol.Decoder,
// Needs to be copied because another call to dec.* would
// invalidate the returned slice.
st.metricBuf = append(st.metricBuf[:0], rawmeasurement...)
metricBuf = append(metricBuf[:0], rawmeasurement...)
// The go compiler optimizes map[string(byteslice)] lookups:
metric.MetricConfig, ok = ms.Metrics[string(rawmeasurement)]
@@ -152,7 +129,7 @@ func DecodeLine(dec *lineprotocol.Decoder,
continue
}
st.typeBuf, st.subTypeBuf = st.typeBuf[:0], st.subTypeBuf[:0]
typeBuf, subTypeBuf := typeBuf[:0], subTypeBuf[:0]
cluster, host := clusterDefault, ""
for {
key, val, err := dec.NextTag()
@@ -185,49 +162,41 @@ func DecodeLine(dec *lineprotocol.Decoder,
}
// We cannot be sure that the "type" tag comes before the "type-id" tag:
if len(st.typeBuf) == 0 {
st.typeBuf = append(st.typeBuf, val...)
if len(typeBuf) == 0 {
typeBuf = append(typeBuf, val...)
} else {
st.typeBuf = reorder(st.typeBuf, val)
typeBuf = reorder(typeBuf, val)
}
case "type-id":
st.typeBuf = append(st.typeBuf, val...)
typeBuf = append(typeBuf, val...)
case "subtype":
// We cannot be sure that the "subtype" tag comes before the "stype-id" tag:
if len(st.subTypeBuf) == 0 {
st.subTypeBuf = append(st.subTypeBuf, val...)
if len(subTypeBuf) == 0 {
subTypeBuf = append(subTypeBuf, val...)
} else {
st.subTypeBuf = reorder(st.subTypeBuf, val)
subTypeBuf = reorder(subTypeBuf, val)
// subTypeBuf = reorder(typeBuf, val)
}
case "stype-id":
st.subTypeBuf = append(st.subTypeBuf, val...)
subTypeBuf = append(subTypeBuf, val...)
default:
}
}
// If the cluster or host changed, the lvl was set to nil
if lvl == nil {
st.selector = st.selector[:2]
st.selector[0], st.selector[1] = cluster, host
lvl = ms.GetLevel(st.selector)
selector = selector[:2]
selector[0], selector[1] = cluster, host
lvl = ms.GetLevel(selector)
prevCluster, prevHost = cluster, host
}
// subtypes: cache []byte→string conversions; messages in a batch typically
// share the same type/subtype so the hit rate is very high.
st.selector = st.selector[:0]
if len(st.typeBuf) > 0 {
if !bytes.Equal(st.typeBuf, st.prevTypeBytes) {
st.prevTypeBytes = append(st.prevTypeBytes[:0], st.typeBuf...)
st.prevTypeStr = string(st.typeBuf)
}
st.selector = append(st.selector, st.prevTypeStr)
if len(st.subTypeBuf) > 0 {
if !bytes.Equal(st.subTypeBuf, st.prevSubTypeBytes) {
st.prevSubTypeBytes = append(st.prevSubTypeBytes[:0], st.subTypeBuf...)
st.prevSubTypeStr = string(st.subTypeBuf)
}
st.selector = append(st.selector, st.prevSubTypeStr)
// subtypes:
selector = selector[:0]
if len(typeBuf) > 0 {
selector = append(selector, string(typeBuf)) // <- Allocation :(
if len(subTypeBuf) > 0 {
selector = append(selector, string(subTypeBuf))
}
}
@@ -275,7 +244,18 @@ func DecodeLine(dec *lineprotocol.Decoder,
time := t.Unix()
if err := ms.WriteToLevel(lvl, st.selector, time, []Metric{metric}); err != nil {
if Keys.Checkpoints.FileFormat != "json" {
LineProtocolMessages <- &AvroStruct{
MetricName: string(metricBuf),
Cluster: cluster,
Node: host,
Selector: append([]string{}, selector...),
Value: metric.Value,
Timestamp: time,
}
}
if err := ms.WriteToLevel(lvl, selector, time, []Metric{metric}); err != nil {
return err
}
}

View File

@@ -8,7 +8,7 @@
//
// The package organizes metrics in a tree structure (cluster → host → component) and
// provides concurrent read/write access to metric data with configurable aggregation strategies.
// Background goroutines handle periodic checkpointing (binary or JSON format), archiving old data,
// Background goroutines handle periodic checkpointing (JSON or Avro format), archiving old data,
// and enforcing retention policies.
//
// Key features:
@@ -151,12 +151,6 @@ func Init(rawConfig json.RawMessage, metrics map[string]MetricConfig, wg *sync.W
restoreFrom := startupTime.Add(-d)
cclog.Infof("[METRICSTORE]> Loading checkpoints newer than %s\n", restoreFrom.Format(time.RFC3339))
// Lower GC target during loading to prevent excessive heap growth.
// During checkpoint loading the heap grows rapidly, causing the GC to
// double its target repeatedly. A lower percentage keeps it tighter.
oldGCPercent := debug.SetGCPercent(20)
files, err := ms.FromCheckpointFiles(Keys.Checkpoints.RootDir, restoreFrom.Unix())
loadedData := ms.SizeInBytes() / 1024 / 1024 // In MB
if err != nil {
@@ -165,16 +159,20 @@ func Init(rawConfig json.RawMessage, metrics map[string]MetricConfig, wg *sync.W
cclog.Infof("[METRICSTORE]> Checkpoints loaded (%d files, %d MB, that took %fs)\n", files, loadedData, time.Since(startupTime).Seconds())
}
// Restore GC target and force a collection to set a tight baseline
// for the "previously active heap" size, reducing long-term memory waste.
debug.SetGCPercent(oldGCPercent)
runtime.GC()
// Try to use less memory by forcing a GC run here and then
// lowering the target percentage. The default of 100 means
// that only once the ratio of new allocations execeds the
// previously active heap, a GC is triggered.
// Forcing a GC here will set the "previously active heap"
// to a minumum.
// runtime.GC()
ctx, shutdown := context.WithCancel(context.Background())
Retention(wg, ctx)
Checkpointing(wg, ctx)
CleanUp(wg, ctx)
DataStaging(wg, ctx)
MemoryUsageTracker(wg, ctx)
// Note: Signal handling has been removed from this function.
@@ -278,10 +276,22 @@ func Shutdown() {
shutdownFunc()
}
if Keys.Checkpoints.FileFormat != "json" {
close(LineProtocolMessages)
}
cclog.Infof("[METRICSTORE]> Writing to '%s'...\n", Keys.Checkpoints.RootDir)
var files int
var err error
ms := GetMemoryStore()
files, err := ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
if Keys.Checkpoints.FileFormat == "json" {
files, err = ms.ToCheckpoint(Keys.Checkpoints.RootDir, lastCheckpoint.Unix(), time.Now().Unix())
} else {
files, err = GetAvroStore().ToCheckpoint(Keys.Checkpoints.RootDir, true)
}
if err != nil {
cclog.Errorf("[METRICSTORE]> Writing checkpoint failed: %s\n", err.Error())
}

View File

@@ -36,9 +36,6 @@
const { query: initq } = init();
const useCbColors = getContext("cc-config")?.plotConfiguration_colorblindMode || false
/* State Init */
let activeTab = $state("");
/* Derived */
const subClusters = $derived($initq?.data?.clusters?.find((c) => c.name == presetCluster)?.subClusters || []);
</script>
@@ -66,22 +63,22 @@
</Row>
{:else}
<Card class="overflow-auto" style="height: auto;">
<TabContent on:tab={(e) => (activeTab = e.detail)}>
<TabContent>
<TabPane tabId="status-dash" tab="Status" active>
<CardBody>
<StatusDash clusters={$initq.data.clusters} {presetCluster} loadMe={(activeTab === "status-dash")}></StatusDash>
<StatusDash clusters={$initq.data.clusters} {presetCluster}></StatusDash>
</CardBody>
</TabPane>
<TabPane tabId="health-dash" tab="Metric Status">
<CardBody>
<HealthDash {presetCluster} loadMe={(activeTab === "health-dash")}></HealthDash>
<HealthDash {presetCluster}></HealthDash>
</CardBody>
</TabPane>
<TabPane tabId="usage-dash" tab="Cluster Usage">
<CardBody>
<UsageDash {presetCluster} {useCbColors} loadMe={(activeTab === "usage-dash")}></UsageDash>
<UsageDash {presetCluster} {useCbColors}></UsageDash>
</CardBody>
</TabPane>
@@ -89,7 +86,7 @@
{#each subClusters.map(sc => sc.name) as scn}
<TabPane tabId="{scn}-usage-dash" tab="{scn.charAt(0).toUpperCase() + scn.slice(1)} Usage">
<CardBody>
<UsageDash {presetCluster} presetSubCluster={scn} {useCbColors} loadMe={(activeTab === `${scn}-usage-dash`)}></UsageDash>
<UsageDash {presetCluster} presetSubCluster={scn} {useCbColors}></UsageDash>
</CardBody>
</TabPane>
{/each}
@@ -97,7 +94,7 @@
<TabPane tabId="metric-dash" tab="Statistics">
<CardBody>
<StatisticsDash {presetCluster} {useCbColors} loadMe={(activeTab === "metric-dash")}></StatisticsDash>
<StatisticsDash {presetCluster} {useCbColors}></StatisticsDash>
</CardBody>
</TabPane>
</TabContent>

View File

@@ -6,7 +6,6 @@
-->
<script>
import { onMount } from "svelte";
import {
Row,
Col,
@@ -29,7 +28,6 @@
/* Svelte 5 Props */
let {
presetCluster,
loadMe = false,
} = $props();
/* Const Init */
@@ -56,7 +54,7 @@
/* Derived */
let cluster = $derived(presetCluster);
const statusQuery = $derived(loadMe ? queryStore({
const statusQuery = $derived(queryStore({
client: client,
query: gql`
query (
@@ -86,7 +84,7 @@
sorting: querySorting,
},
requestPolicy: "network-only"
}) : null);
}));
let healthTableData = $derived.by(() => {
if ($statusQuery?.data) {
@@ -142,9 +140,6 @@
healthTableData = [...pendingHealthData];
}
/* On Mount */
onMount(() => sortBy('healthState'));
</script>
<!-- Refresher and space for other options -->
@@ -162,16 +157,16 @@
<hr/>
<!-- Node Health Pis, later Charts -->
{#if $statusQuery?.fetching}
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery?.error}
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">Status Query (States): {$statusQuery?.error?.message}</Card>
<Card body color="danger">Status Query (States): {$statusQuery.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery?.data?.nodeStates}
@@ -265,19 +260,19 @@
<hr/>
<!-- Tabular Info About Node States and Missing Metrics -->
{#if $statusQuery?.fetching}
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery?.error}
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">Status Query (Details): {$statusQuery?.error?.message}</Card>
<Card body color="danger">Status Query (Details): {$statusQuery.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery?.data}
{:else if $statusQuery.data}
<Row>
<Col>
<Card>

View File

@@ -30,8 +30,7 @@
/* Svelte 5 Props */
let {
presetCluster,
loadMe = false,
presetCluster
} = $props();
/* Const Init */
@@ -50,7 +49,7 @@
: ccconfig['statusView_selectedHistograms'] || []);
// Note: nodeMetrics are requested on configured $timestep resolution
const metricStatusQuery = $derived(loadMe ? queryStore({
const metricStatusQuery = $derived(queryStore({
client: client,
query: gql`
query (
@@ -76,7 +75,7 @@
selectedHistograms: selectedHistograms
},
requestPolicy: "network-only"
}) : null);
}));
</script>
<!-- Loading indicators & Metric Sleect -->
@@ -101,18 +100,18 @@
</Row>
<Row cols={1} class="text-center mt-3">
{#if $metricStatusQuery?.fetching}
{#if $metricStatusQuery.fetching}
<Col>
<Spinner />
</Col>
{:else if $metricStatusQuery?.error}
{:else if $metricStatusQuery.error}
<Col>
<Card body color="danger">{$metricStatusQuery.error.message}</Card>
</Col>
{/if}
</Row>
{#if $metricStatusQuery?.data}
{#if $metricStatusQuery.data}
<!-- Selectable Stats as Histograms : Average Values of Running Jobs -->
{#if selectedHistograms}
<!-- Note: Ignore '#snippet' Error in IDE -->

View File

@@ -32,7 +32,6 @@
let {
clusters,
presetCluster,
loadMe = false,
} = $props();
/* Const Init */
@@ -60,7 +59,7 @@
/* Derived */
let cluster = $derived(presetCluster);
// States for Stacked charts
const statesTimed = $derived(loadMe ? queryStore({
const statesTimed = $derived(queryStore({
client: client,
query: gql`
query ($filter: [NodeFilter!], $typeNode: String!, $typeHealth: String!) {
@@ -82,11 +81,11 @@
typeHealth: "health"
},
requestPolicy: "network-only"
}) : null);
}));
// Note: nodeMetrics are requested on configured $timestep resolution
// Result: The latest 5 minutes (datapoints) for each node independent of job
const statusQuery = $derived(loadMe ? queryStore({
const statusQuery = $derived(queryStore({
client: client,
query: gql`
query (
@@ -185,11 +184,11 @@
sorting: { field: "startTime", type: "col", order: "DESC" }
},
requestPolicy: "network-only"
}) : null);
}));
/* Effects */
$effect(() => {
if ($statusQuery?.data) {
if ($statusQuery.data) {
let subClusters = clusters.find(
(c) => c.name == cluster,
).subClusters;
@@ -375,19 +374,19 @@
<hr/>
<!-- Node Stack Charts -->
{#if $statesTimed?.fetching}
{#if $statesTimed.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statesTimed?.error}
{:else if $statesTimed.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">States Timed: {$statesTimed?.error?.message}</Card>
<Card body color="danger">States Timed: {$statesTimed.error.message}</Card>
</Col>
</Row>
{:else if $statesTimed?.data}
{:else if $statesTimed.data}
<Row cols={{ md: 2 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3 mt-2 mt-lg-0">
<div>
@@ -428,19 +427,19 @@
<hr/>
<!-- Gauges & Roofline per Subcluster-->
{#if $statusQuery?.fetching}
{#if $statusQuery.fetching}
<Row cols={1} class="text-center mt-3">
<Col>
<Spinner />
</Col>
</Row>
{:else if $statusQuery?.error}
{:else if $statusQuery.error}
<Row cols={1} class="text-center mt-3">
<Col>
<Card body color="danger">Status Query (Details): {$statusQuery?.error?.message}</Card>
<Card body color="danger">Status Query (Details): {$statusQuery.error.message}</Card>
</Col>
</Row>
{:else if $statusQuery?.data}
{:else if $statusQuery.data}
{#each clusters.find((c) => c.name == cluster).subClusters as subCluster, i}
<Row cols={{ lg: 3, md: 1 , sm: 1}} class="mb-3 justify-content-center">
<Col class="px-3">

View File

@@ -40,8 +40,7 @@
presetCluster,
presetSubCluster = null,
useCbColors = false,
useAltColors = false,
loadMe = false,
useAltColors = false
} = $props();
/* Const Init */
@@ -63,7 +62,7 @@
? [{ state: ["running"] }, { cluster: { eq: presetCluster} }, { subCluster: { eq: presetSubCluster } }]
: [{ state: ["running"] }, { cluster: { eq: presetCluster} }]
);
const topJobsQuery = $derived(loadMe ? queryStore({
const topJobsQuery = $derived(queryStore({
client: client,
query: gql`
query (
@@ -96,9 +95,9 @@
paging: pagingState // Top 10
},
requestPolicy: "network-only"
}) : null);
}));
const topNodesQuery = $derived(loadMe ? queryStore({
const topNodesQuery = $derived(queryStore({
client: client,
query: gql`
query (
@@ -131,9 +130,9 @@
paging: pagingState
},
requestPolicy: "network-only"
}) : null);
}));
const topAccsQuery = $derived(loadMe ? queryStore({
const topAccsQuery = $derived(queryStore({
client: client,
query: gql`
query (
@@ -166,10 +165,10 @@
paging: pagingState
},
requestPolicy: "network-only"
}): null);
}));
// Note: nodeMetrics are requested on configured $timestep resolution
const nodeStatusQuery = $derived(loadMe ? queryStore({
const nodeStatusQuery = $derived(queryStore({
client: client,
query: gql`
query (
@@ -199,7 +198,7 @@
numDurationBins: numDurationBins,
},
requestPolicy: "network-only"
}) : null);
}));
/* Functions */
function legendColors(targetIdx) {
@@ -247,9 +246,9 @@
<hr/>
<!-- Job Duration, Top Users and Projects-->
{#if $topJobsQuery?.fetching || $nodeStatusQuery?.fetching}
{#if $topJobsQuery.fetching || $nodeStatusQuery.fetching}
<Spinner />
{:else if $topJobsQuery?.data && $nodeStatusQuery?.data}
{:else if $topJobsQuery.data && $nodeStatusQuery.data}
<Row>
<Col xs="12" lg="4" class="p-2">
{#key $nodeStatusQuery.data.jobsStatistics[0].histDuration}
@@ -355,9 +354,9 @@
<hr/>
<!-- Node Distribution, Top Users and Projects-->
{#if $topNodesQuery?.fetching || $nodeStatusQuery?.fetching}
{#if $topNodesQuery.fetching || $nodeStatusQuery.fetching}
<Spinner />
{:else if $topNodesQuery?.data && $nodeStatusQuery?.data}
{:else if $topNodesQuery.data && $nodeStatusQuery.data}
<Row>
<Col xs="12" lg="4" class="p-2">
<Histogram
@@ -459,9 +458,9 @@
<hr/>
<!-- Acc Distribution, Top Users and Projects-->
{#if $topAccsQuery?.fetching || $nodeStatusQuery?.fetching}
{#if $topAccsQuery.fetching || $nodeStatusQuery.fetching}
<Spinner />
{:else if $topAccsQuery?.data && $nodeStatusQuery?.data}
{:else if $topAccsQuery.data && $nodeStatusQuery.data}
<Row>
<Col xs="12" lg="4" class="p-2">
<Histogram

View File

@@ -38,7 +38,7 @@
<input class="form-control" type="password" id="password" name="password" required/>
</div>
<button type="submit" class="btn btn-success">Submit</button>
{{if .Infos.hasOpenIDConnect}}
{{- if .Infos.hasOpenIDConnect}}
<a class="btn btn-primary" href="/oidc-login">OpenID Connect Login</a>
{{end}}
<input type="hidden" id="redirect" name="redirect" value="{{ .Redirect }}" />